Java SingleTypeSchema Examples

Programming Language: Java

Namespace/Package Name: org.sifarish.feature

Class/Type: SingleTypeSchema

Examples at hotexamples.com: 3

Java SingleTypeSchema - 3 examples found. These are the top rated real world Java examples of org.sifarish.feature.SingleTypeSchema extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

getEntity(3)

Example #1

Show file

File: TextAnalyzer.java Project: Ravion/sifarish

 /**
  * @param ordinal
  * @param data
  */
 private void findExtractedFields(int ordinal, String data) {
   List<FieldExtractor> extractors = schema.getEntity().getExtractorsForField(ordinal);
   for (FieldExtractor extractor : extractors) {
     String extField = extrtactedFields.get(extractor.getOrdinal());
     if (null == extField || extField.isEmpty()) {
       String match = extractor.findMatch(data);
       if (null == match) {
         match = "";
       }
       extrtactedFields.put(extractor.getOrdinal(), match);
     }
   }
 }

Example #2

Show file

File: StructuredTextAnalyzer.java Project: Satya-AK/sifarish

    /* (non-Javadoc)
     * @see org.apache.hadoop.mapreduce.Mapper#map(KEYIN, VALUEIN, org.apache.hadoop.mapreduce.Mapper.Context)
     */
    @Override
    protected void map(LongWritable key, Text value, Context context)
        throws IOException, InterruptedException {
      String[] items = value.toString().split(fieldDelimRegex);
      itemList.clear();

      for (int i = 0; i < items.length; ++i) {
        String item = items[i];
        Field field = schema.getEntity().getFieldByOrdinal(i);

        if (null != field && field.getDataType().equals(Field.DATA_TYPE_TEXT)) {
          String format = field.getTextDataSubTypeFormat();
          if (field.getDataSubType().equals(Field.TEXT_TYPE_PERSON_NAME)) {
            item = countryFormat.personNameFormat(item);
          } else if (field.getDataSubType().equals(Field.TEXT_TYPE_STREET_ADDRESS)) {
            item = countryFormat.caseFormat(item, format);
            item = countryFormat.streetAddressFormat(item);
          } else if (field.getDataSubType().equals(Field.TEXT_TYPE_STREET_ADDRESS_ONE)) {
            item = countryFormat.caseFormat(item, format);
            item = countryFormat.streetAddressOneFormat(item);
          } else if (field.getDataSubType().equals(Field.TEXT_TYPE_STREET_ADDRESS_TWO)) {
            item = countryFormat.caseFormat(item, format);
            item = countryFormat.streetAddressTwoFormat(item);
          } else if (field.getDataSubType().equals(Field.TEXT_TYPE_CITY)) {
            item = countryFormat.caseFormat(item, format);
          } else if (field.getDataSubType().equals(Field.TEXT_TYPE_STATE)) {
            item = countryFormat.stateFormat(item);
          } else if (field.getDataSubType().equals(Field.TEXT_TYPE_ZIP)) {
            item = countryFormat.caseFormat(item, format);
          } else if (field.getDataSubType().equals(Field.TEXT_TYPE_COUNTRY)) {
            item = countryFormat.caseFormat(item, format);
          } else if (field.getDataSubType().equals(Field.TEXT_TYPE_EMAIL_ADDR)) {
            item = countryFormat.emailFormat(item, format);
          } else if (field.getDataSubType().equals(Field.TEXT_TYPE_PHONE_NUM)) {
            item = countryFormat.phoneNumFormat(item, format);
          } else {
            // if text field analyze
            item = tokenize(item);
          }
        }
        itemList.add(item);
      }

      // build value string
      valueHolder.set(org.chombo.util.Utility.join(itemList, fieldDelim));
      context.write(NullWritable.get(), valueHolder);
    }

Example #3

Show file

File: TextAnalyzer.java Project: Ravion/sifarish

    /* (non-Javadoc)
     * @see org.apache.hadoop.mapreduce.Mapper#setup(org.apache.hadoop.mapreduce.Mapper.Context)
     */
    protected void setup(Context context) throws IOException, InterruptedException {
      fieldDelim = context.getConfiguration().get("field.delim", "[]");
      fieldDelimRegex = context.getConfiguration().get("field.delim.regex", "\\[\\]");
      consolidateFields = context.getConfiguration().getBoolean("consolidate.field", false);
      String textFields = context.getConfiguration().get("text.field.ordinals", "");
      String[] items = textFields.toString().split(",");
      for (int i = 0; i < items.length; ++i) {
        textFieldOrdinals.add(Integer.parseInt(items[i]));
      }
      analyzer = new StandardAnalyzer(Version.LUCENE_35);

      Configuration conf = context.getConfiguration();
      String filePath = conf.get("raw.schema.file.path");
      FileSystem dfs = FileSystem.get(conf);
      Path src = new Path(filePath);
      FSDataInputStream fs = dfs.open(src);
      ObjectMapper mapper = new ObjectMapper();
      schema = mapper.readValue(fs, SingleTypeSchema.class);

      for (Field field : schema.getEntity().getFields()) {
        retainedFieldOrdinals.add(field.getOrdinal());
      }
    }