Пример #1
0
    /* (non-Javadoc)
     * @see org.apache.hadoop.mapreduce.Reducer#setup(org.apache.hadoop.mapreduce.Reducer.Context)
     */
    protected void setup(Context context) throws IOException, InterruptedException {
      // load schema
      Configuration conf = context.getConfiguration();
      String filePath = conf.get("schema.file.path");
      FileSystem dfs = FileSystem.get(conf);
      Path src = new Path(filePath);
      FSDataInputStream fs = dfs.open(src);
      ObjectMapper mapper = new ObjectMapper();
      schema = mapper.readValue(fs, MixedTypeSchema.class);

      firstTypeSize = schema.getEntityByType(0).getFieldCount();
      firstIdOrdinal = schema.getEntityByType(0).getIdField().getOrdinal();
      secondIdOrdinal = schema.getEntityByType(1).getIdField().getOrdinal();
      Field field = schema.getEntityByType(0).getClassAttributeField();
      if (null != field) {
        firstClassAttrOrdinal = field.getOrdinal();
        secondClassAttrOrdinal = schema.getEntityByType(0).getClassAttributeField().getOrdinal();
      }

      fields = schema.getEntityByType(0).getFields();
      targetFields = schema.getEntityByType(1).getFields();
      scale = context.getConfiguration().getInt("distance.scale", 1000);
      distStrategy = schema.createDistanceStrategy(scale);
      fieldDelimRegex = context.getConfiguration().get("field.delim.regex", "\\[\\]");
      fieldDelim = context.getConfiguration().get("field.delim", ",");
      textSimStrategy = schema.createTextSimilarityStrategy();
      outputVerbose = context.getConfiguration().getBoolean("sim.output.verbose", true);
      identifyWithFilePrefix =
          context.getConfiguration().getBoolean("identify.with.file.prefix", false);

      System.out.println(
          "firstTypeSize: "
              + firstTypeSize
              + " firstIdOrdinal:"
              + firstIdOrdinal
              + " secondIdOrdinal:"
              + secondIdOrdinal
              + " Source field count:"
              + fields.size()
              + " Target field count:"
              + targetFields.size());
    }