/* (non-Javadoc) * @see org.apache.hadoop.mapreduce.Reducer#setup(org.apache.hadoop.mapreduce.Reducer.Context) */ protected void setup(Context context) throws IOException, InterruptedException { // load schema Configuration conf = context.getConfiguration(); String filePath = conf.get("schema.file.path"); FileSystem dfs = FileSystem.get(conf); Path src = new Path(filePath); FSDataInputStream fs = dfs.open(src); ObjectMapper mapper = new ObjectMapper(); schema = mapper.readValue(fs, MixedTypeSchema.class); firstTypeSize = schema.getEntityByType(0).getFieldCount(); firstIdOrdinal = schema.getEntityByType(0).getIdField().getOrdinal(); secondIdOrdinal = schema.getEntityByType(1).getIdField().getOrdinal(); Field field = schema.getEntityByType(0).getClassAttributeField(); if (null != field) { firstClassAttrOrdinal = field.getOrdinal(); secondClassAttrOrdinal = schema.getEntityByType(0).getClassAttributeField().getOrdinal(); } fields = schema.getEntityByType(0).getFields(); targetFields = schema.getEntityByType(1).getFields(); scale = context.getConfiguration().getInt("distance.scale", 1000); distStrategy = schema.createDistanceStrategy(scale); fieldDelimRegex = context.getConfiguration().get("field.delim.regex", "\\[\\]"); fieldDelim = context.getConfiguration().get("field.delim", ","); textSimStrategy = schema.createTextSimilarityStrategy(); outputVerbose = context.getConfiguration().getBoolean("sim.output.verbose", true); identifyWithFilePrefix = context.getConfiguration().getBoolean("identify.with.file.prefix", false); System.out.println( "firstTypeSize: " + firstTypeSize + " firstIdOrdinal:" + firstIdOrdinal + " secondIdOrdinal:" + secondIdOrdinal + " Source field count:" + fields.size() + " Target field count:" + targetFields.size()); }