public static void run(Configuration conf, Path input, String outputFile) throws IOException, InstantiationException, IllegalAccessException { Writer writer; if (outputFile == null) { writer = new OutputStreamWriter(System.out); } else { writer = new OutputStreamWriter( new FileOutputStream(new File(outputFile)), Charset.forName("UTF-8")); } try { FileSystem fs = input.getFileSystem(conf); for (FileStatus fst : fs.listStatus(input, new DataPathFilter())) { Path dataPath = fst.getPath(); SequenceFile.Reader reader = new SequenceFile.Reader(fs, dataPath, conf); try { Text key = reader.getKeyClass().asSubclass(Text.class).newInstance(); DocumentMapping value = new DocumentMapping(); while (reader.next(key, value)) { String docId = value.getDocId(); writer.write(docId + "\t" + key + "\n"); } } finally { reader.close(); } } } finally { writer.close(); } }
@Override protected void map(Text key, DocumentMapping value, Context context) throws IOException, InterruptedException { // no need vector for this MR double distance = value.getDistance(); Vector vector = value.getVector(); // map output MapWritable m = new MapWritable(); m.put(ClusterEvaluatorMR.DOC_VECTOR_KEY, new VectorWritable(vector)); m.put(ClusterEvaluatorMR.DISTANCE_KEY, new DoubleWritable(distance)); m.put(ClusterEvaluatorMR.SQUARED_DISTANCE_KEY, new DoubleWritable(distance * distance)); m.put(ClusterEvaluatorMR.COUNT_KEY, new LongWritable(1)); context.write(key, m); }