@Override protected void map(Text key, Text value, Context context) throws IOException, InterruptedException { StringTuple tuple = new StringTuple(key.toString()); tuple.add(value.toString()); context.write(tuple, ONE); }
/** * Parallel Classification * * @param key The label * @param value the features (all unique) associated w/ this label * @param output The OutputCollector to write the results to * @param reporter Reports status back to hadoop */ @Override public void map( Text key, Text value, OutputCollector<StringTuple, DoubleWritable> output, Reporter reporter) throws IOException { List<String> ngrams = new NGrams(value.toString(), gramSize).generateNGramsWithoutLabel(); try { ClassifierResult result = classifier.classifyDocument(ngrams.toArray(new String[ngrams.size()]), defaultCategory); String correctLabel = key.toString(); String classifiedLabel = result.getLabel(); StringTuple outputTuple = new StringTuple(BayesConstants.CLASSIFIER_TUPLE); outputTuple.add(correctLabel); outputTuple.add(classifiedLabel); output.collect(outputTuple, ONE); } catch (InvalidDatastoreException e) { throw new IOException(e); } }
@Test public void testVectorDistanceMapper() throws Exception { Mapper<WritableComparable<?>, VectorWritable, StringTuple, DoubleWritable>.Context context = EasyMock.createMock(Mapper.Context.class); StringTuple tuple; tuple = new StringTuple(); tuple.add("foo"); tuple.add("123"); context.write(tuple, new DoubleWritable(Math.sqrt(2.0))); tuple = new StringTuple(); tuple.add("foo2"); tuple.add("123"); context.write(tuple, new DoubleWritable(1)); EasyMock.replay(context); Vector vector = new RandomAccessSparseVector(2); vector.set(0, 2); vector.set(1, 2); VectorDistanceMapper mapper = new VectorDistanceMapper(); setField(mapper, "measure", new EuclideanDistanceMeasure()); List<NamedVector> seedVectors = new ArrayList<NamedVector>(); Vector seed1 = new RandomAccessSparseVector(2); seed1.set(0, 1); seed1.set(1, 1); Vector seed2 = new RandomAccessSparseVector(2); seed2.set(0, 2); seed2.set(1, 1); seedVectors.add(new NamedVector(seed1, "foo")); seedVectors.add(new NamedVector(seed2, "foo2")); setField(mapper, "seedVectors", seedVectors); mapper.map(new IntWritable(123), new VectorWritable(vector), context); EasyMock.verify(context); }