@Override public void map(LongWritable key, Text value, TupleMRContext context, Collector collector) throws IOException, InterruptedException { if (tuple == null) { tuple = new Tuple(context.getTupleMRConfig().getIntermediateSchema(0)); } StringTokenizer itr = new StringTokenizer(value.toString()); tuple.set(1, 1); while (itr.hasMoreTokens()) { tuple.set(0, itr.nextToken()); collector.write(tuple); } }
@Test public void testCompare() { ArrayList<Field> fields = new ArrayList<Field>(); fields.add(Field.create("int", Field.Type.INT)); Schema s = new Schema("schema", fields); Criteria cWithCustom = new Criteria(new OrderBy().add("int", Order.ASC, revIntComp).getElements()); Criteria c = new Criteria(new OrderBy().add("int", Order.ASC).getElements()); Tuple t1 = new Tuple(s); Tuple t2 = new Tuple(s); int index[] = new int[] {0}; t1.set("int", 1); t2.set("int", 2); SortComparator sortComparator = new SortComparator(); assertPositive(sortComparator.compare(s, cWithCustom, t1, index, t2, index)); assertNegative(sortComparator.compare(s, c, t1, index, t2, index)); }
@Override public void map(LongWritable key, Text value, TupleMRContext context, Collector collector) throws IOException, InterruptedException { user.set("name", "Pere"); user.set("money", 100); user.set("my_country", "ES"); collector.write(user); user.set("name", "Iván"); user.set("my_country", "ES"); user.set("money", 50); collector.write(user); user.set("my_country", "FR"); user.set("money", 150); user.set("name", "Eric"); collector.write(user); country.set("country", "ES"); country.set("averageSalary", 1000); collector.write(country); country.set("country", "FR"); country.set("averageSalary", 1500); collector.write(country); }
@Test public void test() throws Exception { trash(OUTPUT); Configuration conf = new Configuration(); createInput(INPUT, conf); ToolRunner.run(getConf(), new TopicFingerprint(), new String[] {INPUT, OUTPUT, 2 + ""}); Path outPath = new Path(OUTPUT + "/part-r-00000"); FileSystem fs = FileSystem.get(outPath.toUri(), conf); TupleFile.Reader reader = new TupleFile.Reader(fs, conf, outPath); Tuple tuple = new Tuple(reader.getSchema()); // The order in the output file is deterministic (we have sorted by topic, count) reader.next(tuple); assertEquals(1, tuple.get("topic")); assertEquals("a", tuple.get("word").toString()); reader.next(tuple); assertEquals(1, tuple.get("topic")); assertEquals("c", tuple.get("word").toString()); reader.next(tuple); assertEquals(2, tuple.get("topic")); assertEquals("a", tuple.get("word").toString()); reader.next(tuple); assertEquals(2, tuple.get("topic")); assertEquals("b", tuple.get("word").toString()); // Check the named output reader.close(); outPath = new Path(OUTPUT + "/" + TopicFingerprint.OUTPUT_TOTALCOUNT + "/" + "part-r-00000"); reader = new TupleFile.Reader(fs, conf, outPath); tuple = new Tuple(reader.getSchema()); reader.next(tuple); assertEquals(1, tuple.get("topic")); assertEquals(15, tuple.get("totalcount")); reader.next(tuple); assertEquals(2, tuple.get("topic")); assertEquals(19, tuple.get("totalcount")); reader.close(); trash(INPUT, OUTPUT); }