private static <T extends WritableComparable> Path writePartitionFile( String testname, JobConf conf, T[] splits) throws IOException { final FileSystem fs = FileSystem.getLocal(conf); final Path testdir = new Path(System.getProperty("test.build.data", "/tmp")).makeQualified(fs); Path p = new Path(testdir, testname + "/_partition.lst"); TotalOrderPartitioner.setPartitionFile(conf, p); conf.setNumReduceTasks(splits.length + 1); SequenceFile.Writer w = null; try { NullWritable nw = NullWritable.get(); w = SequenceFile.createWriter( fs, conf, p, splits[0].getClass(), NullWritable.class, SequenceFile.CompressionType.NONE); for (int i = 0; i < splits.length; ++i) { w.append(splits[i], NullWritable.get()); } } finally { if (null != w) w.close(); } return p; }
/** Called for every record in the data */ @Override public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { /** Skip enormous documents, due to memory problems and since regex cannot handle them. */ if (value.getLength() > MAX_DOC_SIZE_IN_BYTES) { context.getCounter(ProcessingTime.SKIPPED).increment(1); return; } /** Parse document and measure time */ t1 = System.nanoTime(); Spinn3rDocument d = new Spinn3rDocument(value.toString()); t2 = System.nanoTime(); context.getCounter(ProcessingTime.PARSING).increment(t2 - t1); /** Return only those documents that satisfy search conditions */ t1 = System.nanoTime(); t = filter.documentSatisfies(d); t2 = System.nanoTime(); context.getCounter(ProcessingTime.FILTERING).increment(t2 - t1); /** Output if satisfies */ if (t) { if (cmdMap.hasOption("formatF5")) { context.write(new Text(d.toStringF5()), NullWritable.get()); } else { context.write(new Text(d.toString()), NullWritable.get()); } } }
@Override public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { for (Text text : values) { mos.write("text", NullWritable.get(), text, "reduce/"); mos.write("sequence", NullWritable.get(), text, "reducesequence/"); } }
protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { for (Text value : values) { if (key.toString().startsWith("node")) multipleOutputs.write("nodes", NullWritable.get(), value); else multipleOutputs.write("wayparts", NullWritable.get(), value); } }
@Override public void map(Object key, Text value, Context context) throws IOException, InterruptedException { if (rands.nextFloat() < filterPercentage) { context.write(NullWritable.get(), value); } }
@Override protected void reduce(LongWritable k2, Iterable<Text> v2s, Context context) throws IOException, InterruptedException { for (Text v2 : v2s) { context.write(NullWritable.get(), v2); } }
/** Request new key from proxied RR. */ @SuppressWarnings("unchecked") public K createKey() { if (keyclass != null) { return (K) ReflectionUtils.newInstance(keyclass, conf); } return (K) NullWritable.get(); }
@org.testng.annotations.Test(groups = {"fast"}) public void sortUsagesTest() throws ParseException { mapReduceDriver.addInput(new Text(RES1_ID), new LongWritable(COUNT1)); mapReduceDriver.addInput(new Text(RES2_ID), new LongWritable(COUNT2)); mapReduceDriver.addInput(new Text(RES3_ID), new LongWritable(COUNT3)); MostPopularProtos.MostPopularStats.Builder statsBuilder = MostPopularProtos.MostPopularStats.newBuilder(); statsBuilder.setTimestamp(new SimpleDateFormat("yyyy-MM-dd").parse(TEST_DATE).getTime()); MostPopularProtos.ResourceStat.Builder resourceStatBuilder = MostPopularProtos.ResourceStat.newBuilder(); resourceStatBuilder.setCounter(COUNT3); resourceStatBuilder.setResourceId(RES3_ID); statsBuilder.addStat(resourceStatBuilder); resourceStatBuilder = MostPopularProtos.ResourceStat.newBuilder(); resourceStatBuilder.setCounter(COUNT1); resourceStatBuilder.setResourceId(RES1_ID); statsBuilder.addStat(resourceStatBuilder); BytesWritable expectedOutput = new BytesWritable(statsBuilder.build().toByteArray()); mapReduceDriver.addOutput(NullWritable.get(), expectedOutput); mapReduceDriver.runTest(); }
/** REDUCER */ public static class Join extends Reducer<Text, Text, NullWritable, Text> { private NullWritable NULL = NullWritable.get(); private Text OUT = new Text(); @Override public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { // For each value, figure out which file it's from and store it // accordingly. List<String> first = new ArrayList<String>(); List<String> second = new ArrayList<String>(); for (Text value : values) { if (value.charAt(0) == '1') { first.add(value.toString().substring(1)); } else second.add(value.toString().substring(1)); context.setStatus("OK"); } context.setStatus("OK"); if (first.size() == 0) return; if (second.size() == 0) second.add(null); // Do the cross product for (String s1 : first) { for (String s2 : second) { if (s2 == null) OUT.set(key.toString() + "\t" + s1 + "\t\t"); else OUT.set(key.toString() + "\t" + s1 + "\t" + key.toString() + "\t" + s2); context.write(NULL, OUT); } } } }
public void reduce( Text key, Iterator<Text> values, OutputCollector<Text, NullWritable> output, Reporter reporter) throws IOException { // convert a.b.c into a^Ab^Ac for easier import into Hive. String classC = key.toString(); String asFields = classC.replace(INPUT_FIELD_SEP, OUTPUT_FIELD_SEP); Text outKey = new Text(asFields); Set<Integer> seenOctets = new HashSet<Integer>(); while (values.hasNext()) { Text val = values.next(); try { Integer lastOctet = new Integer(val.toString()); if (!seenOctets.contains(lastOctet)) { // we have not seen this a.b.c.d before. emit one output entry for // the a.b.c, and memorize the d so we don't do this again for the // same IP. This is ok to buffer because there will be at most 256 // unique entries. output.collect(outKey, NullWritable.get()); seenOctets.add(lastOctet); } } catch (NumberFormatException nfe) { // ignore malformed input; just continue. } } }
/** * Write a partition file for the given job, using the Sampler provided. Queries the sampler for a * sample keyset, sorts by the output key comparator, selects the keys for each rank, and writes * to the destination returned from {@link TotalOrderPartitioner#getPartitionFile}. */ @SuppressWarnings("unchecked") // getInputFormat, getOutputKeyComparator public static <K, V> void writePartitionFile(Job job, Sampler<K, V> sampler) throws IOException, ClassNotFoundException, InterruptedException { Configuration conf = job.getConfiguration(); final InputFormat inf = ReflectionUtils.newInstance(job.getInputFormatClass(), conf); int numPartitions = job.getNumReduceTasks(); K[] samples = sampler.getSample(inf, job); RawComparator<K> comparator = (RawComparator<K>) job.getSortComparator(); Arrays.sort(samples, comparator); Path dst = new Path(TotalOrderPartitioner.getPartitionFile(conf)); FileSystem fs = dst.getFileSystem(conf); if (fs.exists(dst)) { fs.delete(dst, false); } SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, dst, job.getMapOutputKeyClass(), NullWritable.class); NullWritable nullValue = NullWritable.get(); float stepSize = samples.length / (float) numPartitions; int last = -1; for (int i = 1; i < numPartitions; ++i) { int k = Math.round(stepSize * i); while (last >= k && comparator.compare(samples[last], samples[k]) == 0) { ++k; } writer.append(samples[k], nullValue); last = k; } writer.close(); }
@Override protected void cleanup(Context context) throws IOException, InterruptedException { for (Pair<Integer, Integer> pair : linkToPage) { context.write( NullWritable.get(), new IntArrayWritable(new Integer[] {pair.first, pair.second})); } }
@Override public Object getCurrentKey() { if (dataset.size() > 0 && seen < total) { return this.dataset.get((int) seen); } return NullWritable.get(); }
@Test public void test() throws Exception { createTable(TABLE_NAME, getBasicSchema(), getBasicCreateTableOptions()); KuduTableOutputFormat output = new KuduTableOutputFormat(); Configuration conf = new Configuration(); conf.set(KuduTableOutputFormat.MASTER_ADDRESSES_KEY, getMasterAddresses()); conf.set(KuduTableOutputFormat.OUTPUT_TABLE_KEY, TABLE_NAME); output.setConf(conf); String multitonKey = conf.get(KuduTableOutputFormat.MULTITON_KEY); KuduTable table = KuduTableOutputFormat.getKuduTable(multitonKey); assertNotNull(table); Insert insert = table.newInsert(); PartialRow row = insert.getRow(); row.addInt(0, 1); row.addInt(1, 2); row.addInt(2, 3); row.addString(3, "a string"); row.addBoolean(4, true); RecordWriter<NullWritable, Operation> rw = output.getRecordWriter(null); rw.write(NullWritable.get(), insert); rw.close(null); AsyncKuduScanner.AsyncKuduScannerBuilder builder = client.newScannerBuilder(table); assertEquals(1, countRowsInScan(builder.build())); }
/** * Write out a SequenceFile that can be read by TotalOrderPartitioner that contains the split * points in startKeys. * * <p>This method was copied from HFileOutputFormat in hbase-0.90.1-cdh3u0. I had to copy it * because it's private. * * @param conf The job configuration. * @param partitionsPath output path for SequenceFile. * @param startKeys the region start keys to use as the partitions. * @throws IOException If there is an error. */ private static void writePartitionFile( Configuration conf, Path partitionsPath, List<HFileKeyValue> startKeys) throws IOException { if (startKeys.isEmpty()) { throw new IllegalArgumentException("No regions passed"); } // We're generating a list of split points, and we don't ever // have keys < the first region (which has an empty start key) // so we need to remove it. Otherwise we would end up with an // empty reducer with index 0. TreeSet<HFileKeyValue> sorted = new TreeSet<HFileKeyValue>(); sorted.addAll(startKeys); HFileKeyValue first = sorted.first(); if (0 != first.getRowKey().length) { throw new IllegalArgumentException( "First region of table should have empty start row key. Instead has: " + Bytes.toStringBinary(first.getRowKey())); } sorted.remove(first); // Write the actual file final SequenceFile.Writer writer = KijiMRPlatformBridge.get() .newSeqFileWriter(conf, partitionsPath, HFileKeyValue.class, NullWritable.class); try { for (HFileKeyValue startKey : sorted) { writer.append(startKey, NullWritable.get()); } } finally { writer.close(); } }
@Override protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { StringBuilder builder = new StringBuilder(); String rightText = null; List<String> zeros = new ArrayList<String>(); for (Text item : values) { String valueItem = item.toString(); String[] tokens = valueItem.split(":"); int side; try { side = Integer.parseInt(tokens[1]); } catch (NumberFormatException nfe) { throw new NumberFormatException("valueItem: " + valueItem); } if (side == 1) { rightText = tokens[0]; } else { zeros.add(tokens[0]); } } for (String item : zeros) { builder.append(item); if (rightText != null) { builder.append('\t'); builder.append(rightText); } context.write(new Text(builder.toString()), NullWritable.get()); builder.setLength(0); } }
@SuppressWarnings("unchecked") public U createValue() { if (valueclass != null) { return (U) ReflectionUtils.newInstance(valueclass, conf); } return (U) NullWritable.get(); }
@Override public void map( final NullWritable key, final FaunusVertex value, final Mapper<NullWritable, FaunusVertex, WritableComparable, LongWritable>.Context context) throws IOException, InterruptedException { if (this.isVertex) { if (value.hasPaths()) { this.map.incr(ElementPicker.getProperty(value, this.property), value.pathCount()); context.getCounter(Counters.PROPERTIES_COUNTED).increment(1l); } } else { for (final Edge e : value.getEdges(Direction.OUT)) { final FaunusEdge edge = (FaunusEdge) e; if (edge.hasPaths()) { this.map.incr(ElementPicker.getProperty(edge, this.property), edge.pathCount()); context.getCounter(Counters.PROPERTIES_COUNTED).increment(1l); } } } // protected against memory explosion if (this.map.size() > Tokens.MAP_SPILL_OVER) { this.dischargeMap(context); } this.outputs.write(Tokens.GRAPH, NullWritable.get(), value); }
@Override protected void cleanup(Context context) throws IOException, InterruptedException { for (Pair<Integer, Integer> item : linksMap) { Integer[] items = {item.second, item.first}; IntArrayWritable val = new IntArrayWritable(items); context.write(NullWritable.get(), val); } }
@Override public NullWritable getEdgeValue(IntWritable targetVertexId) { if (neighbors.contains(targetVertexId.get())) { return NullWritable.get(); } else { return null; } }
public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { int sum = 0; for (IntWritable val : values) { sum++; } context.write(new IntWritable(sum), NullWritable.get()); }
@Override protected void cleanup(Context context) throws IOException, InterruptedException { val.setSeed(r.nextLong()); while (factory.next(null, val)) { context.write(NullWritable.get(), val); val.setSeed(r.nextLong()); } }
@Override protected void reduce( LongWritable ignored, Iterable<Text> records, Reducer<LongWritable, Text, NullWritable, Text>.Context ctx) throws IOException, InterruptedException { for (Text rec : records) ctx.write(NullWritable.get(), rec); }
@Override public void map(Object key, Text value, Context context) throws IOException, InterruptedException { Matcher matcher = pattern.matcher(value.toString()); if (matcher.find()) { context.write(NullWritable.get(), value); } }
@Override protected void reduce(LongWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException { for (Text value : values) { context.write(NullWritable.get(), value); } }
@Override protected void cleanup(Context context) throws IOException, InterruptedException { for (Pair<Integer, String> item : countToWordMap) { String[] strings = {item.second, item.first.toString()}; TextArrayWritable val = new TextArrayWritable(strings); context.write(NullWritable.get(), val); } }
@Override protected void cleanup(Context context) throws IOException, InterruptedException { for (Pair<Integer, String> entry : sortedWordCount) { String[] strings = {entry.second, entry.first.toString()}; TextArrayWritable val = new TextArrayWritable(strings); context.write(NullWritable.get(), val); } }
@Override protected void map(TKey key, TValue value, Context context) throws IOException, InterruptedException { if (this.tracing) { LOG.trace("Key = {}", key); } context.write(NullWritable.get(), key); }
@Override public void map(Text key, Text value, Context context) throws IOException, InterruptedException { String[] st = new String[2]; st[0] = key.toString(); st[1] = value.toString(); context.write(NullWritable.get(), new TextArrayWritable(st)); }
@Override protected void cleanup(Context context) throws IOException, InterruptedException { // TODO for (Pair<Integer, Integer> item : countTopLinkMap) { Integer[] numbers = {item.second, item.first}; IntArrayWritable val = new IntArrayWritable(numbers); context.write(NullWritable.get(), val); } }