private static <T extends WritableComparable> Path writePartitionFile(
     String testname, JobConf conf, T[] splits) throws IOException {
   final FileSystem fs = FileSystem.getLocal(conf);
   final Path testdir = new Path(System.getProperty("", "/tmp")).makeQualified(fs);
   Path p = new Path(testdir, testname + "/_partition.lst");
   TotalOrderPartitioner.setPartitionFile(conf, p);
   conf.setNumReduceTasks(splits.length + 1);
   SequenceFile.Writer w = null;
   try {
     NullWritable nw = NullWritable.get();
     w =
     for (int i = 0; i < splits.length; ++i) {
       w.append(splits[i], NullWritable.get());
   } finally {
     if (null != w) w.close();
   return p;
  /** Called for every record in the data */
  public void map(LongWritable key, Text value, Context context)
      throws IOException, InterruptedException {
    /** Skip enormous documents, due to memory problems and since regex cannot handle them. */
    if (value.getLength() > MAX_DOC_SIZE_IN_BYTES) {

    /** Parse document and measure time */
    t1 = System.nanoTime();
    Spinn3rDocument d = new Spinn3rDocument(value.toString());
    t2 = System.nanoTime();
    context.getCounter(ProcessingTime.PARSING).increment(t2 - t1);

    /** Return only those documents that satisfy search conditions */
    t1 = System.nanoTime();
    t = filter.documentSatisfies(d);
    t2 = System.nanoTime();
    context.getCounter(ProcessingTime.FILTERING).increment(t2 - t1);

    /** Output if satisfies */
    if (t) {
      if (cmdMap.hasOption("formatF5")) {
        context.write(new Text(d.toStringF5()), NullWritable.get());
      } else {
        context.write(new Text(d.toString()), NullWritable.get());
Exemple #3
 public void reduce(Text key, Iterable<Text> values, Context context)
     throws IOException, InterruptedException {
   for (Text text : values) {
     mos.write("text", NullWritable.get(), text, "reduce/");
     mos.write("sequence", NullWritable.get(), text, "reducesequence/");
Exemple #4
 protected void reduce(Text key, Iterable<Text> values, Context context)
     throws IOException, InterruptedException {
   for (Text value : values) {
     if (key.toString().startsWith("node"))
       multipleOutputs.write("nodes", NullWritable.get(), value);
     else multipleOutputs.write("wayparts", NullWritable.get(), value);
 public void map(Object key, Text value, Context context)
     throws IOException, InterruptedException {
   if (rands.nextFloat() < filterPercentage) {
     context.write(NullWritable.get(), value);
Exemple #6
 protected void reduce(LongWritable k2, Iterable<Text> v2s, Context context)
     throws IOException, InterruptedException {
   for (Text v2 : v2s) {
     context.write(NullWritable.get(), v2);
 /** Request new key from proxied RR. */
 public K createKey() {
   if (keyclass != null) {
     return (K) ReflectionUtils.newInstance(keyclass, conf);
   return (K) NullWritable.get();
Exemple #8
  @org.testng.annotations.Test(groups = {"fast"})
  public void sortUsagesTest() throws ParseException {
    mapReduceDriver.addInput(new Text(RES1_ID), new LongWritable(COUNT1));
    mapReduceDriver.addInput(new Text(RES2_ID), new LongWritable(COUNT2));
    mapReduceDriver.addInput(new Text(RES3_ID), new LongWritable(COUNT3));

    MostPopularProtos.MostPopularStats.Builder statsBuilder =
    statsBuilder.setTimestamp(new SimpleDateFormat("yyyy-MM-dd").parse(TEST_DATE).getTime());

    MostPopularProtos.ResourceStat.Builder resourceStatBuilder =

    resourceStatBuilder = MostPopularProtos.ResourceStat.newBuilder();

    BytesWritable expectedOutput = new BytesWritable(;

    mapReduceDriver.addOutput(NullWritable.get(), expectedOutput);

Exemple #9
  /** REDUCER */
  public static class Join extends Reducer<Text, Text, NullWritable, Text> {

    private NullWritable NULL = NullWritable.get();
    private Text OUT = new Text();

    public void reduce(Text key, Iterable<Text> values, Context context)
        throws IOException, InterruptedException {
      // For each value, figure out which file it's from and store it
      // accordingly.
      List<String> first = new ArrayList<String>();
      List<String> second = new ArrayList<String>();

      for (Text value : values) {
        if (value.charAt(0) == '1') {
        } else second.add(value.toString().substring(1));


      if (first.size() == 0) return;
      if (second.size() == 0) second.add(null);

      // Do the cross product
      for (String s1 : first) {
        for (String s2 : second) {
          if (s2 == null) OUT.set(key.toString() + "\t" + s1 + "\t\t");
          else OUT.set(key.toString() + "\t" + s1 + "\t" + key.toString() + "\t" + s2);
          context.write(NULL, OUT);
    public void reduce(
        Text key,
        Iterator<Text> values,
        OutputCollector<Text, NullWritable> output,
        Reporter reporter)
        throws IOException {

      // convert a.b.c into a^Ab^Ac for easier import into Hive.
      String classC = key.toString();
      String asFields = classC.replace(INPUT_FIELD_SEP, OUTPUT_FIELD_SEP);

      Text outKey = new Text(asFields);

      Set<Integer> seenOctets = new HashSet<Integer>();

      while (values.hasNext()) {
        Text val =;
        try {
          Integer lastOctet = new Integer(val.toString());
          if (!seenOctets.contains(lastOctet)) {
            // we have not seen this a.b.c.d before. emit one output entry for
            // the a.b.c, and memorize the d so we don't do this again for the
            // same IP. This is ok to buffer because there will be at most 256
            // unique entries.
            output.collect(outKey, NullWritable.get());
        } catch (NumberFormatException nfe) {
          // ignore malformed input; just continue.
  * Write a partition file for the given job, using the Sampler provided. Queries the sampler for a
  * sample keyset, sorts by the output key comparator, selects the keys for each rank, and writes
  * to the destination returned from {@link TotalOrderPartitioner#getPartitionFile}.
 @SuppressWarnings("unchecked") // getInputFormat, getOutputKeyComparator
 public static <K, V> void writePartitionFile(Job job, Sampler<K, V> sampler)
     throws IOException, ClassNotFoundException, InterruptedException {
   Configuration conf = job.getConfiguration();
   final InputFormat inf = ReflectionUtils.newInstance(job.getInputFormatClass(), conf);
   int numPartitions = job.getNumReduceTasks();
   K[] samples = sampler.getSample(inf, job);
   RawComparator<K> comparator = (RawComparator<K>) job.getSortComparator();
   Arrays.sort(samples, comparator);
   Path dst = new Path(TotalOrderPartitioner.getPartitionFile(conf));
   FileSystem fs = dst.getFileSystem(conf);
   if (fs.exists(dst)) {
     fs.delete(dst, false);
   SequenceFile.Writer writer =
       SequenceFile.createWriter(fs, conf, dst, job.getMapOutputKeyClass(), NullWritable.class);
   NullWritable nullValue = NullWritable.get();
   float stepSize = samples.length / (float) numPartitions;
   int last = -1;
   for (int i = 1; i < numPartitions; ++i) {
     int k = Math.round(stepSize * i);
     while (last >= k &&[last], samples[k]) == 0) {
     writer.append(samples[k], nullValue);
     last = k;
 protected void cleanup(Context context) throws IOException, InterruptedException {
   for (Pair<Integer, Integer> pair : linkToPage) {
         NullWritable.get(), new IntArrayWritable(new Integer[] {pair.first, pair.second}));
 public Object getCurrentKey() {
   if (dataset.size() > 0 && seen < total) {
     return this.dataset.get((int) seen);
   return NullWritable.get();
  public void test() throws Exception {
    createTable(TABLE_NAME, getBasicSchema(), getBasicCreateTableOptions());

    KuduTableOutputFormat output = new KuduTableOutputFormat();
    Configuration conf = new Configuration();
    conf.set(KuduTableOutputFormat.MASTER_ADDRESSES_KEY, getMasterAddresses());
    conf.set(KuduTableOutputFormat.OUTPUT_TABLE_KEY, TABLE_NAME);

    String multitonKey = conf.get(KuduTableOutputFormat.MULTITON_KEY);
    KuduTable table = KuduTableOutputFormat.getKuduTable(multitonKey);

    Insert insert = table.newInsert();
    PartialRow row = insert.getRow();
    row.addInt(0, 1);
    row.addInt(1, 2);
    row.addInt(2, 3);
    row.addString(3, "a string");
    row.addBoolean(4, true);

    RecordWriter<NullWritable, Operation> rw = output.getRecordWriter(null);
    rw.write(NullWritable.get(), insert);
    AsyncKuduScanner.AsyncKuduScannerBuilder builder = client.newScannerBuilder(table);
    assertEquals(1, countRowsInScan(;
   * Write out a SequenceFile that can be read by TotalOrderPartitioner that contains the split
   * points in startKeys.
   * <p>This method was copied from HFileOutputFormat in hbase-0.90.1-cdh3u0. I had to copy it
   * because it's private.
   * @param conf The job configuration.
   * @param partitionsPath output path for SequenceFile.
   * @param startKeys the region start keys to use as the partitions.
   * @throws IOException If there is an error.
  private static void writePartitionFile(
      Configuration conf, Path partitionsPath, List<HFileKeyValue> startKeys) throws IOException {
    if (startKeys.isEmpty()) {
      throw new IllegalArgumentException("No regions passed");

    // We're generating a list of split points, and we don't ever
    // have keys < the first region (which has an empty start key)
    // so we need to remove it. Otherwise we would end up with an
    // empty reducer with index 0.
    TreeSet<HFileKeyValue> sorted = new TreeSet<HFileKeyValue>();

    HFileKeyValue first = sorted.first();
    if (0 != first.getRowKey().length) {
      throw new IllegalArgumentException(
          "First region of table should have empty start row key. Instead has: "
              + Bytes.toStringBinary(first.getRowKey()));

    // Write the actual file
    final SequenceFile.Writer writer =
            .newSeqFileWriter(conf, partitionsPath, HFileKeyValue.class, NullWritable.class);

    try {
      for (HFileKeyValue startKey : sorted) {
        writer.append(startKey, NullWritable.get());
    } finally {
 protected void reduce(Text key, Iterable<Text> values, Context context)
     throws IOException, InterruptedException {
   StringBuilder builder = new StringBuilder();
   String rightText = null;
   List<String> zeros = new ArrayList<String>();
   for (Text item : values) {
     String valueItem = item.toString();
     String[] tokens = valueItem.split(":");
     int side;
     try {
       side = Integer.parseInt(tokens[1]);
     } catch (NumberFormatException nfe) {
       throw new NumberFormatException("valueItem: " + valueItem);
     if (side == 1) {
       rightText = tokens[0];
     } else {
   for (String item : zeros) {
     if (rightText != null) {
     context.write(new Text(builder.toString()), NullWritable.get());
 public U createValue() {
   if (valueclass != null) {
     return (U) ReflectionUtils.newInstance(valueclass, conf);
   return (U) NullWritable.get();
    public void map(
        final NullWritable key,
        final FaunusVertex value,
        final Mapper<NullWritable, FaunusVertex, WritableComparable, LongWritable>.Context context)
        throws IOException, InterruptedException {

      if (this.isVertex) {
        if (value.hasPaths()) {
,, value.pathCount());
      } else {
        for (final Edge e : value.getEdges(Direction.OUT)) {
          final FaunusEdge edge = (FaunusEdge) e;
          if (edge.hasPaths()) {
  ,, edge.pathCount());

      // protected against memory explosion
      if ( > Tokens.MAP_SPILL_OVER) {

      this.outputs.write(Tokens.GRAPH, NullWritable.get(), value);
 protected void cleanup(Context context) throws IOException, InterruptedException {
   for (Pair<Integer, Integer> item : linksMap) {
     Integer[] items = {item.second, item.first};
     IntArrayWritable val = new IntArrayWritable(items);
     context.write(NullWritable.get(), val);
 public NullWritable getEdgeValue(IntWritable targetVertexId) {
   if (neighbors.contains(targetVertexId.get())) {
     return NullWritable.get();
   } else {
     return null;
Exemple #21
 public void reduce(Text key, Iterable<IntWritable> values, Context context)
     throws IOException, InterruptedException {
   int sum = 0;
   for (IntWritable val : values) {
   context.write(new IntWritable(sum), NullWritable.get());
Exemple #22
 protected void cleanup(Context context) throws IOException, InterruptedException {
   while (, val)) {
     context.write(NullWritable.get(), val);
Exemple #23
 protected void reduce(
     LongWritable ignored,
     Iterable<Text> records,
     Reducer<LongWritable, Text, NullWritable, Text>.Context ctx)
     throws IOException, InterruptedException {
   for (Text rec : records) ctx.write(NullWritable.get(), rec);
 public void map(Object key, Text value, Context context)
     throws IOException, InterruptedException {
   Matcher matcher = pattern.matcher(value.toString());
   if (matcher.find()) {
     context.write(NullWritable.get(), value);
    protected void reduce(LongWritable key, Iterable<Text> values, Context context)
        throws IOException, InterruptedException {

      for (Text value : values) {
        context.write(NullWritable.get(), value);
Exemple #26
 protected void cleanup(Context context) throws IOException, InterruptedException {
   for (Pair<Integer, String> item : countToWordMap) {
     String[] strings = {item.second, item.first.toString()};
     TextArrayWritable val = new TextArrayWritable(strings);
     context.write(NullWritable.get(), val);
 protected void cleanup(Context context) throws IOException, InterruptedException {
   for (Pair<Integer, String> entry : sortedWordCount) {
     String[] strings = {entry.second, entry.first.toString()};
     TextArrayWritable val = new TextArrayWritable(strings);
     context.write(NullWritable.get(), val);
 protected void map(TKey key, TValue value, Context context)
     throws IOException, InterruptedException {
   if (this.tracing) {
     LOG.trace("Key = {}", key);
   context.write(NullWritable.get(), key);
 public void map(Text key, Text value, Context context)
     throws IOException, InterruptedException {
   String[] st = new String[2];
   st[0] = key.toString();
   st[1] = value.toString();
   context.write(NullWritable.get(), new TextArrayWritable(st));
 protected void cleanup(Context context) throws IOException, InterruptedException {
   // TODO
   for (Pair<Integer, Integer> item : countTopLinkMap) {
     Integer[] numbers = {item.second, item.first};
     IntArrayWritable val = new IntArrayWritable(numbers);
     context.write(NullWritable.get(), val);