public void run(
        RecordReader<IntWritable, WikipediaPage> input,
        OutputCollector<IntWritable, Text> output,
        Reporter reporter)
        throws IOException {
      IntWritable key = new IntWritable();
      WikipediaPage value = new WikipediaPage();

      long pos = -1;
      long prevPos = -1;

      int prevDocno = 0;

      pos = input.getPos();
      while (input.next(key, value)) {
        if (prevPos != -1 && prevPos != pos) {
          LOG.info(
              "- beginning of block at " + prevPos + ", docno:" + prevDocno + ", file:" + fileno);
          keyOut.set(prevDocno);
          valOut.set(prevPos + "\t" + fileno);
          output.collect(keyOut, valOut);
          reporter.incrCounter(Blocks.Total, 1);
        }

        prevPos = pos;
        pos = input.getPos();
        prevDocno = key.get();
      }
    }
Exemple #2
0
 public long getPos() throws IOException {
   return recordReader.getPos();
 }
 /** Request position from proxied RR. */
 public long getPos() throws IOException {
   return rr.getPos();
 }