Пример #1
0
    public synchronized boolean next(LongWritable key, Text value) throws IOException {
      boolean gotsomething;
      boolean retval;
      byte space[] = {' '};
      int counter = 0;
      String ln = null;
      value.clear();
      gotsomething = false;

      do {
        retval = lineRecord.next(lineKey, lineValue);
        if (retval) {
          if (lineValue.toString().length() > 0) {
            ln = lineValue.toString();
            lineValue.set(
                ln.split("	")[
                    0]); // here we basically get the first element from a KV such as '4847570 -1'
            byte[] rawline = lineValue.getBytes();
            int rawlinelen = lineValue.getLength();
            value.append(rawline, 0, rawlinelen);
            value.append(space, 0, 1);
            counter++;
          }
          gotsomething = true;
        } else {
          break;
        }
      } while (counter < MAX_LINE_COUNT);

      // System.out.println("ParagraphRecordReader::next() returns "+gotsomething+" after setting
      // value to: ["+value.toString()+"]");
      return gotsomething;
    }
Пример #2
0
    public synchronized boolean next(LongWritable key, Text value) throws IOException {
      boolean gotsomething;
      boolean retval;
      byte space[] = {' '};
      int counter = 0;
      value.clear();
      gotsomething = false;

      do {
        retval = lineRecord.next(lineKey, lineValue);
        if (retval) {
          if (lineValue.toString().length() > 0) {
            byte[] rawline = lineValue.getBytes();
            int rawlinelen = lineValue.getLength();
            value.append(rawline, 0, rawlinelen);
            value.append(space, 0, 1);
            counter++;
          }
          gotsomething = true;
        } else {
          break;
        }
      } while (counter < MAX_LINE_COUNT);

      return gotsomething;
    }
 @Override
 public boolean next(Text key, Text value) throws IOException {
   if (!lineReader.next(lineReaderKey, lineValue)) {
     return false;
   }
   key.set(lineKey);
   value.set(lineValue);
   return true;
 }
 public boolean next(Text key, Text value) throws IOException {
   if (in.next(junk, line)) {
     if (line.getLength() < KEY_LENGTH) {
       key.set(line);
       value.clear();
     } else {
       byte[] bytes = line.getBytes();
       key.set(bytes, 0, KEY_LENGTH);
       value.set(bytes, KEY_LENGTH, line.getLength() - KEY_LENGTH);
     }
     return true;
   } else {
     return false;
   }
 }
  /*
   * Finds a full file and sets it as the value.
   */
  public synchronized boolean next(LongWritable key, Text value) throws IOException {
    Text line = new Text();
    boolean retrieved = true;

    String result = "";

    value.clear();

    while (retrieved) {
      retrieved = recordReader.next(key, line);

      if (line.toString().length() > 0) {
        String lineValue = line.toString();
        result += lineValue + "\n";
      }
    }

    value.set(result);
    return true;
  }
  /* Finds a full sentence and sets it as the value.
   * If the sentence is shorter than the full line, the rest is stored to use later.
   */
  public synchronized boolean next(LongWritable key, Text value) throws IOException {
    Text line = new Text();
    boolean getMore = true;
    boolean retrieved = false;

    String result = leftovers;
    leftovers = "";

    value.clear();

    while (getMore) {
      retrieved = recordReader.next(key, line);

      if (retrieved) {
        String lineValue = line.toString();

        // here, we assume sentences run until the period.
        int endOfSentence = lineValue.indexOf('.');

        if (endOfSentence == -1) {
          result += " " + lineValue;
        } else {
          result += " " + lineValue.substring(0, endOfSentence + 1);
          leftovers = lineValue.substring(endOfSentence + 1);
          getMore = false;
        }
      } else {
        getMore = false;
        value.set(result);
        return false;
      }
    }

    value.set(result);
    return true;
  }
Пример #7
0
 public void close() throws IOException {
   lineRecord.close();
 }
 public float getProgress() throws IOException {
   return in.getProgress();
 }
 public long getPos() throws IOException {
   return in.getPos();
 }
 public void close() throws IOException {
   in.close();
 }
 public LongWritable createKey() {
   return recordReader.createKey();
 }
Пример #12
0
 public DocumentRecordReader(JobConf conf, FileSplit split) throws IOException {
   lineReader = new LineRecordReader(conf, split);
   lineKey = new Text(split.getPath().getName());
   lineValue = lineReader.createValue();
   lineReaderKey = lineReader.createKey();
 }
Пример #13
0
 public long getPos() throws IOException {
   return lineRecord.getPos();
 }
 public Text createValue() {
   return recordReader.createValue();
 }
Пример #15
0
 public float getProgress() throws IOException {
   return lineRecord.getPos();
 }
 public void close() throws IOException {
   recordReader.close();
 }
Пример #17
0
 public ParagraphRecordReader(JobConf conf, FileSplit split) throws IOException {
   lineRecord = new LineRecordReader(conf, split);
   lineKey = lineRecord.createKey();
   lineValue = lineRecord.createValue();
 }
 public long getPos() throws IOException {
   return recordReader.getPos();
 }
 public float getProgress() {
   return recordReader.getProgress();
 }