public synchronized boolean next(LongWritable key, Text value) throws IOException { boolean gotsomething; boolean retval; byte space[] = {' '}; int counter = 0; String ln = null; value.clear(); gotsomething = false; do { retval = lineRecord.next(lineKey, lineValue); if (retval) { if (lineValue.toString().length() > 0) { ln = lineValue.toString(); lineValue.set( ln.split(" ")[ 0]); // here we basically get the first element from a KV such as '4847570 -1' byte[] rawline = lineValue.getBytes(); int rawlinelen = lineValue.getLength(); value.append(rawline, 0, rawlinelen); value.append(space, 0, 1); counter++; } gotsomething = true; } else { break; } } while (counter < MAX_LINE_COUNT); // System.out.println("ParagraphRecordReader::next() returns "+gotsomething+" after setting // value to: ["+value.toString()+"]"); return gotsomething; }
public synchronized boolean next(LongWritable key, Text value) throws IOException { boolean gotsomething; boolean retval; byte space[] = {' '}; int counter = 0; value.clear(); gotsomething = false; do { retval = lineRecord.next(lineKey, lineValue); if (retval) { if (lineValue.toString().length() > 0) { byte[] rawline = lineValue.getBytes(); int rawlinelen = lineValue.getLength(); value.append(rawline, 0, rawlinelen); value.append(space, 0, 1); counter++; } gotsomething = true; } else { break; } } while (counter < MAX_LINE_COUNT); return gotsomething; }
@Override public boolean next(Text key, Text value) throws IOException { if (!lineReader.next(lineReaderKey, lineValue)) { return false; } key.set(lineKey); value.set(lineValue); return true; }
public boolean next(Text key, Text value) throws IOException { if (in.next(junk, line)) { if (line.getLength() < KEY_LENGTH) { key.set(line); value.clear(); } else { byte[] bytes = line.getBytes(); key.set(bytes, 0, KEY_LENGTH); value.set(bytes, KEY_LENGTH, line.getLength() - KEY_LENGTH); } return true; } else { return false; } }
/* * Finds a full file and sets it as the value. */ public synchronized boolean next(LongWritable key, Text value) throws IOException { Text line = new Text(); boolean retrieved = true; String result = ""; value.clear(); while (retrieved) { retrieved = recordReader.next(key, line); if (line.toString().length() > 0) { String lineValue = line.toString(); result += lineValue + "\n"; } } value.set(result); return true; }
/* Finds a full sentence and sets it as the value. * If the sentence is shorter than the full line, the rest is stored to use later. */ public synchronized boolean next(LongWritable key, Text value) throws IOException { Text line = new Text(); boolean getMore = true; boolean retrieved = false; String result = leftovers; leftovers = ""; value.clear(); while (getMore) { retrieved = recordReader.next(key, line); if (retrieved) { String lineValue = line.toString(); // here, we assume sentences run until the period. int endOfSentence = lineValue.indexOf('.'); if (endOfSentence == -1) { result += " " + lineValue; } else { result += " " + lineValue.substring(0, endOfSentence + 1); leftovers = lineValue.substring(endOfSentence + 1); getMore = false; } } else { getMore = false; value.set(result); return false; } } value.set(result); return true; }
public void close() throws IOException { lineRecord.close(); }
public float getProgress() throws IOException { return in.getProgress(); }
public long getPos() throws IOException { return in.getPos(); }
public void close() throws IOException { in.close(); }
public LongWritable createKey() { return recordReader.createKey(); }
public DocumentRecordReader(JobConf conf, FileSplit split) throws IOException { lineReader = new LineRecordReader(conf, split); lineKey = new Text(split.getPath().getName()); lineValue = lineReader.createValue(); lineReaderKey = lineReader.createKey(); }
public long getPos() throws IOException { return lineRecord.getPos(); }
public Text createValue() { return recordReader.createValue(); }
public float getProgress() throws IOException { return lineRecord.getPos(); }
public void close() throws IOException { recordReader.close(); }
public ParagraphRecordReader(JobConf conf, FileSplit split) throws IOException { lineRecord = new LineRecordReader(conf, split); lineKey = lineRecord.createKey(); lineValue = lineRecord.createValue(); }
public long getPos() throws IOException { return recordReader.getPos(); }
public float getProgress() { return recordReader.getProgress(); }