@Test public void testMRMaxLine() throws Exception { final int MAXPOS = 1024 * 1024; final int MAXLINE = 10 * 1024; final int BUF = 64 * 1024; final InputStream infNull = new InputStream() { int position = 0; final int MAXPOSBUF = 1024 * 1024 + BUF; // max LRR pos + LineReader buf @Override public int read() { ++position; return 0; } @Override public int read(byte[] b) { assertTrue("Read too many bytes from the stream", position < MAXPOSBUF); Arrays.fill(b, (byte) 0); position += b.length; return b.length; } }; final LongWritable key = new LongWritable(); final Text val = new Text(); LOG.info("Reading a line from /dev/null"); final Configuration conf = new Configuration(false); conf.setInt(org.apache.hadoop.mapreduce.lib.input.LineRecordReader.MAX_LINE_LENGTH, MAXLINE); conf.setInt("io.file.buffer.size", BUF); // used by LRR final LineRecordReader lrr = new LineRecordReader(infNull, 0, MAXPOS, conf); assertFalse("Read a line from null", lrr.next(key, val)); }
/* * Finds a full file and sets it as the value. */ public synchronized boolean next(LongWritable key, Text value) throws IOException { Text line = new Text(); boolean retrieved = true; String result = ""; value.clear(); while (retrieved) { retrieved = recordReader.next(key, line); if (line.toString().length() > 0) { String lineValue = line.toString(); result += lineValue + "\n"; } } value.set(result); return true; }
/* Finds a full sentence and sets it as the value. * If the sentence is shorter than the full line, the rest is stored to use later. */ public synchronized boolean next(LongWritable key, Text value) throws IOException { Text line = new Text(); boolean getMore = true; boolean retrieved = false; String result = leftovers; leftovers = ""; value.clear(); while (getMore) { retrieved = recordReader.next(key, line); if (retrieved) { String lineValue = line.toString(); // here, we assume sentences run until the period. int endOfSentence = lineValue.indexOf('.'); if (endOfSentence == -1) { result += " " + lineValue; } else { result += " " + lineValue.substring(0, endOfSentence + 1); leftovers = lineValue.substring(endOfSentence + 1); getMore = false; } } else { getMore = false; value.set(result); return false; } } value.set(result); return true; }
@Override public void setConf(Configuration conf) { super.setConf(conf); if (conf.get(NUM_ATTRIBUTES) != null) numAttributes = conf.getInt(NUM_ATTRIBUTES, -1); }
@Override public void initialize(Configuration conf, InputSplit split) throws IOException, InterruptedException { super.initialize(conf, split); if (conf.get(NUM_ATTRIBUTES) != null) numAttributes = conf.getInt(NUM_ATTRIBUTES, -1); }
public float getProgress() { return recordReader.getProgress(); }
public long getPos() throws IOException { return recordReader.getPos(); }
public Text createValue() { return recordReader.createValue(); }
public LongWritable createKey() { return recordReader.createKey(); }
public void close() throws IOException { recordReader.close(); }