public void nextRawValue(DataInputBuffer value) throws IOException { final DataInputBuffer vb = kvIter.getValue(); final int vp = vb.getPosition(); final int vlen = vb.getLength() - vp; value.reset(vb.getData(), vp, vlen); bytesRead += vlen; }
private static void rawValueToTextBytes( DataOutputBuffer dataBuffer, DataInputBuffer inputBuffer, TextBytes textOut) throws IOException { inputBuffer.reset(dataBuffer.getData(), dataBuffer.getLength()); int newLength = WritableUtils.readVInt(inputBuffer); textOut.set(inputBuffer.getData(), inputBuffer.getPosition(), newLength); }
@Test public void testReadWriteReplicaState() { try { DataOutputBuffer out = new DataOutputBuffer(); DataInputBuffer in = new DataInputBuffer(); for (HdfsServerConstants.ReplicaState repState : HdfsServerConstants.ReplicaState.values()) { repState.write(out); in.reset(out.getData(), out.getLength()); HdfsServerConstants.ReplicaState result = HdfsServerConstants.ReplicaState.read(in); assertTrue("testReadWrite error !!!", repState == result); out.reset(); in.reset(); } } catch (Exception ex) { fail("testReadWrite ex error ReplicaState"); } }
public DataInputBuffer getKey() throws IOException { final int kvoff = offsetFor(current); keybuf.reset( kvbuffer, kvmeta.get(kvoff + KEYSTART), kvmeta.get(kvoff + VALSTART) - kvmeta.get(kvoff + KEYSTART)); return keybuf; }
public DataInputBuffer getKey() throws IOException { final int kvoff = kvoffsets[current % kvoffsets.length]; keybuf.reset( kvbuffer, kvindices[kvoff + KEYSTART], kvindices[kvoff + VALSTART] - kvindices[kvoff + KEYSTART]); return keybuf; }
/** Write IFile by reading data from inputFile */ private void createIFile(WriterOptions options, KV_TRAIT trait) throws IOException { Writer writer = IFile2.createWriter(options); FSDataInputStream in = fs.open(inputCSVFile); BufferedReader reader = new BufferedReader(new InputStreamReader(in)); while (reader.ready()) { String line = reader.readLine(); String[] kv = line.split(","); Iterator<String> it = Splitter.on(",").omitEmptyStrings().split(line).iterator(); String k = it.next(); String v = it.next(); key.reset(k.getBytes(), k.getBytes().length); value.reset(v.getBytes(), v.getBytes().length); writer.append(key, value); } reader.close(); writer.close(); }
/** * input/output simple records. * * @throws Exception if test was failed */ @SuppressWarnings("unchecked") @Test public void simple_record() throws Exception { ModelLoader loader = generate(); Class<?> type = loader.modelType("Simple"); assertThat(type.isAnnotationPresent(ModelInputLocation.class), is(true)); assertThat(type.isAnnotationPresent(ModelOutputLocation.class), is(true)); ModelWrapper object = loader.newModel("Simple"); DataOutputBuffer output = new DataOutputBuffer(); ModelOutput<Object> modelOut = (ModelOutput<Object>) type.getAnnotation(ModelOutputLocation.class) .value() .getDeclaredConstructor(RecordEmitter.class) .newInstance(new TsvEmitter(new OutputStreamWriter(output, "UTF-8"))); object.set("sid", 1L); object.set("value", new Text("hello")); modelOut.write(object.unwrap()); object.set("sid", 2L); object.set("value", new Text("world")); modelOut.write(object.unwrap()); object.set("sid", 3L); object.set("value", null); modelOut.write(object.unwrap()); modelOut.close(); DataInputBuffer input = new DataInputBuffer(); input.reset(output.getData(), output.getLength()); ModelInput<Object> modelIn = (ModelInput<Object>) type.getAnnotation(ModelInputLocation.class) .value() .getDeclaredConstructor(RecordParser.class) .newInstance(new TsvParser(new InputStreamReader(input, "UTF-8"))); ModelWrapper copy = loader.newModel("Simple"); modelIn.readTo(copy.unwrap()); assertThat(copy.get("sid"), is((Object) 1L)); assertThat(copy.get("value"), is((Object) new Text("hello"))); modelIn.readTo(copy.unwrap()); assertThat(copy.get("sid"), is((Object) 2L)); assertThat(copy.get("value"), is((Object) new Text("world"))); modelIn.readTo(copy.unwrap()); assertThat(copy.get("sid"), is((Object) 3L)); assertThat(copy.getOption("value").isNull(), is(true)); assertThat(input.read(), is(-1)); modelIn.close(); }
public boolean nextRawKey(DataInputBuffer key) throws IOException { if (kvIter.next()) { final DataInputBuffer kb = kvIter.getKey(); final int kp = kb.getPosition(); final int klen = kb.getLength() - kp; key.reset(kb.getData(), kp, klen); bytesRead += klen; return true; } return false; }
static <T extends Writable> T read(T writable, byte[] bytes) { DataInputBuffer buffer = new DataInputBuffer(); buffer.reset(bytes, bytes.length); try { writable.readFields(buffer); assertThat("Enf of Stream", buffer.read(), is(-1)); } catch (IOException e) { throw new AssertionError(e); } return writable; }
@Override @SuppressWarnings("unchecked") public void run(final JobConf job, final TaskUmbilicalProtocol umbilical) throws IOException { final Reporter reporter = getReporter(umbilical); // start thread that will handle communication with parent startCommunicationThread(umbilical); int numReduceTasks = conf.getNumReduceTasks(); LOG.info("numReduceTasks: " + numReduceTasks); MapOutputCollector collector = null; if (numReduceTasks > 0) { collector = new MapOutputBuffer(umbilical, job, reporter); } else { collector = new DirectMapOutputCollector(umbilical, job, reporter); } // reinstantiate the split try { instantiatedSplit = (InputSplit) ReflectionUtils.newInstance(job.getClassByName(splitClass), job); } catch (ClassNotFoundException exp) { IOException wrap = new IOException("Split class " + splitClass + " not found"); wrap.initCause(exp); throw wrap; } DataInputBuffer splitBuffer = new DataInputBuffer(); splitBuffer.reset(split.get(), 0, split.getSize()); instantiatedSplit.readFields(splitBuffer); // if it is a file split, we can give more details if (instantiatedSplit instanceof FileSplit) { FileSplit fileSplit = (FileSplit) instantiatedSplit; job.set("map.input.file", fileSplit.getPath().toString()); job.setLong("map.input.start", fileSplit.getStart()); job.setLong("map.input.length", fileSplit.getLength()); } RecordReader rawIn = // open input job.getInputFormat().getRecordReader(instantiatedSplit, job, reporter); RecordReader in = new TrackedRecordReader(rawIn, getCounters()); MapRunnable runner = (MapRunnable) ReflectionUtils.newInstance(job.getMapRunnerClass(), job); try { runner.run(in, collector, reporter); collector.flush(); } finally { // close in.close(); // close input collector.close(); } done(umbilical); }
private FileStatus getFileStatus(FileStatus fileStatus) throws IOException { FileStatus status = new FileStatus(); buffer.reset(); DataOutputStream out = new DataOutputStream(buffer); fileStatus.write(out); in.reset(buffer.toByteArray(), 0, buffer.size()); status.readFields(in); return status; }
@Deprecated private void readFields(byte[] bytes, int offset, int len) throws IOException { if (bytes == null || len <= 0) { throw new IllegalArgumentException("Can't build a writable with empty " + "bytes array"); } DataInputBuffer in = new DataInputBuffer(); try { in.reset(bytes, offset, len); this.readFields(in); } finally { in.close(); } }
@Override public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) { try { final int ret; di.reset(b1, s1, l1); final int x1 = WritableUtils.readVInt(di); di.reset(b2, s2, l2); final int x2 = WritableUtils.readVInt(di); final int t1 = b1[s1 + x1]; final int t2 = b2[s2 + x2]; if (t1 == GridmixKey.REDUCE_SPEC || t2 == GridmixKey.REDUCE_SPEC) { ret = t1 - t2; } else { assert t1 == GridmixKey.DATA; assert t2 == GridmixKey.DATA; ret = WritableComparator.compareBytes(b1, s1, x1, b2, s2, x2); } di.reset(reset, 0, 0); return ret; } catch (IOException e) { throw new RuntimeException(e); } }
/** * all primitive types. * * @throws Exception if test was failed */ @SuppressWarnings("unchecked") @Test public void primitives() throws Exception { ModelLoader loader = generate(); Class<?> type = loader.modelType("Primitives"); assertThat(type.isAnnotationPresent(ModelInputLocation.class), is(true)); assertThat(type.isAnnotationPresent(ModelOutputLocation.class), is(true)); ModelWrapper object = loader.newModel("Primitives"); object.set("type_boolean", true); object.set("type_byte", (byte) 64); object.set("type_short", (short) 256); object.set("type_int", 100); object.set("type_long", 200L); object.set("type_float", 300.f); object.set("type_double", 400.d); object.set("type_decimal", new BigDecimal("1234.567")); object.set("type_text", new Text("Hello, world!")); object.set("type_date", new Date(2011, 3, 31)); object.set("type_datetime", new DateTime(2011, 3, 31, 23, 30, 1)); DataOutputBuffer output = new DataOutputBuffer(); ModelOutput<Object> modelOut = (ModelOutput<Object>) type.getAnnotation(ModelOutputLocation.class) .value() .getDeclaredConstructor(RecordEmitter.class) .newInstance(new TsvEmitter(new OutputStreamWriter(output, "UTF-8"))); modelOut.write(object.unwrap()); modelOut.write(object.unwrap()); modelOut.write(object.unwrap()); modelOut.close(); DataInputBuffer input = new DataInputBuffer(); input.reset(output.getData(), output.getLength()); ModelInput<Object> modelIn = (ModelInput<Object>) type.getAnnotation(ModelInputLocation.class) .value() .getDeclaredConstructor(RecordParser.class) .newInstance(new TsvParser(new InputStreamReader(input, "UTF-8"))); ModelWrapper copy = loader.newModel("Primitives"); modelIn.readTo(copy.unwrap()); assertThat(object.unwrap(), equalTo(copy.unwrap())); assertThat(input.read(), is(-1)); modelIn.close(); }
public void reset(byte[] buffer, int start, int length) { this.buffer = buffer; this.start = start; this.length = length; if (start + length > bufvoid) { this.buffer = new byte[this.length]; final int taillen = bufvoid - start; System.arraycopy(buffer, start, this.buffer, 0, taillen); System.arraycopy(buffer, 0, this.buffer, taillen, length - taillen); this.start = 0; } super.reset(this.buffer, this.start, this.length); }
protected static void assertSerializable(HadoopSerialization ser, ITuple tuple, boolean debug) throws IOException { DataInputBuffer input = new DataInputBuffer(); DataOutputBuffer output = new DataOutputBuffer(); DatumWrapper<ITuple> wrapper = new DatumWrapper<ITuple>(tuple); ser.ser(wrapper, output); input.reset(output.getData(), 0, output.getLength()); DatumWrapper<ITuple> wrapper2 = new DatumWrapper<ITuple>(); wrapper2 = ser.deser(wrapper2, input); if (debug) { System.out.println("D:" + wrapper2.datum()); } assertEquals(tuple, wrapper2.datum()); }
/** Used by child copy constructors. */ protected synchronized void copy(Writable other) { if (other != null) { try { DataOutputBuffer out = new DataOutputBuffer(); other.write(out); DataInputBuffer in = new DataInputBuffer(); in.reset(out.getData(), out.getLength()); readFields(in); } catch (IOException e) { throw new IllegalArgumentException("map cannot be copied: " + e.getMessage()); } } else { throw new IllegalArgumentException("source map cannot be null"); } }
public int read() throws IOException { int ret; if (null == inbuf || -1 == (ret = inbuf.read())) { if (!r.next(key, val)) { return -1; } byte[] tmp = key.toString().getBytes(); outbuf.write(tmp, 0, tmp.length); outbuf.write('\t'); tmp = val.toString().getBytes(); outbuf.write(tmp, 0, tmp.length); outbuf.write('\n'); inbuf.reset(outbuf.getData(), outbuf.getLength()); outbuf.reset(); ret = inbuf.read(); } return ret; }
/** * Parses all the HRegionInfo instances from the passed in stream until EOF. Presumes the * HRegionInfo's were serialized to the stream with {@link #toDelimitedByteArray()} * * @param bytes serialized bytes * @param offset the start offset into the byte[] buffer * @param length how far we should read into the byte[] buffer * @return All the hregioninfos that are in the byte array. Keeps reading till we hit the end. */ public static List<HRegionInfo> parseDelimitedFrom( final byte[] bytes, final int offset, final int length) throws IOException { if (bytes == null) { throw new IllegalArgumentException("Can't build an object with empty bytes array"); } DataInputBuffer in = new DataInputBuffer(); List<HRegionInfo> hris = new ArrayList<HRegionInfo>(); try { in.reset(bytes, offset, length); while (in.available() > 0) { HRegionInfo hri = parseFrom(in); hris.add(hri); } } finally { in.close(); } return hris; }
protected static void assertSerializable( TupleSerializer ser, TupleDeserializer deser, DatumWrapper<ITuple> tuple, boolean debug) throws IOException { DataOutputBuffer output = new DataOutputBuffer(); ser.open(output); ser.serialize(tuple); ser.close(); DataInputBuffer input = new DataInputBuffer(); input.reset(output.getData(), 0, output.getLength()); DatumWrapper<ITuple> deserializedTuple = new DatumWrapper<ITuple>(); deser.open(input); deserializedTuple = deser.deserialize(deserializedTuple); deser.close(); if (debug) { System.out.println("D:" + deserializedTuple.datum()); } assertEquals(tuple.datum(), deserializedTuple.datum()); }
private void sortAndSpill() throws IOException { // approximate the length of the output file to be the length of the // buffer + header lengths for the partitions long size = (bufend >= bufstart ? bufend - bufstart : (bufvoid - bufend) + bufstart) + partitions * APPROX_HEADER_LENGTH; FSDataOutputStream out = null; FSDataOutputStream indexOut = null; try { // create spill file Path filename = mapOutputFile.getSpillFileForWrite(getTaskID(), numSpills, size); out = localFs.create(filename); // create spill index Path indexFilename = mapOutputFile.getSpillIndexFileForWrite( getTaskID(), numSpills, partitions * MAP_OUTPUT_INDEX_RECORD_LENGTH); indexOut = localFs.create(indexFilename); final int endPosition = (kvend > kvstart) ? kvend : kvoffsets.length + kvend; sorter.sort(MapOutputBuffer.this, kvstart, endPosition, reporter); int spindex = kvstart; InMemValBytes value = new InMemValBytes(); for (int i = 0; i < partitions; ++i) { IFile.Writer<K, V> writer = null; try { long segmentStart = out.getPos(); writer = new Writer<K, V>(job, out, keyClass, valClass, codec); if (null == combinerClass) { // spill directly DataInputBuffer key = new DataInputBuffer(); while (spindex < endPosition && kvindices[kvoffsets[spindex % kvoffsets.length] + PARTITION] == i) { final int kvoff = kvoffsets[spindex % kvoffsets.length]; getVBytesForOffset(kvoff, value); key.reset( kvbuffer, kvindices[kvoff + KEYSTART], (kvindices[kvoff + VALSTART] - kvindices[kvoff + KEYSTART])); writer.append(key, value); ++spindex; } } else { int spstart = spindex; while (spindex < endPosition && kvindices[kvoffsets[spindex % kvoffsets.length] + PARTITION] == i) { ++spindex; } // Note: we would like to avoid the combiner if we've fewer // than some threshold of records for a partition if (spstart != spindex) { combineCollector.setWriter(writer); RawKeyValueIterator kvIter = new MRResultIterator(spstart, spindex); combineAndSpill(kvIter, combineInputCounter); } } // close the writer writer.close(); // write the index as <offset, raw-length, compressed-length> writeIndexRecord(indexOut, out, segmentStart, writer); writer = null; } finally { if (null != writer) writer.close(); } } LOG.info("Finished spill " + numSpills); ++numSpills; } finally { if (out != null) out.close(); if (indexOut != null) indexOut.close(); } }
/** * create inmemory segments * * @return * @throws IOException */ public List<TezMerger.Segment> createInMemStreams() throws IOException { int numberOfStreams = Math.max(2, rnd.nextInt(10)); LOG.info("No of streams : " + numberOfStreams); SerializationFactory serializationFactory = new SerializationFactory(conf); Serializer keySerializer = serializationFactory.getSerializer(keyClass); Serializer valueSerializer = serializationFactory.getSerializer(valClass); LocalDirAllocator localDirAllocator = new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS); InputContext context = createTezInputContext(); MergeManager mergeManager = new MergeManager( conf, fs, localDirAllocator, context, null, null, null, null, null, 1024 * 1024 * 10, null, false, -1); DataOutputBuffer keyBuf = new DataOutputBuffer(); DataOutputBuffer valBuf = new DataOutputBuffer(); DataInputBuffer keyIn = new DataInputBuffer(); DataInputBuffer valIn = new DataInputBuffer(); keySerializer.open(keyBuf); valueSerializer.open(valBuf); List<TezMerger.Segment> segments = new LinkedList<TezMerger.Segment>(); for (int i = 0; i < numberOfStreams; i++) { BoundedByteArrayOutputStream bout = new BoundedByteArrayOutputStream(1024 * 1024); InMemoryWriter writer = new InMemoryWriter(bout); Map<Writable, Writable> data = createData(); // write data for (Map.Entry<Writable, Writable> entry : data.entrySet()) { keySerializer.serialize(entry.getKey()); valueSerializer.serialize(entry.getValue()); keyIn.reset(keyBuf.getData(), 0, keyBuf.getLength()); valIn.reset(valBuf.getData(), 0, valBuf.getLength()); writer.append(keyIn, valIn); originalData.put(entry.getKey(), entry.getValue()); keyBuf.reset(); valBuf.reset(); keyIn.reset(); valIn.reset(); } IFile.Reader reader = new InMemoryReader(mergeManager, null, bout.getBuffer(), 0, bout.getBuffer().length); segments.add(new TezMerger.Segment(reader, true)); data.clear(); writer.close(); } return segments; }
private static void rawValueToWritable( RawRecordValue rawValue, DataInputBuffer inputBuffer, Writable typeOut) throws IOException { inputBuffer.reset(rawValue.data.getData(), rawValue.data.getLength()); typeOut.readFields(inputBuffer); }
@Test public void testBuiltInGzipDecompressorExceptions() { BuiltInGzipDecompressor decompresser = new BuiltInGzipDecompressor(); try { decompresser.setInput(null, 0, 1); } catch (NullPointerException ex) { // expected } catch (Exception ex) { fail("testBuiltInGzipDecompressorExceptions npe error " + ex); } try { decompresser.setInput(new byte[] {0}, 0, -1); } catch (ArrayIndexOutOfBoundsException ex) { // expected } catch (Exception ex) { fail("testBuiltInGzipDecompressorExceptions aioob error" + ex); } assertTrue("decompresser.getBytesRead error", decompresser.getBytesRead() == 0); assertTrue("decompresser.getRemaining error", decompresser.getRemaining() == 0); decompresser.reset(); decompresser.end(); InputStream decompStream = null; try { // invalid 0 and 1 bytes , must be 31, -117 int buffSize = 1 * 1024; byte buffer[] = new byte[buffSize]; Decompressor decompressor = new BuiltInGzipDecompressor(); DataInputBuffer gzbuf = new DataInputBuffer(); decompStream = new DecompressorStream(gzbuf, decompressor); gzbuf.reset(new byte[] {0, 0, 1, 1, 1, 1, 11, 1, 1, 1, 1}, 11); decompStream.read(buffer); } catch (IOException ioex) { // expected } catch (Exception ex) { fail("invalid 0 and 1 byte in gzip stream" + ex); } // invalid 2 byte, must be 8 try { int buffSize = 1 * 1024; byte buffer[] = new byte[buffSize]; Decompressor decompressor = new BuiltInGzipDecompressor(); DataInputBuffer gzbuf = new DataInputBuffer(); decompStream = new DecompressorStream(gzbuf, decompressor); gzbuf.reset(new byte[] {31, -117, 7, 1, 1, 1, 1, 11, 1, 1, 1, 1}, 11); decompStream.read(buffer); } catch (IOException ioex) { // expected } catch (Exception ex) { fail("invalid 2 byte in gzip stream" + ex); } try { int buffSize = 1 * 1024; byte buffer[] = new byte[buffSize]; Decompressor decompressor = new BuiltInGzipDecompressor(); DataInputBuffer gzbuf = new DataInputBuffer(); decompStream = new DecompressorStream(gzbuf, decompressor); gzbuf.reset(new byte[] {31, -117, 8, -32, 1, 1, 1, 11, 1, 1, 1, 1}, 11); decompStream.read(buffer); } catch (IOException ioex) { // expected } catch (Exception ex) { fail("invalid 3 byte in gzip stream" + ex); } try { int buffSize = 1 * 1024; byte buffer[] = new byte[buffSize]; Decompressor decompressor = new BuiltInGzipDecompressor(); DataInputBuffer gzbuf = new DataInputBuffer(); decompStream = new DecompressorStream(gzbuf, decompressor); gzbuf.reset(new byte[] {31, -117, 8, 4, 1, 1, 1, 11, 1, 1, 1, 1}, 11); decompStream.read(buffer); } catch (IOException ioex) { // expected } catch (Exception ex) { fail("invalid 3 byte make hasExtraField" + ex); } }
protected void spill(int mstart, int mend) throws IOException, InterruptedException { // approximate the length of the output file to be the length of the // buffer + header lengths for the partitions final long size = (bufend >= bufstart ? bufend - bufstart : (bufvoid - bufend) + bufstart) + partitions * APPROX_HEADER_LENGTH; FSDataOutputStream out = null; try { // create spill file final TezSpillRecord spillRec = new TezSpillRecord(partitions); final Path filename = mapOutputFile.getSpillFileForWrite(numSpills, size); out = rfs.create(filename); int spindex = mstart; final InMemValBytes value = createInMemValBytes(); for (int i = 0; i < partitions; ++i) { IFile.Writer writer = null; try { long segmentStart = out.getPos(); writer = new Writer(conf, out, keyClass, valClass, codec, spilledRecordsCounter, null); if (combiner == null) { // spill directly DataInputBuffer key = new DataInputBuffer(); while (spindex < mend && kvmeta.get(offsetFor(spindex) + PARTITION) == i) { final int kvoff = offsetFor(spindex); int keystart = kvmeta.get(kvoff + KEYSTART); int valstart = kvmeta.get(kvoff + VALSTART); key.reset(kvbuffer, keystart, valstart - keystart); getVBytesForOffset(kvoff, value); writer.append(key, value); ++spindex; } } else { int spstart = spindex; while (spindex < mend && kvmeta.get(offsetFor(spindex) + PARTITION) == i) { ++spindex; } // Note: we would like to avoid the combiner if we've fewer // than some threshold of records for a partition if (spstart != spindex) { TezRawKeyValueIterator kvIter = new MRResultIterator(spstart, spindex); if (LOG.isDebugEnabled()) { LOG.debug("Running combine processor"); } runCombineProcessor(kvIter, writer); } } // close the writer writer.close(); if (numSpills > 0) { additionalSpillBytesWritten.increment(writer.getCompressedLength()); numAdditionalSpills.increment(1); // Reset the value will be set during the final merge. outputBytesWithOverheadCounter.setValue(0); } else { // Set this up for the first write only. Subsequent ones will be handled in the final // merge. outputBytesWithOverheadCounter.increment(writer.getRawLength()); } // record offsets final TezIndexRecord rec = new TezIndexRecord(segmentStart, writer.getRawLength(), writer.getCompressedLength()); spillRec.putIndex(rec, i); writer = null; } finally { if (null != writer) writer.close(); } } if (totalIndexCacheMemory >= indexCacheMemoryLimit) { // create spill index file Path indexFilename = mapOutputFile.getSpillIndexFileForWrite( numSpills, partitions * MAP_OUTPUT_INDEX_RECORD_LENGTH); spillRec.writeToFile(indexFilename, conf); } else { indexCacheList.add(spillRec); totalIndexCacheMemory += spillRec.size() * MAP_OUTPUT_INDEX_RECORD_LENGTH; } LOG.info("Finished spill " + numSpills); ++numSpills; } finally { if (out != null) out.close(); } }