Ejemplo n.º 1
0
 public void nextRawValue(DataInputBuffer value) throws IOException {
   final DataInputBuffer vb = kvIter.getValue();
   final int vp = vb.getPosition();
   final int vlen = vb.getLength() - vp;
   value.reset(vb.getData(), vp, vlen);
   bytesRead += vlen;
 }
 private static void rawValueToTextBytes(
     DataOutputBuffer dataBuffer, DataInputBuffer inputBuffer, TextBytes textOut)
     throws IOException {
   inputBuffer.reset(dataBuffer.getData(), dataBuffer.getLength());
   int newLength = WritableUtils.readVInt(inputBuffer);
   textOut.set(inputBuffer.getData(), inputBuffer.getPosition(), newLength);
 }
  /**
   * input/output simple records.
   *
   * @throws Exception if test was failed
   */
  @SuppressWarnings("unchecked")
  @Test
  public void simple_record() throws Exception {
    ModelLoader loader = generate();

    Class<?> type = loader.modelType("Simple");
    assertThat(type.isAnnotationPresent(ModelInputLocation.class), is(true));
    assertThat(type.isAnnotationPresent(ModelOutputLocation.class), is(true));

    ModelWrapper object = loader.newModel("Simple");
    DataOutputBuffer output = new DataOutputBuffer();
    ModelOutput<Object> modelOut =
        (ModelOutput<Object>)
            type.getAnnotation(ModelOutputLocation.class)
                .value()
                .getDeclaredConstructor(RecordEmitter.class)
                .newInstance(new TsvEmitter(new OutputStreamWriter(output, "UTF-8")));

    object.set("sid", 1L);
    object.set("value", new Text("hello"));
    modelOut.write(object.unwrap());

    object.set("sid", 2L);
    object.set("value", new Text("world"));
    modelOut.write(object.unwrap());

    object.set("sid", 3L);
    object.set("value", null);
    modelOut.write(object.unwrap());
    modelOut.close();

    DataInputBuffer input = new DataInputBuffer();
    input.reset(output.getData(), output.getLength());
    ModelInput<Object> modelIn =
        (ModelInput<Object>)
            type.getAnnotation(ModelInputLocation.class)
                .value()
                .getDeclaredConstructor(RecordParser.class)
                .newInstance(new TsvParser(new InputStreamReader(input, "UTF-8")));
    ModelWrapper copy = loader.newModel("Simple");

    modelIn.readTo(copy.unwrap());
    assertThat(copy.get("sid"), is((Object) 1L));
    assertThat(copy.get("value"), is((Object) new Text("hello")));

    modelIn.readTo(copy.unwrap());
    assertThat(copy.get("sid"), is((Object) 2L));
    assertThat(copy.get("value"), is((Object) new Text("world")));

    modelIn.readTo(copy.unwrap());
    assertThat(copy.get("sid"), is((Object) 3L));
    assertThat(copy.getOption("value").isNull(), is(true));

    assertThat(input.read(), is(-1));
    modelIn.close();
  }
Ejemplo n.º 4
0
 public boolean nextRawKey(DataInputBuffer key) throws IOException {
   if (kvIter.next()) {
     final DataInputBuffer kb = kvIter.getKey();
     final int kp = kb.getPosition();
     final int klen = kb.getLength() - kp;
     key.reset(kb.getData(), kp, klen);
     bytesRead += klen;
     return true;
   }
   return false;
 }
Ejemplo n.º 5
0
 static <T extends Writable> T read(T writable, byte[] bytes) {
   DataInputBuffer buffer = new DataInputBuffer();
   buffer.reset(bytes, bytes.length);
   try {
     writable.readFields(buffer);
     assertThat("Enf of Stream", buffer.read(), is(-1));
   } catch (IOException e) {
     throw new AssertionError(e);
   }
   return writable;
 }
Ejemplo n.º 6
0
  @Override
  @SuppressWarnings("unchecked")
  public void run(final JobConf job, final TaskUmbilicalProtocol umbilical) throws IOException {

    final Reporter reporter = getReporter(umbilical);

    // start thread that will handle communication with parent
    startCommunicationThread(umbilical);

    int numReduceTasks = conf.getNumReduceTasks();
    LOG.info("numReduceTasks: " + numReduceTasks);
    MapOutputCollector collector = null;
    if (numReduceTasks > 0) {
      collector = new MapOutputBuffer(umbilical, job, reporter);
    } else {
      collector = new DirectMapOutputCollector(umbilical, job, reporter);
    }
    // reinstantiate the split
    try {
      instantiatedSplit =
          (InputSplit) ReflectionUtils.newInstance(job.getClassByName(splitClass), job);
    } catch (ClassNotFoundException exp) {
      IOException wrap = new IOException("Split class " + splitClass + " not found");
      wrap.initCause(exp);
      throw wrap;
    }
    DataInputBuffer splitBuffer = new DataInputBuffer();
    splitBuffer.reset(split.get(), 0, split.getSize());
    instantiatedSplit.readFields(splitBuffer);

    // if it is a file split, we can give more details
    if (instantiatedSplit instanceof FileSplit) {
      FileSplit fileSplit = (FileSplit) instantiatedSplit;
      job.set("map.input.file", fileSplit.getPath().toString());
      job.setLong("map.input.start", fileSplit.getStart());
      job.setLong("map.input.length", fileSplit.getLength());
    }

    RecordReader rawIn = // open input
        job.getInputFormat().getRecordReader(instantiatedSplit, job, reporter);
    RecordReader in = new TrackedRecordReader(rawIn, getCounters());

    MapRunnable runner = (MapRunnable) ReflectionUtils.newInstance(job.getMapRunnerClass(), job);

    try {
      runner.run(in, collector, reporter);
      collector.flush();
    } finally {
      // close
      in.close(); // close input
      collector.close();
    }
    done(umbilical);
  }
Ejemplo n.º 7
0
 @Deprecated
 private void readFields(byte[] bytes, int offset, int len) throws IOException {
   if (bytes == null || len <= 0) {
     throw new IllegalArgumentException("Can't build a writable with empty " + "bytes array");
   }
   DataInputBuffer in = new DataInputBuffer();
   try {
     in.reset(bytes, offset, len);
     this.readFields(in);
   } finally {
     in.close();
   }
 }
  /**
   * all primitive types.
   *
   * @throws Exception if test was failed
   */
  @SuppressWarnings("unchecked")
  @Test
  public void primitives() throws Exception {
    ModelLoader loader = generate();

    Class<?> type = loader.modelType("Primitives");
    assertThat(type.isAnnotationPresent(ModelInputLocation.class), is(true));
    assertThat(type.isAnnotationPresent(ModelOutputLocation.class), is(true));

    ModelWrapper object = loader.newModel("Primitives");

    object.set("type_boolean", true);
    object.set("type_byte", (byte) 64);
    object.set("type_short", (short) 256);
    object.set("type_int", 100);
    object.set("type_long", 200L);
    object.set("type_float", 300.f);
    object.set("type_double", 400.d);
    object.set("type_decimal", new BigDecimal("1234.567"));
    object.set("type_text", new Text("Hello, world!"));
    object.set("type_date", new Date(2011, 3, 31));
    object.set("type_datetime", new DateTime(2011, 3, 31, 23, 30, 1));

    DataOutputBuffer output = new DataOutputBuffer();
    ModelOutput<Object> modelOut =
        (ModelOutput<Object>)
            type.getAnnotation(ModelOutputLocation.class)
                .value()
                .getDeclaredConstructor(RecordEmitter.class)
                .newInstance(new TsvEmitter(new OutputStreamWriter(output, "UTF-8")));
    modelOut.write(object.unwrap());
    modelOut.write(object.unwrap());
    modelOut.write(object.unwrap());
    modelOut.close();

    DataInputBuffer input = new DataInputBuffer();
    input.reset(output.getData(), output.getLength());
    ModelInput<Object> modelIn =
        (ModelInput<Object>)
            type.getAnnotation(ModelInputLocation.class)
                .value()
                .getDeclaredConstructor(RecordParser.class)
                .newInstance(new TsvParser(new InputStreamReader(input, "UTF-8")));
    ModelWrapper copy = loader.newModel("Primitives");
    modelIn.readTo(copy.unwrap());
    assertThat(object.unwrap(), equalTo(copy.unwrap()));
    assertThat(input.read(), is(-1));
    modelIn.close();
  }
Ejemplo n.º 9
0
  protected static void assertSerializable(HadoopSerialization ser, ITuple tuple, boolean debug)
      throws IOException {
    DataInputBuffer input = new DataInputBuffer();
    DataOutputBuffer output = new DataOutputBuffer();
    DatumWrapper<ITuple> wrapper = new DatumWrapper<ITuple>(tuple);
    ser.ser(wrapper, output);

    input.reset(output.getData(), 0, output.getLength());
    DatumWrapper<ITuple> wrapper2 = new DatumWrapper<ITuple>();

    wrapper2 = ser.deser(wrapper2, input);
    if (debug) {
      System.out.println("D:" + wrapper2.datum());
    }
    assertEquals(tuple, wrapper2.datum());
  }
Ejemplo n.º 10
0
  /** Used by child copy constructors. */
  protected synchronized void copy(Writable other) {
    if (other != null) {
      try {
        DataOutputBuffer out = new DataOutputBuffer();
        other.write(out);
        DataInputBuffer in = new DataInputBuffer();
        in.reset(out.getData(), out.getLength());
        readFields(in);

      } catch (IOException e) {
        throw new IllegalArgumentException("map cannot be copied: " + e.getMessage());
      }

    } else {
      throw new IllegalArgumentException("source map cannot be null");
    }
  }
Ejemplo n.º 11
0
 public DataInputBuffer getKey() throws IOException {
   final int kvoff = kvoffsets[current % kvoffsets.length];
   keybuf.reset(
       kvbuffer,
       kvindices[kvoff + KEYSTART],
       kvindices[kvoff + VALSTART] - kvindices[kvoff + KEYSTART]);
   return keybuf;
 }
Ejemplo n.º 12
0
 public DataInputBuffer getKey() throws IOException {
   final int kvoff = offsetFor(current);
   keybuf.reset(
       kvbuffer,
       kvmeta.get(kvoff + KEYSTART),
       kvmeta.get(kvoff + VALSTART) - kvmeta.get(kvoff + KEYSTART));
   return keybuf;
 }
Ejemplo n.º 13
0
 @Test
 public void testReadWriteReplicaState() {
   try {
     DataOutputBuffer out = new DataOutputBuffer();
     DataInputBuffer in = new DataInputBuffer();
     for (HdfsServerConstants.ReplicaState repState : HdfsServerConstants.ReplicaState.values()) {
       repState.write(out);
       in.reset(out.getData(), out.getLength());
       HdfsServerConstants.ReplicaState result = HdfsServerConstants.ReplicaState.read(in);
       assertTrue("testReadWrite error !!!", repState == result);
       out.reset();
       in.reset();
     }
   } catch (Exception ex) {
     fail("testReadWrite ex error ReplicaState");
   }
 }
Ejemplo n.º 14
0
 /**
  * Parses all the HRegionInfo instances from the passed in stream until EOF. Presumes the
  * HRegionInfo's were serialized to the stream with {@link #toDelimitedByteArray()}
  *
  * @param bytes serialized bytes
  * @param offset the start offset into the byte[] buffer
  * @param length how far we should read into the byte[] buffer
  * @return All the hregioninfos that are in the byte array. Keeps reading till we hit the end.
  */
 public static List<HRegionInfo> parseDelimitedFrom(
     final byte[] bytes, final int offset, final int length) throws IOException {
   if (bytes == null) {
     throw new IllegalArgumentException("Can't build an object with empty bytes array");
   }
   DataInputBuffer in = new DataInputBuffer();
   List<HRegionInfo> hris = new ArrayList<HRegionInfo>();
   try {
     in.reset(bytes, offset, length);
     while (in.available() > 0) {
       HRegionInfo hri = parseFrom(in);
       hris.add(hri);
     }
   } finally {
     in.close();
   }
   return hris;
 }
Ejemplo n.º 15
0
  /** Write IFile by reading data from inputFile */
  private void createIFile(WriterOptions options, KV_TRAIT trait) throws IOException {
    Writer writer = IFile2.createWriter(options);
    FSDataInputStream in = fs.open(inputCSVFile);
    BufferedReader reader = new BufferedReader(new InputStreamReader(in));
    while (reader.ready()) {
      String line = reader.readLine();
      String[] kv = line.split(",");
      Iterator<String> it = Splitter.on(",").omitEmptyStrings().split(line).iterator();
      String k = it.next();
      String v = it.next();

      key.reset(k.getBytes(), k.getBytes().length);
      value.reset(v.getBytes(), v.getBytes().length);
      writer.append(key, value);
    }
    reader.close();
    writer.close();
  }
Ejemplo n.º 16
0
 public int read() throws IOException {
   int ret;
   if (null == inbuf || -1 == (ret = inbuf.read())) {
     if (!r.next(key, val)) {
       return -1;
     }
     byte[] tmp = key.toString().getBytes();
     outbuf.write(tmp, 0, tmp.length);
     outbuf.write('\t');
     tmp = val.toString().getBytes();
     outbuf.write(tmp, 0, tmp.length);
     outbuf.write('\n');
     inbuf.reset(outbuf.getData(), outbuf.getLength());
     outbuf.reset();
     ret = inbuf.read();
   }
   return ret;
 }
Ejemplo n.º 17
0
    @SuppressWarnings("unchecked")
    protected boolean lessThan(Object a, Object b) {
      DataInputBuffer key1 = ((Segment<K, V>) a).getKey();
      DataInputBuffer key2 = ((Segment<K, V>) b).getKey();
      int s1 = key1.getPosition();
      int l1 = key1.getLength() - s1;
      int s2 = key2.getPosition();
      int l2 = key2.getLength() - s2;

      return comparator.compare(key1.getData(), s1, l1, key2.getData(), s2, l2) < 0;
    }
  private FileStatus getFileStatus(FileStatus fileStatus) throws IOException {
    FileStatus status = new FileStatus();

    buffer.reset();
    DataOutputStream out = new DataOutputStream(buffer);
    fileStatus.write(out);

    in.reset(buffer.toByteArray(), 0, buffer.size());
    status.readFields(in);
    return status;
  }
Ejemplo n.º 19
0
  protected static void assertSerializable(
      TupleSerializer ser, TupleDeserializer deser, DatumWrapper<ITuple> tuple, boolean debug)
      throws IOException {
    DataOutputBuffer output = new DataOutputBuffer();
    ser.open(output);
    ser.serialize(tuple);
    ser.close();

    DataInputBuffer input = new DataInputBuffer();
    input.reset(output.getData(), 0, output.getLength());
    DatumWrapper<ITuple> deserializedTuple = new DatumWrapper<ITuple>();

    deser.open(input);
    deserializedTuple = deser.deserialize(deserializedTuple);
    deser.close();

    if (debug) {
      System.out.println("D:" + deserializedTuple.datum());
    }
    assertEquals(tuple.datum(), deserializedTuple.datum());
  }
Ejemplo n.º 20
0
 @Override
 public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
   try {
     final int ret;
     di.reset(b1, s1, l1);
     final int x1 = WritableUtils.readVInt(di);
     di.reset(b2, s2, l2);
     final int x2 = WritableUtils.readVInt(di);
     final int t1 = b1[s1 + x1];
     final int t2 = b2[s2 + x2];
     if (t1 == GridmixKey.REDUCE_SPEC || t2 == GridmixKey.REDUCE_SPEC) {
       ret = t1 - t2;
     } else {
       assert t1 == GridmixKey.DATA;
       assert t2 == GridmixKey.DATA;
       ret = WritableComparator.compareBytes(b1, s1, x1, b2, s2, x2);
     }
     di.reset(reset, 0, 0);
     return ret;
   } catch (IOException e) {
     throw new RuntimeException(e);
   }
 }
Ejemplo n.º 21
0
      public void reset(byte[] buffer, int start, int length) {
        this.buffer = buffer;
        this.start = start;
        this.length = length;

        if (start + length > bufvoid) {
          this.buffer = new byte[this.length];
          final int taillen = bufvoid - start;
          System.arraycopy(buffer, start, this.buffer, 0, taillen);
          System.arraycopy(buffer, 0, this.buffer, taillen, length - taillen);
          this.start = 0;
        }

        super.reset(this.buffer, this.start, this.length);
      }
Ejemplo n.º 22
0
  public static class SpecGroupingComparator implements RawComparator<GridmixKey> {
    private final DataInputBuffer di = new DataInputBuffer();
    private final byte[] reset = di.getData();

    @Override
    public int compare(GridmixKey g1, GridmixKey g2) {
      final byte t1 = g1.getType();
      final byte t2 = g2.getType();
      if (t1 == GridmixKey.REDUCE_SPEC || t2 == GridmixKey.REDUCE_SPEC) {
        return t1 - t2;
      }
      assert t1 == GridmixKey.DATA;
      assert t2 == GridmixKey.DATA;
      return g1.compareTo(g2);
    }

    @Override
    public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
      try {
        final int ret;
        di.reset(b1, s1, l1);
        final int x1 = WritableUtils.readVInt(di);
        di.reset(b2, s2, l2);
        final int x2 = WritableUtils.readVInt(di);
        final int t1 = b1[s1 + x1];
        final int t2 = b2[s2 + x2];
        if (t1 == GridmixKey.REDUCE_SPEC || t2 == GridmixKey.REDUCE_SPEC) {
          ret = t1 - t2;
        } else {
          assert t1 == GridmixKey.DATA;
          assert t2 == GridmixKey.DATA;
          ret = WritableComparator.compareBytes(b1, s1, x1, b2, s2, x2);
        }
        di.reset(reset, 0, 0);
        return ret;
      } catch (IOException e) {
        throw new RuntimeException(e);
      }
    }
  }
Ejemplo n.º 23
0
    private void sortAndSpill() throws IOException {
      // approximate the length of the output file to be the length of the
      // buffer + header lengths for the partitions
      long size =
          (bufend >= bufstart ? bufend - bufstart : (bufvoid - bufend) + bufstart)
              + partitions * APPROX_HEADER_LENGTH;
      FSDataOutputStream out = null;
      FSDataOutputStream indexOut = null;
      try {
        // create spill file
        Path filename = mapOutputFile.getSpillFileForWrite(getTaskID(), numSpills, size);
        out = localFs.create(filename);
        // create spill index
        Path indexFilename =
            mapOutputFile.getSpillIndexFileForWrite(
                getTaskID(), numSpills, partitions * MAP_OUTPUT_INDEX_RECORD_LENGTH);
        indexOut = localFs.create(indexFilename);
        final int endPosition = (kvend > kvstart) ? kvend : kvoffsets.length + kvend;
        sorter.sort(MapOutputBuffer.this, kvstart, endPosition, reporter);
        int spindex = kvstart;
        InMemValBytes value = new InMemValBytes();
        for (int i = 0; i < partitions; ++i) {
          IFile.Writer<K, V> writer = null;
          try {
            long segmentStart = out.getPos();
            writer = new Writer<K, V>(job, out, keyClass, valClass, codec);
            if (null == combinerClass) {
              // spill directly
              DataInputBuffer key = new DataInputBuffer();
              while (spindex < endPosition
                  && kvindices[kvoffsets[spindex % kvoffsets.length] + PARTITION] == i) {
                final int kvoff = kvoffsets[spindex % kvoffsets.length];
                getVBytesForOffset(kvoff, value);
                key.reset(
                    kvbuffer,
                    kvindices[kvoff + KEYSTART],
                    (kvindices[kvoff + VALSTART] - kvindices[kvoff + KEYSTART]));
                writer.append(key, value);
                ++spindex;
              }
            } else {
              int spstart = spindex;
              while (spindex < endPosition
                  && kvindices[kvoffsets[spindex % kvoffsets.length] + PARTITION] == i) {
                ++spindex;
              }
              // Note: we would like to avoid the combiner if we've fewer
              // than some threshold of records for a partition
              if (spstart != spindex) {
                combineCollector.setWriter(writer);
                RawKeyValueIterator kvIter = new MRResultIterator(spstart, spindex);
                combineAndSpill(kvIter, combineInputCounter);
              }
            }

            // close the writer
            writer.close();

            // write the index as <offset, raw-length, compressed-length>
            writeIndexRecord(indexOut, out, segmentStart, writer);
            writer = null;
          } finally {
            if (null != writer) writer.close();
          }
        }
        LOG.info("Finished spill " + numSpills);
        ++numSpills;
      } finally {
        if (out != null) out.close();
        if (indexOut != null) indexOut.close();
      }
    }
  @Test
  public void testBuiltInGzipDecompressorExceptions() {
    BuiltInGzipDecompressor decompresser = new BuiltInGzipDecompressor();
    try {
      decompresser.setInput(null, 0, 1);
    } catch (NullPointerException ex) {
      // expected
    } catch (Exception ex) {
      fail("testBuiltInGzipDecompressorExceptions npe error " + ex);
    }

    try {
      decompresser.setInput(new byte[] {0}, 0, -1);
    } catch (ArrayIndexOutOfBoundsException ex) {
      // expected
    } catch (Exception ex) {
      fail("testBuiltInGzipDecompressorExceptions aioob error" + ex);
    }

    assertTrue("decompresser.getBytesRead error", decompresser.getBytesRead() == 0);
    assertTrue("decompresser.getRemaining error", decompresser.getRemaining() == 0);
    decompresser.reset();
    decompresser.end();

    InputStream decompStream = null;
    try {
      // invalid 0 and 1 bytes , must be 31, -117
      int buffSize = 1 * 1024;
      byte buffer[] = new byte[buffSize];
      Decompressor decompressor = new BuiltInGzipDecompressor();
      DataInputBuffer gzbuf = new DataInputBuffer();
      decompStream = new DecompressorStream(gzbuf, decompressor);
      gzbuf.reset(new byte[] {0, 0, 1, 1, 1, 1, 11, 1, 1, 1, 1}, 11);
      decompStream.read(buffer);
    } catch (IOException ioex) {
      // expected
    } catch (Exception ex) {
      fail("invalid 0 and 1 byte in gzip stream" + ex);
    }

    // invalid 2 byte, must be 8
    try {
      int buffSize = 1 * 1024;
      byte buffer[] = new byte[buffSize];
      Decompressor decompressor = new BuiltInGzipDecompressor();
      DataInputBuffer gzbuf = new DataInputBuffer();
      decompStream = new DecompressorStream(gzbuf, decompressor);
      gzbuf.reset(new byte[] {31, -117, 7, 1, 1, 1, 1, 11, 1, 1, 1, 1}, 11);
      decompStream.read(buffer);
    } catch (IOException ioex) {
      // expected
    } catch (Exception ex) {
      fail("invalid 2 byte in gzip stream" + ex);
    }

    try {
      int buffSize = 1 * 1024;
      byte buffer[] = new byte[buffSize];
      Decompressor decompressor = new BuiltInGzipDecompressor();
      DataInputBuffer gzbuf = new DataInputBuffer();
      decompStream = new DecompressorStream(gzbuf, decompressor);
      gzbuf.reset(new byte[] {31, -117, 8, -32, 1, 1, 1, 11, 1, 1, 1, 1}, 11);
      decompStream.read(buffer);
    } catch (IOException ioex) {
      // expected
    } catch (Exception ex) {
      fail("invalid 3 byte in gzip stream" + ex);
    }
    try {
      int buffSize = 1 * 1024;
      byte buffer[] = new byte[buffSize];
      Decompressor decompressor = new BuiltInGzipDecompressor();
      DataInputBuffer gzbuf = new DataInputBuffer();
      decompStream = new DecompressorStream(gzbuf, decompressor);
      gzbuf.reset(new byte[] {31, -117, 8, 4, 1, 1, 1, 11, 1, 1, 1, 1}, 11);
      decompStream.read(buffer);
    } catch (IOException ioex) {
      // expected
    } catch (Exception ex) {
      fail("invalid 3 byte make hasExtraField" + ex);
    }
  }
Ejemplo n.º 25
0
  /**
   * create inmemory segments
   *
   * @return
   * @throws IOException
   */
  public List<TezMerger.Segment> createInMemStreams() throws IOException {
    int numberOfStreams = Math.max(2, rnd.nextInt(10));
    LOG.info("No of streams : " + numberOfStreams);

    SerializationFactory serializationFactory = new SerializationFactory(conf);
    Serializer keySerializer = serializationFactory.getSerializer(keyClass);
    Serializer valueSerializer = serializationFactory.getSerializer(valClass);

    LocalDirAllocator localDirAllocator =
        new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS);
    InputContext context = createTezInputContext();
    MergeManager mergeManager =
        new MergeManager(
            conf,
            fs,
            localDirAllocator,
            context,
            null,
            null,
            null,
            null,
            null,
            1024 * 1024 * 10,
            null,
            false,
            -1);

    DataOutputBuffer keyBuf = new DataOutputBuffer();
    DataOutputBuffer valBuf = new DataOutputBuffer();
    DataInputBuffer keyIn = new DataInputBuffer();
    DataInputBuffer valIn = new DataInputBuffer();
    keySerializer.open(keyBuf);
    valueSerializer.open(valBuf);

    List<TezMerger.Segment> segments = new LinkedList<TezMerger.Segment>();
    for (int i = 0; i < numberOfStreams; i++) {
      BoundedByteArrayOutputStream bout = new BoundedByteArrayOutputStream(1024 * 1024);
      InMemoryWriter writer = new InMemoryWriter(bout);
      Map<Writable, Writable> data = createData();
      // write data
      for (Map.Entry<Writable, Writable> entry : data.entrySet()) {
        keySerializer.serialize(entry.getKey());
        valueSerializer.serialize(entry.getValue());
        keyIn.reset(keyBuf.getData(), 0, keyBuf.getLength());
        valIn.reset(valBuf.getData(), 0, valBuf.getLength());
        writer.append(keyIn, valIn);
        originalData.put(entry.getKey(), entry.getValue());
        keyBuf.reset();
        valBuf.reset();
        keyIn.reset();
        valIn.reset();
      }
      IFile.Reader reader =
          new InMemoryReader(mergeManager, null, bout.getBuffer(), 0, bout.getBuffer().length);
      segments.add(new TezMerger.Segment(reader, true));

      data.clear();
      writer.close();
    }
    return segments;
  }
Ejemplo n.º 26
0
  protected void spill(int mstart, int mend) throws IOException, InterruptedException {

    // approximate the length of the output file to be the length of the
    // buffer + header lengths for the partitions
    final long size =
        (bufend >= bufstart ? bufend - bufstart : (bufvoid - bufend) + bufstart)
            + partitions * APPROX_HEADER_LENGTH;
    FSDataOutputStream out = null;
    try {
      // create spill file
      final TezSpillRecord spillRec = new TezSpillRecord(partitions);
      final Path filename = mapOutputFile.getSpillFileForWrite(numSpills, size);
      out = rfs.create(filename);

      int spindex = mstart;
      final InMemValBytes value = createInMemValBytes();
      for (int i = 0; i < partitions; ++i) {
        IFile.Writer writer = null;
        try {
          long segmentStart = out.getPos();
          writer = new Writer(conf, out, keyClass, valClass, codec, spilledRecordsCounter, null);
          if (combiner == null) {
            // spill directly
            DataInputBuffer key = new DataInputBuffer();
            while (spindex < mend && kvmeta.get(offsetFor(spindex) + PARTITION) == i) {
              final int kvoff = offsetFor(spindex);
              int keystart = kvmeta.get(kvoff + KEYSTART);
              int valstart = kvmeta.get(kvoff + VALSTART);
              key.reset(kvbuffer, keystart, valstart - keystart);
              getVBytesForOffset(kvoff, value);
              writer.append(key, value);
              ++spindex;
            }
          } else {
            int spstart = spindex;
            while (spindex < mend && kvmeta.get(offsetFor(spindex) + PARTITION) == i) {
              ++spindex;
            }
            // Note: we would like to avoid the combiner if we've fewer
            // than some threshold of records for a partition
            if (spstart != spindex) {
              TezRawKeyValueIterator kvIter = new MRResultIterator(spstart, spindex);
              if (LOG.isDebugEnabled()) {
                LOG.debug("Running combine processor");
              }
              runCombineProcessor(kvIter, writer);
            }
          }

          // close the writer
          writer.close();
          if (numSpills > 0) {
            additionalSpillBytesWritten.increment(writer.getCompressedLength());
            numAdditionalSpills.increment(1);
            // Reset the value will be set during the final merge.
            outputBytesWithOverheadCounter.setValue(0);
          } else {
            // Set this up for the first write only. Subsequent ones will be handled in the final
            // merge.
            outputBytesWithOverheadCounter.increment(writer.getRawLength());
          }
          // record offsets
          final TezIndexRecord rec =
              new TezIndexRecord(segmentStart, writer.getRawLength(), writer.getCompressedLength());
          spillRec.putIndex(rec, i);

          writer = null;
        } finally {
          if (null != writer) writer.close();
        }
      }

      if (totalIndexCacheMemory >= indexCacheMemoryLimit) {
        // create spill index file
        Path indexFilename =
            mapOutputFile.getSpillIndexFileForWrite(
                numSpills, partitions * MAP_OUTPUT_INDEX_RECORD_LENGTH);
        spillRec.writeToFile(indexFilename, conf);
      } else {
        indexCacheList.add(spillRec);
        totalIndexCacheMemory += spillRec.size() * MAP_OUTPUT_INDEX_RECORD_LENGTH;
      }
      LOG.info("Finished spill " + numSpills);
      ++numSpills;
    } finally {
      if (out != null) out.close();
    }
  }
 private static void rawValueToWritable(
     RawRecordValue rawValue, DataInputBuffer inputBuffer, Writable typeOut) throws IOException {
   inputBuffer.reset(rawValue.data.getData(), rawValue.data.getLength());
   typeOut.readFields(inputBuffer);
 }