    public void map(ImmutableHexWritable key, FsEntry value, Context context)
        throws IOException, InterruptedException {
      if (Extensions.contains(value.extension())) {

        encodeHex(Sha, value, "sha1");
        encodeHex(Md5, value, "md5");

        if (value.isContentHDFS()) {
        } else {
          final byte[] buf = value.getContentBuffer();
          if (buf == null) {
            LOG.warn(value.fullPath() + " didn't have a content buffer, skipping.");
          Vid.set(buf, 0, buf.length);
        byte[] keybytes = key.get();
        OutKey.set(keybytes, 0, keybytes.length);
        context.write(OutKey, Fields);
 public boolean next(BytesWritable k, BytesWritable v) throws IOException {
   if (nextKeyValue()) {
     k.set(new BytesWritable(getCurrentKey().getBytes()));
     v.set(new BytesWritable(getCurrentValue()));
     return true;
   return false;
  protected void setValue(long length) throws IOException {
    ByteArrayOutputStream baos;
    if (length > 0) {
      baos = new ByteArrayOutputStream((int) length);
    } else {
      baos = new ByteArrayOutputStream();

    int size;
    while ((size = zipIn.read(buf, 0, buf.length)) != -1) {
      baos.write(buf, 0, size);
    if (value instanceof Text) {
      ((Text) value).set(baos.toString(encoding));
    } else if (value instanceof BytesWritable) {
      if (batchSize > 1) {
        // Copy data since XCC won't do it when Content is created.
        value = (VALUEIN) new BytesWritable();
      ((BytesWritable) value).set(baos.toByteArray(), 0, baos.size());
    } else {
      String error = "Unsupported input value class: " + value.getClass();
      LOG.error(error, new UnsupportedOperationException(error));
      key = null;
    public void write(Writable w) throws IOException {

      // Get input data
      byte[] input;
      int inputLength;
      if (w instanceof Text) {
        input = ((Text) w).getBytes();
        inputLength = ((Text) w).getLength();
      } else {
        assert (w instanceof BytesWritable);
        input = ((BytesWritable) w).get();
        inputLength = ((BytesWritable) w).getSize();

      // Add signature
      byte[] wrapped = new byte[signature.length + inputLength];
      for (int i = 0; i < signature.length; i++) {
        wrapped[i] = signature[i];
      for (int i = 0; i < inputLength; i++) {
        wrapped[i + signature.length] = input[i];

      // Encode
      byte[] output = base64.encode(wrapped);
      bytesWritable.set(output, 0, output.length);

  public BytesWritable toBytes() // throws IOException
    sbuffer[0] = (byte) (m_isRC ? 1 : 0);

    sbuffer[1] = (byte) ((m_refID & 0xFF000000) >> 24);
    sbuffer[2] = (byte) ((m_refID & 0x00FF0000) >> 16);
    sbuffer[3] = (byte) ((m_refID & 0x0000FF00) >> 8);
    sbuffer[4] = (byte) ((m_refID & 0x000000FF));

    sbuffer[5] = (byte) ((m_refStart & 0xFF000000) >> 24);
    sbuffer[6] = (byte) ((m_refStart & 0x00FF0000) >> 16);
    sbuffer[7] = (byte) ((m_refStart & 0x0000FF00) >> 8);
    sbuffer[8] = (byte) ((m_refStart & 0x000000FF));

    sbuffer[9] = (byte) ((m_refEnd & 0xFF000000) >> 24);
    sbuffer[10] = (byte) ((m_refEnd & 0x00FF0000) >> 16);
    sbuffer[11] = (byte) ((m_refEnd & 0x0000FF00) >> 8);
    sbuffer[12] = (byte) ((m_refEnd & 0x000000FF));

    sbuffer[13] = (byte) ((m_differences & 0xFF000000) >> 24);
    sbuffer[14] = (byte) ((m_differences & 0x00FF0000) >> 16);
    sbuffer[15] = (byte) ((m_differences & 0x0000FF00) >> 8);
    sbuffer[16] = (byte) ((m_differences & 0x000000FF));

    bytes.set(sbuffer, 0, 17);
    return bytes;
  public boolean nextKeyValue() throws IOException, InterruptedException {
    FileSystem fileSystem = FileSystem.get(configuration);

    if (fileSystem.isDirectory(split.getPath())) {
      return false;

    if (fileProcessed) {
      return false;

    int fileLength = (int) split.getLength();
    byte[] result = new byte[fileLength];

    FSDataInputStream inputStream = null;

    try {
      inputStream = fileSystem.open(split.getPath());
      IOUtils.readFully(inputStream, result, 0, fileLength);
      currentValue.set(result, 0, fileLength);
    } finally {
    fileProcessed = true;
    return true;
Пример #7
 public Writable create(Object value, TypeConverter typeConverter, Holder<Integer> size) {
   BytesWritable writable = new BytesWritable();
   ByteBuffer bb = (ByteBuffer) value;
   writable.set(bb.array(), 0, bb.array().length);
   size.value = bb.array().length;
   return writable;
 public BytesWritable readBytes(BytesWritable bw) throws IOException {
   byte[] bytes = in.readBytes();
   if (bw == null) {
     bw = new BytesWritable(bytes);
   } else {
     bw.set(bytes, 0, bytes.length);
   return bw;
    public void map(
        LongWritable key,
        Text value,
        OutputCollector<BytesWritable, Tuple> output,
        Reporter reporter)
        throws IOException {
      // value should contain "word count"
      String[] wdct = value.toString().split(" ");
      if (wdct.length != 2) {
        // LOG the error

      byte[] word = wdct[0].getBytes();
      bytesKey.set(word, 0, word.length);
      System.out.println("word: " + new String(word));
      tupleRow.set(0, new String(word));
      tupleRow.set(1, Integer.parseInt(wdct[1]));
      System.out.println("count:  " + Integer.parseInt(wdct[1]));

      // This key has to be created by user
       * Tuple userKey = new DefaultTuple(); userKey.append(new String(word));
       * userKey.append(Integer.parseInt(wdct[1]));
      System.out.println("in map, sortkey: " + sortKey);
      Tuple userKey = new ZebraTuple();
      if (sortKey.equalsIgnoreCase("word,count")) {
        userKey.append(new String(word));

      if (sortKey.equalsIgnoreCase("count")) {

      if (sortKey.equalsIgnoreCase("word")) {
        userKey.append(new String(word));

      try {

        /* New M/R Interface */
        /* Converts user key to zebra BytesWritable key */
        /* using sort key expr tree */
        /* Returns a java base object */
        /* Done for each user key */

        bytesKey = BasicTableOutputFormat.getSortKey(javaObj, userKey);
      } catch (Exception e) {


      output.collect(bytesKey, tupleRow);
Пример #10
 @JRubyMethod(name = "ruby=", required = 1)
 public IRubyObject ruby_set(final ThreadContext ctx, IRubyObject arg) {
   RubyString string;
   try {
     string = arg.convertToString();
   } catch (RaiseException re) {
     throw newTypeError(ctx.runtime, arg.getMetaClass(), "String");
   ByteList bytes = string.getByteList();
   value.set(bytes.getUnsafeBytes(), bytes.getBegin(), bytes.getRealSize());
   return arg;
Пример #11
    public void reduce(
        BytesWritable topkRollupKey, Iterable<BytesWritable> timeSeriesIterable, Context context)
        throws IOException, InterruptedException {

      TopKRollupPhaseOneMapOutputKey wrapper =
          "DimensionName {} DimensionValue {}",

      MetricTimeSeries aggregateSeries = new MetricTimeSeries(metricSchema);
      for (BytesWritable writable : timeSeriesIterable) {
        MetricTimeSeries series = MetricTimeSeries.fromBytes(writable.copyBytes(), metricSchema);

      Map<String, Long> metricValues = new HashMap<String, Long>();
      for (MetricSpec metricSpec : starTreeConfig.getMetrics()) {
        metricValues.put(metricSpec.getName(), 0L);
      for (Long time : aggregateSeries.getTimeWindowSet()) {
        for (MetricSpec metricSpec : starTreeConfig.getMetrics()) {
          String metricName = metricSpec.getName();
          long metricValue = aggregateSeries.get(time, metricName).longValue();
          metricValues.put(metricName, metricValues.get(metricName) + metricValue);

      boolean aboveThreshold = true;
      for (MetricSpec metricSpec : starTreeConfig.getMetrics()) {
        String metricName = metricSpec.getName();

        long metricValue = metricValues.get(metricName);
        long metricSum = metricSums.get(metricName);
        double metricThreshold = metricThresholds.get(metricName);

        LOGGER.info("metricValue : {} metricSum : {}", metricValue, metricSum);
        if (metricValue < (metricThreshold / 100) * metricSum) {
          aboveThreshold = false;

      if (aboveThreshold) {
        LOGGER.info("Passed threshold");
        valWritable.set(aggregateSeries.toBytes(), 0, aggregateSeries.toBytes().length);
        context.write(topkRollupKey, valWritable);
  public Writable serialize(Object obj, ObjectInspector objInspector) throws SerDeException {
    StructObjectInspector soi = (StructObjectInspector) objInspector;
    List<? extends StructField> fields = soi.getAllStructFieldRefs();

    for (int i = 0; i < columnNames.size(); i++) {
          soi.getStructFieldData(obj, fields.get(i)),

    serializeBytesWritable.set(outputByteBuffer.getData(), 0, outputByteBuffer.getLength());
    return serializeBytesWritable;
Пример #13
 public Writable create(Object value, TypeConverter typeConverter, Holder<Integer> size) {
   InputStream is = null;
   try {
     is = typeConverter.convertTo(InputStream.class, value);
     ByteArrayOutputStream bos = new ByteArrayOutputStream();
     IOUtils.copyBytes(is, bos, HdfsConstants.DEFAULT_BUFFERSIZE, false);
     BytesWritable writable = new BytesWritable();
     writable.set(bos.toByteArray(), 0, bos.toByteArray().length);
     size.value = bos.toByteArray().length;
     return writable;
   } catch (IOException ex) {
     throw new RuntimeCamelException(ex);
   } finally {
Пример #14
  protected boolean get(KafkaETLKey key, BytesWritable value) throws IOException {
    if (_messageIt != null && _messageIt.hasNext()) {
      Message msg = _messageIt.next();

      ByteBuffer buf = msg.payload();
      int origSize = buf.remaining();
      byte[] bytes = new byte[origSize];
      buf.get(bytes, buf.position(), origSize);
      value.set(bytes, 0, origSize);

      key.set(_index, _offset, msg.checksum());

      _offset += MessageSet.entrySize(msg); // increase offset
      _count++; // increase count

      return true;
    } else return false;
Пример #15
    public void map(BytesWritable dimensionKeyBytes, BytesWritable aggSeries, Context context)
        throws IOException, InterruptedException {

      DimensionKey dimensionKey = DimensionKey.fromBytes(dimensionKeyBytes.getBytes());
      String[] dimensionValues = dimensionKey.getDimensionValues();

      for (String dimensionName : config.getDimensionNames()) {

        String dimensionValue = dimensionValues[dimensionNameToIndexMapping.get(dimensionName)];

        TopKRollupPhaseOneMapOutputKey keyWrapper;
        keyWrapper = new TopKRollupPhaseOneMapOutputKey(dimensionName, dimensionValue);
        byte[] keyBytes = keyWrapper.toBytes();
        keyWritable.set(keyBytes, 0, keyBytes.length);

        context.write(keyWritable, aggSeries);
  protected void map(AvroKey<DocumentMetadata> avro, NullWritable ignore, Context context)
      throws IOException, InterruptedException {
    String docId = null;
    try {
      // TODO MiconCodeReview: I would extract a method 'private static String
      // getDocId(AvroKey<DocumentMetadata> fromAvro)'
      docId = new DocEntityId(avro.datum().getId().toString()).toString();

      // TODO MiconCodeReview: I would extract a method 'private static MatchableEntity
      // getMatchableEntity(AvroKey<DocumentMetadata> fromAvro)'
      MatchableEntity entity =
              docId, Util.avroBasicMetadataToProtoBuf(avro.datum().getBasicMetadata()));
      byte[] metaBytes = entity.data().toByteArray();
      docMetaWritable.set(metaBytes, 0, metaBytes.length);

      context.write(docIdWritable, docMetaWritable);
    } catch (Exception e) {
      log.error("Error" + (docId != null ? " while processing document " + docId : ""), e);
  static Object deserialize(InputByteBuffer buffer, TypeInfo type, boolean invert, Object reuse)
      throws IOException {

    // Is this field a null?
    byte isNull = buffer.read(invert);
    if (isNull == 0) {
      return null;
    assert (isNull == 1);

    switch (type.getCategory()) {
      case PRIMITIVE:
          PrimitiveTypeInfo ptype = (PrimitiveTypeInfo) type;
          switch (ptype.getPrimitiveCategory()) {
            case VOID:
                return null;
            case BOOLEAN:
                BooleanWritable r = reuse == null ? new BooleanWritable() : (BooleanWritable) reuse;
                byte b = buffer.read(invert);
                assert (b == 1 || b == 2);
                r.set(b == 2);
                return r;
            case BYTE:
                ByteWritable r = reuse == null ? new ByteWritable() : (ByteWritable) reuse;
                r.set((byte) (buffer.read(invert) ^ 0x80));
                return r;
            case SHORT:
                ShortWritable r = reuse == null ? new ShortWritable() : (ShortWritable) reuse;
                int v = buffer.read(invert) ^ 0x80;
                v = (v << 8) + (buffer.read(invert) & 0xff);
                r.set((short) v);
                return r;
            case INT:
                IntWritable r = reuse == null ? new IntWritable() : (IntWritable) reuse;
                int v = buffer.read(invert) ^ 0x80;
                for (int i = 0; i < 3; i++) {
                  v = (v << 8) + (buffer.read(invert) & 0xff);
                return r;
            case LONG:
                LongWritable r = reuse == null ? new LongWritable() : (LongWritable) reuse;
                long v = buffer.read(invert) ^ 0x80;
                for (int i = 0; i < 7; i++) {
                  v = (v << 8) + (buffer.read(invert) & 0xff);
                return r;
            case FLOAT:
                FloatWritable r = reuse == null ? new FloatWritable() : (FloatWritable) reuse;
                int v = 0;
                for (int i = 0; i < 4; i++) {
                  v = (v << 8) + (buffer.read(invert) & 0xff);
                if ((v & (1 << 31)) == 0) {
                  // negative number, flip all bits
                  v = ~v;
                } else {
                  // positive number, flip the first bit
                  v = v ^ (1 << 31);
                return r;
            case DOUBLE:
                DoubleWritable r = reuse == null ? new DoubleWritable() : (DoubleWritable) reuse;
                long v = 0;
                for (int i = 0; i < 8; i++) {
                  v = (v << 8) + (buffer.read(invert) & 0xff);
                if ((v & (1L << 63)) == 0) {
                  // negative number, flip all bits
                  v = ~v;
                } else {
                  // positive number, flip the first bit
                  v = v ^ (1L << 63);
                return r;
            case STRING:
                Text r = reuse == null ? new Text() : (Text) reuse;
                // Get the actual length first
                int start = buffer.tell();
                int length = 0;
                do {
                  byte b = buffer.read(invert);
                  if (b == 0) {
                    // end of string
                  if (b == 1) {
                    // the last char is an escape char. read the actual char
                } while (true);

                if (length == buffer.tell() - start) {
                  // No escaping happened, so we are already done.
                  r.set(buffer.getData(), start, length);
                } else {
                  // Escaping happened, we need to copy byte-by-byte.
                  // 1. Set the length first.
                  r.set(buffer.getData(), start, length);
                  // 2. Reset the pointer.
                  // 3. Copy the data.
                  byte[] rdata = r.getBytes();
                  for (int i = 0; i < length; i++) {
                    byte b = buffer.read(invert);
                    if (b == 1) {
                      // The last char is an escape char, read the actual char.
                      // The serialization format escape \0 to \1, and \1 to \2,
                      // to make sure the string is null-terminated.
                      b = (byte) (buffer.read(invert) - 1);
                    rdata[i] = b;
                  // 4. Read the null terminator.
                  byte b = buffer.read(invert);
                  assert (b == 0);
                return r;

            case BINARY:
                BytesWritable bw = new BytesWritable();
                // Get the actual length first
                int start = buffer.tell();
                int length = 0;
                do {
                  byte b = buffer.read(invert);
                  if (b == 0) {
                    // end of string
                  if (b == 1) {
                    // the last char is an escape char. read the actual char
                } while (true);

                if (length == buffer.tell() - start) {
                  // No escaping happened, so we are already done.
                  bw.set(buffer.getData(), start, length);
                } else {
                  // Escaping happened, we need to copy byte-by-byte.
                  // 1. Set the length first.
                  bw.set(buffer.getData(), start, length);
                  // 2. Reset the pointer.
                  // 3. Copy the data.
                  byte[] rdata = bw.getBytes();
                  for (int i = 0; i < length; i++) {
                    byte b = buffer.read(invert);
                    if (b == 1) {
                      // The last char is an escape char, read the actual char.
                      // The serialization format escape \0 to \1, and \1 to \2,
                      // to make sure the string is null-terminated.
                      b = (byte) (buffer.read(invert) - 1);
                    rdata[i] = b;
                  // 4. Read the null terminator.
                  byte b = buffer.read(invert);
                  assert (b == 0);
                return bw;

            case DATE:
                DateWritable d = reuse == null ? new DateWritable() : (DateWritable) reuse;
                long v = buffer.read(invert) ^ 0x80;
                for (int i = 0; i < 7; i++) {
                  v = (v << 8) + (buffer.read(invert) & 0xff);
                return d;

            case TIMESTAMP:
              TimestampWritable t =
                  (reuse == null ? new TimestampWritable() : (TimestampWritable) reuse);
              byte[] bytes = new byte[8];

              for (int i = 0; i < bytes.length; i++) {
                bytes[i] = buffer.read(invert);
              t.setBinarySortable(bytes, 0);
              return t;

                throw new RuntimeException("Unrecognized type: " + ptype.getPrimitiveCategory());
      case LIST:
          ListTypeInfo ltype = (ListTypeInfo) type;
          TypeInfo etype = ltype.getListElementTypeInfo();

          // Create the list if needed
          ArrayList<Object> r = reuse == null ? new ArrayList<Object>() : (ArrayList<Object>) reuse;

          // Read the list
          int size = 0;
          while (true) {
            int more = buffer.read(invert);
            if (more == 0) {
              // \0 to terminate
            // \1 followed by each element
            assert (more == 1);
            if (size == r.size()) {
            r.set(size, deserialize(buffer, etype, invert, r.get(size)));
          // Remove additional elements if the list is reused
          while (r.size() > size) {
            r.remove(r.size() - 1);
          return r;
      case MAP:
          MapTypeInfo mtype = (MapTypeInfo) type;
          TypeInfo ktype = mtype.getMapKeyTypeInfo();
          TypeInfo vtype = mtype.getMapValueTypeInfo();

          // Create the map if needed
          Map<Object, Object> r;
          if (reuse == null) {
            r = new HashMap<Object, Object>();
          } else {
            r = (HashMap<Object, Object>) reuse;

          while (true) {
            int more = buffer.read(invert);
            if (more == 0) {
              // \0 to terminate
            // \1 followed by each key and then each value
            assert (more == 1);
            Object k = deserialize(buffer, ktype, invert, null);
            Object v = deserialize(buffer, vtype, invert, null);
            r.put(k, v);
          return r;
      case STRUCT:
          StructTypeInfo stype = (StructTypeInfo) type;
          List<TypeInfo> fieldTypes = stype.getAllStructFieldTypeInfos();
          int size = fieldTypes.size();
          // Create the struct if needed
          ArrayList<Object> r =
              reuse == null ? new ArrayList<Object>(size) : (ArrayList<Object>) reuse;
          assert (r.size() <= size);
          // Set the size of the struct
          while (r.size() < size) {
          // Read one field by one field
          for (int eid = 0; eid < size; eid++) {
            r.set(eid, deserialize(buffer, fieldTypes.get(eid), invert, r.get(eid)));
          return r;
      case UNION:
          UnionTypeInfo utype = (UnionTypeInfo) type;
          StandardUnion r = reuse == null ? new StandardUnion() : (StandardUnion) reuse;
          // Read the tag
          byte tag = buffer.read(invert);
              deserialize(buffer, utype.getAllUnionObjectTypeInfos().get(tag), invert, null));
          return r;
          throw new RuntimeException("Unrecognized type: " + type.getCategory());
Пример #18
  private void testBinarySortableFast(
      SerdeRandomRowSource source,
      Object[][] rows,
      boolean[] columnSortOrderIsDesc,
      byte[] columnNullMarker,
      byte[] columnNotNullMarker,
      AbstractSerDe serde,
      StructObjectInspector rowOI,
      AbstractSerDe serde_fewer,
      StructObjectInspector writeRowOI,
      boolean ascending,
      PrimitiveTypeInfo[] primitiveTypeInfos,
      boolean useIncludeColumns,
      boolean doWriteFewerColumns,
      Random r)
      throws Throwable {

    int rowCount = rows.length;
    int columnCount = primitiveTypeInfos.length;

    boolean[] columnsToInclude = null;
    if (useIncludeColumns) {
      columnsToInclude = new boolean[columnCount];
      for (int i = 0; i < columnCount; i++) {
        columnsToInclude[i] = r.nextBoolean();

    int writeColumnCount = columnCount;
    if (doWriteFewerColumns) {
      writeColumnCount = writeRowOI.getAllStructFieldRefs().size();

    BinarySortableSerializeWrite binarySortableSerializeWrite =
        new BinarySortableSerializeWrite(
            columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker);

    // Try to serialize

    // One Writable per row.
    BytesWritable serializeWriteBytes[] = new BytesWritable[rowCount];

    int[][] perFieldWriteLengthsArray = new int[rowCount][];
    for (int i = 0; i < rowCount; i++) {
      Object[] row = rows[i];
      Output output = new Output();

      int[] perFieldWriteLengths = new int[columnCount];
      for (int index = 0; index < writeColumnCount; index++) {

        Writable writable = (Writable) row[index];

            binarySortableSerializeWrite, primitiveTypeInfos[index], writable);
        perFieldWriteLengths[index] = output.getLength();
      perFieldWriteLengthsArray[i] = perFieldWriteLengths;

      BytesWritable bytesWritable = new BytesWritable();
      bytesWritable.set(output.getData(), 0, output.getLength());
      serializeWriteBytes[i] = bytesWritable;
      if (i > 0) {
        int compareResult = serializeWriteBytes[i - 1].compareTo(serializeWriteBytes[i]);
        if ((compareResult < 0 && !ascending) || (compareResult > 0 && ascending)) {
              "Test failed in "
                  + (ascending ? "ascending" : "descending")
                  + " order with "
                  + (i - 1)
                  + " and "
                  + i);
              "serialized data ["
                  + (i - 1)
                  + "] = "
                  + TestBinarySortableSerDe.hexString(serializeWriteBytes[i - 1]));
              "serialized data ["
                  + i
                  + "] = "
                  + TestBinarySortableSerDe.hexString(serializeWriteBytes[i]));
          fail("Sort order of serialized " + (i - 1) + " and " + i + " are reversed!");

    // Try to deserialize using DeserializeRead our Writable row objects created by SerializeWrite.
    for (int i = 0; i < rowCount; i++) {
      Object[] row = rows[i];
      BinarySortableDeserializeRead binarySortableDeserializeRead =
          new BinarySortableDeserializeRead(
              primitiveTypeInfos, /* useExternalBuffer */ false, columnSortOrderIsDesc);

      BytesWritable bytesWritable = serializeWriteBytes[i];
      binarySortableDeserializeRead.set(bytesWritable.getBytes(), 0, bytesWritable.getLength());

      for (int index = 0; index < columnCount; index++) {
        if (useIncludeColumns && !columnsToInclude[index]) {
        } else if (index >= writeColumnCount) {
          // Should come back a null.
              binarySortableDeserializeRead, primitiveTypeInfos[index], null);
        } else {
          Writable writable = (Writable) row[index];
              binarySortableDeserializeRead, primitiveTypeInfos[index], writable);
      if (writeColumnCount == columnCount) {

       * Clip off one byte and expect to get an EOFException on the write field.
      BinarySortableDeserializeRead binarySortableDeserializeRead2 =
          new BinarySortableDeserializeRead(
              primitiveTypeInfos, /* useExternalBuffer */ false, columnSortOrderIsDesc);

          bytesWritable.getBytes(), 0, bytesWritable.getLength() - 1); // One fewer byte.

      for (int index = 0; index < writeColumnCount; index++) {
        Writable writable = (Writable) row[index];
        if (index == writeColumnCount - 1) {
          boolean threw = false;
          try {
                binarySortableDeserializeRead2, primitiveTypeInfos[index], writable);
          } catch (EOFException e) {
            //          debugDetailedReadPositionString =
            // binarySortableDeserializeRead2.getDetailedReadPositionString();
            //          debugStackTrace = e.getStackTrace();
            threw = true;
        } else {
          if (useIncludeColumns && !columnsToInclude[index]) {
          } else {
                binarySortableDeserializeRead2, primitiveTypeInfos[index], writable);

    // Try to deserialize using SerDe class our Writable row objects created by SerializeWrite.
    for (int i = 0; i < rowCount; i++) {
      BytesWritable bytesWritable = serializeWriteBytes[i];

      // Note that regular SerDe doesn't tolerate fewer columns.
      List<Object> deserializedRow;
      if (doWriteFewerColumns) {
        deserializedRow = (List<Object>) serde_fewer.deserialize(bytesWritable);
      } else {
        deserializedRow = (List<Object>) serde.deserialize(bytesWritable);

      Object[] row = rows[i];
      for (int index = 0; index < writeColumnCount; index++) {
        Object expected = row[index];
        Object object = deserializedRow.get(index);
        if (expected == null || object == null) {
          if (expected != null || object != null) {
            fail("SerDe deserialized NULL column mismatch");
        } else {
          if (!object.equals(expected)) {
                "SerDe deserialized value does not match (expected "
                    + expected.getClass().getName()
                    + " "
                    + expected.toString()
                    + ", actual "
                    + object.getClass().getName()
                    + " "
                    + object.toString()
                    + ")");

    // One Writable per row.
    BytesWritable serdeBytes[] = new BytesWritable[rowCount];

    // Serialize using the SerDe, then below deserialize using DeserializeRead.
    for (int i = 0; i < rowCount; i++) {
      Object[] row = rows[i];

      // Since SerDe reuses memory, we will need to make a copy.
      BytesWritable serialized;
      if (doWriteFewerColumns) {
        serialized = (BytesWritable) serde_fewer.serialize(row, rowOI);
      } else {
        serialized = (BytesWritable) serde.serialize(row, rowOI);
      BytesWritable bytesWritable = new BytesWritable();
      byte[] serDeOutput =
          Arrays.copyOfRange(bytesWritable.getBytes(), 0, bytesWritable.getLength());

      byte[] serializeWriteExpected =
              serializeWriteBytes[i].getBytes(), 0, serializeWriteBytes[i].getLength());
      if (!Arrays.equals(serDeOutput, serializeWriteExpected)) {
        int mismatchPos = -1;
        if (serDeOutput.length != serializeWriteExpected.length) {
          for (int b = 0; b < Math.min(serDeOutput.length, serializeWriteExpected.length); b++) {
            if (serDeOutput[b] != serializeWriteExpected[b]) {
              mismatchPos = b;
              "Different byte array lengths: serDeOutput.length "
                  + serDeOutput.length
                  + ", serializeWriteExpected.length "
                  + serializeWriteExpected.length
                  + " mismatchPos "
                  + mismatchPos
                  + " perFieldWriteLengths "
                  + Arrays.toString(perFieldWriteLengthsArray[i]));
        List<Integer> differentPositions = new ArrayList();
        for (int b = 0; b < serDeOutput.length; b++) {
          if (serDeOutput[b] != serializeWriteExpected[b]) {
        if (differentPositions.size() > 0) {
          List<String> serializeWriteExpectedFields = new ArrayList<String>();
          List<String> serDeFields = new ArrayList<String>();
          int f = 0;
          int lastBegin = 0;
          for (int b = 0; b < serDeOutput.length; b++) {
            int writeLength = perFieldWriteLengthsArray[i][f];
            if (b + 1 == writeLength) {
                  displayBytes(serializeWriteExpected, lastBegin, writeLength - lastBegin));
              serDeFields.add(displayBytes(serDeOutput, lastBegin, writeLength - lastBegin));
              lastBegin = b + 1;
              "SerializeWrite and SerDe serialization does not match at positions "
                  + differentPositions.toString()
                  + "\n(SerializeWrite: "
                  + serializeWriteExpectedFields.toString()
                  + "\nSerDe: "
                  + serDeFields.toString()
                  + "\nperFieldWriteLengths "
                  + Arrays.toString(perFieldWriteLengthsArray[i])
                  + "\nprimitiveTypeInfos "
                  + Arrays.toString(primitiveTypeInfos)
                  + "\nrow "
                  + Arrays.toString(row));
      serdeBytes[i] = bytesWritable;

    // Try to deserialize using DeserializeRead our Writable row objects created by SerDe.
    for (int i = 0; i < rowCount; i++) {
      Object[] row = rows[i];
      BinarySortableDeserializeRead binarySortableDeserializeRead =
          new BinarySortableDeserializeRead(
              primitiveTypeInfos, /* useExternalBuffer */ false, columnSortOrderIsDesc);

      BytesWritable bytesWritable = serdeBytes[i];
      binarySortableDeserializeRead.set(bytesWritable.getBytes(), 0, bytesWritable.getLength());

      for (int index = 0; index < columnCount; index++) {
        if (useIncludeColumns && !columnsToInclude[index]) {
        } else if (index >= writeColumnCount) {
          // Should come back a null.
              binarySortableDeserializeRead, primitiveTypeInfos[index], null);
        } else {
          Writable writable = (Writable) row[index];
              binarySortableDeserializeRead, primitiveTypeInfos[index], writable);
      if (writeColumnCount == columnCount) {
  public <E> void processRow(Object key, Iterator<E> values) throws IOException {
    if (reducer.getDone()) {

    try {
      BytesWritable keyWritable = (BytesWritable) key;
      byte tag = 0;
      if (isTagged) {
        // remove the tag from key coming out of reducer
        // and store it in separate variable.
        int size = keyWritable.getSize() - 1;
        tag = keyWritable.get()[size];

      if (!keyWritable.equals(groupKey)) {
        // If a operator wants to do some work at the beginning of a group
        if (groupKey == null) { // the first group
          groupKey = new BytesWritable();
        } else {
          // If a operator wants to do some work at the end of a group
          LOG.trace("End Group");

        try {
          keyObject = inputKeyDeserializer.deserialize(keyWritable);
        } catch (Exception e) {
          throw new HiveException(
              "Hive Runtime Error: Unable to deserialize reduce input key from "
                  + Utilities.formatBinaryString(keyWritable.get(), 0, keyWritable.getSize())
                  + " with properties "
                  + keyTableDesc.getProperties(),

        groupKey.set(keyWritable.get(), 0, keyWritable.getSize());
        LOG.trace("Start Group");
      /* this.keyObject passed via reference */
      if (vectorized) {
        processVectors(values, tag);
      } else {
        processKeyValues(values, tag);

    } catch (Throwable e) {
      abort = true;
      Utilities.setReduceWork(jc, null);
      if (e instanceof OutOfMemoryError) {
        // Don't create a new object if we are already out of memory
        throw (OutOfMemoryError) e;
      } else {
        String msg = "Fatal error: " + e;
        LOG.fatal(msg, e);
        throw new RuntimeException(e);
Пример #20
 public void map(NullWritable key, Triple value, Context context)
     throws IOException, InterruptedException {
   pred.set(value.getBuffer(), value.getPredicateOffset(), value.getPredicateLength());
   context.write(pred, one);
  public void testBinary() throws IOException, InterruptedException {
    Configuration conf = new Configuration();
    Job job = new Job(conf);

    Path outdir = new Path(System.getProperty("test.build.data", "/tmp"), "outseq");
    Random r = new Random();
    long seed = r.nextLong();

    FileOutputFormat.setOutputPath(job, outdir);

    SequenceFileAsBinaryOutputFormat.setSequenceFileOutputKeyClass(job, IntWritable.class);
    SequenceFileAsBinaryOutputFormat.setSequenceFileOutputValueClass(job, DoubleWritable.class);

    SequenceFileAsBinaryOutputFormat.setCompressOutput(job, true);
    SequenceFileAsBinaryOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);

    BytesWritable bkey = new BytesWritable();
    BytesWritable bval = new BytesWritable();

    TaskAttemptContext context =
    OutputFormat<BytesWritable, BytesWritable> outputFormat =
        new SequenceFileAsBinaryOutputFormat();
    OutputCommitter committer = outputFormat.getOutputCommitter(context);
    RecordWriter<BytesWritable, BytesWritable> writer = outputFormat.getRecordWriter(context);

    IntWritable iwritable = new IntWritable();
    DoubleWritable dwritable = new DoubleWritable();
    DataOutputBuffer outbuf = new DataOutputBuffer();
    LOG.info("Creating data by SequenceFileAsBinaryOutputFormat");
    try {
      for (int i = 0; i < RECORDS; ++i) {
        iwritable = new IntWritable(r.nextInt());
        bkey.set(outbuf.getData(), 0, outbuf.getLength());
        dwritable = new DoubleWritable(r.nextDouble());
        bval.set(outbuf.getData(), 0, outbuf.getLength());
        writer.write(bkey, bval);
    } finally {

    InputFormat<IntWritable, DoubleWritable> iformat =
        new SequenceFileInputFormat<IntWritable, DoubleWritable>();
    int count = 0;
    SequenceFileInputFormat.setInputPaths(job, outdir);
    LOG.info("Reading data by SequenceFileInputFormat");
    for (InputSplit split : iformat.getSplits(job)) {
      RecordReader<IntWritable, DoubleWritable> reader = iformat.createRecordReader(split, context);
      MapContext<IntWritable, DoubleWritable, BytesWritable, BytesWritable> mcontext =
          new MapContextImpl<IntWritable, DoubleWritable, BytesWritable, BytesWritable>(
      reader.initialize(split, mcontext);
      try {
        int sourceInt;
        double sourceDouble;
        while (reader.nextKeyValue()) {
          sourceInt = r.nextInt();
          sourceDouble = r.nextDouble();
          iwritable = reader.getCurrentKey();
          dwritable = reader.getCurrentValue();
              "Keys don't match: " + "*" + iwritable.get() + ":" + sourceInt + "*",
              "Vals don't match: " + "*" + dwritable.get() + ":" + sourceDouble + "*",
              Double.compare(dwritable.get(), sourceDouble) == 0);
      } finally {
    assertEquals("Some records not found", RECORDS, count);