Esempio n. 1
0
  public static int getNanos(byte[] bytes, int offset) {
    VInt vInt = LazyBinaryUtils.threadLocalVInt.get();
    LazyBinaryUtils.readVInt(bytes, offset, vInt);
    int val = vInt.value;
    if (val < 0) {
      // This means there is a second VInt present that specifies additional bits of the timestamp.
      // The reversed nanoseconds value is still encoded in this VInt.
      val = -val - 1;
    }
    int len = (int) Math.floor(Math.log10(val)) + 1;

    // Reverse the value
    int tmp = 0;
    while (val != 0) {
      tmp *= 10;
      tmp += val % 10;
      val /= 10;
    }
    val = tmp;

    if (len < 9) {
      val *= Math.pow(10, 9 - len);
    }
    return val;
  }
Esempio n. 2
0
  public void readFields(DataInput in) throws IOException {
    in.readFully(internalBytes, 0, 4);
    if (TimestampWritable.hasDecimalOrSecondVInt(internalBytes[0])) {
      in.readFully(internalBytes, 4, 1);
      int len = (byte) WritableUtils.decodeVIntSize(internalBytes[4]);
      if (len > 1) {
        in.readFully(internalBytes, 5, len - 1);
      }

      long vlong = LazyBinaryUtils.readVLongFromByteArray(internalBytes, 4);
      if (vlong < -1000000000 || vlong > 999999999) {
        throw new IOException(
            "Invalid first vint value (encoded nanoseconds) of a TimestampWritable: "
                + vlong
                + ", expected to be between -1000000000 and 999999999.");
        // Note that -1000000000 is a valid value corresponding to a nanosecond timestamp
        // of 999999999, because if the second VInt is present, we use the value
        // (-reversedNanoseconds - 1) as the second VInt.
      }
      if (vlong < 0) {
        // This indicates there is a second VInt containing the additional bits of the seconds
        // field.
        in.readFully(internalBytes, 4 + len, 1);
        int secondVIntLen = (byte) WritableUtils.decodeVIntSize(internalBytes[4 + len]);
        if (secondVIntLen > 1) {
          in.readFully(internalBytes, 5 + len, secondVIntLen - 1);
        }
      }
    }
    currentBytes = internalBytes;
    this.offset = 0;
  }
Esempio n. 3
0
  /** Parse the bytes and fill elementStart, elementLength, elementInited and elementIsNull. */
  private void parse() {

    byte[] bytes = this.bytes.getData();

    // get the vlong that represents the map size
    LazyBinaryUtils.readVInt(bytes, start, vInt);
    arraySize = vInt.value;
    if (0 == arraySize) {
      parsed = true;
      return;
    }

    // adjust arrays
    adjustArraySize(arraySize);
    // find out the null-bytes
    int arryByteStart = start + vInt.length;
    int nullByteCur = arryByteStart;
    int nullByteEnd = arryByteStart + (arraySize + 7) / 8;
    // the begin the real elements
    int lastElementByteEnd = nullByteEnd;
    // the list element object inspector
    ObjectInspector listEleObjectInspector =
        ((ListObjectInspector) oi).getListElementObjectInspector();
    // parsing elements one by one
    for (int i = 0; i < arraySize; i++) {
      elementIsNull[i] = true;
      if ((bytes[nullByteCur] & (1 << (i % 8))) != 0) {
        elementIsNull[i] = false;
        LazyBinaryUtils.checkObjectByteInfo(
            listEleObjectInspector, bytes, lastElementByteEnd, recordInfo);
        elementStart[i] = lastElementByteEnd + recordInfo.elementOffset;
        elementLength[i] = recordInfo.elementSize;
        lastElementByteEnd = elementStart[i] + elementLength[i];
      }
      // move onto the next null byte
      if (7 == (i % 8)) {
        nullByteCur++;
      }
    }

    Arrays.fill(elementInited, 0, arraySize, false);
    parsed = true;
  }
Esempio n. 4
0
 public static ObjectInspector createColumnarStructInspector(
     List<String> columnNames, List<TypeInfo> columnTypes) {
   ArrayList<ObjectInspector> columnObjectInspectors =
       new ArrayList<ObjectInspector>(columnTypes.size());
   for (int i = 0; i < columnTypes.size(); i++) {
     columnObjectInspectors.add(
         LazyBinaryUtils.getLazyBinaryObjectInspectorFromTypeInfo(columnTypes.get(i)));
   }
   return ObjectInspectorFactory.getColumnarStructObjectInspector(
       columnNames, columnObjectInspectors);
 }
Esempio n. 5
0
  /**
   * Gets seconds stored as integer at bytes[offset]
   *
   * @param bytes
   * @param offset
   * @return the number of seconds
   */
  public static long getSeconds(byte[] bytes, int offset) {
    int lowest31BitsOfSecondsAndFlag = bytesToInt(bytes, offset);
    if (lowest31BitsOfSecondsAndFlag >= 0
        || // the "has decimal or second VInt" flag is not set
        !hasSecondVInt(bytes[offset + 4])) {
      // The entire seconds field is stored in the first 4 bytes.
      return lowest31BitsOfSecondsAndFlag & LOWEST_31_BITS_OF_SEC_MASK;
    }

    // We compose the seconds field from two parts. The lowest 31 bits come from the first four
    // bytes. The higher-order bits come from the second VInt that follows the nanos field.
    return ((long) (lowest31BitsOfSecondsAndFlag & LOWEST_31_BITS_OF_SEC_MASK))
        | (LazyBinaryUtils.readVLongFromByteArray(
                bytes, offset + 4 + WritableUtils.decodeVIntSize(bytes[offset + 4]))
            << 31);
  }
Esempio n. 6
0
 public static void setTimestamp(Timestamp t, byte[] bytes, int offset) {
   boolean hasDecimalOrSecondVInt = hasDecimalOrSecondVInt(bytes[offset]);
   long seconds = (long) TimestampWritable.getSeconds(bytes, offset);
   int nanos = 0;
   if (hasDecimalOrSecondVInt) {
     nanos = TimestampWritable.getNanos(bytes, offset + 4);
     if (hasSecondVInt(bytes[offset + 4])) {
       seconds +=
           LazyBinaryUtils.readVLongFromByteArray(
               bytes, offset + 4 + WritableUtils.decodeVIntSize(bytes[offset + 4]));
     }
   }
   t.setTime(seconds * 1000);
   if (nanos != 0) {
     t.setNanos(nanos);
   }
 }
  private LazyBinaryStructObjectInspector createInternalOi(MapJoinObjectSerDeContext valCtx)
      throws SerDeException {
    // We are going to use LBSerDe to serialize values; create OI for retrieval.
    List<? extends StructField> fields =
        ((StructObjectInspector) valCtx.getSerDe().getObjectInspector()).getAllStructFieldRefs();
    List<String> colNames = new ArrayList<String>(fields.size());
    List<ObjectInspector> colOis = new ArrayList<ObjectInspector>(fields.size());
    for (int i = 0; i < fields.size(); ++i) {
      StructField field = fields.get(i);
      colNames.add(field.getFieldName());
      // It would be nice if OI could return typeInfo...
      TypeInfo typeInfo =
          TypeInfoUtils.getTypeInfoFromTypeString(field.getFieldObjectInspector().getTypeName());
      colOis.add(LazyBinaryUtils.getLazyBinaryObjectInspectorFromTypeInfo(typeInfo));
    }

    return LazyBinaryObjectInspectorFactory.getLazyBinaryStructObjectInspector(colNames, colOis);
  }
Esempio n. 8
0
  /**
   * Given an integer representing nanoseconds, write its serialized value to the byte array b at
   * offset
   *
   * @param nanos
   * @param b
   * @param offset
   * @return
   */
  private static boolean setNanosBytes(int nanos, byte[] b, int offset, boolean hasSecondVInt) {
    int decimal = 0;
    if (nanos != 0) {
      int counter = 0;
      while (counter < 9) {
        decimal *= 10;
        decimal += nanos % 10;
        nanos /= 10;
        counter++;
      }
    }

    if (hasSecondVInt || decimal != 0) {
      // We use the sign of the reversed-nanoseconds field to indicate that there is a second VInt
      // present.
      LazyBinaryUtils.writeVLongToByteArray(b, offset, hasSecondVInt ? (-decimal - 1) : decimal);
    }
    return decimal != 0;
  }
Esempio n. 9
0
  /**
   * Writes a Timestamp's serialized value to byte array b at the given offset
   *
   * @param timestamp to convert to bytes
   * @param b destination byte array
   * @param offset destination offset in the byte array
   */
  public static void convertTimestampToBytes(Timestamp t, byte[] b, int offset) {
    long millis = t.getTime();
    int nanos = t.getNanos();

    long seconds = millisToSeconds(millis);
    boolean hasSecondVInt = seconds < 0 || seconds > Integer.MAX_VALUE;
    boolean hasDecimal = setNanosBytes(nanos, b, offset + 4, hasSecondVInt);

    int firstInt = (int) seconds;
    if (hasDecimal || hasSecondVInt) {
      firstInt |= DECIMAL_OR_SECOND_VINT_FLAG;
    } else {
      firstInt &= LOWEST_31_BITS_OF_SEC_MASK;
    }
    intToBytes(firstInt, b, offset);

    if (hasSecondVInt) {
      LazyBinaryUtils.writeVLongToByteArray(
          b, offset + 4 + WritableUtils.decodeVIntSize(b[offset + 4]), seconds >> 31);
    }
  }
Esempio n. 10
0
  /**
   * Given a byte[] that has binary sortable data, initialize the internal structures to hold that
   * data
   *
   * @param bytes the byte array that holds the binary sortable representation
   * @param binSortOffset offset of the binary-sortable representation within the buffer.
   */
  public void setBinarySortable(byte[] bytes, int binSortOffset) {
    // Flip the sign bit (and unused bits of the high-order byte) of the seven-byte long back.
    long seconds = readSevenByteLong(bytes, binSortOffset) ^ SEVEN_BYTE_LONG_SIGN_FLIP;
    int nanos = bytesToInt(bytes, binSortOffset + 7);
    int firstInt = (int) seconds;
    boolean hasSecondVInt = seconds < 0 || seconds > Integer.MAX_VALUE;
    if (nanos != 0 || hasSecondVInt) {
      firstInt |= DECIMAL_OR_SECOND_VINT_FLAG;
    } else {
      firstInt &= LOWEST_31_BITS_OF_SEC_MASK;
    }

    intToBytes(firstInt, internalBytes, 0);
    setNanosBytes(nanos, internalBytes, 4, hasSecondVInt);
    if (hasSecondVInt) {
      LazyBinaryUtils.writeVLongToByteArray(
          internalBytes, 4 + WritableUtils.decodeVIntSize(internalBytes[4]), seconds >> 31);
    }

    currentBytes = internalBytes;
    this.offset = 0;
  }
Esempio n. 11
0
 @Override
 public void init(ByteArrayRef bytes, int start, int length) {
   assert (4 == length);
   data.set(Float.intBitsToFloat(LazyBinaryUtils.byteArrayToInt(bytes.getData(), start)));
 }
 @Override
 public void init(ByteArrayRef bytes, int start, int length) {
   assert (8 == length);
   data.set(Double.longBitsToDouble(LazyBinaryUtils.byteArrayToLong(bytes.getData(), start)));
 }
 public static void writeVLong(RandomAccessOutput byteStream, long l) {
   byte[] vLongBytes = vLongBytesThreadLocal.get();
   int len = LazyBinaryUtils.writeVLongToByteArray(vLongBytes, l);
   byteStream.write(vLongBytes, 0, len);
 }
 /**
  * Write a zero-compressed encoded long to a byte array.
  *
  * @param bytes the byte array/stream
  * @param l the long
  */
 public static int writeVLongToByteArray(byte[] bytes, long l) {
   return LazyBinaryUtils.writeVLongToByteArray(bytes, 0, l);
 }
 /**
  * Check a particular field and set its size and offset in bytes based on the field type and the
  * bytes arrays.
  *
  * <p>For void, boolean, byte, short, int, long, float and double, there is no offset and the size
  * is fixed. For string, map, list, struct, the first four bytes are used to store the size. So
  * the offset is 4 and the size is computed by concating the first four bytes together. The first
  * four bytes are defined with respect to the offset in the bytes arrays. For timestamp, if the
  * first bit is 0, the record length is 4, otherwise a VInt begins at the 5th byte and its length
  * is added to 4.
  *
  * @param objectInspector object inspector of the field
  * @param bytes bytes arrays store the table row
  * @param offset offset of this field
  * @param recordInfo modify this byteinfo object and return it
  */
 public static void checkObjectByteInfo(
     ObjectInspector objectInspector, byte[] bytes, int offset, RecordInfo recordInfo, VInt vInt) {
   Category category = objectInspector.getCategory();
   switch (category) {
     case PRIMITIVE:
       PrimitiveCategory primitiveCategory =
           ((PrimitiveObjectInspector) objectInspector).getPrimitiveCategory();
       switch (primitiveCategory) {
         case VOID:
           recordInfo.elementOffset = 0;
           recordInfo.elementSize = 0;
           break;
         case BOOLEAN:
         case BYTE:
           recordInfo.elementOffset = 0;
           recordInfo.elementSize = 1;
           break;
         case SHORT:
           recordInfo.elementOffset = 0;
           recordInfo.elementSize = 2;
           break;
         case FLOAT:
           recordInfo.elementOffset = 0;
           recordInfo.elementSize = 4;
           break;
         case DOUBLE:
           recordInfo.elementOffset = 0;
           recordInfo.elementSize = 8;
           break;
         case INT:
           recordInfo.elementOffset = 0;
           recordInfo.elementSize = WritableUtils.decodeVIntSize(bytes[offset]);
           break;
         case LONG:
           recordInfo.elementOffset = 0;
           recordInfo.elementSize = WritableUtils.decodeVIntSize(bytes[offset]);
           break;
         case STRING:
           // using vint instead of 4 bytes
           LazyBinaryUtils.readVInt(bytes, offset, vInt);
           recordInfo.elementOffset = vInt.length;
           recordInfo.elementSize = vInt.value;
           break;
         case CHAR:
         case VARCHAR:
           LazyBinaryUtils.readVInt(bytes, offset, vInt);
           recordInfo.elementOffset = vInt.length;
           recordInfo.elementSize = vInt.value;
           break;
         case BINARY:
           // using vint instead of 4 bytes
           LazyBinaryUtils.readVInt(bytes, offset, vInt);
           recordInfo.elementOffset = vInt.length;
           recordInfo.elementSize = vInt.value;
           break;
         case DATE:
           recordInfo.elementOffset = 0;
           recordInfo.elementSize = WritableUtils.decodeVIntSize(bytes[offset]);
           break;
         case TIMESTAMP:
           recordInfo.elementOffset = 0;
           recordInfo.elementSize = TimestampWritable.getTotalLength(bytes, offset);
           break;
         case DECIMAL:
           // using vint instead of 4 bytes
           LazyBinaryUtils.readVInt(bytes, offset, vInt);
           recordInfo.elementOffset = 0;
           recordInfo.elementSize = vInt.length;
           LazyBinaryUtils.readVInt(bytes, offset + vInt.length, vInt);
           recordInfo.elementSize += vInt.length + vInt.value;
           break;
         default:
           {
             throw new RuntimeException("Unrecognized primitive type: " + primitiveCategory);
           }
       }
       break;
     case LIST:
     case MAP:
     case STRUCT:
     case UNION:
       recordInfo.elementOffset = 4;
       recordInfo.elementSize = LazyBinaryUtils.byteArrayToInt(bytes, offset);
       break;
     default:
       {
         throw new RuntimeException("Unrecognized non-primitive type: " + category);
       }
   }
 }