public void readFields(DataInput in) throws IOException { in.readFully(internalBytes, 0, 4); if (TimestampWritable.hasDecimalOrSecondVInt(internalBytes[0])) { in.readFully(internalBytes, 4, 1); int len = (byte) WritableUtils.decodeVIntSize(internalBytes[4]); if (len > 1) { in.readFully(internalBytes, 5, len - 1); } long vlong = LazyBinaryUtils.readVLongFromByteArray(internalBytes, 4); if (vlong < -1000000000 || vlong > 999999999) { throw new IOException( "Invalid first vint value (encoded nanoseconds) of a TimestampWritable: " + vlong + ", expected to be between -1000000000 and 999999999."); // Note that -1000000000 is a valid value corresponding to a nanosecond timestamp // of 999999999, because if the second VInt is present, we use the value // (-reversedNanoseconds - 1) as the second VInt. } if (vlong < 0) { // This indicates there is a second VInt containing the additional bits of the seconds // field. in.readFully(internalBytes, 4 + len, 1); int secondVIntLen = (byte) WritableUtils.decodeVIntSize(internalBytes[4 + len]); if (secondVIntLen > 1) { in.readFully(internalBytes, 5 + len, secondVIntLen - 1); } } } currentBytes = internalBytes; this.offset = 0; }
@Override public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) { try { int firstL1 = WritableUtils.decodeVIntSize(b1[s1]) + readVInt(b1, s1); int firstL2 = WritableUtils.decodeVIntSize(b2[s2]) + readVInt(b2, s2); return TEXT_COMPARATOR.compare(b1, s1, firstL1, b2, s2, firstL2); } catch (IOException e) { throw new IllegalArgumentException(e); } }
public static int getTotalLength(byte[] bytes, int offset) { int len = 4; if (hasDecimalOrSecondVInt(bytes[offset])) { int firstVIntLen = WritableUtils.decodeVIntSize(bytes[offset + 4]); len += firstVIntLen; if (hasSecondVInt(bytes[offset + 4])) { len += WritableUtils.decodeVIntSize(bytes[offset + 4 + firstVIntLen]); } } return len; }
/** Optimization hook. */ public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) { long thisLeftValue = readLong(b1, s1); long thatLeftValue = readLong(b2, s2); if (thisLeftValue == thatLeftValue) { int n1 = WritableUtils.decodeVIntSize(b1[s1 + 8]); int n2 = WritableUtils.decodeVIntSize(b2[s2 + 8]); return compareBytes(b1, s1 + 8 + n1, l1 - n1 - 8, b2, s2 + n2 + 8, l2 - n2 - 8); } return thisLeftValue < thatLeftValue ? -1 : 1; }
public static final int compare(byte[] x, int xs, int xl, byte[] y, int ys, int yl, int[] size) { try { size[0] = 1 + WritableUtils.decodeVIntSize(x[xs]); long v = WritableComparator.readVLong(x, xs) - WritableComparator.readVLong(y, ys); return (v == 0) ? 0 : ((v > 0) ? 1 : -1); } catch (IOException e) { throw new Error(e); } }
@Override public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) { try { int result = 0; int pos1 = WritableUtils.decodeVIntSize(b1[s1]); int pos2 = WritableUtils.decodeVIntSize(b2[s2]); int split1 = WritableComparator.readVInt(b1, s1); int split2 = WritableComparator.readVInt(b2, s2); result = split2 == split1 ? 0 : (split2 < split1 ? 1 : -1); if (result != 0) { return result; } long offset1 = WritableComparator.readVLong(b1, s1 + pos1); long offset2 = WritableComparator.readVLong(b2, s2 + pos2); result = offset2 == offset1 ? 0 : (offset2 < offset1 ? 1 : -1); return result; } catch (Exception ex) { throw new IllegalArgumentException(ex); } }
public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) { try { int h1 = WritableUtils.decodeVIntSize(b1[s1 + 1]) + 1; int h2 = WritableUtils.decodeVIntSize(b2[s2 + 1]) + 1; int cmp = SOURCE_COMP.compare(b1, s1 + h1, l1 - h1, b2, s2 + h2, l2 - h2); if (cmp != 0) return cmp; cmp = TARGET_COMP.compare(b1, s1 + h1, l1 - h1, b2, s2 + h2, l2 - h2); if (cmp != 0) return cmp; cmp = PrimitiveUtils.compare(b1[s1], b2[s2]); if (cmp != 0) return cmp; int lhs1 = Math.abs(WritableComparator.readVInt(b1, s1 + 1)); int lhs2 = Math.abs(WritableComparator.readVInt(b2, s2 + 1)); return PrimitiveUtils.compare(lhs1, lhs2); } catch (IOException e) { throw new IllegalArgumentException(e); } }
/** Similar to {@link WritableUtils#readVLong(DataInput)} but reads from a {@link ByteBuff}. */ public static long readVLong(ByteBuff in) { byte firstByte = in.get(); int len = WritableUtils.decodeVIntSize(firstByte); if (len == 1) { return firstByte; } long i = 0; for (int idx = 0; idx < len - 1; idx++) { byte b = in.get(); i = i << 8; i = i | (b & 0xFF); } return (WritableUtils.isNegativeVInt(firstByte) ? (i ^ -1L) : i); }
/** * Read a zero-compressed encoded long from a byte array. * * @param bytes the byte array * @param offset the offset in the byte array where the VLong is stored * @return the long */ public static long readVLongFromByteArray(final byte[] bytes, int offset) { byte firstByte = bytes[offset++]; int len = WritableUtils.decodeVIntSize(firstByte); if (len == 1) { return firstByte; } long i = 0; for (int idx = 0; idx < len - 1; idx++) { byte b = bytes[offset++]; i = i << 8; i = i | (b & 0xFF); } return (WritableUtils.isNegativeVInt(firstByte) ? ~i : i); }
/** * Reads a zero-compressed encoded int from a byte array and returns it. * * @param bytes the byte array * @param offset offset of the array to read from * @param vInt storing the deserialized int and its size in byte */ public static void readVInt(byte[] bytes, int offset, VInt vInt) { byte firstByte = bytes[offset]; vInt.length = (byte) WritableUtils.decodeVIntSize(firstByte); if (vInt.length == 1) { vInt.value = firstByte; return; } int i = 0; for (int idx = 0; idx < vInt.length - 1; idx++) { byte b = bytes[offset + 1 + idx]; i = i << 8; i = i | (b & 0xFF); } vInt.value = (WritableUtils.isNegativeVInt(firstByte) ? (i ^ -1) : i); }
/** * Gets seconds stored as integer at bytes[offset] * * @param bytes * @param offset * @return the number of seconds */ public static long getSeconds(byte[] bytes, int offset) { int lowest31BitsOfSecondsAndFlag = bytesToInt(bytes, offset); if (lowest31BitsOfSecondsAndFlag >= 0 || // the "has decimal or second VInt" flag is not set !hasSecondVInt(bytes[offset + 4])) { // The entire seconds field is stored in the first 4 bytes. return lowest31BitsOfSecondsAndFlag & LOWEST_31_BITS_OF_SEC_MASK; } // We compose the seconds field from two parts. The lowest 31 bits come from the first four // bytes. The higher-order bits come from the second VInt that follows the nanos field. return ((long) (lowest31BitsOfSecondsAndFlag & LOWEST_31_BITS_OF_SEC_MASK)) | (LazyBinaryUtils.readVLongFromByteArray( bytes, offset + 4 + WritableUtils.decodeVIntSize(bytes[offset + 4])) << 31); }
public static void setTimestamp(Timestamp t, byte[] bytes, int offset) { boolean hasDecimalOrSecondVInt = hasDecimalOrSecondVInt(bytes[offset]); long seconds = (long) TimestampWritable.getSeconds(bytes, offset); int nanos = 0; if (hasDecimalOrSecondVInt) { nanos = TimestampWritable.getNanos(bytes, offset + 4); if (hasSecondVInt(bytes[offset + 4])) { seconds += LazyBinaryUtils.readVLongFromByteArray( bytes, offset + 4 + WritableUtils.decodeVIntSize(bytes[offset + 4])); } } t.setTime(seconds * 1000); if (nanos != 0) { t.setNanos(nanos); } }
/** * Writes a Timestamp's serialized value to byte array b at the given offset * * @param timestamp to convert to bytes * @param b destination byte array * @param offset destination offset in the byte array */ public static void convertTimestampToBytes(Timestamp t, byte[] b, int offset) { long millis = t.getTime(); int nanos = t.getNanos(); long seconds = millisToSeconds(millis); boolean hasSecondVInt = seconds < 0 || seconds > Integer.MAX_VALUE; boolean hasDecimal = setNanosBytes(nanos, b, offset + 4, hasSecondVInt); int firstInt = (int) seconds; if (hasDecimal || hasSecondVInt) { firstInt |= DECIMAL_OR_SECOND_VINT_FLAG; } else { firstInt &= LOWEST_31_BITS_OF_SEC_MASK; } intToBytes(firstInt, b, offset); if (hasSecondVInt) { LazyBinaryUtils.writeVLongToByteArray( b, offset + 4 + WritableUtils.decodeVIntSize(b[offset + 4]), seconds >> 31); } }
/** * Given a byte[] that has binary sortable data, initialize the internal structures to hold that * data * * @param bytes the byte array that holds the binary sortable representation * @param binSortOffset offset of the binary-sortable representation within the buffer. */ public void setBinarySortable(byte[] bytes, int binSortOffset) { // Flip the sign bit (and unused bits of the high-order byte) of the seven-byte long back. long seconds = readSevenByteLong(bytes, binSortOffset) ^ SEVEN_BYTE_LONG_SIGN_FLIP; int nanos = bytesToInt(bytes, binSortOffset + 7); int firstInt = (int) seconds; boolean hasSecondVInt = seconds < 0 || seconds > Integer.MAX_VALUE; if (nanos != 0 || hasSecondVInt) { firstInt |= DECIMAL_OR_SECOND_VINT_FLAG; } else { firstInt &= LOWEST_31_BITS_OF_SEC_MASK; } intToBytes(firstInt, internalBytes, 0); setNanosBytes(nanos, internalBytes, 4, hasSecondVInt); if (hasSecondVInt) { LazyBinaryUtils.writeVLongToByteArray( internalBytes, 4 + WritableUtils.decodeVIntSize(internalBytes[4]), seconds >> 31); } currentBytes = internalBytes; this.offset = 0; }
public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) { int n1 = WritableUtils.decodeVIntSize(b1[s1]); int n2 = WritableUtils.decodeVIntSize(b2[s2]); return -1 * WritableComparator.compareBytes(b1, s1 + n1, l1 - n1, b2, s2 + n2, l2 - n2); }
/** * Check a particular field and set its size and offset in bytes based on the field type and the * bytes arrays. * * <p>For void, boolean, byte, short, int, long, float and double, there is no offset and the size * is fixed. For string, map, list, struct, the first four bytes are used to store the size. So * the offset is 4 and the size is computed by concating the first four bytes together. The first * four bytes are defined with respect to the offset in the bytes arrays. For timestamp, if the * first bit is 0, the record length is 4, otherwise a VInt begins at the 5th byte and its length * is added to 4. * * @param objectInspector object inspector of the field * @param bytes bytes arrays store the table row * @param offset offset of this field * @param recordInfo modify this byteinfo object and return it */ public static void checkObjectByteInfo( ObjectInspector objectInspector, byte[] bytes, int offset, RecordInfo recordInfo, VInt vInt) { Category category = objectInspector.getCategory(); switch (category) { case PRIMITIVE: PrimitiveCategory primitiveCategory = ((PrimitiveObjectInspector) objectInspector).getPrimitiveCategory(); switch (primitiveCategory) { case VOID: recordInfo.elementOffset = 0; recordInfo.elementSize = 0; break; case BOOLEAN: case BYTE: recordInfo.elementOffset = 0; recordInfo.elementSize = 1; break; case SHORT: recordInfo.elementOffset = 0; recordInfo.elementSize = 2; break; case FLOAT: recordInfo.elementOffset = 0; recordInfo.elementSize = 4; break; case DOUBLE: recordInfo.elementOffset = 0; recordInfo.elementSize = 8; break; case INT: recordInfo.elementOffset = 0; recordInfo.elementSize = WritableUtils.decodeVIntSize(bytes[offset]); break; case LONG: recordInfo.elementOffset = 0; recordInfo.elementSize = WritableUtils.decodeVIntSize(bytes[offset]); break; case STRING: // using vint instead of 4 bytes LazyBinaryUtils.readVInt(bytes, offset, vInt); recordInfo.elementOffset = vInt.length; recordInfo.elementSize = vInt.value; break; case CHAR: case VARCHAR: LazyBinaryUtils.readVInt(bytes, offset, vInt); recordInfo.elementOffset = vInt.length; recordInfo.elementSize = vInt.value; break; case BINARY: // using vint instead of 4 bytes LazyBinaryUtils.readVInt(bytes, offset, vInt); recordInfo.elementOffset = vInt.length; recordInfo.elementSize = vInt.value; break; case DATE: recordInfo.elementOffset = 0; recordInfo.elementSize = WritableUtils.decodeVIntSize(bytes[offset]); break; case TIMESTAMP: recordInfo.elementOffset = 0; recordInfo.elementSize = TimestampWritable.getTotalLength(bytes, offset); break; case DECIMAL: // using vint instead of 4 bytes LazyBinaryUtils.readVInt(bytes, offset, vInt); recordInfo.elementOffset = 0; recordInfo.elementSize = vInt.length; LazyBinaryUtils.readVInt(bytes, offset + vInt.length, vInt); recordInfo.elementSize += vInt.length + vInt.value; break; default: { throw new RuntimeException("Unrecognized primitive type: " + primitiveCategory); } } break; case LIST: case MAP: case STRUCT: case UNION: recordInfo.elementOffset = 4; recordInfo.elementSize = LazyBinaryUtils.byteArrayToInt(bytes, offset); break; default: { throw new RuntimeException("Unrecognized non-primitive type: " + category); } } }