public static int getNanos(byte[] bytes, int offset) { VInt vInt = LazyBinaryUtils.threadLocalVInt.get(); LazyBinaryUtils.readVInt(bytes, offset, vInt); int val = vInt.value; if (val < 0) { // This means there is a second VInt present that specifies additional bits of the timestamp. // The reversed nanoseconds value is still encoded in this VInt. val = -val - 1; } int len = (int) Math.floor(Math.log10(val)) + 1; // Reverse the value int tmp = 0; while (val != 0) { tmp *= 10; tmp += val % 10; val /= 10; } val = tmp; if (len < 9) { val *= Math.pow(10, 9 - len); } return val; }
public void readFields(DataInput in) throws IOException { in.readFully(internalBytes, 0, 4); if (TimestampWritable.hasDecimalOrSecondVInt(internalBytes[0])) { in.readFully(internalBytes, 4, 1); int len = (byte) WritableUtils.decodeVIntSize(internalBytes[4]); if (len > 1) { in.readFully(internalBytes, 5, len - 1); } long vlong = LazyBinaryUtils.readVLongFromByteArray(internalBytes, 4); if (vlong < -1000000000 || vlong > 999999999) { throw new IOException( "Invalid first vint value (encoded nanoseconds) of a TimestampWritable: " + vlong + ", expected to be between -1000000000 and 999999999."); // Note that -1000000000 is a valid value corresponding to a nanosecond timestamp // of 999999999, because if the second VInt is present, we use the value // (-reversedNanoseconds - 1) as the second VInt. } if (vlong < 0) { // This indicates there is a second VInt containing the additional bits of the seconds // field. in.readFully(internalBytes, 4 + len, 1); int secondVIntLen = (byte) WritableUtils.decodeVIntSize(internalBytes[4 + len]); if (secondVIntLen > 1) { in.readFully(internalBytes, 5 + len, secondVIntLen - 1); } } } currentBytes = internalBytes; this.offset = 0; }
/** Parse the bytes and fill elementStart, elementLength, elementInited and elementIsNull. */ private void parse() { byte[] bytes = this.bytes.getData(); // get the vlong that represents the map size LazyBinaryUtils.readVInt(bytes, start, vInt); arraySize = vInt.value; if (0 == arraySize) { parsed = true; return; } // adjust arrays adjustArraySize(arraySize); // find out the null-bytes int arryByteStart = start + vInt.length; int nullByteCur = arryByteStart; int nullByteEnd = arryByteStart + (arraySize + 7) / 8; // the begin the real elements int lastElementByteEnd = nullByteEnd; // the list element object inspector ObjectInspector listEleObjectInspector = ((ListObjectInspector) oi).getListElementObjectInspector(); // parsing elements one by one for (int i = 0; i < arraySize; i++) { elementIsNull[i] = true; if ((bytes[nullByteCur] & (1 << (i % 8))) != 0) { elementIsNull[i] = false; LazyBinaryUtils.checkObjectByteInfo( listEleObjectInspector, bytes, lastElementByteEnd, recordInfo); elementStart[i] = lastElementByteEnd + recordInfo.elementOffset; elementLength[i] = recordInfo.elementSize; lastElementByteEnd = elementStart[i] + elementLength[i]; } // move onto the next null byte if (7 == (i % 8)) { nullByteCur++; } } Arrays.fill(elementInited, 0, arraySize, false); parsed = true; }
public static ObjectInspector createColumnarStructInspector( List<String> columnNames, List<TypeInfo> columnTypes) { ArrayList<ObjectInspector> columnObjectInspectors = new ArrayList<ObjectInspector>(columnTypes.size()); for (int i = 0; i < columnTypes.size(); i++) { columnObjectInspectors.add( LazyBinaryUtils.getLazyBinaryObjectInspectorFromTypeInfo(columnTypes.get(i))); } return ObjectInspectorFactory.getColumnarStructObjectInspector( columnNames, columnObjectInspectors); }
/** * Gets seconds stored as integer at bytes[offset] * * @param bytes * @param offset * @return the number of seconds */ public static long getSeconds(byte[] bytes, int offset) { int lowest31BitsOfSecondsAndFlag = bytesToInt(bytes, offset); if (lowest31BitsOfSecondsAndFlag >= 0 || // the "has decimal or second VInt" flag is not set !hasSecondVInt(bytes[offset + 4])) { // The entire seconds field is stored in the first 4 bytes. return lowest31BitsOfSecondsAndFlag & LOWEST_31_BITS_OF_SEC_MASK; } // We compose the seconds field from two parts. The lowest 31 bits come from the first four // bytes. The higher-order bits come from the second VInt that follows the nanos field. return ((long) (lowest31BitsOfSecondsAndFlag & LOWEST_31_BITS_OF_SEC_MASK)) | (LazyBinaryUtils.readVLongFromByteArray( bytes, offset + 4 + WritableUtils.decodeVIntSize(bytes[offset + 4])) << 31); }
public static void setTimestamp(Timestamp t, byte[] bytes, int offset) { boolean hasDecimalOrSecondVInt = hasDecimalOrSecondVInt(bytes[offset]); long seconds = (long) TimestampWritable.getSeconds(bytes, offset); int nanos = 0; if (hasDecimalOrSecondVInt) { nanos = TimestampWritable.getNanos(bytes, offset + 4); if (hasSecondVInt(bytes[offset + 4])) { seconds += LazyBinaryUtils.readVLongFromByteArray( bytes, offset + 4 + WritableUtils.decodeVIntSize(bytes[offset + 4])); } } t.setTime(seconds * 1000); if (nanos != 0) { t.setNanos(nanos); } }
private LazyBinaryStructObjectInspector createInternalOi(MapJoinObjectSerDeContext valCtx) throws SerDeException { // We are going to use LBSerDe to serialize values; create OI for retrieval. List<? extends StructField> fields = ((StructObjectInspector) valCtx.getSerDe().getObjectInspector()).getAllStructFieldRefs(); List<String> colNames = new ArrayList<String>(fields.size()); List<ObjectInspector> colOis = new ArrayList<ObjectInspector>(fields.size()); for (int i = 0; i < fields.size(); ++i) { StructField field = fields.get(i); colNames.add(field.getFieldName()); // It would be nice if OI could return typeInfo... TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(field.getFieldObjectInspector().getTypeName()); colOis.add(LazyBinaryUtils.getLazyBinaryObjectInspectorFromTypeInfo(typeInfo)); } return LazyBinaryObjectInspectorFactory.getLazyBinaryStructObjectInspector(colNames, colOis); }
/** * Given an integer representing nanoseconds, write its serialized value to the byte array b at * offset * * @param nanos * @param b * @param offset * @return */ private static boolean setNanosBytes(int nanos, byte[] b, int offset, boolean hasSecondVInt) { int decimal = 0; if (nanos != 0) { int counter = 0; while (counter < 9) { decimal *= 10; decimal += nanos % 10; nanos /= 10; counter++; } } if (hasSecondVInt || decimal != 0) { // We use the sign of the reversed-nanoseconds field to indicate that there is a second VInt // present. LazyBinaryUtils.writeVLongToByteArray(b, offset, hasSecondVInt ? (-decimal - 1) : decimal); } return decimal != 0; }
/** * Writes a Timestamp's serialized value to byte array b at the given offset * * @param timestamp to convert to bytes * @param b destination byte array * @param offset destination offset in the byte array */ public static void convertTimestampToBytes(Timestamp t, byte[] b, int offset) { long millis = t.getTime(); int nanos = t.getNanos(); long seconds = millisToSeconds(millis); boolean hasSecondVInt = seconds < 0 || seconds > Integer.MAX_VALUE; boolean hasDecimal = setNanosBytes(nanos, b, offset + 4, hasSecondVInt); int firstInt = (int) seconds; if (hasDecimal || hasSecondVInt) { firstInt |= DECIMAL_OR_SECOND_VINT_FLAG; } else { firstInt &= LOWEST_31_BITS_OF_SEC_MASK; } intToBytes(firstInt, b, offset); if (hasSecondVInt) { LazyBinaryUtils.writeVLongToByteArray( b, offset + 4 + WritableUtils.decodeVIntSize(b[offset + 4]), seconds >> 31); } }
/** * Given a byte[] that has binary sortable data, initialize the internal structures to hold that * data * * @param bytes the byte array that holds the binary sortable representation * @param binSortOffset offset of the binary-sortable representation within the buffer. */ public void setBinarySortable(byte[] bytes, int binSortOffset) { // Flip the sign bit (and unused bits of the high-order byte) of the seven-byte long back. long seconds = readSevenByteLong(bytes, binSortOffset) ^ SEVEN_BYTE_LONG_SIGN_FLIP; int nanos = bytesToInt(bytes, binSortOffset + 7); int firstInt = (int) seconds; boolean hasSecondVInt = seconds < 0 || seconds > Integer.MAX_VALUE; if (nanos != 0 || hasSecondVInt) { firstInt |= DECIMAL_OR_SECOND_VINT_FLAG; } else { firstInt &= LOWEST_31_BITS_OF_SEC_MASK; } intToBytes(firstInt, internalBytes, 0); setNanosBytes(nanos, internalBytes, 4, hasSecondVInt); if (hasSecondVInt) { LazyBinaryUtils.writeVLongToByteArray( internalBytes, 4 + WritableUtils.decodeVIntSize(internalBytes[4]), seconds >> 31); } currentBytes = internalBytes; this.offset = 0; }
@Override public void init(ByteArrayRef bytes, int start, int length) { assert (4 == length); data.set(Float.intBitsToFloat(LazyBinaryUtils.byteArrayToInt(bytes.getData(), start))); }
@Override public void init(ByteArrayRef bytes, int start, int length) { assert (8 == length); data.set(Double.longBitsToDouble(LazyBinaryUtils.byteArrayToLong(bytes.getData(), start))); }
public static void writeVLong(RandomAccessOutput byteStream, long l) { byte[] vLongBytes = vLongBytesThreadLocal.get(); int len = LazyBinaryUtils.writeVLongToByteArray(vLongBytes, l); byteStream.write(vLongBytes, 0, len); }
/** * Write a zero-compressed encoded long to a byte array. * * @param bytes the byte array/stream * @param l the long */ public static int writeVLongToByteArray(byte[] bytes, long l) { return LazyBinaryUtils.writeVLongToByteArray(bytes, 0, l); }
/** * Check a particular field and set its size and offset in bytes based on the field type and the * bytes arrays. * * <p>For void, boolean, byte, short, int, long, float and double, there is no offset and the size * is fixed. For string, map, list, struct, the first four bytes are used to store the size. So * the offset is 4 and the size is computed by concating the first four bytes together. The first * four bytes are defined with respect to the offset in the bytes arrays. For timestamp, if the * first bit is 0, the record length is 4, otherwise a VInt begins at the 5th byte and its length * is added to 4. * * @param objectInspector object inspector of the field * @param bytes bytes arrays store the table row * @param offset offset of this field * @param recordInfo modify this byteinfo object and return it */ public static void checkObjectByteInfo( ObjectInspector objectInspector, byte[] bytes, int offset, RecordInfo recordInfo, VInt vInt) { Category category = objectInspector.getCategory(); switch (category) { case PRIMITIVE: PrimitiveCategory primitiveCategory = ((PrimitiveObjectInspector) objectInspector).getPrimitiveCategory(); switch (primitiveCategory) { case VOID: recordInfo.elementOffset = 0; recordInfo.elementSize = 0; break; case BOOLEAN: case BYTE: recordInfo.elementOffset = 0; recordInfo.elementSize = 1; break; case SHORT: recordInfo.elementOffset = 0; recordInfo.elementSize = 2; break; case FLOAT: recordInfo.elementOffset = 0; recordInfo.elementSize = 4; break; case DOUBLE: recordInfo.elementOffset = 0; recordInfo.elementSize = 8; break; case INT: recordInfo.elementOffset = 0; recordInfo.elementSize = WritableUtils.decodeVIntSize(bytes[offset]); break; case LONG: recordInfo.elementOffset = 0; recordInfo.elementSize = WritableUtils.decodeVIntSize(bytes[offset]); break; case STRING: // using vint instead of 4 bytes LazyBinaryUtils.readVInt(bytes, offset, vInt); recordInfo.elementOffset = vInt.length; recordInfo.elementSize = vInt.value; break; case CHAR: case VARCHAR: LazyBinaryUtils.readVInt(bytes, offset, vInt); recordInfo.elementOffset = vInt.length; recordInfo.elementSize = vInt.value; break; case BINARY: // using vint instead of 4 bytes LazyBinaryUtils.readVInt(bytes, offset, vInt); recordInfo.elementOffset = vInt.length; recordInfo.elementSize = vInt.value; break; case DATE: recordInfo.elementOffset = 0; recordInfo.elementSize = WritableUtils.decodeVIntSize(bytes[offset]); break; case TIMESTAMP: recordInfo.elementOffset = 0; recordInfo.elementSize = TimestampWritable.getTotalLength(bytes, offset); break; case DECIMAL: // using vint instead of 4 bytes LazyBinaryUtils.readVInt(bytes, offset, vInt); recordInfo.elementOffset = 0; recordInfo.elementSize = vInt.length; LazyBinaryUtils.readVInt(bytes, offset + vInt.length, vInt); recordInfo.elementSize += vInt.length + vInt.value; break; default: { throw new RuntimeException("Unrecognized primitive type: " + primitiveCategory); } } break; case LIST: case MAP: case STRUCT: case UNION: recordInfo.elementOffset = 4; recordInfo.elementSize = LazyBinaryUtils.byteArrayToInt(bytes, offset); break; default: { throw new RuntimeException("Unrecognized non-primitive type: " + category); } } }