@Override public void setKeyValue(Writable key, Writable val) throws SerDeException { Object keyObj = keySerDe.deserialize(key), valObj = valSerDe.deserialize(val); List<? extends StructField> keyFields = keySoi.getAllStructFieldRefs(), valFields = valSoi.getAllStructFieldRefs(); for (int i = 0; i < keyFields.size(); ++i) { keyObjs[i] = keySoi.getStructFieldData(keyObj, keyFields.get(i)); } for (int i = 0; i < valFields.size(); ++i) { valObjs[i] = valSoi.getStructFieldData(valObj, valFields.get(i)); } }
/** * Serializing means getting every field, and setting the appropriate JSONObject field. Actual * serialization is done at the end when the whole JSON object is built * * @param serializer * @param obj * @param structObjectInspector */ private JSONObject serializeStruct( Object obj, StructObjectInspector soi, List<String> columnNames) { // do nothing for null struct if (null == obj) { return null; } JSONObject result = new JSONObject(); List<? extends StructField> fields = soi.getAllStructFieldRefs(); for (int i = 0; i < fields.size(); i++) { StructField sf = fields.get(i); Object data = soi.getStructFieldData(obj, sf); if (null != data) { try { // we want to serialize columns with their proper HIVE name, // not the _col2 kind of name usually generated upstream result.put( getSerializedFieldName(columnNames, i, sf), serializeField(data, sf.getFieldObjectInspector())); } catch (JSONException ex) { LOG.warn("Problem serializing", ex); throw new RuntimeException(ex); } } } return result; }
public Writable serialize(Object obj, ObjectInspector objInspector) throws SerDeException { if (objInspector.getCategory() != Category.STRUCT) { throw new SerDeException( getClass().toString() + " can only serialize struct types, but we got: " + objInspector.getTypeName()); } StructObjectInspector soi = (StructObjectInspector) objInspector; List<? extends StructField> fields = soi.getAllStructFieldRefs(); StringBuilder sb = new StringBuilder(); for (int i = 0; i < fields.size(); i++) { if (i > 0) sb.append(separator); Object column = soi.getStructFieldData(obj, fields.get(i)); if (fields.get(i).getFieldObjectInspector().getCategory() == Category.PRIMITIVE) { // For primitive object, serialize to plain string sb.append(column == null ? nullString : column.toString()); } else { // For complex object, serialize to JSON format sb.append(SerDeUtils.getJSONString(column, fields.get(i).getFieldObjectInspector())); } } serializeCache.set(sb.toString()); return serializeCache; }
private void parseStringColumn(int column) { // don't include column number in message because it causes boxing which is expensive here checkArgument(!isPartitionColumn[column], "Column is a partition key"); loaded[column] = true; Object fieldData = rowInspector.getStructFieldData(rowData, structFields[column]); if (fieldData == null) { nulls[column] = true; } else { Object fieldValue = ((PrimitiveObjectInspector) fieldInspectors[column]).getPrimitiveJavaObject(fieldData); checkState(fieldValue != null, "fieldValue should not be null"); Slice value; if (fieldValue instanceof String) { value = Slices.utf8Slice((String) fieldValue); } else if (fieldValue instanceof byte[]) { value = Slices.wrappedBuffer((byte[]) fieldValue); } else if (fieldValue instanceof HiveVarchar) { value = Slices.utf8Slice(((HiveVarchar) fieldValue).getValue()); } else { throw new IllegalStateException( "unsupported string field type: " + fieldValue.getClass().getName()); } Type type = types[column]; if (isVarcharType(type)) { value = truncateToLength(value, type); } slices[column] = value; nulls[column] = false; } }
private void parseStringColumn(int column) { // don't include column number in message because it causes boxing which is expensive here checkArgument(!isPartitionColumn[column], "Column is a partition key"); loaded[column] = true; Object fieldData = rowInspector.getStructFieldData(rowData, structFields[column]); if (fieldData == null) { nulls[column] = true; } else if (hiveTypes[column] == HiveType.MAP || hiveTypes[column] == HiveType.LIST || hiveTypes[column] == HiveType.STRUCT) { // temporarily special case MAP, LIST, and STRUCT types as strings slices[column] = Slices.wrappedBuffer( SerDeUtils.getJsonBytes(sessionTimeZone, fieldData, fieldInspectors[column])); nulls[column] = false; } else { Object fieldValue = ((PrimitiveObjectInspector) fieldInspectors[column]).getPrimitiveJavaObject(fieldData); checkState(fieldValue != null, "fieldValue should not be null"); if (fieldValue instanceof String) { slices[column] = Slices.utf8Slice((String) fieldValue); } else if (fieldValue instanceof byte[]) { slices[column] = Slices.wrappedBuffer((byte[]) fieldValue); } else { throw new IllegalStateException( "unsupported string field type: " + fieldValue.getClass().getName()); } nulls[column] = false; } }
private void stringifyObject(StringBuilder buffer, Object obj, ObjectInspector inspector) throws IOException { if (inspector instanceof StructObjectInspector) { buffer.append("{ "); StructObjectInspector soi = (StructObjectInspector) inspector; boolean isFirst = true; for (StructField field : soi.getAllStructFieldRefs()) { if (isFirst) { isFirst = false; } else { buffer.append(", "); } buffer.append(field.getFieldName()); buffer.append(": "); stringifyObject( buffer, soi.getStructFieldData(obj, field), field.getFieldObjectInspector()); } buffer.append(" }"); } else if (inspector instanceof PrimitiveObjectInspector) { PrimitiveObjectInspector poi = (PrimitiveObjectInspector) inspector; buffer.append(poi.getPrimitiveJavaObject(obj).toString()); } else { buffer.append("*unknown*"); } }
private void parseDecimalColumn(int column) { // don't include column number in message because it causes boxing which is expensive here checkArgument(!isPartitionColumn[column], "Column is a partition key"); loaded[column] = true; Object fieldData = rowInspector.getStructFieldData(rowData, structFields[column]); if (fieldData == null) { nulls[column] = true; } else { Object fieldValue = ((PrimitiveObjectInspector) fieldInspectors[column]).getPrimitiveJavaObject(fieldData); checkState(fieldValue != null, "fieldValue should not be null"); HiveDecimal decimal = (HiveDecimal) fieldValue; DecimalType columnType = (DecimalType) types[column]; BigInteger unscaledDecimal = rescale(decimal.unscaledValue(), decimal.scale(), columnType.getScale()); if (columnType.isShort()) { longs[column] = unscaledDecimal.longValue(); } else { slices[column] = Decimals.encodeUnscaledValue(unscaledDecimal); } nulls[column] = false; } }
@Override protected String extractField(Object target) { if (target instanceof HiveType) { HiveType type = (HiveType) target; ObjectInspector inspector = type.getObjectInspector(); if (inspector instanceof StructObjectInspector) { StructObjectInspector soi = (StructObjectInspector) inspector; StructField field = soi.getStructFieldRef(fieldName); ObjectInspector foi = field.getFieldObjectInspector(); Assert.isTrue( foi.getCategory() == ObjectInspector.Category.PRIMITIVE, String.format( "Field [%s] needs to be a primitive; found [%s]", fieldName, foi.getTypeName())); // expecting a writeable - simply do a toString Object data = soi.getStructFieldData(type.getObject(), field); if (data == null || data instanceof NullWritable) { return StringUtils.EMPTY; } return data.toString(); } } return null; }
private RowSet decodeFromString(List<Object> rows, RowSet rowSet) throws SQLException, SerDeException { getSerDe(); StructObjectInspector soi = (StructObjectInspector) serde.getObjectInspector(); List<? extends StructField> fieldRefs = soi.getAllStructFieldRefs(); Object[] deserializedFields = new Object[fieldRefs.size()]; Object rowObj; ObjectInspector fieldOI; int protocol = getProtocolVersion().getValue(); for (Object rowString : rows) { try { rowObj = serde.deserialize(new BytesWritable(((String) rowString).getBytes("UTF-8"))); } catch (UnsupportedEncodingException e) { throw new SerDeException(e); } for (int i = 0; i < fieldRefs.size(); i++) { StructField fieldRef = fieldRefs.get(i); fieldOI = fieldRef.getFieldObjectInspector(); Object fieldData = soi.getStructFieldData(rowObj, fieldRef); deserializedFields[i] = SerDeUtils.toThriftPayload(fieldData, fieldOI, protocol); } rowSet.addRow(deserializedFields); } return rowSet; }
@Override public Writable serialize(final Object obj, final ObjectInspector inspector) throws SerDeException { final StructObjectInspector structInspector = (StructObjectInspector) inspector; final List<? extends StructField> fields = structInspector.getAllStructFieldRefs(); if (fields.size() != columnNames.size()) { throw new SerDeException( String.format("Required %d columns, received %d.", columnNames.size(), fields.size())); } cachedWritable.clear(); for (int c = 0; c < fieldCount; c++) { StructField structField = fields.get(c); LOG.debug("fieldId=" + c + ",structField=" + structField.toString()); if (structField != null) { final Object field = structInspector.getStructFieldData(obj, fields.get(c)); final AbstractPrimitiveObjectInspector fieldOI = (AbstractPrimitiveObjectInspector) fields.get(c).getFieldObjectInspector(); Writable value = (Writable) fieldOI.getPrimitiveWritableObject(field); if (value == null) { continue; } LOG.debug("fieldCount=" + fieldCount + ",value=" + value.toString()); if (value instanceof IntWritable) { cachedWritable.put(new Text(columnNames.get(c)), value); } else if (value instanceof Text) { cachedWritable.put(new Text(columnNames.get(c)), ((Text) value)); } else if (value instanceof LongWritable) { cachedWritable.put(new Text(columnNames.get(c)), ((LongWritable) value)); } else if (value instanceof DoubleWritable) { cachedWritable.put(new Text(columnNames.get(c)), ((DoubleWritable) value)); } else if (value instanceof FloatWritable) { cachedWritable.put(new Text(columnNames.get(c)), ((FloatWritable) value)); } else if (value instanceof BooleanWritable) { cachedWritable.put(new Text(columnNames.get(c)), ((BooleanWritable) value)); } else if (value instanceof ByteWritable) { cachedWritable.put(new Text(columnNames.get(c)), ((ByteWritable) value)); } else if (value instanceof BytesWritable) { cachedWritable.put(new Text(columnNames.get(c)), ((BytesWritable) value)); } else { LOG.warn("fieldCount=" + fieldCount + ",type=" + value.getClass().getName()); } } } return cachedWritable; }
private ArrayWritable createStruct(final Object obj, final StructObjectInspector inspector) throws SerDeException { final List<? extends StructField> fields = inspector.getAllStructFieldRefs(); final Writable[] arr = new Writable[fields.size()]; for (int i = 0; i < fields.size(); i++) { final StructField field = fields.get(i); final Object subObj = inspector.getStructFieldData(obj, field); final ObjectInspector subInspector = field.getFieldObjectInspector(); arr[i] = createObject(subObj, subInspector); } return new ArrayWritable(Writable.class, arr); }
/** * Copy fields in the input row to the output array of standard objects. * * @param result output list of standard objects. * @param row input row. * @param soi Object inspector for the to-be-copied columns. * @param objectInspectorOption */ public static void copyToStandardObject( List<Object> result, Object row, StructObjectInspector soi, ObjectInspectorCopyOption objectInspectorOption) { List<? extends StructField> fields = soi.getAllStructFieldRefs(); for (StructField f : fields) { result.add( copyToStandardObject( soi.getStructFieldData(row, f), f.getFieldObjectInspector(), objectInspectorOption)); } }
private int computeBucketNumber(Object row, int numBuckets) throws HiveException { if (conf.getWriteType() == AcidUtils.Operation.UPDATE || conf.getWriteType() == AcidUtils.Operation.DELETE) { // We don't need to evaluate the hash code. Instead read the bucket number directly from // the row. I don't need to evaluate any expressions as I know I am reading the ROW__ID // column directly. Object recIdValue = acidRowInspector.getStructFieldData(row, recIdField); int buckNum = bucketInspector.get(recIdInspector.getStructFieldData(recIdValue, bucketField)); if (isLogTraceEnabled) { LOG.trace("Acid choosing bucket number " + buckNum); } return buckNum; } else { Object[] bucketFieldValues = new Object[bucketEval.length]; for (int i = 0; i < bucketEval.length; i++) { bucketFieldValues[i] = bucketEval[i].evaluate(row); } return ObjectInspectorUtils.getBucketNumber( bucketFieldValues, bucketObjectInspectors, numBuckets); } }
private void parseObjectColumn(int column) { // don't include column number in message because it causes boxing which is expensive here checkArgument(!isPartitionColumn[column], "Column is a partition key"); loaded[column] = true; Object fieldData = rowInspector.getStructFieldData(rowData, structFields[column]); if (fieldData == null) { nulls[column] = true; } else { objects[column] = getBlockObject(types[column], fieldData, fieldInspectors[column]); nulls[column] = false; } }
@Override public Writable serialize(Object obj, ObjectInspector objInspector) throws SerDeException { outputByteBuffer.reset(); StructObjectInspector soi = (StructObjectInspector) objInspector; List<? extends StructField> fields = soi.getAllStructFieldRefs(); for (int i = 0; i < columnNames.size(); i++) { serialize( outputByteBuffer, soi.getStructFieldData(obj, fields.get(i)), fields.get(i).getFieldObjectInspector(), columnSortOrderIsDesc[i]); } serializeBytesWritable.set(outputByteBuffer.getData(), 0, outputByteBuffer.getLength()); return serializeBytesWritable; }
private void parseDoubleColumn(int column) { // don't include column number in message because it causes boxing which is expensive here checkArgument(!isPartitionColumn[column], "Column is a partition key"); loaded[column] = true; Object fieldData = rowInspector.getStructFieldData(rowData, structFields[column]); if (fieldData == null) { nulls[column] = true; } else { Object fieldValue = ((PrimitiveObjectInspector) fieldInspectors[column]).getPrimitiveJavaObject(fieldData); checkState(fieldValue != null, "fieldValue should not be null"); doubles[column] = ((Number) fieldValue).doubleValue(); nulls[column] = false; } }
private void partialReadTest(FileSystem fs, int count, Path file) throws IOException, SerDeException { LOG.debug("reading " + count + " records"); long start = System.currentTimeMillis(); java.util.ArrayList<Integer> readCols = new java.util.ArrayList<Integer>(); readCols.add(Integer.valueOf(2)); readCols.add(Integer.valueOf(3)); ColumnProjectionUtils.setReadColumnIDs(conf, readCols); RCFile.Reader reader = new RCFile.Reader(fs, file, conf); LongWritable rowID = new LongWritable(); BytesRefArrayWritable cols = new BytesRefArrayWritable(); while (reader.next(rowID)) { reader.getCurrentRow(cols); cols.resetValid(8); Object row = serDe.deserialize(cols); StructObjectInspector oi = (StructObjectInspector) serDe.getObjectInspector(); List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs(); assertEquals("Field size should be 8", 8, fieldRefs.size()); for (int i : readCols) { Object fieldData = oi.getStructFieldData(row, fieldRefs.get(i)); Object standardWritableData = ObjectInspectorUtils.copyToStandardObject( fieldData, fieldRefs.get(i).getFieldObjectInspector(), ObjectInspectorCopyOption.WRITABLE); assertEquals("Field " + i, standardWritableData, expectedPartitalFieldsData[i]); } assertEquals( "Class of the serialized object should be BytesRefArrayWritable", BytesRefArrayWritable.class, serDe.getSerializedClass()); BytesRefArrayWritable serializedBytes = (BytesRefArrayWritable) serDe.serialize(row, oi); assertEquals("Serialized data", patialS, serializedBytes); } reader.close(); long cost = System.currentTimeMillis() - start; LOG.debug("reading fully costs:" + cost + " milliseconds"); }
@Override public synchronized void process(Object row, int tag) throws HiveException { StructObjectInspector soi = parentObjInspectors[tag]; List<? extends StructField> fields = parentFields[tag]; if (needsTransform[tag]) { for (int c = 0; c < fields.size(); c++) { outputRow.set( c, columnTypeResolvers[c].convertIfNecessary( soi.getStructFieldData(row, fields.get(c)), fields.get(c).getFieldObjectInspector())); } forward(outputRow, outputObjInspector); } else { forward(row, inputObjInspectors[tag]); } }
public void fullyReadTest(FileSystem fs, int count, Path file) throws IOException, SerDeException { LOG.debug("reading " + count + " records"); long start = System.currentTimeMillis(); ColumnProjectionUtils.setFullyReadColumns(conf); RCFile.Reader reader = new RCFile.Reader(fs, file, conf); LongWritable rowID = new LongWritable(); int actualRead = 0; BytesRefArrayWritable cols = new BytesRefArrayWritable(); while (reader.next(rowID)) { reader.getCurrentRow(cols); cols.resetValid(8); Object row = serDe.deserialize(cols); StructObjectInspector oi = (StructObjectInspector) serDe.getObjectInspector(); List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs(); assertEquals("Field size should be 8", 8, fieldRefs.size()); for (int i = 0; i < fieldRefs.size(); i++) { Object fieldData = oi.getStructFieldData(row, fieldRefs.get(i)); Object standardWritableData = ObjectInspectorUtils.copyToStandardObject( fieldData, fieldRefs.get(i).getFieldObjectInspector(), ObjectInspectorCopyOption.WRITABLE); assertEquals("Field " + i, standardWritableData, expectedFieldsData[i]); } // Serialize assertEquals( "Class of the serialized object should be BytesRefArrayWritable", BytesRefArrayWritable.class, serDe.getSerializedClass()); BytesRefArrayWritable serializedText = (BytesRefArrayWritable) serDe.serialize(row, oi); assertEquals("Serialized data", s, serializedText); actualRead++; } reader.close(); assertEquals("Expect " + count + " rows, actual read " + actualRead, actualRead, count); long cost = System.currentTimeMillis() - start; LOG.debug("reading fully costs:" + cost + " milliseconds"); }
@Override public Writable serialize(Object obj, ObjectInspector objInspector) throws SerDeException { final StructObjectInspector outputRowOI = (StructObjectInspector) objInspector; final List<? extends StructField> outputFieldRefs = outputRowOI.getAllStructFieldRefs(); if (outputFieldRefs.size() != numCols) { throw new SerDeException( "Cannot serialize the object because there are " + outputFieldRefs.size() + " fields but the table has " + numCols + " columns."); } // Get all data out. for (int c = 0; c < numCols; c++) { final Object field = outputRowOI.getStructFieldData(obj, outputFieldRefs.get(c)); final ObjectInspector fieldOI = outputFieldRefs.get(c).getFieldObjectInspector(); // The data must be of type String final StringObjectInspector fieldStringOI = (StringObjectInspector) fieldOI; // Convert the field to Java class String, because objects of String // type // can be stored in String, Text, or some other classes. outputFields[c] = fieldStringOI.getPrimitiveJavaObject(field); } final StringWriter writer = new StringWriter(); final CSVWriter csv = newWriter(writer, separatorChar, quoteChar, escapeChar); try { csv.writeNext(outputFields); csv.close(); return new Text(writer.toString()); } catch (final IOException ioe) { throw new SerDeException(ioe); } }
/** * Copy specified fields in the input row to the output array of standard objects. * * @param result output list of standard objects. * @param row input row. * @param startCol starting column number from the input row. * @param numCols number of columns to copy. * @param soi Object inspector for the to-be-copied columns. */ public static void partialCopyToStandardObject( List<Object> result, Object row, int startCol, int numCols, StructObjectInspector soi, ObjectInspectorCopyOption objectInspectorOption) { List<? extends StructField> fields = soi.getAllStructFieldRefs(); int i = 0, j = 0; for (StructField f : fields) { if (i++ >= startCol) { result.add( copyToStandardObject( soi.getStructFieldData(row, f), f.getFieldObjectInspector(), objectInspectorOption)); if (++j == numCols) { break; } } } }
/** * If we received data with tags from ReduceSinkOperators, no keys will match. This should not * happen, but is important enough that we want to find out and work around it if some optimized * change causes RSO to pass on tags. */ private void sanityCheckKeyForTag() throws SerDeException { if (hasTag != null) return; BinaryComparable b = (BinaryComparable) key; Object o = keySerDe.deserialize(key); StructObjectInspector soi = (StructObjectInspector) keySerDe.getObjectInspector(); List<? extends StructField> fields = soi.getAllStructFieldRefs(); Object[] data = new Object[fields.size()]; List<ObjectInspector> fois = new ArrayList<ObjectInspector>(fields.size()); for (int i = 0; i < fields.size(); i++) { data[i] = soi.getStructFieldData(o, fields.get(i)); fois.add(fields.get(i).getFieldObjectInspector()); } Output output = new Output(); BinarySortableSerDe.serializeStruct(output, data, fois, new boolean[fields.size()]); hasTag = (output.getLength() != b.getLength()); if (hasTag) { LOG.error("Tag found in keys and will be removed. This should not happen."); if (output.getLength() != (b.getLength() - 1)) { throw new SerDeException( "Unexpected tag: " + b.getLength() + " reserialized to " + output.getLength()); } } }
@Override public RowSet getNextRowSet(FetchOrientation orientation, long maxRows) throws HiveSQLException { assertState(OperationState.FINISHED); ArrayList<String> rows = new ArrayList<String>(); driver.setMaxRows((int) maxRows); try { driver.getResults(rows); getSerDe(); StructObjectInspector soi = (StructObjectInspector) serde.getObjectInspector(); List<? extends StructField> fieldRefs = soi.getAllStructFieldRefs(); RowSet rowSet = new RowSet(); Object[] deserializedFields = new Object[fieldRefs.size()]; Object rowObj; ObjectInspector fieldOI; for (String rowString : rows) { rowObj = serde.deserialize(new BytesWritable(rowString.getBytes())); for (int i = 0; i < fieldRefs.size(); i++) { StructField fieldRef = fieldRefs.get(i); fieldOI = fieldRef.getFieldObjectInspector(); deserializedFields[i] = convertLazyToJava(soi.getStructFieldData(rowObj, fieldRef), fieldOI); } rowSet.addRow(resultSchema, deserializedFields); } return rowSet; } catch (IOException e) { throw new HiveSQLException(e); } catch (CommandNeedRetryException e) { throw new HiveSQLException(e); } catch (Exception e) { throw new HiveSQLException(e); } }
public static void copyStructToArray( Object o, ObjectInspector oi, ObjectInspectorCopyOption objectInspectorOption, Object[] dest, int offset) throws SerDeException { if (o == null) { return; } if (oi.getCategory() != Category.STRUCT) { throw new SerDeException("Unexpected category " + oi.getCategory()); } StructObjectInspector soi = (StructObjectInspector) oi; List<? extends StructField> fields = soi.getAllStructFieldRefs(); for (int i = 0; i < fields.size(); ++i) { StructField f = fields.get(i); dest[offset + i] = copyToStandardObject( soi.getStructFieldData(o, f), f.getFieldObjectInspector(), objectInspectorOption); } }
private static Block serializeStruct( Type type, BlockBuilder builder, Object object, StructObjectInspector inspector) { if (object == null) { requireNonNull(builder, "parent builder is null").appendNull(); return null; } List<Type> typeParameters = type.getTypeParameters(); List<? extends StructField> allStructFieldRefs = inspector.getAllStructFieldRefs(); checkArgument(typeParameters.size() == allStructFieldRefs.size()); BlockBuilder currentBuilder; if (builder != null) { currentBuilder = builder.beginBlockEntry(); } else { currentBuilder = new InterleavedBlockBuilder( typeParameters, new BlockBuilderStatus(), typeParameters.size()); } for (int i = 0; i < typeParameters.size(); i++) { StructField field = allStructFieldRefs.get(i); serializeObject( typeParameters.get(i), currentBuilder, inspector.getStructFieldData(object, field), field.getFieldObjectInspector()); } if (builder != null) { builder.closeEntry(); return null; } else { Block resultBlock = currentBuilder.build(); return resultBlock; } }
@Override public void processOp(Object row, int tag) throws HiveException { try { reportProgress(); // get alias alias = (byte) tag; if ((lastAlias == null) || (!lastAlias.equals(alias))) { nextSz = joinEmitInterval; } ArrayList<Object> nr = JoinUtil.computeValues( row, joinValues.get(alias), joinValuesObjectInspectors.get(alias), joinFilters.get(alias), joinFilterObjectInspectors.get(alias), noOuterJoin); if (handleSkewJoin) { skewJoinKeyContext.handleSkew(tag); } // number of rows for the key in the given table int sz = storage.get(alias).size(); StructObjectInspector soi = (StructObjectInspector) inputObjInspectors[tag]; StructField sf = soi.getStructFieldRef(Utilities.ReduceField.KEY.toString()); Object keyObject = soi.getStructFieldData(row, sf); // Are we consuming too much memory if (alias == numAliases - 1 && !(handleSkewJoin && skewJoinKeyContext.currBigKeyTag >= 0)) { if (sz == joinEmitInterval) { // The input is sorted by alias, so if we are already in the last join // operand, // we can emit some results now. // Note this has to be done before adding the current row to the // storage, // to preserve the correctness for outer joins. checkAndGenObject(); storage.get(alias).clear(); } } else { if (sz == nextSz) { // Output a warning if we reached at least 1000 rows for a join // operand // We won't output a warning for the last join operand since the size // will never goes to joinEmitInterval. LOG.warn("table " + alias + " has " + sz + " rows for join key " + keyObject); nextSz = getNextSize(nextSz); } } // Add the value to the vector storage.get(alias).add(nr); // if join-key is null, process each row in different group. if (SerDeUtils.hasAnyNullObject(keyObject, sf.getFieldObjectInspector())) { endGroup(); startGroup(); } } catch (Exception e) { e.printStackTrace(); throw new HiveException(e); } }
public static Object copyToStandardObject( Object o, ObjectInspector oi, ObjectInspectorCopyOption objectInspectorOption) { if (o == null) { return null; } Object result = null; switch (oi.getCategory()) { case PRIMITIVE: { PrimitiveObjectInspector loi = (PrimitiveObjectInspector) oi; if (objectInspectorOption == ObjectInspectorCopyOption.DEFAULT) { objectInspectorOption = loi.preferWritable() ? ObjectInspectorCopyOption.WRITABLE : ObjectInspectorCopyOption.JAVA; } switch (objectInspectorOption) { case JAVA: result = loi.getPrimitiveJavaObject(o); if (loi.getPrimitiveCategory() == PrimitiveObjectInspector.PrimitiveCategory.TIMESTAMP) { result = PrimitiveObjectInspectorFactory.javaTimestampObjectInspector.copyObject(result); } break; case WRITABLE: result = loi.getPrimitiveWritableObject(loi.copyObject(o)); break; } break; } case LIST: { ListObjectInspector loi = (ListObjectInspector) oi; int length = loi.getListLength(o); ArrayList<Object> list = new ArrayList<Object>(length); for (int i = 0; i < length; i++) { list.add( copyToStandardObject( loi.getListElement(o, i), loi.getListElementObjectInspector(), objectInspectorOption)); } result = list; break; } case MAP: { MapObjectInspector moi = (MapObjectInspector) oi; HashMap<Object, Object> map = new HashMap<Object, Object>(); Map<? extends Object, ? extends Object> omap = moi.getMap(o); for (Map.Entry<? extends Object, ? extends Object> entry : omap.entrySet()) { map.put( copyToStandardObject( entry.getKey(), moi.getMapKeyObjectInspector(), objectInspectorOption), copyToStandardObject( entry.getValue(), moi.getMapValueObjectInspector(), objectInspectorOption)); } result = map; break; } case STRUCT: { StructObjectInspector soi = (StructObjectInspector) oi; List<? extends StructField> fields = soi.getAllStructFieldRefs(); ArrayList<Object> struct = new ArrayList<Object>(fields.size()); for (StructField f : fields) { struct.add( copyToStandardObject( soi.getStructFieldData(o, f), f.getFieldObjectInspector(), objectInspectorOption)); } result = struct; break; } case UNION: { UnionObjectInspector uoi = (UnionObjectInspector) oi; List<ObjectInspector> objectInspectors = uoi.getObjectInspectors(); Object object = copyToStandardObject( uoi.getField(o), objectInspectors.get(uoi.getTag(o)), objectInspectorOption); result = object; break; } default: { throw new RuntimeException("Unknown ObjectInspector category!"); } } return result; }
public static int hashCode(Object o, ObjectInspector objIns) { if (o == null) { return 0; } switch (objIns.getCategory()) { case PRIMITIVE: { PrimitiveObjectInspector poi = ((PrimitiveObjectInspector) objIns); switch (poi.getPrimitiveCategory()) { case VOID: return 0; case BOOLEAN: return ((BooleanObjectInspector) poi).get(o) ? 1 : 0; case BYTE: return ((ByteObjectInspector) poi).get(o); case SHORT: return ((ShortObjectInspector) poi).get(o); case INT: return ((IntObjectInspector) poi).get(o); case LONG: { long a = ((LongObjectInspector) poi).get(o); return (int) ((a >>> 32) ^ a); } case FLOAT: return Float.floatToIntBits(((FloatObjectInspector) poi).get(o)); case DOUBLE: { // This hash function returns the same result as Double.hashCode() // while DoubleWritable.hashCode returns a different result. long a = Double.doubleToLongBits(((DoubleObjectInspector) poi).get(o)); return (int) ((a >>> 32) ^ a); } case STRING: { // This hash function returns the same result as String.hashCode() when // all characters are ASCII, while Text.hashCode() always returns a // different result. Text t = ((StringObjectInspector) poi).getPrimitiveWritableObject(o); int r = 0; for (int i = 0; i < t.getLength(); i++) { r = r * 31 + t.getBytes()[i]; } return r; } case CHAR: return ((HiveCharObjectInspector) poi).getPrimitiveWritableObject(o).hashCode(); case VARCHAR: return ((HiveVarcharObjectInspector) poi).getPrimitiveWritableObject(o).hashCode(); case BINARY: return ((BinaryObjectInspector) poi).getPrimitiveWritableObject(o).hashCode(); case DATE: return ((DateObjectInspector) poi).getPrimitiveWritableObject(o).hashCode(); case TIMESTAMP: TimestampWritable t = ((TimestampObjectInspector) poi).getPrimitiveWritableObject(o); return t.hashCode(); case INTERVAL_YEAR_MONTH: HiveIntervalYearMonthWritable intervalYearMonth = ((HiveIntervalYearMonthObjectInspector) poi).getPrimitiveWritableObject(o); return intervalYearMonth.hashCode(); case INTERVAL_DAY_TIME: HiveIntervalDayTimeWritable intervalDayTime = ((HiveIntervalDayTimeObjectInspector) poi).getPrimitiveWritableObject(o); return intervalDayTime.hashCode(); case DECIMAL: return ((HiveDecimalObjectInspector) poi).getPrimitiveWritableObject(o).hashCode(); default: { throw new RuntimeException("Unknown type: " + poi.getPrimitiveCategory()); } } } case LIST: { int r = 0; ListObjectInspector listOI = (ListObjectInspector) objIns; ObjectInspector elemOI = listOI.getListElementObjectInspector(); for (int ii = 0; ii < listOI.getListLength(o); ++ii) { r = 31 * r + hashCode(listOI.getListElement(o, ii), elemOI); } return r; } case MAP: { int r = 0; MapObjectInspector mapOI = (MapObjectInspector) objIns; ObjectInspector keyOI = mapOI.getMapKeyObjectInspector(); ObjectInspector valueOI = mapOI.getMapValueObjectInspector(); Map<?, ?> map = mapOI.getMap(o); for (Map.Entry<?, ?> entry : map.entrySet()) { r += hashCode(entry.getKey(), keyOI) ^ hashCode(entry.getValue(), valueOI); } return r; } case STRUCT: int r = 0; StructObjectInspector structOI = (StructObjectInspector) objIns; List<? extends StructField> fields = structOI.getAllStructFieldRefs(); for (StructField field : fields) { r = 31 * r + hashCode( structOI.getStructFieldData(o, field), field.getFieldObjectInspector()); } return r; case UNION: UnionObjectInspector uOI = (UnionObjectInspector) objIns; byte tag = uOI.getTag(o); return hashCode(uOI.getField(o), uOI.getObjectInspectors().get(tag)); default: throw new RuntimeException("Unknown type: " + objIns.getTypeName()); } }
/** Compare two objects with their respective ObjectInspectors. */ public static int compare( Object o1, ObjectInspector oi1, Object o2, ObjectInspector oi2, MapEqualComparer mapEqualComparer) { if (oi1.getCategory() != oi2.getCategory()) { return oi1.getCategory().compareTo(oi2.getCategory()); } if (o1 == null) { return o2 == null ? 0 : -1; } else if (o2 == null) { return 1; } switch (oi1.getCategory()) { case PRIMITIVE: { PrimitiveObjectInspector poi1 = ((PrimitiveObjectInspector) oi1); PrimitiveObjectInspector poi2 = ((PrimitiveObjectInspector) oi2); if (poi1.getPrimitiveCategory() != poi2.getPrimitiveCategory()) { return poi1.getPrimitiveCategory().compareTo(poi2.getPrimitiveCategory()); } switch (poi1.getPrimitiveCategory()) { case VOID: return 0; case BOOLEAN: { int v1 = ((BooleanObjectInspector) poi1).get(o1) ? 1 : 0; int v2 = ((BooleanObjectInspector) poi2).get(o2) ? 1 : 0; return v1 - v2; } case BYTE: { int v1 = ((ByteObjectInspector) poi1).get(o1); int v2 = ((ByteObjectInspector) poi2).get(o2); return v1 - v2; } case SHORT: { int v1 = ((ShortObjectInspector) poi1).get(o1); int v2 = ((ShortObjectInspector) poi2).get(o2); return v1 - v2; } case INT: { int v1 = ((IntObjectInspector) poi1).get(o1); int v2 = ((IntObjectInspector) poi2).get(o2); return v1 > v2 ? 1 : (v1 < v2 ? -1 : 0); } case LONG: { long v1 = ((LongObjectInspector) poi1).get(o1); long v2 = ((LongObjectInspector) poi2).get(o2); return v1 > v2 ? 1 : (v1 < v2 ? -1 : 0); } case FLOAT: { float v1 = ((FloatObjectInspector) poi1).get(o1); float v2 = ((FloatObjectInspector) poi2).get(o2); return Float.compare(v1, v2); } case DOUBLE: { double v1 = ((DoubleObjectInspector) poi1).get(o1); double v2 = ((DoubleObjectInspector) poi2).get(o2); return Double.compare(v1, v2); } case STRING: { if (poi1.preferWritable() || poi2.preferWritable()) { Text t1 = (Text) poi1.getPrimitiveWritableObject(o1); Text t2 = (Text) poi2.getPrimitiveWritableObject(o2); return t1 == null ? (t2 == null ? 0 : -1) : (t2 == null ? 1 : t1.compareTo(t2)); } else { String s1 = (String) poi1.getPrimitiveJavaObject(o1); String s2 = (String) poi2.getPrimitiveJavaObject(o2); return s1 == null ? (s2 == null ? 0 : -1) : (s2 == null ? 1 : s1.compareTo(s2)); } } case CHAR: { HiveCharWritable t1 = ((HiveCharObjectInspector) poi1).getPrimitiveWritableObject(o1); HiveCharWritable t2 = ((HiveCharObjectInspector) poi2).getPrimitiveWritableObject(o2); return t1.compareTo(t2); } case VARCHAR: { HiveVarcharWritable t1 = ((HiveVarcharObjectInspector) poi1).getPrimitiveWritableObject(o1); HiveVarcharWritable t2 = ((HiveVarcharObjectInspector) poi2).getPrimitiveWritableObject(o2); return t1.compareTo(t2); } case BINARY: { BytesWritable bw1 = ((BinaryObjectInspector) poi1).getPrimitiveWritableObject(o1); BytesWritable bw2 = ((BinaryObjectInspector) poi2).getPrimitiveWritableObject(o2); return bw1.compareTo(bw2); } case DATE: { DateWritable d1 = ((DateObjectInspector) poi1).getPrimitiveWritableObject(o1); DateWritable d2 = ((DateObjectInspector) poi2).getPrimitiveWritableObject(o2); return d1.compareTo(d2); } case TIMESTAMP: { TimestampWritable t1 = ((TimestampObjectInspector) poi1).getPrimitiveWritableObject(o1); TimestampWritable t2 = ((TimestampObjectInspector) poi2).getPrimitiveWritableObject(o2); return t1.compareTo(t2); } case INTERVAL_YEAR_MONTH: { HiveIntervalYearMonthWritable i1 = ((HiveIntervalYearMonthObjectInspector) poi1).getPrimitiveWritableObject(o1); HiveIntervalYearMonthWritable i2 = ((HiveIntervalYearMonthObjectInspector) poi2).getPrimitiveWritableObject(o2); return i1.compareTo(i2); } case INTERVAL_DAY_TIME: { HiveIntervalDayTimeWritable i1 = ((HiveIntervalDayTimeObjectInspector) poi1).getPrimitiveWritableObject(o1); HiveIntervalDayTimeWritable i2 = ((HiveIntervalDayTimeObjectInspector) poi2).getPrimitiveWritableObject(o2); return i1.compareTo(i2); } case DECIMAL: { HiveDecimalWritable t1 = ((HiveDecimalObjectInspector) poi1).getPrimitiveWritableObject(o1); HiveDecimalWritable t2 = ((HiveDecimalObjectInspector) poi2).getPrimitiveWritableObject(o2); return t1.compareTo(t2); } default: { throw new RuntimeException("Unknown type: " + poi1.getPrimitiveCategory()); } } } case STRUCT: { StructObjectInspector soi1 = (StructObjectInspector) oi1; StructObjectInspector soi2 = (StructObjectInspector) oi2; List<? extends StructField> fields1 = soi1.getAllStructFieldRefs(); List<? extends StructField> fields2 = soi2.getAllStructFieldRefs(); int minimum = Math.min(fields1.size(), fields2.size()); for (int i = 0; i < minimum; i++) { int r = compare( soi1.getStructFieldData(o1, fields1.get(i)), fields1.get(i).getFieldObjectInspector(), soi2.getStructFieldData(o2, fields2.get(i)), fields2.get(i).getFieldObjectInspector(), mapEqualComparer); if (r != 0) { return r; } } return fields1.size() - fields2.size(); } case LIST: { ListObjectInspector loi1 = (ListObjectInspector) oi1; ListObjectInspector loi2 = (ListObjectInspector) oi2; int minimum = Math.min(loi1.getListLength(o1), loi2.getListLength(o2)); for (int i = 0; i < minimum; i++) { int r = compare( loi1.getListElement(o1, i), loi1.getListElementObjectInspector(), loi2.getListElement(o2, i), loi2.getListElementObjectInspector(), mapEqualComparer); if (r != 0) { return r; } } return loi1.getListLength(o1) - loi2.getListLength(o2); } case MAP: { if (mapEqualComparer == null) { throw new RuntimeException("Compare on map type not supported!"); } else { return mapEqualComparer.compare( o1, (MapObjectInspector) oi1, o2, (MapObjectInspector) oi2); } } case UNION: { UnionObjectInspector uoi1 = (UnionObjectInspector) oi1; UnionObjectInspector uoi2 = (UnionObjectInspector) oi2; byte tag1 = uoi1.getTag(o1); byte tag2 = uoi2.getTag(o2); if (tag1 != tag2) { return tag1 - tag2; } return compare( uoi1.getField(o1), uoi1.getObjectInspectors().get(tag1), uoi2.getField(o2), uoi2.getObjectInspectors().get(tag2), mapEqualComparer); } default: throw new RuntimeException("Compare on unknown type: " + oi1.getCategory()); } }
public void testSimpleReadAndWrite() throws IOException, SerDeException { fs.delete(file, true); byte[][] record_1 = { "123".getBytes("UTF-8"), "456".getBytes("UTF-8"), "789".getBytes("UTF-8"), "1000".getBytes("UTF-8"), "5.3".getBytes("UTF-8"), "hive and hadoop".getBytes("UTF-8"), new byte[0], "NULL".getBytes("UTF-8") }; byte[][] record_2 = { "100".getBytes("UTF-8"), "200".getBytes("UTF-8"), "123".getBytes("UTF-8"), "1000".getBytes("UTF-8"), "5.3".getBytes("UTF-8"), "hive and hadoop".getBytes("UTF-8"), new byte[0], "NULL".getBytes("UTF-8") }; RCFileOutputFormat.setColumnNumber(conf, expectedFieldsData.length); RCFile.Writer writer = new RCFile.Writer(fs, conf, file, null, new DefaultCodec()); BytesRefArrayWritable bytes = new BytesRefArrayWritable(record_1.length); for (int i = 0; i < record_1.length; i++) { BytesRefWritable cu = new BytesRefWritable(record_1[i], 0, record_1[i].length); bytes.set(i, cu); } writer.append(bytes); bytes.clear(); for (int i = 0; i < record_2.length; i++) { BytesRefWritable cu = new BytesRefWritable(record_2[i], 0, record_2[i].length); bytes.set(i, cu); } writer.append(bytes); writer.close(); Object[] expectedRecord_1 = { new ByteWritable((byte) 123), new ShortWritable((short) 456), new IntWritable(789), new LongWritable(1000), new DoubleWritable(5.3), new Text("hive and hadoop"), null, null }; Object[] expectedRecord_2 = { new ByteWritable((byte) 100), new ShortWritable((short) 200), new IntWritable(123), new LongWritable(1000), new DoubleWritable(5.3), new Text("hive and hadoop"), null, null }; RCFile.Reader reader = new RCFile.Reader(fs, file, conf); LongWritable rowID = new LongWritable(); for (int i = 0; i < 2; i++) { reader.next(rowID); BytesRefArrayWritable cols = new BytesRefArrayWritable(); reader.getCurrentRow(cols); cols.resetValid(8); Object row = serDe.deserialize(cols); StructObjectInspector oi = (StructObjectInspector) serDe.getObjectInspector(); List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs(); assertEquals("Field size should be 8", 8, fieldRefs.size()); for (int j = 0; j < fieldRefs.size(); j++) { Object fieldData = oi.getStructFieldData(row, fieldRefs.get(j)); Object standardWritableData = ObjectInspectorUtils.copyToStandardObject( fieldData, fieldRefs.get(j).getFieldObjectInspector(), ObjectInspectorCopyOption.WRITABLE); if (i == 0) { assertEquals("Field " + i, standardWritableData, expectedRecord_1[j]); } else { assertEquals("Field " + i, standardWritableData, expectedRecord_2[j]); } } } reader.close(); }