/** * Uses the ColumnarSerde to deserialize the buff:BytesRefArrayWritable into a ColumnarStruct * instance. * * @param buff BytesRefArrayWritable * @return ColumnarStruct */ private ColumnarStruct readColumnarStruct(BytesRefArrayWritable buff) { // use ColumnarSerDe to deserialize row ColumnarStruct struct = null; try { struct = (ColumnarStruct) serde.deserialize(buff); } catch (SerDeException e) { LOG.error(e.toString(), e); throw new RuntimeException(e.toString(), e); } return struct; }
// Load the hash table @Override public void cleanUpInputFileChangedOp() throws HiveException { try { if (firstRow) { // generate the map metadata generateMapMetaData(); firstRow = false; } loadHashTable(); } catch (SerDeException e) { e.printStackTrace(); throw new HiveException(e); } }
@Override public void prepareToRead(@SuppressWarnings("rawtypes") RecordReader reader, PigSplit split) throws IOException { this.reader = (HiveRCRecordReader) reader; // check that the required indexes actually exist i.e. the columns that // should be read. // assuming this is always defined simplifies the readColumnarTuple // logic. int requiredIndexes[] = getRequiredColumns(); if (requiredIndexes == null) { int fieldLen = pigSchema.getFields().length; // if any the partition keys should already exist String[] partitionKeys = getPartitionKeys(null, null); if (partitionKeys != null) { fieldLen = partitionKeys.length; } requiredIndexes = new int[fieldLen]; for (int i = 0; i < fieldLen; i++) { requiredIndexes[i] = i; } this.requiredColumns = requiredIndexes; } try { serde = new ColumnarSerDe(); serde.initialize(hiveConf, props); } catch (SerDeException e) { LOG.error(e.toString(), e); throw new IOException(e); } }
@Override public void processOp(Object row, int tag) throws HiveException { try { if (firstRow) { // generate the map metadata generateMapMetaData(); firstRow = false; } // get alias alias = order[tag]; // alias = (byte)tag; if ((lastAlias == null) || (!lastAlias.equals(alias))) { nextSz = joinEmitInterval; } // compute keys and values as StandardObjects AbstractMapJoinKey key = JoinUtil.computeMapJoinKeys( row, joinKeys.get(alias), joinKeysObjectInspectors.get(alias)); ArrayList<Object> value = JoinUtil.computeValues( row, joinValues.get(alias), joinValuesObjectInspectors.get(alias), joinFilters.get(alias), joinFilterObjectInspectors.get(alias), noOuterJoin); // Add the value to the ArrayList storage.get((byte) tag).add(value); for (Byte pos : order) { if (pos.intValue() != tag) { MapJoinObjectValue o = mapJoinTables.get(pos).get(key); MapJoinRowContainer<ArrayList<Object>> rowContainer = rowContainerMap.get(pos); // there is no join-value or join-key has all null elements if (o == null || key.hasAnyNulls()) { if (noOuterJoin) { storage.put(pos, emptyList); } else { storage.put(pos, dummyObjVectors[pos.intValue()]); } } else { rowContainer.reset(o.getObj()); storage.put(pos, rowContainer); } } } // generate the output records checkAndGenObject(); // done with the row storage.get((byte) tag).clear(); for (Byte pos : order) { if (pos.intValue() != tag) { storage.put(pos, null); } } } catch (SerDeException e) { e.printStackTrace(); throw new HiveException(e); } }