示例#1
0
  /**
   * Uses the ColumnarSerde to deserialize the buff:BytesRefArrayWritable into a ColumnarStruct
   * instance.
   *
   * @param buff BytesRefArrayWritable
   * @return ColumnarStruct
   */
  private ColumnarStruct readColumnarStruct(BytesRefArrayWritable buff) {
    // use ColumnarSerDe to deserialize row
    ColumnarStruct struct = null;
    try {
      struct = (ColumnarStruct) serde.deserialize(buff);
    } catch (SerDeException e) {
      LOG.error(e.toString(), e);
      throw new RuntimeException(e.toString(), e);
    }

    return struct;
  }
  // Load the hash table
  @Override
  public void cleanUpInputFileChangedOp() throws HiveException {
    try {
      if (firstRow) {
        // generate the map metadata
        generateMapMetaData();
        firstRow = false;
      }

      loadHashTable();
    } catch (SerDeException e) {
      e.printStackTrace();
      throw new HiveException(e);
    }
  }
示例#3
0
  @Override
  public void prepareToRead(@SuppressWarnings("rawtypes") RecordReader reader, PigSplit split)
      throws IOException {

    this.reader = (HiveRCRecordReader) reader;

    // check that the required indexes actually exist i.e. the columns that
    // should be read.
    // assuming this is always defined simplifies the readColumnarTuple
    // logic.

    int requiredIndexes[] = getRequiredColumns();
    if (requiredIndexes == null) {

      int fieldLen = pigSchema.getFields().length;

      // if any the partition keys should already exist
      String[] partitionKeys = getPartitionKeys(null, null);
      if (partitionKeys != null) {
        fieldLen = partitionKeys.length;
      }

      requiredIndexes = new int[fieldLen];

      for (int i = 0; i < fieldLen; i++) {
        requiredIndexes[i] = i;
      }

      this.requiredColumns = requiredIndexes;
    }

    try {
      serde = new ColumnarSerDe();
      serde.initialize(hiveConf, props);
    } catch (SerDeException e) {
      LOG.error(e.toString(), e);
      throw new IOException(e);
    }
  }
  @Override
  public void processOp(Object row, int tag) throws HiveException {

    try {
      if (firstRow) {
        // generate the map metadata
        generateMapMetaData();
        firstRow = false;
      }

      // get alias
      alias = order[tag];
      // alias = (byte)tag;

      if ((lastAlias == null) || (!lastAlias.equals(alias))) {
        nextSz = joinEmitInterval;
      }

      // compute keys and values as StandardObjects
      AbstractMapJoinKey key =
          JoinUtil.computeMapJoinKeys(
              row, joinKeys.get(alias), joinKeysObjectInspectors.get(alias));
      ArrayList<Object> value =
          JoinUtil.computeValues(
              row,
              joinValues.get(alias),
              joinValuesObjectInspectors.get(alias),
              joinFilters.get(alias),
              joinFilterObjectInspectors.get(alias),
              noOuterJoin);

      // Add the value to the ArrayList
      storage.get((byte) tag).add(value);

      for (Byte pos : order) {
        if (pos.intValue() != tag) {

          MapJoinObjectValue o = mapJoinTables.get(pos).get(key);
          MapJoinRowContainer<ArrayList<Object>> rowContainer = rowContainerMap.get(pos);

          // there is no join-value or join-key has all null elements
          if (o == null || key.hasAnyNulls()) {
            if (noOuterJoin) {
              storage.put(pos, emptyList);
            } else {
              storage.put(pos, dummyObjVectors[pos.intValue()]);
            }
          } else {
            rowContainer.reset(o.getObj());
            storage.put(pos, rowContainer);
          }
        }
      }

      // generate the output records
      checkAndGenObject();

      // done with the row
      storage.get((byte) tag).clear();

      for (Byte pos : order) {
        if (pos.intValue() != tag) {
          storage.put(pos, null);
        }
      }

    } catch (SerDeException e) {
      e.printStackTrace();
      throw new HiveException(e);
    }
  }