Java TopNHash примеры использования

Язык программирования: Java

Пространство имен/Пакет: org.apache.hadoop.hive.ql.exec

Класс/Тип: TopNHash

Примеров на hotexamples.com: 3

Java TopNHash - 3 примера найдено. Это лучшие примеры Java кода для org.apache.hadoop.hive.ql.exec.TopNHash, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

flush(2)

initialize(1)

removed(1)

storeValue(1)

tryStoreKey(1)

Пример #1

Показать файл

Файл: ReduceSinkOperator.java Проект: hugh-han/hive

 @Override
 protected void closeOp(boolean abort) throws HiveException {
   if (!abort) {
     reducerHash.flush();
   }
   super.closeOp(abort);
   out = null;
   if (isLogInfoEnabled) {
     LOG.info(toString() + ": records written - " + numRows);
   }
   recordCounter.set(numRows);
 }

Пример #2

Показать файл

Файл: ReduceSinkOperator.java Проект: hugh-han/hive

  @Override
  @SuppressWarnings("unchecked")
  public void process(Object row, int tag) throws HiveException {
    try {
      ObjectInspector rowInspector = inputObjInspectors[tag];
      if (firstRow) {
        firstRow = false;
        // TODO: this is fishy - we init object inspectors based on first tag. We
        //       should either init for each tag, or if rowInspector doesn't really
        //       matter, then we can create this in ctor and get rid of firstRow.
        if (conf.getWriteType() == AcidUtils.Operation.UPDATE
            || conf.getWriteType() == AcidUtils.Operation.DELETE) {
          assert rowInspector instanceof StructObjectInspector
              : "Exptected rowInspector to be instance of StructObjectInspector but it is a "
                  + rowInspector.getClass().getName();
          acidRowInspector = (StructObjectInspector) rowInspector;
          // The record identifier is always in the first column
          recIdField = acidRowInspector.getAllStructFieldRefs().get(0);
          recIdInspector = (StructObjectInspector) recIdField.getFieldObjectInspector();
          // The bucket field is in the second position
          bucketField = recIdInspector.getAllStructFieldRefs().get(1);
          bucketInspector = (IntObjectInspector) bucketField.getFieldObjectInspector();
        }

        if (isLogInfoEnabled) {
          LOG.info(
              "keys are "
                  + conf.getOutputKeyColumnNames()
                  + " num distributions: "
                  + conf.getNumDistributionKeys());
        }
        keyObjectInspector =
            initEvaluatorsAndReturnStruct(
                keyEval,
                distinctColIndices,
                conf.getOutputKeyColumnNames(),
                numDistributionKeys,
                rowInspector);
        valueObjectInspector =
            initEvaluatorsAndReturnStruct(
                valueEval, conf.getOutputValueColumnNames(), rowInspector);
        partitionObjectInspectors = initEvaluators(partitionEval, rowInspector);
        if (bucketEval != null) {
          bucketObjectInspectors = initEvaluators(bucketEval, rowInspector);
        }
        int numKeys = numDistinctExprs > 0 ? numDistinctExprs : 1;
        int keyLen = numDistinctExprs > 0 ? numDistributionKeys + 1 : numDistributionKeys;
        cachedKeys = new Object[numKeys][keyLen];
        cachedValues = new Object[valueEval.length];
      }

      // Determine distKeyLength (w/o distincts), and then add the first if present.
      populateCachedDistributionKeys(row, 0);

      // replace bucketing columns with hashcode % numBuckets
      int bucketNumber = -1;
      if (bucketEval != null) {
        bucketNumber = computeBucketNumber(row, conf.getNumBuckets());
        cachedKeys[0][buckColIdxInKey] = new Text(String.valueOf(bucketNumber));
      } else if (conf.getWriteType() == AcidUtils.Operation.UPDATE
          || conf.getWriteType() == AcidUtils.Operation.DELETE) {
        // In the non-partitioned case we still want to compute the bucket number for updates and
        // deletes.
        bucketNumber = computeBucketNumber(row, conf.getNumBuckets());
      }

      HiveKey firstKey = toHiveKey(cachedKeys[0], tag, null);
      int distKeyLength = firstKey.getDistKeyLength();
      if (numDistinctExprs > 0) {
        populateCachedDistinctKeys(row, 0);
        firstKey = toHiveKey(cachedKeys[0], tag, distKeyLength);
      }

      final int hashCode;

      // distKeyLength doesn't include tag, but includes buckNum in cachedKeys[0]
      if (useUniformHash && partitionEval.length > 0) {
        hashCode = computeMurmurHash(firstKey);
      } else {
        hashCode = computeHashCode(row, bucketNumber);
      }

      firstKey.setHashCode(hashCode);

      /*
       * in case of TopN for windowing, we need to distinguish between rows with
       * null partition keys and rows with value 0 for partition keys.
       */
      boolean partKeyNull = conf.isPTFReduceSink() && partitionKeysAreNull(row);

      // Try to store the first key. If it's not excluded, we will proceed.
      int firstIndex = reducerHash.tryStoreKey(firstKey, partKeyNull);
      if (firstIndex == TopNHash.EXCLUDE) return; // Nothing to do.
      // Compute value and hashcode - we'd either store or forward them.
      BytesWritable value = makeValueWritable(row);

      if (firstIndex == TopNHash.FORWARD) {
        collect(firstKey, value);
      } else {
        assert firstIndex >= 0;
        reducerHash.storeValue(firstIndex, firstKey.hashCode(), value, false);
      }

      // All other distinct keys will just be forwarded. This could be optimized...
      for (int i = 1; i < numDistinctExprs; i++) {
        System.arraycopy(cachedKeys[0], 0, cachedKeys[i], 0, numDistributionKeys);
        populateCachedDistinctKeys(row, i);
        HiveKey hiveKey = toHiveKey(cachedKeys[i], tag, distKeyLength);
        hiveKey.setHashCode(hashCode);
        collect(hiveKey, value);
      }
    } catch (HiveException e) {
      throw e;
    } catch (Exception e) {
      throw new HiveException(e);
    }
  }

Пример #3

Показать файл

Файл: ReduceSinkOperator.java Проект: hugh-han/hive

  @Override
  protected void initializeOp(Configuration hconf) throws HiveException {
    super.initializeOp(hconf);
    try {

      numRows = 0;
      cntr = 1;
      logEveryNRows = HiveConf.getLongVar(hconf, HiveConf.ConfVars.HIVE_LOG_N_RECORDS);

      statsMap.put(getCounterName(Counter.RECORDS_OUT_INTERMEDIATE, hconf), recordCounter);

      List<ExprNodeDesc> keys = conf.getKeyCols();

      if (isLogDebugEnabled) {
        LOG.debug("keys size is " + keys.size());
        for (ExprNodeDesc k : keys) {
          LOG.debug("Key exprNodeDesc " + k.getExprString());
        }
      }

      keyEval = new ExprNodeEvaluator[keys.size()];
      int i = 0;
      for (ExprNodeDesc e : keys) {
        keyEval[i++] = ExprNodeEvaluatorFactory.get(e);
      }

      numDistributionKeys = conf.getNumDistributionKeys();
      distinctColIndices = conf.getDistinctColumnIndices();
      numDistinctExprs = distinctColIndices.size();

      valueEval = new ExprNodeEvaluator[conf.getValueCols().size()];
      i = 0;
      for (ExprNodeDesc e : conf.getValueCols()) {
        valueEval[i++] = ExprNodeEvaluatorFactory.get(e);
      }

      partitionEval = new ExprNodeEvaluator[conf.getPartitionCols().size()];
      i = 0;
      for (ExprNodeDesc e : conf.getPartitionCols()) {
        int index = ExprNodeDescUtils.indexOf(e, keys);
        partitionEval[i++] = index < 0 ? ExprNodeEvaluatorFactory.get(e) : keyEval[index];
      }

      if (conf.getBucketCols() != null && !conf.getBucketCols().isEmpty()) {
        bucketEval = new ExprNodeEvaluator[conf.getBucketCols().size()];

        i = 0;
        for (ExprNodeDesc e : conf.getBucketCols()) {
          int index = ExprNodeDescUtils.indexOf(e, keys);
          bucketEval[i++] = index < 0 ? ExprNodeEvaluatorFactory.get(e) : keyEval[index];
        }

        buckColIdxInKey = conf.getPartitionCols().size();
      }

      tag = conf.getTag();
      tagByte[0] = (byte) tag;
      skipTag = conf.getSkipTag();
      if (isLogInfoEnabled) {
        LOG.info("Using tag = " + tag);
      }

      TableDesc keyTableDesc = conf.getKeySerializeInfo();
      keySerializer = (Serializer) keyTableDesc.getDeserializerClass().newInstance();
      keySerializer.initialize(null, keyTableDesc.getProperties());
      keyIsText = keySerializer.getSerializedClass().equals(Text.class);

      TableDesc valueTableDesc = conf.getValueSerializeInfo();
      valueSerializer = (Serializer) valueTableDesc.getDeserializerClass().newInstance();
      valueSerializer.initialize(null, valueTableDesc.getProperties());

      int limit = conf.getTopN();
      float memUsage = conf.getTopNMemoryUsage();

      if (limit >= 0 && memUsage > 0) {
        reducerHash = conf.isPTFReduceSink() ? new PTFTopNHash() : reducerHash;
        reducerHash.initialize(limit, memUsage, conf.isMapGroupBy(), this);
      }

      useUniformHash = conf.getReducerTraits().contains(UNIFORM);

      firstRow = true;
    } catch (Exception e) {
      String msg = "Error initializing ReduceSinkOperator: " + e.getMessage();
      LOG.error(msg, e);
      throw new RuntimeException(e);
    }
  }