예제 #1
0
  /**
   * Determine the partition group that has the maximum intersection in terms of the estimated ndv
   * of the partition exprs with groupingExprs. That partition group is placed at the front of
   * partitionGroups, with its partition exprs reduced to the intersection, and the intersecting
   * groupingExprs are returned in inputPartitionExprs.
   */
  private void computeInputPartitionExprs(
      List<PartitionGroup> partitionGroups,
      List<Expr> groupingExprs,
      int numNodes,
      List<Expr> inputPartitionExprs) {
    inputPartitionExprs.clear();
    // find partition group with maximum intersection
    long maxNdv = 0;
    PartitionGroup maxPg = null;
    List<Expr> maxGroupingExprs = null;
    for (PartitionGroup pg : partitionGroups) {
      List<Expr> l1 = Lists.newArrayList();
      List<Expr> l2 = Lists.newArrayList();
      Expr.intersect(
          analyzer_, pg.partitionByExprs, groupingExprs, analyzer_.getEquivClassSmap(), l1, l2);
      // TODO: also look at l2 and take the max?
      long ndv = Expr.getNumDistinctValues(l1);
      if (ndv < 0 || ndv < numNodes || ndv < maxNdv) continue;
      // found a better partition group
      maxPg = pg;
      maxPg.partitionByExprs = l1;
      maxGroupingExprs = l2;
      maxNdv = ndv;
    }

    if (maxNdv > numNodes) {
      Preconditions.checkNotNull(maxPg);
      // we found a partition group that gives us enough parallelism;
      // move it to the front
      partitionGroups.remove(maxPg);
      partitionGroups.add(0, maxPg);
      inputPartitionExprs.addAll(maxGroupingExprs);
    }
  }
예제 #2
0
  // Append a flattened version of this plan node, including all children, to 'container'.
  private void treeToThriftHelper(TPlan container) {
    TPlanNode msg = new TPlanNode();
    msg.node_id = id_.asInt();
    msg.limit = limit_;

    TExecStats estimatedStats = new TExecStats();
    estimatedStats.setCardinality(cardinality_);
    estimatedStats.setMemory_used(perHostMemCost_);
    msg.setLabel(getDisplayLabel());
    msg.setLabel_detail(getDisplayLabelDetail());
    msg.setEstimated_stats(estimatedStats);

    msg.setRow_tuples(Lists.<Integer>newArrayListWithCapacity(tupleIds_.size()));
    msg.setNullable_tuples(Lists.<Boolean>newArrayListWithCapacity(tupleIds_.size()));
    for (TupleId tid : tupleIds_) {
      msg.addToRow_tuples(tid.asInt());
      msg.addToNullable_tuples(nullableTupleIds_.contains(tid));
    }
    for (Expr e : conjuncts_) {
      msg.addToConjuncts(e.treeToThrift());
    }
    toThrift(msg);
    container.addToNodes(msg);
    // For the purpose of the BE consider ExchangeNodes to have no children.
    if (this instanceof ExchangeNode) {
      msg.num_children = 0;
      return;
    } else {
      msg.num_children = children_.size();
      for (PlanNode child : children_) {
        child.treeToThriftHelper(container);
      }
    }
  }
예제 #3
0
 /** Create a predicate that checks if all exprs are equal or both sides are null. */
 private Expr createNullMatchingEquals(
     List<Expr> exprs, TupleId inputTid, ExprSubstitutionMap bufferedSmap) {
   Preconditions.checkState(!exprs.isEmpty());
   Expr result = createNullMatchingEqualsAux(exprs, 0, inputTid, bufferedSmap);
   result.analyzeNoThrow(analyzer_);
   return result;
 }
예제 #4
0
  /** Create SortInfo, including sort tuple, to sort entire input row on sortExprs. */
  private SortInfo createSortInfo(
      PlanNode input, List<Expr> sortExprs, List<Boolean> isAsc, List<Boolean> nullsFirst) {
    // create tuple for sort output = the entire materialized input in a single tuple
    TupleDescriptor sortTupleDesc = analyzer_.getDescTbl().createTupleDescriptor("sort-tuple");
    ExprSubstitutionMap sortSmap = new ExprSubstitutionMap();
    List<Expr> sortSlotExprs = Lists.newArrayList();
    sortTupleDesc.setIsMaterialized(true);
    for (TupleId tid : input.getTupleIds()) {
      TupleDescriptor tupleDesc = analyzer_.getTupleDesc(tid);
      for (SlotDescriptor inputSlotDesc : tupleDesc.getSlots()) {
        if (!inputSlotDesc.isMaterialized()) continue;
        SlotDescriptor sortSlotDesc = analyzer_.copySlotDescriptor(inputSlotDesc, sortTupleDesc);
        // all output slots need to be materialized
        sortSlotDesc.setIsMaterialized(true);
        sortSmap.put(new SlotRef(inputSlotDesc), new SlotRef(sortSlotDesc));
        sortSlotExprs.add(new SlotRef(inputSlotDesc));
      }
    }

    SortInfo sortInfo =
        new SortInfo(Expr.substituteList(sortExprs, sortSmap, analyzer_, false), isAsc, nullsFirst);
    LOG.trace("sortinfo exprs: " + Expr.debugString(sortInfo.getOrderingExprs()));
    sortInfo.setMaterializedTupleInfo(sortTupleDesc, sortSlotExprs);
    return sortInfo;
  }
예제 #5
0
 /** Marks all slots referenced in exprs as materialized. */
 protected void markSlotsMaterialized(Analyzer analyzer, List<Expr> exprs) {
   List<SlotId> refdIdList = Lists.newArrayList();
   for (Expr expr : exprs) {
     expr.getIds(null, refdIdList);
   }
   analyzer.getDescTbl().markSlotsMaterialized(refdIdList);
 }
예제 #6
0
 /** Compute the product of the selectivies of all conjuncts. */
 protected double computeSelectivity() {
   double prod = 1.0;
   for (Expr e : conjuncts_) {
     if (e.getSelectivity() < 0) continue;
     prod *= e.getSelectivity();
   }
   return prod;
 }
예제 #7
0
 public String getExplainString(TExplainLevel explainLevel) {
   StringBuilder str = new StringBuilder();
   str.append(type.toString());
   if (!partitionExprs.isEmpty()) {
     List<String> strings = Lists.newArrayList();
     for (Expr expr : partitionExprs) {
       strings.add(expr.toSql());
     }
     str.append(": " + Joiner.on(", ").join(strings));
   }
   str.append("\n");
   return str.toString();
 }
예제 #8
0
 @Override
 protected void toThrift(TPlanNode msg) {
   msg.node_type = TPlanNodeType.AGGREGATION_NODE;
   msg.agg_node =
       new TAggregationNode(
           Expr.treesToThrift(aggInfo.getAggregateExprs()),
           aggInfo.getAggTupleId().asInt(),
           needsFinalize);
   List<Expr> groupingExprs = aggInfo.getGroupingExprs();
   if (groupingExprs != null) {
     msg.agg_node.setGrouping_exprs(Expr.treesToThrift(groupingExprs));
   }
 }
예제 #9
0
 public TDataPartition toThrift() {
   TDataPartition result = new TDataPartition(type);
   if (partitionExprs != null) {
     result.setPartition_exprs(Expr.treesToThrift(partitionExprs));
   }
   return result;
 }
예제 #10
0
 protected String debugString() {
   // not using Objects.toStrHelper because
   // PlanNode.debugString() is embedded by debug strings of the subclasses
   StringBuilder output = new StringBuilder();
   output.append("preds=" + Expr.debugString(conjuncts_));
   output.append(" limit=" + Long.toString(limit_));
   return output.toString();
 }
예제 #11
0
 /** Copy c'tor. Also passes in new id_. */
 protected PlanNode(PlanNodeId id, PlanNode node, String displayName) {
   id_ = id;
   limit_ = node.limit_;
   tupleIds_ = Lists.newArrayList(node.tupleIds_);
   tblRefIds_ = Lists.newArrayList(node.tblRefIds_);
   nullableTupleIds_ = Sets.newHashSet(node.nullableTupleIds_);
   conjuncts_ = Expr.cloneList(node.conjuncts_);
   cardinality_ = -1;
   numNodes_ = -1;
   displayName_ = displayName;
 }
예제 #12
0
    /**
     * True if the partition exprs and ordering elements and the window of analyticExpr match ours.
     */
    public boolean isCompatible(AnalyticExpr analyticExpr) {
      if (requiresIndependentEval(analyticExprs.get(0)) || requiresIndependentEval(analyticExpr)) {
        return false;
      }

      if (!Expr.equalSets(analyticExpr.getPartitionExprs(), partitionByExprs)) {
        return false;
      }
      if (!analyticExpr.getOrderByElements().equals(orderByElements)) return false;
      if ((window == null) != (analyticExpr.getWindow() == null)) return false;
      if (window == null) return true;
      return analyticExpr.getWindow().equals(window);
    }
예제 #13
0
  public THdfsPartition toThrift() {
    List<TExpr> thriftExprs = Expr.treesToThrift(getPartitionValues());

    return new THdfsPartition(
        (byte) fileFormatDescriptor.getLineDelim(),
        (byte) fileFormatDescriptor.getFieldDelim(),
        (byte) fileFormatDescriptor.getCollectionDelim(),
        (byte) fileFormatDescriptor.getMapKeyDelim(),
        (byte) fileFormatDescriptor.getEscapeChar(),
        fileFormatDescriptor.getFileFormat().toThrift(),
        thriftExprs,
        fileFormatDescriptor.getBlockSize(),
        fileFormatDescriptor.getCompression());
  }
예제 #14
0
 /**
  * Return true if 'this' and other have compatible partition exprs and our orderByElements are a
  * prefix of other's.
  */
 public boolean isPrefixOf(SortGroup other) {
   if (other.orderByElements.size() > orderByElements.size()) return false;
   if (!Expr.equalSets(partitionByExprs, other.partitionByExprs)) return false;
   for (int i = 0; i < other.orderByElements.size(); ++i) {
     OrderByElement ob = orderByElements.get(i);
     OrderByElement otherOb = other.orderByElements.get(i);
     // TODO: compare equiv classes by comparing each equiv class's placeholder
     // slotref
     if (!ob.getExpr().equals(otherOb.getExpr())) return false;
     if (ob.isAsc() != otherOb.isAsc()) return false;
     if (ob.nullsFirst() != otherOb.nullsFirst()) return false;
   }
   return true;
 }
예제 #15
0
  /**
   * Create an unanalyzed predicate that checks if elements >= i are equal or both sides are null.
   *
   * <p>The predicate has the form ((lhs[i] is null && rhs[i] is null) || ( lhs[i] is not null &&
   * rhs[i] is not null && lhs[i] = rhs[i])) && <createEqualsAux(i + 1)>
   */
  private Expr createNullMatchingEqualsAux(
      List<Expr> elements, int i, TupleId inputTid, ExprSubstitutionMap bufferedSmap) {
    if (i > elements.size() - 1) return new BoolLiteral(true);

    // compare elements[i]
    Expr lhs = elements.get(i);
    Preconditions.checkState(lhs.isBound(inputTid));
    Expr rhs = lhs.substitute(bufferedSmap, analyzer_, false);

    Expr bothNull =
        new CompoundPredicate(
            Operator.AND, new IsNullPredicate(lhs, false), new IsNullPredicate(rhs, false));
    Expr lhsEqRhsNotNull =
        new CompoundPredicate(
            Operator.AND,
            new CompoundPredicate(
                Operator.AND, new IsNullPredicate(lhs, true), new IsNullPredicate(rhs, true)),
            new BinaryPredicate(BinaryPredicate.Operator.EQ, lhs, rhs));
    Expr remainder = createNullMatchingEqualsAux(elements, i + 1, inputTid, bufferedSmap);
    return new CompoundPredicate(
        CompoundPredicate.Operator.AND,
        new CompoundPredicate(Operator.OR, bothNull, lhsEqRhsNotNull),
        remainder);
  }
예제 #16
0
 /**
  * Coalesce partition groups for which the intersection of their partition exprs has ndv estimate
  * > numNodes, so that the resulting plan still parallelizes across all nodes.
  */
 private void mergePartitionGroups(List<PartitionGroup> partitionGroups, int numNodes) {
   boolean hasMerged = false;
   do {
     hasMerged = false;
     for (PartitionGroup pg1 : partitionGroups) {
       for (PartitionGroup pg2 : partitionGroups) {
         if (pg1 != pg2) {
           long ndv =
               Expr.getNumDistinctValues(
                   Expr.intersect(pg1.partitionByExprs, pg2.partitionByExprs));
           if (ndv == -1 || ndv < 0 || ndv < numNodes) {
             // didn't get a usable value or the number of partitions is too small
             continue;
           }
           pg1.merge(pg2);
           partitionGroups.remove(pg2);
           hasMerged = true;
           break;
         }
       }
       if (hasMerged) break;
     }
   } while (hasMerged);
 }
예제 #17
0
 public String debugString() {
   return Objects.toStringHelper(this)
       .add("type", type)
       .addValue(Expr.debugString(partitionExprs))
       .toString();
 }
예제 #18
0
  /**
   * Create HdfsPartition objects corresponding to 'partitions'.
   *
   * <p>If there are no partitions in the Hive metadata, a single partition is added with no
   * partition keys.
   *
   * <p>For files that have not been changed, reuses file descriptors from oldFileDescMap.
   */
  private void loadPartitions(
      List<org.apache.hadoop.hive.metastore.api.Partition> msPartitions,
      org.apache.hadoop.hive.metastore.api.Table msTbl,
      Map<String, FileDescriptor> oldFileDescMap)
      throws IOException, CatalogException {
    partitions_.clear();
    hdfsBaseDir_ = msTbl.getSd().getLocation();
    List<FileDescriptor> newFileDescs = Lists.newArrayList();

    // INSERT statements need to refer to this if they try to write to new partitions
    // Scans don't refer to this because by definition all partitions they refer to
    // exist.
    addDefaultPartition(msTbl.getSd());

    if (msTbl.getPartitionKeysSize() == 0) {
      Preconditions.checkArgument(msPartitions == null || msPartitions.isEmpty());
      // This table has no partition key, which means it has no declared partitions.
      // We model partitions slightly differently to Hive - every file must exist in a
      // partition, so add a single partition with no keys which will get all the
      // files in the table's root directory.
      addPartition(msTbl.getSd(), null, new ArrayList<LiteralExpr>(), oldFileDescMap, newFileDescs);
      Path location = new Path(hdfsBaseDir_);
      if (DFS.exists(location)) {
        accessLevel_ = getAvailableAccessLevel(location);
      }
    } else {
      // keep track of distinct partition key values and how many nulls there are
      Set<String>[] uniquePartitionKeys = new HashSet[numClusteringCols_];
      long[] numNullKeys = new long[numClusteringCols_];
      for (int i = 0; i < numClusteringCols_; ++i) {
        uniquePartitionKeys[i] = new HashSet<String>();
        numNullKeys[i] = 0;
      }

      for (org.apache.hadoop.hive.metastore.api.Partition msPartition : msPartitions) {
        // load key values
        List<LiteralExpr> keyValues = Lists.newArrayList();
        int i = 0;
        for (String partitionKey : msPartition.getValues()) {
          uniquePartitionKeys[i].add(partitionKey);
          // Deal with Hive's special NULL partition key.
          if (partitionKey.equals(nullPartitionKeyValue_)) {
            keyValues.add(new NullLiteral());
            ++numNullKeys[i];
          } else {
            ColumnType type = colsByPos_.get(keyValues.size()).getType();
            try {
              Expr expr = LiteralExpr.create(partitionKey, type);
              // Force the literal to be of type declared in the metadata.
              expr = expr.castTo(type);
              keyValues.add((LiteralExpr) expr);
            } catch (AnalysisException ex) {
              LOG.warn("Failed to create literal expression of type: " + type, ex);
              throw new InvalidStorageDescriptorException(ex);
            }
          }
          ++i;
        }
        HdfsPartition partition =
            addPartition(msPartition.getSd(), msPartition, keyValues, oldFileDescMap, newFileDescs);
        // If the partition is null, its HDFS path does not exist, and it was not added to
        // this table's partition list. Skip the partition.
        if (partition == null) continue;

        if (msPartition.getParameters() != null) {
          partition.setNumRows(getRowCount(msPartition.getParameters()));
        }
        if (!TAccessLevelUtil.impliesWriteAccess(partition.getAccessLevel())) {
          // TODO: READ_ONLY isn't exactly correct because the it's possible the
          // partition does not have READ permissions either. When we start checking
          // whether we can READ from a table, this should be updated to set the
          // table's access level to the "lowest" effective level across all
          // partitions. That is, if one partition has READ_ONLY and another has
          // WRITE_ONLY the table's access level should be NONE.
          accessLevel_ = TAccessLevel.READ_ONLY;
        }
      }

      // update col stats for partition key cols
      for (int i = 0; i < numClusteringCols_; ++i) {
        ColumnStats stats = colsByPos_.get(i).getStats();
        stats.setNumNulls(numNullKeys[i]);
        stats.setNumDistinctValues(uniquePartitionKeys[i].size());
        LOG.debug("#col=" + Integer.toString(i) + " stats=" + stats.toString());
      }
    }

    if (newFileDescs.size() > 0) {
      loadBlockMd(newFileDescs);
    }
    uniqueHostPortsCount_ = countUniqueDataNetworkLocations(partitions_);
  }
예제 #19
0
  /**
   * Create plan tree for the entire sort group, including all contained window groups. Marks the
   * SortNode as requiring its input to be partitioned if partitionExprs is not null (partitionExprs
   * represent the data partition of the entire partition group of which this sort group is a part).
   */
  private PlanNode createSortGroupPlan(
      PlanNode root, SortGroup sortGroup, List<Expr> partitionExprs) throws ImpalaException {
    List<Expr> partitionByExprs = sortGroup.partitionByExprs;
    List<OrderByElement> orderByElements = sortGroup.orderByElements;
    ExprSubstitutionMap sortSmap = null;
    TupleId sortTupleId = null;
    TupleDescriptor bufferedTupleDesc = null;
    // map from input to buffered tuple
    ExprSubstitutionMap bufferedSmap = new ExprSubstitutionMap();

    // sort on partition by (pb) + order by (ob) exprs and create pb/ob predicates
    if (!partitionByExprs.isEmpty() || !orderByElements.isEmpty()) {
      // first sort on partitionExprs (direction doesn't matter)
      List<Expr> sortExprs = Lists.newArrayList(partitionByExprs);
      List<Boolean> isAsc =
          Lists.newArrayList(Collections.nCopies(sortExprs.size(), new Boolean(true)));
      // TODO: utilize a direction and nulls/first last that has benefit
      // for subsequent sort groups
      List<Boolean> nullsFirst =
          Lists.newArrayList(Collections.nCopies(sortExprs.size(), new Boolean(true)));

      // then sort on orderByExprs
      for (OrderByElement orderByElement : sortGroup.orderByElements) {
        sortExprs.add(orderByElement.getExpr());
        isAsc.add(orderByElement.isAsc());
        nullsFirst.add(orderByElement.getNullsFirstParam());
      }

      SortInfo sortInfo = createSortInfo(root, sortExprs, isAsc, nullsFirst);
      SortNode sortNode = new SortNode(idGenerator_.getNextId(), root, sortInfo, false, 0);

      // if this sort group does not have partitioning exprs, we want the sort
      // to be executed like a regular distributed sort
      if (!partitionByExprs.isEmpty()) sortNode.setIsAnalyticSort(true);

      if (partitionExprs != null) {
        // create required input partition
        DataPartition inputPartition = DataPartition.UNPARTITIONED;
        if (!partitionExprs.isEmpty()) {
          inputPartition = new DataPartition(TPartitionType.HASH_PARTITIONED, partitionExprs);
        }
        sortNode.setInputPartition(inputPartition);
      }

      root = sortNode;
      root.init(analyzer_);
      sortSmap = sortNode.getOutputSmap();

      // create bufferedTupleDesc and bufferedSmap
      sortTupleId = sortNode.tupleIds_.get(0);
      bufferedTupleDesc = analyzer_.getDescTbl().copyTupleDescriptor(sortTupleId, "buffered-tuple");
      LOG.trace("desctbl: " + analyzer_.getDescTbl().debugString());

      List<SlotDescriptor> inputSlots = analyzer_.getTupleDesc(sortTupleId).getSlots();
      List<SlotDescriptor> bufferedSlots = bufferedTupleDesc.getSlots();
      for (int i = 0; i < inputSlots.size(); ++i) {
        bufferedSmap.put(new SlotRef(inputSlots.get(i)), new SlotRef(bufferedSlots.get(i)));
      }
    }

    // create one AnalyticEvalNode per window group
    for (WindowGroup windowGroup : sortGroup.windowGroups) {
      // Create partition-by (pb) and order-by (ob) less-than predicates between the
      // input tuple (the output of the preceding sort) and a buffered tuple that is
      // identical to the input tuple. We need a different tuple descriptor for the
      // buffered tuple because the generated predicates should compare two different
      // tuple instances from the same input stream (i.e., the predicates should be
      // evaluated over a row that is composed of the input and the buffered tuple).

      // we need to remap the pb/ob exprs to a) the sort output, b) our buffer of the
      // sort input
      Expr partitionByEq = null;
      if (!windowGroup.partitionByExprs.isEmpty()) {
        partitionByEq =
            createNullMatchingEquals(
                Expr.substituteList(windowGroup.partitionByExprs, sortSmap, analyzer_, false),
                sortTupleId,
                bufferedSmap);
        LOG.trace("partitionByEq: " + partitionByEq.debugString());
      }
      Expr orderByEq = null;
      if (!windowGroup.orderByElements.isEmpty()) {
        orderByEq =
            createNullMatchingEquals(
                OrderByElement.getOrderByExprs(
                    OrderByElement.substitute(windowGroup.orderByElements, sortSmap, analyzer_)),
                sortTupleId,
                bufferedSmap);
        LOG.trace("orderByEq: " + orderByEq.debugString());
      }

      root =
          new AnalyticEvalNode(
              idGenerator_.getNextId(),
              root,
              stmtTupleIds_,
              windowGroup.analyticFnCalls,
              windowGroup.partitionByExprs,
              windowGroup.orderByElements,
              windowGroup.window,
              analyticInfo_.getOutputTupleDesc(),
              windowGroup.physicalIntermediateTuple,
              windowGroup.physicalOutputTuple,
              windowGroup.logicalToPhysicalSmap,
              partitionByEq,
              orderByEq,
              bufferedTupleDesc);
      root.init(analyzer_);
    }
    return root;
  }
예제 #20
0
 /** True if the partition and ordering exprs of windowGroup match ours. */
 public boolean isCompatible(WindowGroup windowGroup) {
   return Expr.equalSets(windowGroup.partitionByExprs, partitionByExprs)
       && windowGroup.orderByElements.equals(orderByElements);
 }
예제 #21
0
 /**
  * True if the partition exprs of sortGroup are compatible with ours. For now that means
  * equality.
  */
 public boolean isCompatible(SortGroup sortGroup) {
   return Expr.equalSets(sortGroup.partitionByExprs, partitionByExprs);
 }
예제 #22
0
 /**
  * Merge 'other' into 'this' - partitionByExprs is the intersection of the two - sortGroups
  * becomes the union
  */
 public void merge(PartitionGroup other) {
   partitionByExprs = Expr.intersect(partitionByExprs, other.partitionByExprs);
   Preconditions.checkState(Expr.getNumDistinctValues(partitionByExprs) >= 0);
   sortGroups.addAll(other.sortGroups);
 }
예제 #23
0
 /**
  * Sets outputSmap_ to compose(existing smap, combined child smap). Also substitutes conjuncts_
  * using the combined child smap.
  */
 protected void createDefaultSmap(Analyzer analyzer) {
   ExprSubstitutionMap combinedChildSmap = getCombinedChildSmap();
   outputSmap_ = ExprSubstitutionMap.compose(outputSmap_, combinedChildSmap, analyzer);
   conjuncts_ = Expr.substituteList(conjuncts_, outputSmap_, analyzer, false);
 }