Exemplo n.º 1
0
  private static void setupBucketMapJoinInfo(
      MapredWork plan,
      AbstractMapJoinOperator<? extends MapJoinDesc> currMapJoinOp,
      boolean createLocalPlan) {
    if (currMapJoinOp != null) {
      LinkedHashMap<String, LinkedHashMap<String, ArrayList<String>>> aliasBucketFileNameMapping =
          currMapJoinOp.getConf().getAliasBucketFileNameMapping();
      if (aliasBucketFileNameMapping != null) {
        MapredLocalWork localPlan = plan.getMapLocalWork();
        if (localPlan == null) {
          if (currMapJoinOp instanceof SMBMapJoinOperator) {
            localPlan = ((SMBMapJoinOperator) currMapJoinOp).getConf().getLocalWork();
          }
          if (localPlan == null && createLocalPlan) {
            localPlan =
                new MapredLocalWork(
                    new LinkedHashMap<String, Operator<? extends Serializable>>(),
                    new LinkedHashMap<String, FetchWork>());
          }
        } else {
          // local plan is not null, we want to merge it into SMBMapJoinOperator's local work
          if (currMapJoinOp instanceof SMBMapJoinOperator) {
            MapredLocalWork smbLocalWork =
                ((SMBMapJoinOperator) currMapJoinOp).getConf().getLocalWork();
            if (smbLocalWork != null) {
              localPlan.getAliasToFetchWork().putAll(smbLocalWork.getAliasToFetchWork());
              localPlan.getAliasToWork().putAll(smbLocalWork.getAliasToWork());
            }
          }
        }

        if (localPlan == null) {
          return;
        }

        if (currMapJoinOp instanceof SMBMapJoinOperator) {
          plan.setMapLocalWork(null);
          ((SMBMapJoinOperator) currMapJoinOp).getConf().setLocalWork(localPlan);
        } else {
          plan.setMapLocalWork(localPlan);
        }
        BucketMapJoinContext bucketMJCxt = new BucketMapJoinContext();
        localPlan.setBucketMapjoinContext(bucketMJCxt);
        bucketMJCxt.setAliasBucketFileNameMapping(aliasBucketFileNameMapping);
        bucketMJCxt.setBucketFileNameMapping(currMapJoinOp.getConf().getBucketFileNameMapping());
        localPlan.setInputFileChangeSensitive(true);
        bucketMJCxt.setMapJoinBigTableAlias(currMapJoinOp.getConf().getBigTableAlias());
        bucketMJCxt.setBucketMatcherClass(
            org.apache.hadoop.hive.ql.exec.DefaultBucketMatcher.class);
      }
    }
  }
Exemplo n.º 2
0
  /**
   * set the current task in the mapredWork.
   *
   * @param alias current alias
   * @param topOp the top operator of the stack
   * @param plan current plan
   * @param local whether you need to add to map-reduce or local work
   * @param tt_desc table descriptor
   */
  public static void setTaskPlan(
      String path,
      String alias,
      Operator<? extends Serializable> topOp,
      MapredWork plan,
      boolean local,
      TableDesc tt_desc)
      throws SemanticException {

    if (path == null || alias == null) {
      return;
    }

    if (!local) {
      if (plan.getPathToAliases().get(path) == null) {
        plan.getPathToAliases().put(path, new ArrayList<String>());
      }
      plan.getPathToAliases().get(path).add(alias);
      plan.getPathToPartitionInfo().put(path, new PartitionDesc(tt_desc, null));
      plan.getAliasToWork().put(alias, topOp);
    } else {
      // populate local work if needed
      MapredLocalWork localPlan = plan.getMapLocalWork();
      if (localPlan == null) {
        localPlan =
            new MapredLocalWork(
                new LinkedHashMap<String, Operator<? extends Serializable>>(),
                new LinkedHashMap<String, FetchWork>());
      }

      assert localPlan.getAliasToWork().get(alias) == null;
      assert localPlan.getAliasToFetchWork().get(alias) == null;
      localPlan.getAliasToWork().put(alias, topOp);
      localPlan.getAliasToFetchWork().put(alias, new FetchWork(alias, tt_desc));
      plan.setMapLocalWork(localPlan);
    }
  }
Exemplo n.º 3
0
  /**
   * set the current task in the mapredWork.
   *
   * @param alias_id current alias
   * @param topOp the top operator of the stack
   * @param plan current plan
   * @param local whether you need to add to map-reduce or local work
   * @param opProcCtx processing context
   * @param pList pruned partition list. If it is null it will be computed on-the-fly.
   */
  public static void setTaskPlan(
      String alias_id,
      Operator<? extends Serializable> topOp,
      MapredWork plan,
      boolean local,
      GenMRProcContext opProcCtx,
      PrunedPartitionList pList)
      throws SemanticException {
    ParseContext parseCtx = opProcCtx.getParseCtx();
    Set<ReadEntity> inputs = opProcCtx.getInputs();

    ArrayList<Path> partDir = new ArrayList<Path>();
    ArrayList<PartitionDesc> partDesc = new ArrayList<PartitionDesc>();

    Path tblDir = null;
    TableDesc tblDesc = null;

    PrunedPartitionList partsList = pList;

    if (partsList == null) {
      try {
        partsList = parseCtx.getOpToPartList().get((TableScanOperator) topOp);
        if (partsList == null) {
          partsList =
              PartitionPruner.prune(
                  parseCtx.getTopToTable().get(topOp),
                  parseCtx.getOpToPartPruner().get(topOp),
                  opProcCtx.getConf(),
                  alias_id,
                  parseCtx.getPrunedPartitions());
          parseCtx.getOpToPartList().put((TableScanOperator) topOp, partsList);
        }
      } catch (SemanticException e) {
        throw e;
      } catch (HiveException e) {
        LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
        throw new SemanticException(e.getMessage(), e);
      }
    }

    // Generate the map work for this alias_id
    Set<Partition> parts = null;
    // pass both confirmed and unknown partitions through the map-reduce
    // framework

    parts = partsList.getConfirmedPartns();
    parts.addAll(partsList.getUnknownPartns());
    PartitionDesc aliasPartnDesc = null;
    try {
      if (!parts.isEmpty()) {
        aliasPartnDesc = Utilities.getPartitionDesc(parts.iterator().next());
      }
    } catch (HiveException e) {
      LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
      throw new SemanticException(e.getMessage(), e);
    }

    // The table does not have any partitions
    if (aliasPartnDesc == null) {
      aliasPartnDesc =
          new PartitionDesc(Utilities.getTableDesc(parseCtx.getTopToTable().get(topOp)), null);
    }

    plan.getAliasToPartnInfo().put(alias_id, aliasPartnDesc);

    for (Partition part : parts) {
      if (part.getTable().isPartitioned()) {
        inputs.add(new ReadEntity(part));
      } else {
        inputs.add(new ReadEntity(part.getTable()));
      }

      // Later the properties have to come from the partition as opposed
      // to from the table in order to support versioning.
      Path[] paths;
      sampleDesc sampleDescr = parseCtx.getOpToSamplePruner().get(topOp);

      if (sampleDescr != null) {
        paths = SamplePruner.prune(part, sampleDescr);
      } else {
        paths = part.getPath();
      }

      // is it a partitioned table ?
      if (!part.getTable().isPartitioned()) {
        assert ((tblDir == null) && (tblDesc == null));

        tblDir = paths[0];
        tblDesc = Utilities.getTableDesc(part.getTable());
      }

      for (Path p : paths) {
        if (p == null) {
          continue;
        }
        String path = p.toString();
        if (LOG.isDebugEnabled()) {
          LOG.debug("Adding " + path + " of table" + alias_id);
        }

        partDir.add(p);
        try {
          partDesc.add(Utilities.getPartitionDesc(part));
        } catch (HiveException e) {
          LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
          throw new SemanticException(e.getMessage(), e);
        }
      }
    }

    Iterator<Path> iterPath = partDir.iterator();
    Iterator<PartitionDesc> iterPartnDesc = partDesc.iterator();

    if (!local) {
      while (iterPath.hasNext()) {
        assert iterPartnDesc.hasNext();
        String path = iterPath.next().toString();

        PartitionDesc prtDesc = iterPartnDesc.next();

        // Add the path to alias mapping
        if (plan.getPathToAliases().get(path) == null) {
          plan.getPathToAliases().put(path, new ArrayList<String>());
        }
        plan.getPathToAliases().get(path).add(alias_id);
        plan.getPathToPartitionInfo().put(path, prtDesc);
        if (LOG.isDebugEnabled()) {
          LOG.debug("Information added for path " + path);
        }
      }

      assert plan.getAliasToWork().get(alias_id) == null;
      plan.getAliasToWork().put(alias_id, topOp);
    } else {
      // populate local work if needed
      MapredLocalWork localPlan = plan.getMapLocalWork();
      if (localPlan == null) {
        localPlan =
            new MapredLocalWork(
                new LinkedHashMap<String, Operator<? extends Serializable>>(),
                new LinkedHashMap<String, FetchWork>());
      }

      assert localPlan.getAliasToWork().get(alias_id) == null;
      assert localPlan.getAliasToFetchWork().get(alias_id) == null;
      localPlan.getAliasToWork().put(alias_id, topOp);
      if (tblDir == null) {
        localPlan
            .getAliasToFetchWork()
            .put(alias_id, new FetchWork(FetchWork.convertPathToStringArray(partDir), partDesc));
      } else {
        localPlan.getAliasToFetchWork().put(alias_id, new FetchWork(tblDir.toString(), tblDesc));
      }
      plan.setMapLocalWork(localPlan);
    }
  }