Example #1
0
 /**
  * This code block iterates over indexes on the table and populates the indexToKeys map for all
  * the indexes that satisfy the rewrite criteria.
  *
  * @param indexTables
  * @return
  * @throws SemanticException
  */
 Map<Index, Set<String>> getIndexToKeysMap(List<Index> indexTables) throws SemanticException {
   Index index = null;
   Hive hiveInstance = hiveDb;
   Map<Index, Set<String>> indexToKeysMap = new LinkedHashMap<Index, Set<String>>();
   for (int idxCtr = 0; idxCtr < indexTables.size(); idxCtr++) {
     final Set<String> indexKeyNames = new LinkedHashSet<String>();
     index = indexTables.get(idxCtr);
     // Getting index key columns
     StorageDescriptor sd = index.getSd();
     List<FieldSchema> idxColList = sd.getCols();
     for (FieldSchema fieldSchema : idxColList) {
       indexKeyNames.add(fieldSchema.getName());
     }
     assert indexKeyNames.size() == 1;
     // Check that the index schema is as expected. This code block should
     // catch problems of this rewrite breaking when the AggregateIndexHandler
     // index is changed.
     List<String> idxTblColNames = new ArrayList<String>();
     try {
       Table idxTbl = hiveInstance.getTable(index.getDbName(), index.getIndexTableName());
       for (FieldSchema idxTblCol : idxTbl.getCols()) {
         idxTblColNames.add(idxTblCol.getName());
       }
     } catch (HiveException e) {
       LOG.error(
           "Got exception while locating index table, "
               + "skipping "
               + getName()
               + " optimization");
       LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
       throw new SemanticException(e.getMessage(), e);
     }
     assert (idxTblColNames.contains(IDX_BUCKET_COL));
     assert (idxTblColNames.contains(IDX_OFFSETS_ARRAY_COL));
     // we add all index tables which can be used for rewrite
     // and defer the decision of using a particular index for later
     // this is to allow choosing a index if a better mechanism is
     // designed later to chose a better rewrite
     indexToKeysMap.put(index, indexKeyNames);
   }
   return indexToKeysMap;
 }
Example #2
0
  @Override
  public ParseContext transform(ParseContext pctx) throws SemanticException {
    parseContext = pctx;
    hiveConf = parseContext.getConf();
    try {
      hiveDb = Hive.get(hiveConf);
    } catch (HiveException e) {
      LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
      throw new SemanticException(e.getMessage(), e);
    }

    // Don't try to index optimize the query to build the index
    HiveConf.setBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTINDEXFILTER, false);

    /* Check if the input query passes all the tests to be eligible for a rewrite
     * If yes, rewrite original query; else, return the current parseContext
     */
    if (shouldApplyOptimization()) {
      LOG.info("Rewriting Original Query using " + getName() + " optimization.");
      rewriteOriginalQuery();
    }
    return parseContext;
  }
  /**
   * set the current task in the mapredWork.
   *
   * @param alias_id current alias
   * @param topOp the top operator of the stack
   * @param plan current plan
   * @param local whether you need to add to map-reduce or local work
   * @param opProcCtx processing context
   * @param pList pruned partition list. If it is null it will be computed on-the-fly.
   */
  public static void setTaskPlan(
      String alias_id,
      Operator<? extends Serializable> topOp,
      MapredWork plan,
      boolean local,
      GenMRProcContext opProcCtx,
      PrunedPartitionList pList)
      throws SemanticException {
    ParseContext parseCtx = opProcCtx.getParseCtx();
    Set<ReadEntity> inputs = opProcCtx.getInputs();

    ArrayList<Path> partDir = new ArrayList<Path>();
    ArrayList<PartitionDesc> partDesc = new ArrayList<PartitionDesc>();

    Path tblDir = null;
    TableDesc tblDesc = null;

    PrunedPartitionList partsList = pList;

    if (partsList == null) {
      try {
        partsList = parseCtx.getOpToPartList().get((TableScanOperator) topOp);
        if (partsList == null) {
          partsList =
              PartitionPruner.prune(
                  parseCtx.getTopToTable().get(topOp),
                  parseCtx.getOpToPartPruner().get(topOp),
                  opProcCtx.getConf(),
                  alias_id,
                  parseCtx.getPrunedPartitions());
          parseCtx.getOpToPartList().put((TableScanOperator) topOp, partsList);
        }
      } catch (SemanticException e) {
        throw e;
      } catch (HiveException e) {
        LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
        throw new SemanticException(e.getMessage(), e);
      }
    }

    // Generate the map work for this alias_id
    Set<Partition> parts = null;
    // pass both confirmed and unknown partitions through the map-reduce
    // framework

    parts = partsList.getConfirmedPartns();
    parts.addAll(partsList.getUnknownPartns());
    PartitionDesc aliasPartnDesc = null;
    try {
      if (!parts.isEmpty()) {
        aliasPartnDesc = Utilities.getPartitionDesc(parts.iterator().next());
      }
    } catch (HiveException e) {
      LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
      throw new SemanticException(e.getMessage(), e);
    }

    // The table does not have any partitions
    if (aliasPartnDesc == null) {
      aliasPartnDesc =
          new PartitionDesc(Utilities.getTableDesc(parseCtx.getTopToTable().get(topOp)), null);
    }

    plan.getAliasToPartnInfo().put(alias_id, aliasPartnDesc);

    for (Partition part : parts) {
      if (part.getTable().isPartitioned()) {
        inputs.add(new ReadEntity(part));
      } else {
        inputs.add(new ReadEntity(part.getTable()));
      }

      // Later the properties have to come from the partition as opposed
      // to from the table in order to support versioning.
      Path[] paths;
      sampleDesc sampleDescr = parseCtx.getOpToSamplePruner().get(topOp);

      if (sampleDescr != null) {
        paths = SamplePruner.prune(part, sampleDescr);
      } else {
        paths = part.getPath();
      }

      // is it a partitioned table ?
      if (!part.getTable().isPartitioned()) {
        assert ((tblDir == null) && (tblDesc == null));

        tblDir = paths[0];
        tblDesc = Utilities.getTableDesc(part.getTable());
      }

      for (Path p : paths) {
        if (p == null) {
          continue;
        }
        String path = p.toString();
        if (LOG.isDebugEnabled()) {
          LOG.debug("Adding " + path + " of table" + alias_id);
        }

        partDir.add(p);
        try {
          partDesc.add(Utilities.getPartitionDesc(part));
        } catch (HiveException e) {
          LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
          throw new SemanticException(e.getMessage(), e);
        }
      }
    }

    Iterator<Path> iterPath = partDir.iterator();
    Iterator<PartitionDesc> iterPartnDesc = partDesc.iterator();

    if (!local) {
      while (iterPath.hasNext()) {
        assert iterPartnDesc.hasNext();
        String path = iterPath.next().toString();

        PartitionDesc prtDesc = iterPartnDesc.next();

        // Add the path to alias mapping
        if (plan.getPathToAliases().get(path) == null) {
          plan.getPathToAliases().put(path, new ArrayList<String>());
        }
        plan.getPathToAliases().get(path).add(alias_id);
        plan.getPathToPartitionInfo().put(path, prtDesc);
        if (LOG.isDebugEnabled()) {
          LOG.debug("Information added for path " + path);
        }
      }

      assert plan.getAliasToWork().get(alias_id) == null;
      plan.getAliasToWork().put(alias_id, topOp);
    } else {
      // populate local work if needed
      MapredLocalWork localPlan = plan.getMapLocalWork();
      if (localPlan == null) {
        localPlan =
            new MapredLocalWork(
                new LinkedHashMap<String, Operator<? extends Serializable>>(),
                new LinkedHashMap<String, FetchWork>());
      }

      assert localPlan.getAliasToWork().get(alias_id) == null;
      assert localPlan.getAliasToFetchWork().get(alias_id) == null;
      localPlan.getAliasToWork().put(alias_id, topOp);
      if (tblDir == null) {
        localPlan
            .getAliasToFetchWork()
            .put(alias_id, new FetchWork(FetchWork.convertPathToStringArray(partDir), partDesc));
      } else {
        localPlan.getAliasToFetchWork().put(alias_id, new FetchWork(tblDir.toString(), tblDesc));
      }
      plan.setMapLocalWork(localPlan);
    }
  }
Example #4
0
  private List<HiveLockObj> getLockObjects(
      QueryPlan plan, Database db, Table t, Partition p, HiveLockMode mode) throws LockException {
    List<HiveLockObj> locks = new LinkedList<HiveLockObj>();

    HiveLockObject.HiveLockObjectData lockData =
        new HiveLockObject.HiveLockObjectData(
            plan.getQueryId(),
            String.valueOf(System.currentTimeMillis()),
            "IMPLICIT",
            plan.getQueryStr());

    if (db != null) {
      locks.add(new HiveLockObj(new HiveLockObject(db.getName(), lockData), mode));
      return locks;
    }

    if (t != null) {
      locks.add(new HiveLockObj(new HiveLockObject(t, lockData), mode));
      mode = HiveLockMode.SHARED;
      locks.add(new HiveLockObj(new HiveLockObject(t.getDbName(), lockData), mode));
      return locks;
    }

    if (p != null) {
      if (!(p instanceof DummyPartition)) {
        locks.add(new HiveLockObj(new HiveLockObject(p, lockData), mode));
      }

      // All the parents are locked in shared mode
      mode = HiveLockMode.SHARED;

      // For dummy partitions, only partition name is needed
      String name = p.getName();

      if (p instanceof DummyPartition) {
        name = p.getName().split("@")[2];
      }

      String partialName = "";
      String[] partns = name.split("/");
      int len = p instanceof DummyPartition ? partns.length : partns.length - 1;
      Map<String, String> partialSpec = new LinkedHashMap<String, String>();
      for (int idx = 0; idx < len; idx++) {
        String partn = partns[idx];
        partialName += partn;
        String[] nameValue = partn.split("=");
        assert (nameValue.length == 2);
        partialSpec.put(nameValue[0], nameValue[1]);
        try {
          locks.add(
              new HiveLockObj(
                  new HiveLockObject(
                      new DummyPartition(
                          p.getTable(),
                          p.getTable().getDbName()
                              + "/"
                              + MetaStoreUtils.encodeTableName(p.getTable().getTableName())
                              + "/"
                              + partialName,
                          partialSpec),
                      lockData),
                  mode));
          partialName += "/";
        } catch (HiveException e) {
          throw new LockException(e.getMessage());
        }
      }

      locks.add(new HiveLockObj(new HiveLockObject(p.getTable(), lockData), mode));
      locks.add(new HiveLockObj(new HiveLockObject(p.getTable().getDbName(), lockData), mode));
    }
    return locks;
  }