/**
  * Method to fetch table data
  *
  * @param table table name
  * @param database database
  * @return list of columns in comma seperated way
  * @throws Exception if any error occurs
  */
 private List<String> getTableData(String table, String database) throws Exception {
   HiveConf conf = new HiveConf();
   conf.addResource("hive-site.xml");
   ArrayList<String> results = new ArrayList<String>();
   ArrayList<String> temp = new ArrayList<String>();
   Hive hive = Hive.get(conf);
   org.apache.hadoop.hive.ql.metadata.Table tbl = hive.getTable(database, table);
   FetchWork work;
   if (!tbl.getPartCols().isEmpty()) {
     List<Partition> partitions = hive.getPartitions(tbl);
     List<PartitionDesc> partDesc = new ArrayList<PartitionDesc>();
     List<String> partLocs = new ArrayList<String>();
     for (Partition part : partitions) {
       partLocs.add(part.getLocation());
       partDesc.add(Utilities.getPartitionDesc(part));
     }
     work = new FetchWork(partLocs, partDesc, Utilities.getTableDesc(tbl));
     work.setLimit(100);
   } else {
     work = new FetchWork(tbl.getDataLocation().toString(), Utilities.getTableDesc(tbl));
   }
   FetchTask task = new FetchTask();
   task.setWork(work);
   task.initialize(conf, null, null);
   task.fetch(temp);
   for (String str : temp) {
     results.add(str.replace("\t", ","));
   }
   return results;
 }
Example #2
0
 /**
  * Returns archiving level, which is how many fields were set in partial specification ARCHIVE was
  * run for
  */
 public static int getArchivingLevel(Partition p) throws HiveException {
   try {
     return MetaStoreUtils.getArchivingLevel(p.getTPartition());
   } catch (MetaException ex) {
     throw new HiveException(ex.getMessage(), ex);
   }
 }
Example #3
0
 /**
  * Get a prefix of the given parition's string representation. The sencond argument, level, is
  * used for the prefix length. For example, partition (ds='2010-01-01', hr='00', min='00'), level
  * 1 will reture 'ds=2010-01-01', and level 2 will return 'ds=2010-01-01/hr=00'.
  *
  * @param p partition object
  * @param level level for prefix depth
  * @return prefix of partition's string representation
  * @throws HiveException
  */
 public static String getPartialName(Partition p, int level) throws HiveException {
   List<FieldSchema> ffields = p.getTable().getPartCols();
   List<FieldSchema> fields = new ArrayList<FieldSchema>(level);
   List<String> fvalues = p.getValues();
   List<String> values = new ArrayList<String>(level);
   for (int i = 0; i < level; i++) {
     FieldSchema fs = ffields.get(i);
     String s = fvalues.get(i);
     fields.add(fs);
     values.add(s);
   }
   try {
     return Warehouse.makePartName(fields, values);
   } catch (MetaException e) {
     throw new HiveException("Wasn't able to generate name" + " for partial specification");
   }
 }
    // Return true if the partition is bucketed/sorted by the specified positions
    // The number of buckets, the sort order should also match along with the
    // columns which are bucketed/sorted
    private boolean checkPartition(
        Partition partition,
        List<Integer> bucketPositionsDest,
        List<Integer> sortPositionsDest,
        List<Integer> sortOrderDest,
        int numBucketsDest) {
      // The bucketing and sorting positions should exactly match
      int numBuckets = partition.getBucketCount();
      if (numBucketsDest != numBuckets) {
        return false;
      }

      List<Integer> partnBucketPositions =
          getBucketPositions(partition.getBucketCols(), partition.getTable().getCols());
      ObjectPair<List<Integer>, List<Integer>> partnSortPositionsOrder =
          getSortPositionsOrder(partition.getSortCols(), partition.getTable().getCols());
      return bucketPositionsDest.equals(partnBucketPositions)
          && sortPositionsDest.equals(partnSortPositionsOrder.getFirst())
          && sortOrderDest.equals(partnSortPositionsOrder.getSecond());
    }
  /**
   * set the current task in the mapredWork.
   *
   * @param alias_id current alias
   * @param topOp the top operator of the stack
   * @param plan current plan
   * @param local whether you need to add to map-reduce or local work
   * @param opProcCtx processing context
   * @param pList pruned partition list. If it is null it will be computed on-the-fly.
   */
  public static void setTaskPlan(
      String alias_id,
      Operator<? extends Serializable> topOp,
      MapredWork plan,
      boolean local,
      GenMRProcContext opProcCtx,
      PrunedPartitionList pList)
      throws SemanticException {
    ParseContext parseCtx = opProcCtx.getParseCtx();
    Set<ReadEntity> inputs = opProcCtx.getInputs();

    ArrayList<Path> partDir = new ArrayList<Path>();
    ArrayList<PartitionDesc> partDesc = new ArrayList<PartitionDesc>();

    Path tblDir = null;
    TableDesc tblDesc = null;

    PrunedPartitionList partsList = pList;

    if (partsList == null) {
      try {
        partsList = parseCtx.getOpToPartList().get((TableScanOperator) topOp);
        if (partsList == null) {
          partsList =
              PartitionPruner.prune(
                  parseCtx.getTopToTable().get(topOp),
                  parseCtx.getOpToPartPruner().get(topOp),
                  opProcCtx.getConf(),
                  alias_id,
                  parseCtx.getPrunedPartitions());
          parseCtx.getOpToPartList().put((TableScanOperator) topOp, partsList);
        }
      } catch (SemanticException e) {
        throw e;
      } catch (HiveException e) {
        LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
        throw new SemanticException(e.getMessage(), e);
      }
    }

    // Generate the map work for this alias_id
    Set<Partition> parts = null;
    // pass both confirmed and unknown partitions through the map-reduce
    // framework

    parts = partsList.getConfirmedPartns();
    parts.addAll(partsList.getUnknownPartns());
    PartitionDesc aliasPartnDesc = null;
    try {
      if (!parts.isEmpty()) {
        aliasPartnDesc = Utilities.getPartitionDesc(parts.iterator().next());
      }
    } catch (HiveException e) {
      LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
      throw new SemanticException(e.getMessage(), e);
    }

    // The table does not have any partitions
    if (aliasPartnDesc == null) {
      aliasPartnDesc =
          new PartitionDesc(Utilities.getTableDesc(parseCtx.getTopToTable().get(topOp)), null);
    }

    plan.getAliasToPartnInfo().put(alias_id, aliasPartnDesc);

    for (Partition part : parts) {
      if (part.getTable().isPartitioned()) {
        inputs.add(new ReadEntity(part));
      } else {
        inputs.add(new ReadEntity(part.getTable()));
      }

      // Later the properties have to come from the partition as opposed
      // to from the table in order to support versioning.
      Path[] paths;
      sampleDesc sampleDescr = parseCtx.getOpToSamplePruner().get(topOp);

      if (sampleDescr != null) {
        paths = SamplePruner.prune(part, sampleDescr);
      } else {
        paths = part.getPath();
      }

      // is it a partitioned table ?
      if (!part.getTable().isPartitioned()) {
        assert ((tblDir == null) && (tblDesc == null));

        tblDir = paths[0];
        tblDesc = Utilities.getTableDesc(part.getTable());
      }

      for (Path p : paths) {
        if (p == null) {
          continue;
        }
        String path = p.toString();
        if (LOG.isDebugEnabled()) {
          LOG.debug("Adding " + path + " of table" + alias_id);
        }

        partDir.add(p);
        try {
          partDesc.add(Utilities.getPartitionDesc(part));
        } catch (HiveException e) {
          LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
          throw new SemanticException(e.getMessage(), e);
        }
      }
    }

    Iterator<Path> iterPath = partDir.iterator();
    Iterator<PartitionDesc> iterPartnDesc = partDesc.iterator();

    if (!local) {
      while (iterPath.hasNext()) {
        assert iterPartnDesc.hasNext();
        String path = iterPath.next().toString();

        PartitionDesc prtDesc = iterPartnDesc.next();

        // Add the path to alias mapping
        if (plan.getPathToAliases().get(path) == null) {
          plan.getPathToAliases().put(path, new ArrayList<String>());
        }
        plan.getPathToAliases().get(path).add(alias_id);
        plan.getPathToPartitionInfo().put(path, prtDesc);
        if (LOG.isDebugEnabled()) {
          LOG.debug("Information added for path " + path);
        }
      }

      assert plan.getAliasToWork().get(alias_id) == null;
      plan.getAliasToWork().put(alias_id, topOp);
    } else {
      // populate local work if needed
      MapredLocalWork localPlan = plan.getMapLocalWork();
      if (localPlan == null) {
        localPlan =
            new MapredLocalWork(
                new LinkedHashMap<String, Operator<? extends Serializable>>(),
                new LinkedHashMap<String, FetchWork>());
      }

      assert localPlan.getAliasToWork().get(alias_id) == null;
      assert localPlan.getAliasToFetchWork().get(alias_id) == null;
      localPlan.getAliasToWork().put(alias_id, topOp);
      if (tblDir == null) {
        localPlan
            .getAliasToFetchWork()
            .put(alias_id, new FetchWork(FetchWork.convertPathToStringArray(partDir), partDesc));
      } else {
        localPlan.getAliasToFetchWork().put(alias_id, new FetchWork(tblDir.toString(), tblDesc));
      }
      plan.setMapLocalWork(localPlan);
    }
  }
Example #6
0
 /**
  * Determines whether a partition has been archived
  *
  * @param p
  * @return is it archived?
  */
 public static boolean isArchived(Partition p) {
   return MetaStoreUtils.isArchived(p.getTPartition());
 }
  @Override
  public void analyzeInternal(ASTNode ast) throws SemanticException {
    isLocal = false;
    isOverWrite = false;
    Tree fromTree = ast.getChild(0);
    Tree tableTree = ast.getChild(1);

    if (ast.getChildCount() == 4) {
      isLocal = true;
      isOverWrite = true;
    }

    if (ast.getChildCount() == 3) {
      if (ast.getChild(2).getText().toLowerCase().equals("local")) {
        isLocal = true;
      } else {
        isOverWrite = true;
      }
    }

    // initialize load path
    URI fromURI;
    try {
      String fromPath = stripQuotes(fromTree.getText());
      fromURI = initializeFromURI(fromPath);
    } catch (IOException e) {
      throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(fromTree, e.getMessage()), e);
    } catch (URISyntaxException e) {
      throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(fromTree, e.getMessage()), e);
    }

    // initialize destination table/partition
    tableSpec ts = new tableSpec(db, conf, (ASTNode) tableTree);

    if (ts.tableHandle.isOffline()) {
      throw new SemanticException(
          ErrorMsg.OFFLINE_TABLE_OR_PARTITION.getMsg(":Table " + ts.tableName));
    }

    if (ts.tableHandle.isView()) {
      throw new SemanticException(ErrorMsg.DML_AGAINST_VIEW.getMsg());
    }
    if (ts.tableHandle.isNonNative()) {
      throw new SemanticException(ErrorMsg.LOAD_INTO_NON_NATIVE.getMsg());
    }

    if (ts.tableHandle.isStoredAsSubDirectories()) {
      throw new SemanticException(ErrorMsg.LOAD_INTO_STORED_AS_DIR.getMsg());
    }

    URI toURI =
        (ts.partHandle != null)
            ? ts.partHandle.getDataLocation()
            : ts.tableHandle.getDataLocation();

    List<FieldSchema> parts = ts.tableHandle.getPartitionKeys();
    if ((parts != null && parts.size() > 0) && (ts.partSpec == null || ts.partSpec.size() == 0)) {
      throw new SemanticException(ErrorMsg.NEED_PARTITION_ERROR.getMsg());
    }

    // make sure the arguments make sense
    applyConstraints(fromURI, toURI, fromTree, isLocal);

    Task<? extends Serializable> rTask = null;

    // create copy work
    if (isLocal) {
      // if the local keyword is specified - we will always make a copy. this
      // might seem redundant in the case
      // that the hive warehouse is also located in the local file system - but
      // that's just a test case.
      String copyURIStr = ctx.getExternalTmpFileURI(toURI);
      URI copyURI = URI.create(copyURIStr);
      rTask = TaskFactory.get(new CopyWork(fromURI.toString(), copyURIStr), conf);
      fromURI = copyURI;
    }

    // create final load/move work

    String loadTmpPath = ctx.getExternalTmpFileURI(toURI);
    Map<String, String> partSpec = ts.getPartSpec();
    if (partSpec == null) {
      partSpec = new LinkedHashMap<String, String>();
      outputs.add(new WriteEntity(ts.tableHandle));
    } else {
      try {
        Partition part = Hive.get().getPartition(ts.tableHandle, partSpec, false);
        if (part != null) {
          if (part.isOffline()) {
            throw new SemanticException(
                ErrorMsg.OFFLINE_TABLE_OR_PARTITION.getMsg(ts.tableName + ":" + part.getName()));
          }
          outputs.add(new WriteEntity(part));
        } else {
          outputs.add(new WriteEntity(ts.tableHandle));
        }
      } catch (HiveException e) {
        throw new SemanticException(e);
      }
    }

    LoadTableDesc loadTableWork =
        new LoadTableDesc(
            fromURI.toString(),
            loadTmpPath,
            Utilities.getTableDesc(ts.tableHandle),
            partSpec,
            isOverWrite);

    Task<? extends Serializable> childTask =
        TaskFactory.get(new MoveWork(getInputs(), getOutputs(), loadTableWork, null, true), conf);
    if (rTask != null) {
      rTask.addDependentTask(childTask);
    } else {
      rTask = childTask;
    }

    rootTasks.add(rTask);

    // The user asked for stats to be collected.
    // Some stats like number of rows require a scan of the data
    // However, some other stats, like number of files, do not require a complete scan
    // Update the stats which do not require a complete scan.
    Task<? extends Serializable> statTask = null;
    if (conf.getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) {
      StatsWork statDesc = new StatsWork(loadTableWork);
      statDesc.setNoStatsAggregator(true);
      statDesc.setClearAggregatorStats(true);
      statDesc.setStatsReliable(conf.getBoolVar(HiveConf.ConfVars.HIVE_STATS_RELIABLE));
      statTask = TaskFactory.get(statDesc, conf);
    }

    // HIVE-3334 has been filed for load file with index auto update
    if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEINDEXAUTOUPDATE)) {
      IndexUpdater indexUpdater = new IndexUpdater(loadTableWork, getInputs(), conf);
      try {
        List<Task<? extends Serializable>> indexUpdateTasks = indexUpdater.generateUpdateTasks();

        for (Task<? extends Serializable> updateTask : indexUpdateTasks) {
          // LOAD DATA will either have a copy & move or just a move,
          // we always want the update to be dependent on the move
          childTask.addDependentTask(updateTask);
          if (statTask != null) {
            updateTask.addDependentTask(statTask);
          }
        }
      } catch (HiveException e) {
        console.printInfo(
            "WARNING: could not auto-update stale indexes, indexes are not out of sync");
      }
    } else if (statTask != null) {
      childTask.addDependentTask(statTask);
    }
  }
Example #8
0
  private List<HiveLockObj> getLockObjects(
      QueryPlan plan, Database db, Table t, Partition p, HiveLockMode mode) throws LockException {
    List<HiveLockObj> locks = new LinkedList<HiveLockObj>();

    HiveLockObject.HiveLockObjectData lockData =
        new HiveLockObject.HiveLockObjectData(
            plan.getQueryId(),
            String.valueOf(System.currentTimeMillis()),
            "IMPLICIT",
            plan.getQueryStr());

    if (db != null) {
      locks.add(new HiveLockObj(new HiveLockObject(db.getName(), lockData), mode));
      return locks;
    }

    if (t != null) {
      locks.add(new HiveLockObj(new HiveLockObject(t, lockData), mode));
      mode = HiveLockMode.SHARED;
      locks.add(new HiveLockObj(new HiveLockObject(t.getDbName(), lockData), mode));
      return locks;
    }

    if (p != null) {
      if (!(p instanceof DummyPartition)) {
        locks.add(new HiveLockObj(new HiveLockObject(p, lockData), mode));
      }

      // All the parents are locked in shared mode
      mode = HiveLockMode.SHARED;

      // For dummy partitions, only partition name is needed
      String name = p.getName();

      if (p instanceof DummyPartition) {
        name = p.getName().split("@")[2];
      }

      String partialName = "";
      String[] partns = name.split("/");
      int len = p instanceof DummyPartition ? partns.length : partns.length - 1;
      Map<String, String> partialSpec = new LinkedHashMap<String, String>();
      for (int idx = 0; idx < len; idx++) {
        String partn = partns[idx];
        partialName += partn;
        String[] nameValue = partn.split("=");
        assert (nameValue.length == 2);
        partialSpec.put(nameValue[0], nameValue[1]);
        try {
          locks.add(
              new HiveLockObj(
                  new HiveLockObject(
                      new DummyPartition(
                          p.getTable(),
                          p.getTable().getDbName()
                              + "/"
                              + MetaStoreUtils.encodeTableName(p.getTable().getTableName())
                              + "/"
                              + partialName,
                          partialSpec),
                      lockData),
                  mode));
          partialName += "/";
        } catch (HiveException e) {
          throw new LockException(e.getMessage());
        }
      }

      locks.add(new HiveLockObj(new HiveLockObject(p.getTable(), lockData), mode));
      locks.add(new HiveLockObj(new HiveLockObject(p.getTable().getDbName(), lockData), mode));
    }
    return locks;
  }
Example #9
0
  @Override
  protected void authorizeDDLWork(HiveSemanticAnalyzerHookContext cntxt, Hive hive, DDLWork work)
      throws HiveException {
    // DB opereations, none of them are enforced by Hive right now.

    ShowDatabasesDesc showDatabases = work.getShowDatabasesDesc();
    if (showDatabases != null) {
      authorize(
          HiveOperation.SHOWDATABASES.getInputRequiredPrivileges(),
          HiveOperation.SHOWDATABASES.getOutputRequiredPrivileges());
    }

    DropDatabaseDesc dropDb = work.getDropDatabaseDesc();
    if (dropDb != null) {
      Database db = cntxt.getHive().getDatabase(dropDb.getDatabaseName());
      authorize(db, Privilege.DROP);
    }

    DescDatabaseDesc descDb = work.getDescDatabaseDesc();
    if (descDb != null) {
      Database db = cntxt.getHive().getDatabase(descDb.getDatabaseName());
      authorize(db, Privilege.SELECT);
    }

    SwitchDatabaseDesc switchDb = work.getSwitchDatabaseDesc();
    if (switchDb != null) {
      Database db = cntxt.getHive().getDatabase(switchDb.getDatabaseName());
      authorize(db, Privilege.SELECT);
    }

    ShowTablesDesc showTables = work.getShowTblsDesc();
    if (showTables != null) {
      String dbName =
          showTables.getDbName() == null
              ? SessionState.get().getCurrentDatabase()
              : showTables.getDbName();
      authorize(cntxt.getHive().getDatabase(dbName), Privilege.SELECT);
    }

    ShowTableStatusDesc showTableStatus = work.getShowTblStatusDesc();
    if (showTableStatus != null) {
      String dbName =
          showTableStatus.getDbName() == null
              ? SessionState.get().getCurrentDatabase()
              : showTableStatus.getDbName();
      authorize(cntxt.getHive().getDatabase(dbName), Privilege.SELECT);
    }

    // TODO: add alter database support in HCat

    // Table operations.

    DropTableDesc dropTable = work.getDropTblDesc();
    if (dropTable != null) {
      if (dropTable.getPartSpecs() == null) {
        // drop table is already enforced by Hive. We only check for table level location even if
        // the
        // table is partitioned.
      } else {
        // this is actually a ALTER TABLE DROP PARITITION statement
        for (DropTableDesc.PartSpec partSpec : dropTable.getPartSpecs()) {
          // partitions are not added as write entries in drop partitions in Hive
          Table table =
              hive.getTable(SessionState.get().getCurrentDatabase(), dropTable.getTableName());
          List<Partition> partitions = null;
          try {
            partitions = hive.getPartitionsByFilter(table, partSpec.getPartSpec().getExprString());
          } catch (Exception e) {
            throw new HiveException(e);
          }

          for (Partition part : partitions) {
            authorize(part, Privilege.DROP);
          }
        }
      }
    }

    AlterTableDesc alterTable = work.getAlterTblDesc();
    if (alterTable != null) {
      Table table =
          hive.getTable(SessionState.get().getCurrentDatabase(), alterTable.getOldName(), false);

      Partition part = null;
      if (alterTable.getPartSpec() != null) {
        part = hive.getPartition(table, alterTable.getPartSpec(), false);
      }

      String newLocation = alterTable.getNewLocation();

      /* Hcat requires ALTER_DATA privileges for ALTER TABLE LOCATION statements
       * for the old table/partition location and the new location.
       */
      if (alterTable.getOp() == AlterTableDesc.AlterTableTypes.ALTERLOCATION) {
        if (part != null) {
          authorize(part, Privilege.ALTER_DATA); // authorize for the old
          // location, and new location
          part.setLocation(newLocation);
          authorize(part, Privilege.ALTER_DATA);
        } else {
          authorize(table, Privilege.ALTER_DATA); // authorize for the old
          // location, and new location
          table.getTTable().getSd().setLocation(newLocation);
          authorize(table, Privilege.ALTER_DATA);
        }
      }
      // other alter operations are already supported by Hive
    }

    // we should be careful when authorizing table based on just the
    // table name. If columns have separate authorization domain, it
    // must be honored
    DescTableDesc descTable = work.getDescTblDesc();
    if (descTable != null) {
      String tableName = extractTableName(descTable.getTableName());
      authorizeTable(cntxt.getHive(), tableName, Privilege.SELECT);
    }

    ShowPartitionsDesc showParts = work.getShowPartsDesc();
    if (showParts != null) {
      String tableName = extractTableName(showParts.getTabName());
      authorizeTable(cntxt.getHive(), tableName, Privilege.SELECT);
    }
  }
  private LinkedHashMap<Type, Referenceable> createOrUpdateEntities(
      HiveMetaStoreBridge dgiBridge,
      HiveEventContext event,
      Entity entity,
      boolean skipTempTables,
      Table existTable)
      throws Exception {
    Database db = null;
    Table table = null;
    Partition partition = null;
    LinkedHashMap<Type, Referenceable> result = new LinkedHashMap<>();
    List<Referenceable> entities = new ArrayList<>();

    switch (entity.getType()) {
      case DATABASE:
        db = entity.getDatabase();
        break;

      case TABLE:
        table = entity.getTable();
        db = dgiBridge.hiveClient.getDatabase(table.getDbName());
        break;

      case PARTITION:
        partition = entity.getPartition();
        table = partition.getTable();
        db = dgiBridge.hiveClient.getDatabase(table.getDbName());
        break;
    }

    db = dgiBridge.hiveClient.getDatabase(db.getName());
    Referenceable dbEntity = dgiBridge.createDBInstance(db);

    entities.add(dbEntity);
    result.put(Type.DATABASE, dbEntity);

    Referenceable tableEntity = null;

    if (table != null) {
      if (existTable != null) {
        table = existTable;
      } else {
        table = dgiBridge.hiveClient.getTable(table.getDbName(), table.getTableName());
      }
      // If its an external table, even though the temp table skip flag is on,
      // we create the table since we need the HDFS path to temp table lineage.
      if (skipTempTables
          && table.isTemporary()
          && !TableType.EXTERNAL_TABLE.equals(table.getTableType())) {
        LOG.debug(
            "Skipping temporary table registration {} since it is not an external table {} ",
            table.getTableName(),
            table.getTableType().name());

      } else {
        tableEntity = dgiBridge.createTableInstance(dbEntity, table);
        entities.add(tableEntity);
        result.put(Type.TABLE, tableEntity);
      }
    }

    event.addMessage(new HookNotification.EntityUpdateRequest(event.getUser(), entities));
    return result;
  }
Example #11
0
 public static HCatSchema extractSchema(Partition partition) throws HCatException {
   return new HCatSchema(HCatUtil.getHCatFieldSchemaList(partition.getCols()));
 }
 @Override
 public LongWatermark getPreviousHighWatermark(Partition partition) {
   return getPreviousHighWatermark(partition.getTable());
 }
 @Override
 public LongWatermark getExpectedHighWatermark(
     Partition partition, long tableProcessTime, long partitionProcessTime) {
   return getExpectedHighWatermark(partition.getTable(), tableProcessTime);
 }