/** * Method to fetch table data * * @param table table name * @param database database * @return list of columns in comma seperated way * @throws Exception if any error occurs */ private List<String> getTableData(String table, String database) throws Exception { HiveConf conf = new HiveConf(); conf.addResource("hive-site.xml"); ArrayList<String> results = new ArrayList<String>(); ArrayList<String> temp = new ArrayList<String>(); Hive hive = Hive.get(conf); org.apache.hadoop.hive.ql.metadata.Table tbl = hive.getTable(database, table); FetchWork work; if (!tbl.getPartCols().isEmpty()) { List<Partition> partitions = hive.getPartitions(tbl); List<PartitionDesc> partDesc = new ArrayList<PartitionDesc>(); List<String> partLocs = new ArrayList<String>(); for (Partition part : partitions) { partLocs.add(part.getLocation()); partDesc.add(Utilities.getPartitionDesc(part)); } work = new FetchWork(partLocs, partDesc, Utilities.getTableDesc(tbl)); work.setLimit(100); } else { work = new FetchWork(tbl.getDataLocation().toString(), Utilities.getTableDesc(tbl)); } FetchTask task = new FetchTask(); task.setWork(work); task.initialize(conf, null, null); task.fetch(temp); for (String str : temp) { results.add(str.replace("\t", ",")); } return results; }
/** * Returns archiving level, which is how many fields were set in partial specification ARCHIVE was * run for */ public static int getArchivingLevel(Partition p) throws HiveException { try { return MetaStoreUtils.getArchivingLevel(p.getTPartition()); } catch (MetaException ex) { throw new HiveException(ex.getMessage(), ex); } }
/** * Get a prefix of the given parition's string representation. The sencond argument, level, is * used for the prefix length. For example, partition (ds='2010-01-01', hr='00', min='00'), level * 1 will reture 'ds=2010-01-01', and level 2 will return 'ds=2010-01-01/hr=00'. * * @param p partition object * @param level level for prefix depth * @return prefix of partition's string representation * @throws HiveException */ public static String getPartialName(Partition p, int level) throws HiveException { List<FieldSchema> ffields = p.getTable().getPartCols(); List<FieldSchema> fields = new ArrayList<FieldSchema>(level); List<String> fvalues = p.getValues(); List<String> values = new ArrayList<String>(level); for (int i = 0; i < level; i++) { FieldSchema fs = ffields.get(i); String s = fvalues.get(i); fields.add(fs); values.add(s); } try { return Warehouse.makePartName(fields, values); } catch (MetaException e) { throw new HiveException("Wasn't able to generate name" + " for partial specification"); } }
// Return true if the partition is bucketed/sorted by the specified positions // The number of buckets, the sort order should also match along with the // columns which are bucketed/sorted private boolean checkPartition( Partition partition, List<Integer> bucketPositionsDest, List<Integer> sortPositionsDest, List<Integer> sortOrderDest, int numBucketsDest) { // The bucketing and sorting positions should exactly match int numBuckets = partition.getBucketCount(); if (numBucketsDest != numBuckets) { return false; } List<Integer> partnBucketPositions = getBucketPositions(partition.getBucketCols(), partition.getTable().getCols()); ObjectPair<List<Integer>, List<Integer>> partnSortPositionsOrder = getSortPositionsOrder(partition.getSortCols(), partition.getTable().getCols()); return bucketPositionsDest.equals(partnBucketPositions) && sortPositionsDest.equals(partnSortPositionsOrder.getFirst()) && sortOrderDest.equals(partnSortPositionsOrder.getSecond()); }
/** * set the current task in the mapredWork. * * @param alias_id current alias * @param topOp the top operator of the stack * @param plan current plan * @param local whether you need to add to map-reduce or local work * @param opProcCtx processing context * @param pList pruned partition list. If it is null it will be computed on-the-fly. */ public static void setTaskPlan( String alias_id, Operator<? extends Serializable> topOp, MapredWork plan, boolean local, GenMRProcContext opProcCtx, PrunedPartitionList pList) throws SemanticException { ParseContext parseCtx = opProcCtx.getParseCtx(); Set<ReadEntity> inputs = opProcCtx.getInputs(); ArrayList<Path> partDir = new ArrayList<Path>(); ArrayList<PartitionDesc> partDesc = new ArrayList<PartitionDesc>(); Path tblDir = null; TableDesc tblDesc = null; PrunedPartitionList partsList = pList; if (partsList == null) { try { partsList = parseCtx.getOpToPartList().get((TableScanOperator) topOp); if (partsList == null) { partsList = PartitionPruner.prune( parseCtx.getTopToTable().get(topOp), parseCtx.getOpToPartPruner().get(topOp), opProcCtx.getConf(), alias_id, parseCtx.getPrunedPartitions()); parseCtx.getOpToPartList().put((TableScanOperator) topOp, partsList); } } catch (SemanticException e) { throw e; } catch (HiveException e) { LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e)); throw new SemanticException(e.getMessage(), e); } } // Generate the map work for this alias_id Set<Partition> parts = null; // pass both confirmed and unknown partitions through the map-reduce // framework parts = partsList.getConfirmedPartns(); parts.addAll(partsList.getUnknownPartns()); PartitionDesc aliasPartnDesc = null; try { if (!parts.isEmpty()) { aliasPartnDesc = Utilities.getPartitionDesc(parts.iterator().next()); } } catch (HiveException e) { LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e)); throw new SemanticException(e.getMessage(), e); } // The table does not have any partitions if (aliasPartnDesc == null) { aliasPartnDesc = new PartitionDesc(Utilities.getTableDesc(parseCtx.getTopToTable().get(topOp)), null); } plan.getAliasToPartnInfo().put(alias_id, aliasPartnDesc); for (Partition part : parts) { if (part.getTable().isPartitioned()) { inputs.add(new ReadEntity(part)); } else { inputs.add(new ReadEntity(part.getTable())); } // Later the properties have to come from the partition as opposed // to from the table in order to support versioning. Path[] paths; sampleDesc sampleDescr = parseCtx.getOpToSamplePruner().get(topOp); if (sampleDescr != null) { paths = SamplePruner.prune(part, sampleDescr); } else { paths = part.getPath(); } // is it a partitioned table ? if (!part.getTable().isPartitioned()) { assert ((tblDir == null) && (tblDesc == null)); tblDir = paths[0]; tblDesc = Utilities.getTableDesc(part.getTable()); } for (Path p : paths) { if (p == null) { continue; } String path = p.toString(); if (LOG.isDebugEnabled()) { LOG.debug("Adding " + path + " of table" + alias_id); } partDir.add(p); try { partDesc.add(Utilities.getPartitionDesc(part)); } catch (HiveException e) { LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e)); throw new SemanticException(e.getMessage(), e); } } } Iterator<Path> iterPath = partDir.iterator(); Iterator<PartitionDesc> iterPartnDesc = partDesc.iterator(); if (!local) { while (iterPath.hasNext()) { assert iterPartnDesc.hasNext(); String path = iterPath.next().toString(); PartitionDesc prtDesc = iterPartnDesc.next(); // Add the path to alias mapping if (plan.getPathToAliases().get(path) == null) { plan.getPathToAliases().put(path, new ArrayList<String>()); } plan.getPathToAliases().get(path).add(alias_id); plan.getPathToPartitionInfo().put(path, prtDesc); if (LOG.isDebugEnabled()) { LOG.debug("Information added for path " + path); } } assert plan.getAliasToWork().get(alias_id) == null; plan.getAliasToWork().put(alias_id, topOp); } else { // populate local work if needed MapredLocalWork localPlan = plan.getMapLocalWork(); if (localPlan == null) { localPlan = new MapredLocalWork( new LinkedHashMap<String, Operator<? extends Serializable>>(), new LinkedHashMap<String, FetchWork>()); } assert localPlan.getAliasToWork().get(alias_id) == null; assert localPlan.getAliasToFetchWork().get(alias_id) == null; localPlan.getAliasToWork().put(alias_id, topOp); if (tblDir == null) { localPlan .getAliasToFetchWork() .put(alias_id, new FetchWork(FetchWork.convertPathToStringArray(partDir), partDesc)); } else { localPlan.getAliasToFetchWork().put(alias_id, new FetchWork(tblDir.toString(), tblDesc)); } plan.setMapLocalWork(localPlan); } }
/** * Determines whether a partition has been archived * * @param p * @return is it archived? */ public static boolean isArchived(Partition p) { return MetaStoreUtils.isArchived(p.getTPartition()); }
@Override public void analyzeInternal(ASTNode ast) throws SemanticException { isLocal = false; isOverWrite = false; Tree fromTree = ast.getChild(0); Tree tableTree = ast.getChild(1); if (ast.getChildCount() == 4) { isLocal = true; isOverWrite = true; } if (ast.getChildCount() == 3) { if (ast.getChild(2).getText().toLowerCase().equals("local")) { isLocal = true; } else { isOverWrite = true; } } // initialize load path URI fromURI; try { String fromPath = stripQuotes(fromTree.getText()); fromURI = initializeFromURI(fromPath); } catch (IOException e) { throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(fromTree, e.getMessage()), e); } catch (URISyntaxException e) { throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(fromTree, e.getMessage()), e); } // initialize destination table/partition tableSpec ts = new tableSpec(db, conf, (ASTNode) tableTree); if (ts.tableHandle.isOffline()) { throw new SemanticException( ErrorMsg.OFFLINE_TABLE_OR_PARTITION.getMsg(":Table " + ts.tableName)); } if (ts.tableHandle.isView()) { throw new SemanticException(ErrorMsg.DML_AGAINST_VIEW.getMsg()); } if (ts.tableHandle.isNonNative()) { throw new SemanticException(ErrorMsg.LOAD_INTO_NON_NATIVE.getMsg()); } if (ts.tableHandle.isStoredAsSubDirectories()) { throw new SemanticException(ErrorMsg.LOAD_INTO_STORED_AS_DIR.getMsg()); } URI toURI = (ts.partHandle != null) ? ts.partHandle.getDataLocation() : ts.tableHandle.getDataLocation(); List<FieldSchema> parts = ts.tableHandle.getPartitionKeys(); if ((parts != null && parts.size() > 0) && (ts.partSpec == null || ts.partSpec.size() == 0)) { throw new SemanticException(ErrorMsg.NEED_PARTITION_ERROR.getMsg()); } // make sure the arguments make sense applyConstraints(fromURI, toURI, fromTree, isLocal); Task<? extends Serializable> rTask = null; // create copy work if (isLocal) { // if the local keyword is specified - we will always make a copy. this // might seem redundant in the case // that the hive warehouse is also located in the local file system - but // that's just a test case. String copyURIStr = ctx.getExternalTmpFileURI(toURI); URI copyURI = URI.create(copyURIStr); rTask = TaskFactory.get(new CopyWork(fromURI.toString(), copyURIStr), conf); fromURI = copyURI; } // create final load/move work String loadTmpPath = ctx.getExternalTmpFileURI(toURI); Map<String, String> partSpec = ts.getPartSpec(); if (partSpec == null) { partSpec = new LinkedHashMap<String, String>(); outputs.add(new WriteEntity(ts.tableHandle)); } else { try { Partition part = Hive.get().getPartition(ts.tableHandle, partSpec, false); if (part != null) { if (part.isOffline()) { throw new SemanticException( ErrorMsg.OFFLINE_TABLE_OR_PARTITION.getMsg(ts.tableName + ":" + part.getName())); } outputs.add(new WriteEntity(part)); } else { outputs.add(new WriteEntity(ts.tableHandle)); } } catch (HiveException e) { throw new SemanticException(e); } } LoadTableDesc loadTableWork = new LoadTableDesc( fromURI.toString(), loadTmpPath, Utilities.getTableDesc(ts.tableHandle), partSpec, isOverWrite); Task<? extends Serializable> childTask = TaskFactory.get(new MoveWork(getInputs(), getOutputs(), loadTableWork, null, true), conf); if (rTask != null) { rTask.addDependentTask(childTask); } else { rTask = childTask; } rootTasks.add(rTask); // The user asked for stats to be collected. // Some stats like number of rows require a scan of the data // However, some other stats, like number of files, do not require a complete scan // Update the stats which do not require a complete scan. Task<? extends Serializable> statTask = null; if (conf.getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) { StatsWork statDesc = new StatsWork(loadTableWork); statDesc.setNoStatsAggregator(true); statDesc.setClearAggregatorStats(true); statDesc.setStatsReliable(conf.getBoolVar(HiveConf.ConfVars.HIVE_STATS_RELIABLE)); statTask = TaskFactory.get(statDesc, conf); } // HIVE-3334 has been filed for load file with index auto update if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEINDEXAUTOUPDATE)) { IndexUpdater indexUpdater = new IndexUpdater(loadTableWork, getInputs(), conf); try { List<Task<? extends Serializable>> indexUpdateTasks = indexUpdater.generateUpdateTasks(); for (Task<? extends Serializable> updateTask : indexUpdateTasks) { // LOAD DATA will either have a copy & move or just a move, // we always want the update to be dependent on the move childTask.addDependentTask(updateTask); if (statTask != null) { updateTask.addDependentTask(statTask); } } } catch (HiveException e) { console.printInfo( "WARNING: could not auto-update stale indexes, indexes are not out of sync"); } } else if (statTask != null) { childTask.addDependentTask(statTask); } }
private List<HiveLockObj> getLockObjects( QueryPlan plan, Database db, Table t, Partition p, HiveLockMode mode) throws LockException { List<HiveLockObj> locks = new LinkedList<HiveLockObj>(); HiveLockObject.HiveLockObjectData lockData = new HiveLockObject.HiveLockObjectData( plan.getQueryId(), String.valueOf(System.currentTimeMillis()), "IMPLICIT", plan.getQueryStr()); if (db != null) { locks.add(new HiveLockObj(new HiveLockObject(db.getName(), lockData), mode)); return locks; } if (t != null) { locks.add(new HiveLockObj(new HiveLockObject(t, lockData), mode)); mode = HiveLockMode.SHARED; locks.add(new HiveLockObj(new HiveLockObject(t.getDbName(), lockData), mode)); return locks; } if (p != null) { if (!(p instanceof DummyPartition)) { locks.add(new HiveLockObj(new HiveLockObject(p, lockData), mode)); } // All the parents are locked in shared mode mode = HiveLockMode.SHARED; // For dummy partitions, only partition name is needed String name = p.getName(); if (p instanceof DummyPartition) { name = p.getName().split("@")[2]; } String partialName = ""; String[] partns = name.split("/"); int len = p instanceof DummyPartition ? partns.length : partns.length - 1; Map<String, String> partialSpec = new LinkedHashMap<String, String>(); for (int idx = 0; idx < len; idx++) { String partn = partns[idx]; partialName += partn; String[] nameValue = partn.split("="); assert (nameValue.length == 2); partialSpec.put(nameValue[0], nameValue[1]); try { locks.add( new HiveLockObj( new HiveLockObject( new DummyPartition( p.getTable(), p.getTable().getDbName() + "/" + MetaStoreUtils.encodeTableName(p.getTable().getTableName()) + "/" + partialName, partialSpec), lockData), mode)); partialName += "/"; } catch (HiveException e) { throw new LockException(e.getMessage()); } } locks.add(new HiveLockObj(new HiveLockObject(p.getTable(), lockData), mode)); locks.add(new HiveLockObj(new HiveLockObject(p.getTable().getDbName(), lockData), mode)); } return locks; }
@Override protected void authorizeDDLWork(HiveSemanticAnalyzerHookContext cntxt, Hive hive, DDLWork work) throws HiveException { // DB opereations, none of them are enforced by Hive right now. ShowDatabasesDesc showDatabases = work.getShowDatabasesDesc(); if (showDatabases != null) { authorize( HiveOperation.SHOWDATABASES.getInputRequiredPrivileges(), HiveOperation.SHOWDATABASES.getOutputRequiredPrivileges()); } DropDatabaseDesc dropDb = work.getDropDatabaseDesc(); if (dropDb != null) { Database db = cntxt.getHive().getDatabase(dropDb.getDatabaseName()); authorize(db, Privilege.DROP); } DescDatabaseDesc descDb = work.getDescDatabaseDesc(); if (descDb != null) { Database db = cntxt.getHive().getDatabase(descDb.getDatabaseName()); authorize(db, Privilege.SELECT); } SwitchDatabaseDesc switchDb = work.getSwitchDatabaseDesc(); if (switchDb != null) { Database db = cntxt.getHive().getDatabase(switchDb.getDatabaseName()); authorize(db, Privilege.SELECT); } ShowTablesDesc showTables = work.getShowTblsDesc(); if (showTables != null) { String dbName = showTables.getDbName() == null ? SessionState.get().getCurrentDatabase() : showTables.getDbName(); authorize(cntxt.getHive().getDatabase(dbName), Privilege.SELECT); } ShowTableStatusDesc showTableStatus = work.getShowTblStatusDesc(); if (showTableStatus != null) { String dbName = showTableStatus.getDbName() == null ? SessionState.get().getCurrentDatabase() : showTableStatus.getDbName(); authorize(cntxt.getHive().getDatabase(dbName), Privilege.SELECT); } // TODO: add alter database support in HCat // Table operations. DropTableDesc dropTable = work.getDropTblDesc(); if (dropTable != null) { if (dropTable.getPartSpecs() == null) { // drop table is already enforced by Hive. We only check for table level location even if // the // table is partitioned. } else { // this is actually a ALTER TABLE DROP PARITITION statement for (DropTableDesc.PartSpec partSpec : dropTable.getPartSpecs()) { // partitions are not added as write entries in drop partitions in Hive Table table = hive.getTable(SessionState.get().getCurrentDatabase(), dropTable.getTableName()); List<Partition> partitions = null; try { partitions = hive.getPartitionsByFilter(table, partSpec.getPartSpec().getExprString()); } catch (Exception e) { throw new HiveException(e); } for (Partition part : partitions) { authorize(part, Privilege.DROP); } } } } AlterTableDesc alterTable = work.getAlterTblDesc(); if (alterTable != null) { Table table = hive.getTable(SessionState.get().getCurrentDatabase(), alterTable.getOldName(), false); Partition part = null; if (alterTable.getPartSpec() != null) { part = hive.getPartition(table, alterTable.getPartSpec(), false); } String newLocation = alterTable.getNewLocation(); /* Hcat requires ALTER_DATA privileges for ALTER TABLE LOCATION statements * for the old table/partition location and the new location. */ if (alterTable.getOp() == AlterTableDesc.AlterTableTypes.ALTERLOCATION) { if (part != null) { authorize(part, Privilege.ALTER_DATA); // authorize for the old // location, and new location part.setLocation(newLocation); authorize(part, Privilege.ALTER_DATA); } else { authorize(table, Privilege.ALTER_DATA); // authorize for the old // location, and new location table.getTTable().getSd().setLocation(newLocation); authorize(table, Privilege.ALTER_DATA); } } // other alter operations are already supported by Hive } // we should be careful when authorizing table based on just the // table name. If columns have separate authorization domain, it // must be honored DescTableDesc descTable = work.getDescTblDesc(); if (descTable != null) { String tableName = extractTableName(descTable.getTableName()); authorizeTable(cntxt.getHive(), tableName, Privilege.SELECT); } ShowPartitionsDesc showParts = work.getShowPartsDesc(); if (showParts != null) { String tableName = extractTableName(showParts.getTabName()); authorizeTable(cntxt.getHive(), tableName, Privilege.SELECT); } }
private LinkedHashMap<Type, Referenceable> createOrUpdateEntities( HiveMetaStoreBridge dgiBridge, HiveEventContext event, Entity entity, boolean skipTempTables, Table existTable) throws Exception { Database db = null; Table table = null; Partition partition = null; LinkedHashMap<Type, Referenceable> result = new LinkedHashMap<>(); List<Referenceable> entities = new ArrayList<>(); switch (entity.getType()) { case DATABASE: db = entity.getDatabase(); break; case TABLE: table = entity.getTable(); db = dgiBridge.hiveClient.getDatabase(table.getDbName()); break; case PARTITION: partition = entity.getPartition(); table = partition.getTable(); db = dgiBridge.hiveClient.getDatabase(table.getDbName()); break; } db = dgiBridge.hiveClient.getDatabase(db.getName()); Referenceable dbEntity = dgiBridge.createDBInstance(db); entities.add(dbEntity); result.put(Type.DATABASE, dbEntity); Referenceable tableEntity = null; if (table != null) { if (existTable != null) { table = existTable; } else { table = dgiBridge.hiveClient.getTable(table.getDbName(), table.getTableName()); } // If its an external table, even though the temp table skip flag is on, // we create the table since we need the HDFS path to temp table lineage. if (skipTempTables && table.isTemporary() && !TableType.EXTERNAL_TABLE.equals(table.getTableType())) { LOG.debug( "Skipping temporary table registration {} since it is not an external table {} ", table.getTableName(), table.getTableType().name()); } else { tableEntity = dgiBridge.createTableInstance(dbEntity, table); entities.add(tableEntity); result.put(Type.TABLE, tableEntity); } } event.addMessage(new HookNotification.EntityUpdateRequest(event.getUser(), entities)); return result; }
public static HCatSchema extractSchema(Partition partition) throws HCatException { return new HCatSchema(HCatUtil.getHCatFieldSchemaList(partition.getCols())); }
@Override public LongWatermark getPreviousHighWatermark(Partition partition) { return getPreviousHighWatermark(partition.getTable()); }
@Override public LongWatermark getExpectedHighWatermark( Partition partition, long tableProcessTime, long partitionProcessTime) { return getExpectedHighWatermark(partition.getTable(), tableProcessTime); }