/** * Method to fetch table data * * @param table table name * @param database database * @return list of columns in comma seperated way * @throws Exception if any error occurs */ private List<String> getTableData(String table, String database) throws Exception { HiveConf conf = new HiveConf(); conf.addResource("hive-site.xml"); ArrayList<String> results = new ArrayList<String>(); ArrayList<String> temp = new ArrayList<String>(); Hive hive = Hive.get(conf); org.apache.hadoop.hive.ql.metadata.Table tbl = hive.getTable(database, table); FetchWork work; if (!tbl.getPartCols().isEmpty()) { List<Partition> partitions = hive.getPartitions(tbl); List<PartitionDesc> partDesc = new ArrayList<PartitionDesc>(); List<String> partLocs = new ArrayList<String>(); for (Partition part : partitions) { partLocs.add(part.getLocation()); partDesc.add(Utilities.getPartitionDesc(part)); } work = new FetchWork(partLocs, partDesc, Utilities.getTableDesc(tbl)); work.setLimit(100); } else { work = new FetchWork(tbl.getDataLocation().toString(), Utilities.getTableDesc(tbl)); } FetchTask task = new FetchTask(); task.setWork(work); task.initialize(conf, null, null); task.fetch(temp); for (String str : temp) { results.add(str.replace("\t", ",")); } return results; }
/** * Creates path where partitions matching prefix should lie in filesystem * * @param tbl table in which partition is * @return expected location of partitions matching prefix in filesystem */ public Path createPath(Table tbl) throws HiveException { String prefixSubdir; try { prefixSubdir = Warehouse.makePartName(fields, values); } catch (MetaException e) { throw new HiveException("Unable to get partitions directories prefix", e); } Path tableDir = tbl.getDataLocation(); if (tableDir == null) { throw new HiveException("Table has no location set"); } return new Path(tableDir, prefixSubdir); }
private void handleExternalTables( final HiveMetaStoreBridge dgiBridge, final HiveEventContext event, final LinkedHashMap<Type, Referenceable> tables) throws HiveException, MalformedURLException { List<Referenceable> entities = new ArrayList<>(); final Entity hiveEntity = getEntityByType(event.getOutputs(), Type.TABLE); Table hiveTable = hiveEntity.getTable(); // Refresh to get the correct location hiveTable = dgiBridge.hiveClient.getTable(hiveTable.getDbName(), hiveTable.getTableName()); final String location = lower(hiveTable.getDataLocation().toString()); if (hiveTable != null && TableType.EXTERNAL_TABLE.equals(hiveTable.getTableType())) { LOG.info("Registering external table process {} ", event.getQueryStr()); final ReadEntity dfsEntity = new ReadEntity(); dfsEntity.setTyp(Type.DFS_DIR); dfsEntity.setName(location); SortedMap<Entity, Referenceable> inputs = new TreeMap<Entity, Referenceable>(entityComparator) { { put(dfsEntity, dgiBridge.fillHDFSDataSet(location)); } }; SortedMap<Entity, Referenceable> outputs = new TreeMap<Entity, Referenceable>(entityComparator) { { put(hiveEntity, tables.get(Type.TABLE)); } }; Referenceable processReferenceable = getProcessReferenceable(dgiBridge, event, inputs, outputs); String tableQualifiedName = dgiBridge.getTableQualifiedName(dgiBridge.getClusterName(), hiveTable); if (isCreateOp(event)) { processReferenceable.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, tableQualifiedName); } entities.addAll(tables.values()); entities.add(processReferenceable); event.addMessage(new HookNotification.EntityUpdateRequest(event.getUser(), entities)); } }