/**
  * Method to fetch table data
  *
  * @param table table name
  * @param database database
  * @return list of columns in comma seperated way
  * @throws Exception if any error occurs
  */
 private List<String> getTableData(String table, String database) throws Exception {
   HiveConf conf = new HiveConf();
   conf.addResource("hive-site.xml");
   ArrayList<String> results = new ArrayList<String>();
   ArrayList<String> temp = new ArrayList<String>();
   Hive hive = Hive.get(conf);
   org.apache.hadoop.hive.ql.metadata.Table tbl = hive.getTable(database, table);
   FetchWork work;
   if (!tbl.getPartCols().isEmpty()) {
     List<Partition> partitions = hive.getPartitions(tbl);
     List<PartitionDesc> partDesc = new ArrayList<PartitionDesc>();
     List<String> partLocs = new ArrayList<String>();
     for (Partition part : partitions) {
       partLocs.add(part.getLocation());
       partDesc.add(Utilities.getPartitionDesc(part));
     }
     work = new FetchWork(partLocs, partDesc, Utilities.getTableDesc(tbl));
     work.setLimit(100);
   } else {
     work = new FetchWork(tbl.getDataLocation().toString(), Utilities.getTableDesc(tbl));
   }
   FetchTask task = new FetchTask();
   task.setWork(work);
   task.initialize(conf, null, null);
   task.fetch(temp);
   for (String str : temp) {
     results.add(str.replace("\t", ","));
   }
   return results;
 }
Пример #2
0
 /**
  * Creates path where partitions matching prefix should lie in filesystem
  *
  * @param tbl table in which partition is
  * @return expected location of partitions matching prefix in filesystem
  */
 public Path createPath(Table tbl) throws HiveException {
   String prefixSubdir;
   try {
     prefixSubdir = Warehouse.makePartName(fields, values);
   } catch (MetaException e) {
     throw new HiveException("Unable to get partitions directories prefix", e);
   }
   Path tableDir = tbl.getDataLocation();
   if (tableDir == null) {
     throw new HiveException("Table has no location set");
   }
   return new Path(tableDir, prefixSubdir);
 }
Пример #3
0
  private void handleExternalTables(
      final HiveMetaStoreBridge dgiBridge,
      final HiveEventContext event,
      final LinkedHashMap<Type, Referenceable> tables)
      throws HiveException, MalformedURLException {
    List<Referenceable> entities = new ArrayList<>();
    final Entity hiveEntity = getEntityByType(event.getOutputs(), Type.TABLE);
    Table hiveTable = hiveEntity.getTable();
    // Refresh to get the correct location
    hiveTable = dgiBridge.hiveClient.getTable(hiveTable.getDbName(), hiveTable.getTableName());

    final String location = lower(hiveTable.getDataLocation().toString());
    if (hiveTable != null && TableType.EXTERNAL_TABLE.equals(hiveTable.getTableType())) {
      LOG.info("Registering external table process {} ", event.getQueryStr());
      final ReadEntity dfsEntity = new ReadEntity();
      dfsEntity.setTyp(Type.DFS_DIR);
      dfsEntity.setName(location);

      SortedMap<Entity, Referenceable> inputs =
          new TreeMap<Entity, Referenceable>(entityComparator) {
            {
              put(dfsEntity, dgiBridge.fillHDFSDataSet(location));
            }
          };

      SortedMap<Entity, Referenceable> outputs =
          new TreeMap<Entity, Referenceable>(entityComparator) {
            {
              put(hiveEntity, tables.get(Type.TABLE));
            }
          };

      Referenceable processReferenceable =
          getProcessReferenceable(dgiBridge, event, inputs, outputs);
      String tableQualifiedName =
          dgiBridge.getTableQualifiedName(dgiBridge.getClusterName(), hiveTable);

      if (isCreateOp(event)) {
        processReferenceable.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, tableQualifiedName);
      }
      entities.addAll(tables.values());
      entities.add(processReferenceable);
      event.addMessage(new HookNotification.EntityUpdateRequest(event.getUser(), entities));
    }
  }