Example #1
0
 @Override
 public void registerForNotification(URI uri, Configuration conf, String user, String actionID)
     throws URIHandlerException {
   HCatURI hcatURI;
   try {
     hcatURI = new HCatURI(uri);
   } catch (URISyntaxException e) {
     throw new URIHandlerException(ErrorCode.E0906, uri, e);
   }
   HCatAccessorService hcatService = Services.get().get(HCatAccessorService.class);
   if (!hcatService.isRegisteredForNotification(hcatURI)) {
     HCatClient client = getHCatClient(uri, conf, user);
     try {
       String topic = client.getMessageBusTopicName(hcatURI.getDb(), hcatURI.getTable());
       if (topic == null) {
         return;
       }
       hcatService.registerForNotification(
           hcatURI, topic, new HCatMessageHandler(uri.getAuthority()));
     } catch (HCatException e) {
       throw new HCatAccessorException(ErrorCode.E1501, e);
     } finally {
       closeQuietly(client, true);
     }
   }
   PartitionDependencyManagerService pdmService =
       Services.get().get(PartitionDependencyManagerService.class);
   pdmService.addMissingDependency(hcatURI, actionID);
 }
  private void dropMultiColDatedPartitions(
      String tableName, List<Map<String, String>> candidatePartitions) throws Exception {

    for (Map<String, String> partition : candidatePartitions) {
      client.dropPartitions(DATABASE_NAME, tableName, partition, true);
    }
  }
Example #3
0
  private HCatClient getHCatClient(URI uri, Configuration conf, String user)
      throws HCatAccessorException {
    final HiveConf hiveConf = new HiveConf(conf, this.getClass());
    String serverURI = getMetastoreConnectURI(uri);
    if (!serverURI.equals("")) {
      hiveConf.set("hive.metastore.local", "false");
    }
    hiveConf.set(HiveConf.ConfVars.METASTOREURIS.varname, serverURI);
    try {
      XLog.getLog(HCatURIHandler.class)
          .info(
              "Creating HCatClient for user [{0}] login_user [{1}] and server [{2}] ",
              user, UserGroupInformation.getLoginUser(), serverURI);

      // HiveMetastoreClient (hive 0.9) currently does not work if UGI has doAs
      // We are good to connect as the oozie user since listPartitions does not require
      // authorization
      /*
      UserGroupInformation ugi = ugiService.getProxyUser(user);
      return ugi.doAs(new PrivilegedExceptionAction<HCatClient>() {
          public HCatClient run() throws Exception {
              return HCatClient.create(hiveConf);
          }
      });
      */

      return HCatClient.create(hiveConf);
    } catch (HCatException e) {
      throw new HCatAccessorException(ErrorCode.E1501, e);
    } catch (IOException e) {
      throw new HCatAccessorException(ErrorCode.E1501, e);
    }
  }
Example #4
0
 @Override
 public void destroy() {
   try {
     hcatClient.close();
   } catch (Exception ignore) {
     XLog.getLog(HCatContext.class).warn("Error closing hcat client", ignore);
   }
 }
Example #5
0
 private void closeQuietly(HCatClient client, boolean close) {
   if (close && client != null) {
     try {
       client.close();
     } catch (Exception ignore) {
       XLog.getLog(HCatURIHandler.class).warn("Error closing hcat client", ignore);
     }
   }
 }
  private List<HCatPartition> getFilteredPartitions(
      String tableName, String timeZone, String dateMask, Pair<Date, Date> range)
      throws HCatException {
    DateFormat dateFormat = new SimpleDateFormat(dateMask);
    dateFormat.setTimeZone(TimeZone.getTimeZone(timeZone));

    String filter = "ds < '" + dateFormat.format(range.first) + "'";
    return client.listPartitionsByFilter(DATABASE_NAME, tableName, filter);
  }
  private void dropPartitions(String tableName, List<String> candidatePartitions) throws Exception {

    for (String candidatePartition : candidatePartitions) {
      Map<String, String> partition = new HashMap<String, String>();
      partition.put("ds", candidatePartition); // yyyyMMDD
      partition.put("region", "in");
      client.dropPartitions(DATABASE_NAME, tableName, partition, true);
    }
  }
  @Override
  public Map<String, Object> readRow(String tableName, Object keyObject) throws Exception {
    if (!(keyObject instanceof Map)
        && !(keyObject instanceof String)
        && !(keyObject instanceof byte[])) {
      throw new IllegalArgumentException(
          "Unsupported key type - " + keyObject.getClass().getName());
    }

    Map<String, Object> result;

    HCatTable table = hcatClient.getTable("default", tableName);
    String hbaseTableName = HiveUtils.getTableName(table);

    HTableInterface tableInterface = tableFactory.getTable(hbaseConfiguration, hbaseTableName);

    try {
      List<HCatFieldSchema> columns = table.getCols();

      HCatFieldSchema keyColumn = columns.get(0);

      // we use the serializer to build the row key
      HiveSerializer serializer = new HiveSerializer(table);
      final byte[] rowKey;

      if (keyObject instanceof Map) {
        rowKey = serializer.serializeHiveType(keyColumn, null, keyObject, 0);
      } else if (keyObject instanceof String) {
        rowKey = Bytes.toBytes((String) keyObject);
      } else {
        rowKey = (byte[]) keyObject;
      }

      Get get = new Get(rowKey);
      get.setCacheBlocks(true);
      get.setMaxVersions(1);

      Result dbResult = tableInterface.get(get);

      HiveDeserializer deserializer = new HiveDeserializer(table, dbResult);

      result = deserializer.deserialize();

      result.put("__rowkey", rowKey);
    } finally {
      tableInterface.close();
    }

    return result;
  }
Example #9
0
 private boolean exists(URI uri, HCatClient client, boolean closeClient)
     throws HCatAccessorException {
   try {
     HCatURI hcatURI = new HCatURI(uri.toString());
     List<HCatPartition> partitions =
         client.getPartitions(hcatURI.getDb(), hcatURI.getTable(), hcatURI.getPartitionMap());
     return (partitions != null && !partitions.isEmpty());
   } catch (ConnectionFailureException e) {
     throw new HCatAccessorException(ErrorCode.E1501, e);
   } catch (HCatException e) {
     throw new HCatAccessorException(ErrorCode.E0902, e);
   } catch (URISyntaxException e) {
     throw new HCatAccessorException(ErrorCode.E0902, e);
   } finally {
     closeQuietly(client, closeClient);
   }
 }
  private void addPartitions(
      String tableName, List<String> candidatePartitions, boolean isTableExternal)
      throws Exception {
    Path path = new Path(EXTERNAL_TABLE_LOCATION);
    FileSystem fs = path.getFileSystem(new Configuration());

    for (String candidatePartition : candidatePartitions) {
      if (isTableExternal) {
        touch(fs, EXTERNAL_TABLE_LOCATION + candidatePartition);
      }

      Map<String, String> partition = new HashMap<String, String>();
      partition.put("ds", candidatePartition); // yyyyMMDD
      partition.put("region", "in");
      HCatAddPartitionDesc addPtn =
          HCatAddPartitionDesc.create(DATABASE_NAME, tableName, null, partition).build();
      client.addPartition(addPtn);
    }
  }
  private void addMultiColDatedPartitions(
      String tableName, List<Map<String, String>> candidatePartitions, boolean isTableExternal)
      throws Exception {
    Path path = new Path(MULTI_COL_DATED_EXTERNAL_TABLE_LOCATION);
    FileSystem fs = path.getFileSystem(new Configuration());

    for (Map<String, String> candidatePartition : candidatePartitions) {
      if (isTableExternal) {
        StringBuilder pathStr = new StringBuilder(MULTI_COL_DATED_EXTERNAL_TABLE_LOCATION);
        for (Map.Entry<String, String> entry : candidatePartition.entrySet()) {
          pathStr.append(entry.getKey()).append("=").append(entry.getValue()).append("/");
        }
        pathStr.append("region=in");
        touch(fs, pathStr.toString());
      }

      candidatePartition.put("region", "in");
      HCatAddPartitionDesc addPtn =
          HCatAddPartitionDesc.create(DATABASE_NAME, tableName, null, candidatePartition).build();
      client.addPartition(addPtn);
    }
  }
  private List<HCatPartition> getMultiColDatedFilteredPartitions(
      String tableName, String timeZone, Pair<Date, Date> range) throws HCatException {
    DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
    dateFormat.setTimeZone(TimeZone.getTimeZone(timeZone));

    Calendar calendar = Calendar.getInstance();
    calendar.setTime(range.first);
    String[] dateCols = dateFormat.format(calendar.getTime()).split("-");
    // filter eg: "(year < '2014') or (year = '2014' and month < '02') or
    // (year = '2014' and month = '02' and day < '24')"
    String filter1 = "(year < '" + dateCols[0] + "')";
    String filter2 = "(year = '" + dateCols[0] + "' and month < '" + dateCols[1] + "')";
    String filter3 =
        "(year = '"
            + dateCols[0]
            + "' and month = '"
            + dateCols[1]
            + "' and day < '"
            + dateCols[2]
            + "')";
    String filter = filter1 + " or " + filter2 + " or " + filter3;
    return client.listPartitionsByFilter(DATABASE_NAME, tableName, filter);
  }
  @Test(dataProvider = "multiColDatedEvictorTestDataProvider")
  public void testFeedEvictorForMultiColDatedTableStorage(String retentionLimit, boolean isExternal)
      throws Exception {
    final String tableName =
        isExternal ? MULTI_COL_DATED_EXTERNAL_TABLE_NAME : MULTI_COL_DATED_TABLE_NAME;
    final String timeZone = "UTC";

    List<Map<String, String>> candidatePartitions =
        getMultiColDatedCandidatePartitions("days(10)", timeZone, 3);
    addMultiColDatedPartitions(tableName, candidatePartitions, isExternal);

    List<HCatPartition> partitions = client.getPartitions(DATABASE_NAME, tableName);
    Assert.assertEquals(partitions.size(), candidatePartitions.size());
    Pair<Date, Date> range = getDateRange(retentionLimit);
    List<HCatPartition> filteredPartitions =
        getMultiColDatedFilteredPartitions(tableName, timeZone, range);

    try {
      stream.clear();

      final String tableUri =
          DATABASE_NAME + "/" + tableName + "/year=${YEAR};month=${MONTH};day=${DAY};region=us";
      String feedBasePath = METASTORE_URL + tableUri;
      String logFile = STORAGE_URL + "/falcon/staging/feed/instancePaths-2013-09-13-01-00.csv";

      FeedEvictor.main(
          new String[] {
            "-feedBasePath", feedBasePath,
            "-retentionType", "instance",
            "-retentionLimit", retentionLimit,
            "-timeZone", timeZone,
            "-frequency", "daily",
            "-logFile", logFile,
            "-falconFeedStorageType", Storage.TYPE.TABLE.name(),
          });

      StringBuilder expectedInstancePaths = new StringBuilder();
      List<Map<String, String>> expectedInstancesEvicted =
          getMultiColDatedExpectedEvictedInstances(
              candidatePartitions, range.first, timeZone, expectedInstancePaths);
      int expectedSurvivorSize = candidatePartitions.size() - expectedInstancesEvicted.size();

      List<HCatPartition> survivingPartitions = client.getPartitions(DATABASE_NAME, tableName);
      Assert.assertEquals(
          survivingPartitions.size(),
          expectedSurvivorSize,
          "Unexpected number of surviving partitions");

      Assert.assertEquals(
          expectedInstancesEvicted.size(),
          filteredPartitions.size(),
          "Unexpected number of evicted partitions");

      final String actualInstancesEvicted = readLogFile(new Path(logFile));
      validateInstancePaths(actualInstancesEvicted, expectedInstancePaths.toString());

      if (isExternal) {
        verifyMultiColDatedFSPartitionsAreDeleted(candidatePartitions, range.first, timeZone);
      }
    } finally {
      dropMultiColDatedPartitions(tableName, candidatePartitions);
      Assert.assertEquals(client.getPartitions(DATABASE_NAME, tableName).size(), 0);
    }
  }