@Override public void registerForNotification(URI uri, Configuration conf, String user, String actionID) throws URIHandlerException { HCatURI hcatURI; try { hcatURI = new HCatURI(uri); } catch (URISyntaxException e) { throw new URIHandlerException(ErrorCode.E0906, uri, e); } HCatAccessorService hcatService = Services.get().get(HCatAccessorService.class); if (!hcatService.isRegisteredForNotification(hcatURI)) { HCatClient client = getHCatClient(uri, conf, user); try { String topic = client.getMessageBusTopicName(hcatURI.getDb(), hcatURI.getTable()); if (topic == null) { return; } hcatService.registerForNotification( hcatURI, topic, new HCatMessageHandler(uri.getAuthority())); } catch (HCatException e) { throw new HCatAccessorException(ErrorCode.E1501, e); } finally { closeQuietly(client, true); } } PartitionDependencyManagerService pdmService = Services.get().get(PartitionDependencyManagerService.class); pdmService.addMissingDependency(hcatURI, actionID); }
private void dropMultiColDatedPartitions( String tableName, List<Map<String, String>> candidatePartitions) throws Exception { for (Map<String, String> partition : candidatePartitions) { client.dropPartitions(DATABASE_NAME, tableName, partition, true); } }
private HCatClient getHCatClient(URI uri, Configuration conf, String user) throws HCatAccessorException { final HiveConf hiveConf = new HiveConf(conf, this.getClass()); String serverURI = getMetastoreConnectURI(uri); if (!serverURI.equals("")) { hiveConf.set("hive.metastore.local", "false"); } hiveConf.set(HiveConf.ConfVars.METASTOREURIS.varname, serverURI); try { XLog.getLog(HCatURIHandler.class) .info( "Creating HCatClient for user [{0}] login_user [{1}] and server [{2}] ", user, UserGroupInformation.getLoginUser(), serverURI); // HiveMetastoreClient (hive 0.9) currently does not work if UGI has doAs // We are good to connect as the oozie user since listPartitions does not require // authorization /* UserGroupInformation ugi = ugiService.getProxyUser(user); return ugi.doAs(new PrivilegedExceptionAction<HCatClient>() { public HCatClient run() throws Exception { return HCatClient.create(hiveConf); } }); */ return HCatClient.create(hiveConf); } catch (HCatException e) { throw new HCatAccessorException(ErrorCode.E1501, e); } catch (IOException e) { throw new HCatAccessorException(ErrorCode.E1501, e); } }
@Override public void destroy() { try { hcatClient.close(); } catch (Exception ignore) { XLog.getLog(HCatContext.class).warn("Error closing hcat client", ignore); } }
private void closeQuietly(HCatClient client, boolean close) { if (close && client != null) { try { client.close(); } catch (Exception ignore) { XLog.getLog(HCatURIHandler.class).warn("Error closing hcat client", ignore); } } }
private List<HCatPartition> getFilteredPartitions( String tableName, String timeZone, String dateMask, Pair<Date, Date> range) throws HCatException { DateFormat dateFormat = new SimpleDateFormat(dateMask); dateFormat.setTimeZone(TimeZone.getTimeZone(timeZone)); String filter = "ds < '" + dateFormat.format(range.first) + "'"; return client.listPartitionsByFilter(DATABASE_NAME, tableName, filter); }
private void dropPartitions(String tableName, List<String> candidatePartitions) throws Exception { for (String candidatePartition : candidatePartitions) { Map<String, String> partition = new HashMap<String, String>(); partition.put("ds", candidatePartition); // yyyyMMDD partition.put("region", "in"); client.dropPartitions(DATABASE_NAME, tableName, partition, true); } }
@Override public Map<String, Object> readRow(String tableName, Object keyObject) throws Exception { if (!(keyObject instanceof Map) && !(keyObject instanceof String) && !(keyObject instanceof byte[])) { throw new IllegalArgumentException( "Unsupported key type - " + keyObject.getClass().getName()); } Map<String, Object> result; HCatTable table = hcatClient.getTable("default", tableName); String hbaseTableName = HiveUtils.getTableName(table); HTableInterface tableInterface = tableFactory.getTable(hbaseConfiguration, hbaseTableName); try { List<HCatFieldSchema> columns = table.getCols(); HCatFieldSchema keyColumn = columns.get(0); // we use the serializer to build the row key HiveSerializer serializer = new HiveSerializer(table); final byte[] rowKey; if (keyObject instanceof Map) { rowKey = serializer.serializeHiveType(keyColumn, null, keyObject, 0); } else if (keyObject instanceof String) { rowKey = Bytes.toBytes((String) keyObject); } else { rowKey = (byte[]) keyObject; } Get get = new Get(rowKey); get.setCacheBlocks(true); get.setMaxVersions(1); Result dbResult = tableInterface.get(get); HiveDeserializer deserializer = new HiveDeserializer(table, dbResult); result = deserializer.deserialize(); result.put("__rowkey", rowKey); } finally { tableInterface.close(); } return result; }
private boolean exists(URI uri, HCatClient client, boolean closeClient) throws HCatAccessorException { try { HCatURI hcatURI = new HCatURI(uri.toString()); List<HCatPartition> partitions = client.getPartitions(hcatURI.getDb(), hcatURI.getTable(), hcatURI.getPartitionMap()); return (partitions != null && !partitions.isEmpty()); } catch (ConnectionFailureException e) { throw new HCatAccessorException(ErrorCode.E1501, e); } catch (HCatException e) { throw new HCatAccessorException(ErrorCode.E0902, e); } catch (URISyntaxException e) { throw new HCatAccessorException(ErrorCode.E0902, e); } finally { closeQuietly(client, closeClient); } }
private void addPartitions( String tableName, List<String> candidatePartitions, boolean isTableExternal) throws Exception { Path path = new Path(EXTERNAL_TABLE_LOCATION); FileSystem fs = path.getFileSystem(new Configuration()); for (String candidatePartition : candidatePartitions) { if (isTableExternal) { touch(fs, EXTERNAL_TABLE_LOCATION + candidatePartition); } Map<String, String> partition = new HashMap<String, String>(); partition.put("ds", candidatePartition); // yyyyMMDD partition.put("region", "in"); HCatAddPartitionDesc addPtn = HCatAddPartitionDesc.create(DATABASE_NAME, tableName, null, partition).build(); client.addPartition(addPtn); } }
private void addMultiColDatedPartitions( String tableName, List<Map<String, String>> candidatePartitions, boolean isTableExternal) throws Exception { Path path = new Path(MULTI_COL_DATED_EXTERNAL_TABLE_LOCATION); FileSystem fs = path.getFileSystem(new Configuration()); for (Map<String, String> candidatePartition : candidatePartitions) { if (isTableExternal) { StringBuilder pathStr = new StringBuilder(MULTI_COL_DATED_EXTERNAL_TABLE_LOCATION); for (Map.Entry<String, String> entry : candidatePartition.entrySet()) { pathStr.append(entry.getKey()).append("=").append(entry.getValue()).append("/"); } pathStr.append("region=in"); touch(fs, pathStr.toString()); } candidatePartition.put("region", "in"); HCatAddPartitionDesc addPtn = HCatAddPartitionDesc.create(DATABASE_NAME, tableName, null, candidatePartition).build(); client.addPartition(addPtn); } }
private List<HCatPartition> getMultiColDatedFilteredPartitions( String tableName, String timeZone, Pair<Date, Date> range) throws HCatException { DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd"); dateFormat.setTimeZone(TimeZone.getTimeZone(timeZone)); Calendar calendar = Calendar.getInstance(); calendar.setTime(range.first); String[] dateCols = dateFormat.format(calendar.getTime()).split("-"); // filter eg: "(year < '2014') or (year = '2014' and month < '02') or // (year = '2014' and month = '02' and day < '24')" String filter1 = "(year < '" + dateCols[0] + "')"; String filter2 = "(year = '" + dateCols[0] + "' and month < '" + dateCols[1] + "')"; String filter3 = "(year = '" + dateCols[0] + "' and month = '" + dateCols[1] + "' and day < '" + dateCols[2] + "')"; String filter = filter1 + " or " + filter2 + " or " + filter3; return client.listPartitionsByFilter(DATABASE_NAME, tableName, filter); }
@Test(dataProvider = "multiColDatedEvictorTestDataProvider") public void testFeedEvictorForMultiColDatedTableStorage(String retentionLimit, boolean isExternal) throws Exception { final String tableName = isExternal ? MULTI_COL_DATED_EXTERNAL_TABLE_NAME : MULTI_COL_DATED_TABLE_NAME; final String timeZone = "UTC"; List<Map<String, String>> candidatePartitions = getMultiColDatedCandidatePartitions("days(10)", timeZone, 3); addMultiColDatedPartitions(tableName, candidatePartitions, isExternal); List<HCatPartition> partitions = client.getPartitions(DATABASE_NAME, tableName); Assert.assertEquals(partitions.size(), candidatePartitions.size()); Pair<Date, Date> range = getDateRange(retentionLimit); List<HCatPartition> filteredPartitions = getMultiColDatedFilteredPartitions(tableName, timeZone, range); try { stream.clear(); final String tableUri = DATABASE_NAME + "/" + tableName + "/year=${YEAR};month=${MONTH};day=${DAY};region=us"; String feedBasePath = METASTORE_URL + tableUri; String logFile = STORAGE_URL + "/falcon/staging/feed/instancePaths-2013-09-13-01-00.csv"; FeedEvictor.main( new String[] { "-feedBasePath", feedBasePath, "-retentionType", "instance", "-retentionLimit", retentionLimit, "-timeZone", timeZone, "-frequency", "daily", "-logFile", logFile, "-falconFeedStorageType", Storage.TYPE.TABLE.name(), }); StringBuilder expectedInstancePaths = new StringBuilder(); List<Map<String, String>> expectedInstancesEvicted = getMultiColDatedExpectedEvictedInstances( candidatePartitions, range.first, timeZone, expectedInstancePaths); int expectedSurvivorSize = candidatePartitions.size() - expectedInstancesEvicted.size(); List<HCatPartition> survivingPartitions = client.getPartitions(DATABASE_NAME, tableName); Assert.assertEquals( survivingPartitions.size(), expectedSurvivorSize, "Unexpected number of surviving partitions"); Assert.assertEquals( expectedInstancesEvicted.size(), filteredPartitions.size(), "Unexpected number of evicted partitions"); final String actualInstancesEvicted = readLogFile(new Path(logFile)); validateInstancePaths(actualInstancesEvicted, expectedInstancePaths.toString()); if (isExternal) { verifyMultiColDatedFSPartitionsAreDeleted(candidatePartitions, range.first, timeZone); } } finally { dropMultiColDatedPartitions(tableName, candidatePartitions); Assert.assertEquals(client.getPartitions(DATABASE_NAME, tableName).size(), 0); } }