@Test(enabled = false) public void testEvictionWithEmptyDirs() throws Exception { try { Configuration conf = cluster.getConf(); FileSystem fs = FileSystem.get(conf); fs.delete(new Path("/"), true); stream.clear(); Pair<List<String>, List<String>> pair = generateInstances( fs, "feed1", "yyyy/MM/dd/'more'/yyyy", 10, TimeUnit.DAYS, "/data", false); final String storageUrl = cluster.getConf().get(HadoopClientFactory.FS_DEFAULT_NAME_KEY); String dataPath = LocationType.DATA.name() + "=" + storageUrl + "/data/YYYY/feed1/mmHH/dd/MM/?{YEAR}/?{MONTH}/?{DAY}/more/?{YEAR}"; String logFile = hdfsUrl + "/falcon/staging/feed/instancePaths-2012-01-01-01-00.csv"; long beforeDelCount = fs.getContentSummary(new Path(("/data/YYYY/feed1/mmHH/dd/MM/"))).getDirectoryCount(); FeedEvictor.main( new String[] { "-feedBasePath", dataPath, "-retentionType", "instance", "-retentionLimit", "days(10)", "-timeZone", "UTC", "-frequency", "daily", "-logFile", logFile, "-falconFeedStorageType", Storage.TYPE.FILESYSTEM.name(), }); compare(map.get("feed1"), stream.getBuffer()); String expectedInstancePaths = getExpectedInstancePaths(dataPath.replaceAll(storageUrl, "")); Assert.assertEquals(readLogFile(new Path(logFile)), expectedInstancePaths); String deletedPath = expectedInstancePaths.split(",")[0].split("=")[1]; Assert.assertFalse(fs.exists(new Path(deletedPath))); // empty parents Assert.assertFalse(fs.exists(new Path(deletedPath).getParent())); Assert.assertFalse(fs.exists(new Path(deletedPath).getParent().getParent())); // base path not deleted Assert.assertTrue(fs.exists(new Path("/data/YYYY/feed1/mmHH/dd/MM/"))); // non-eligible empty dirs long afterDelCount = fs.getContentSummary(new Path(("/data/YYYY/feed1/mmHH/dd/MM/"))).getDirectoryCount(); Assert.assertEquals((beforeDelCount - afterDelCount), 19); for (String path : pair.second) { Assert.assertTrue(fs.exists(new Path(path))); } } catch (Exception e) { Assert.fail("Unknown exception", e); } }
@Test public void testEviction4() throws Exception { try { Configuration conf = cluster.getConf(); FileSystem fs = FileSystem.get(conf); fs.delete(new Path("/"), true); stream.clear(); Pair<List<String>, List<String>> pair = createTestData("/data"); FeedEvictor.main( new String[] { "-feedBasePath", LocationType.DATA.name() + "=" + cluster.getConf().get(HadoopClientFactory.FS_DEFAULT_NAME_KEY) + "/data/YYYY/feed3/dd/MM/?{MONTH}/more/?{HOUR}", "-retentionType", "instance", "-retentionLimit", "months(5)", "-timeZone", "UTC", "-frequency", "hourly", "-logFile", conf.get(HadoopClientFactory.FS_DEFAULT_NAME_KEY) + "/falcon/staging/feed/2012-01-01-04-00", "-falconFeedStorageType", Storage.TYPE.FILESYSTEM.name(), }); Assert.assertEquals("instances=NULL", stream.getBuffer()); stream.clear(); String dataPath = "/data/YYYY/feed4/dd/MM/02/more/hello"; String logFile = hdfsUrl + "/falcon/staging/feed/instancePaths-2012-01-01-02-00.csv"; FeedEvictor.main( new String[] { "-feedBasePath", LocationType.DATA.name() + "=" + cluster.getConf().get(HadoopClientFactory.FS_DEFAULT_NAME_KEY) + dataPath, "-retentionType", "instance", "-retentionLimit", "hours(5)", "-timeZone", "UTC", "-frequency", "hourly", "-logFile", logFile, "-falconFeedStorageType", Storage.TYPE.FILESYSTEM.name(), }); Assert.assertEquals("instances=NULL", stream.getBuffer()); Assert.assertEquals(readLogFile(new Path(logFile)), getExpectedInstancePaths(dataPath)); assertFailures(fs, pair); } catch (Exception e) { Assert.fail("Unknown exception", e); } }
private String getExpectedInstancePaths(String dataPath) { StringBuilder newBuffer = new StringBuilder("instancePaths="); DateFormat format = new SimpleDateFormat("yyyyMMddHHmm"); format.setTimeZone(TimeZone.getTimeZone("UTC")); String[] locs = dataPath.split("#"); String[] instances = stream.getBuffer().split("instances=")[1].split(","); if (instances[0].equals("NULL")) { return "instancePaths="; } for (int i = 0; i < locs.length; i++) { for (int j = 0, k = i * instances.length / locs.length; j < instances.length / locs.length; j++) { String[] paths = locs[i].split("="); String path = paths[1]; String instancePath = path.replaceAll("\\?\\{YEAR\\}", instances[j + k].substring(0, 4)); instancePath = instancePath.replaceAll("\\?\\{MONTH\\}", instances[j + k].substring(4, 6)); instancePath = instancePath.replaceAll("\\?\\{DAY\\}", instances[j + k].substring(6, 8)); instancePath = instancePath.replaceAll("\\?\\{HOUR\\}", instances[j + k].substring(8, 10)); instancePath = instancePath.replaceAll("\\?\\{MINUTE\\}", instances[j + k].substring(10, 12)); newBuffer.append(instancePath).append(','); } } return newBuffer.toString(); }
@Test public void testEviction2() throws Exception { try { Configuration conf = cluster.getConf(); FileSystem fs = FileSystem.get(conf); fs.delete(new Path("/"), true); stream.clear(); Pair<List<String>, List<String>> pair = createTestData("feed1", "yyyy-MM-dd/'more'/yyyy", 10, TimeUnit.DAYS, "/data"); final String storageUrl = cluster.getConf().get(HadoopClientFactory.FS_DEFAULT_NAME_KEY); String dataPath = LocationType.DATA.name() + "=" + storageUrl + "/data/YYYY/feed1/mmHH/dd/MM/?{YEAR}-?{MONTH}-?{DAY}/more/?{YEAR}"; String logFile = hdfsUrl + "/falcon/staging/feed/instancePaths-2012-01-01-01-00.csv"; FeedEvictor.main( new String[] { "-feedBasePath", dataPath, "-retentionType", "instance", "-retentionLimit", "days(10)", "-timeZone", "UTC", "-frequency", "daily", "-logFile", logFile, "-falconFeedStorageType", Storage.TYPE.FILESYSTEM.name(), }); assertFailures(fs, pair); compare(map.get("feed1"), stream.getBuffer()); String expectedInstancePaths = getExpectedInstancePaths(dataPath); Assert.assertEquals(readLogFile(new Path(logFile)), expectedInstancePaths); String deletedPath = expectedInstancePaths.split(",")[0].split("=")[1]; Assert.assertFalse(fs.exists(new Path(deletedPath))); // empty parents Assert.assertFalse(fs.exists(new Path(deletedPath).getParent())); Assert.assertFalse(fs.exists(new Path(deletedPath).getParent().getParent())); // base path not deleted Assert.assertTrue(fs.exists(new Path("/data/YYYY/feed1/mmHH/dd/MM/"))); } catch (Exception e) { Assert.fail("Unknown exception", e); } }
@Test public void testEviction3() throws Exception { try { Configuration conf = cluster.getConf(); FileSystem fs = FileSystem.get(conf); fs.delete(new Path("/"), true); stream.clear(); Pair<List<String>, List<String>> pair = createTestData("feed2", "yyyyMMddHH/'more'/yyyy", 5, TimeUnit.HOURS, "/data"); final String storageUrl = cluster.getConf().get(HadoopClientFactory.FS_DEFAULT_NAME_KEY); String dataPath = LocationType.DATA.name() + "=" + storageUrl + "/data/YYYY/feed2/mmHH/dd/MM/?{YEAR}?{MONTH}?{DAY}?{HOUR}/more/?{YEAR}"; String logFile = hdfsUrl + "/falcon/staging/feed/instancePaths-2012-01-01-02-00.csv"; FeedEvictor.main( new String[] { "-feedBasePath", dataPath, "-retentionType", "instance", "-retentionLimit", "hours(5)", "-timeZone", "UTC", "-frequency", "hourly", "-logFile", logFile, "-falconFeedStorageType", Storage.TYPE.FILESYSTEM.name(), }); assertFailures(fs, pair); compare(map.get("feed2"), stream.getBuffer()); Assert.assertEquals(readLogFile(new Path(logFile)), getExpectedInstancePaths(dataPath)); } catch (Exception e) { Assert.fail("Unknown exception", e); } }
@Test( dataProvider = "evictorTestInvalidDataProvider", expectedExceptions = URISyntaxException.class) public void testFeedEvictorForInvalidTableStorage( String retentionLimit, String dateSeparator, boolean isExternal) throws Exception { final String tableName = isExternal ? EXTERNAL_TABLE_NAME : TABLE_NAME; final String timeZone = "UTC"; final String dateMask = "yyyy" + dateSeparator + "MM" + dateSeparator + "dd"; List<String> candidatePartitions = getCandidatePartitions("days(10)", dateMask, timeZone, 3); addPartitions(tableName, candidatePartitions, isExternal); try { stream.clear(); final String tableUri = DATABASE_NAME + "/" + tableName + "/ds=${YEAR}" + dateSeparator + "${MONTH}" + dateSeparator + "${DAY};region=us"; String feedBasePath = METASTORE_URL + tableUri; String logFile = STORAGE_URL + "/falcon/staging/feed/instancePaths-2013-09-13-01-00.csv"; FeedEvictor.main( new String[] { "-feedBasePath", feedBasePath, "-retentionType", "instance", "-retentionLimit", retentionLimit, "-timeZone", timeZone, "-frequency", "daily", "-logFile", logFile, "-falconFeedStorageType", Storage.TYPE.TABLE.name(), }); Assert.fail("Exception must have been thrown"); } finally { dropPartitions(tableName, candidatePartitions); } }
@Test(dataProvider = "multiColDatedEvictorTestDataProvider") public void testFeedEvictorForMultiColDatedTableStorage(String retentionLimit, boolean isExternal) throws Exception { final String tableName = isExternal ? MULTI_COL_DATED_EXTERNAL_TABLE_NAME : MULTI_COL_DATED_TABLE_NAME; final String timeZone = "UTC"; List<Map<String, String>> candidatePartitions = getMultiColDatedCandidatePartitions("days(10)", timeZone, 3); addMultiColDatedPartitions(tableName, candidatePartitions, isExternal); List<HCatPartition> partitions = client.getPartitions(DATABASE_NAME, tableName); Assert.assertEquals(partitions.size(), candidatePartitions.size()); Pair<Date, Date> range = getDateRange(retentionLimit); List<HCatPartition> filteredPartitions = getMultiColDatedFilteredPartitions(tableName, timeZone, range); try { stream.clear(); final String tableUri = DATABASE_NAME + "/" + tableName + "/year=${YEAR};month=${MONTH};day=${DAY};region=us"; String feedBasePath = METASTORE_URL + tableUri; String logFile = STORAGE_URL + "/falcon/staging/feed/instancePaths-2013-09-13-01-00.csv"; FeedEvictor.main( new String[] { "-feedBasePath", feedBasePath, "-retentionType", "instance", "-retentionLimit", retentionLimit, "-timeZone", timeZone, "-frequency", "daily", "-logFile", logFile, "-falconFeedStorageType", Storage.TYPE.TABLE.name(), }); StringBuilder expectedInstancePaths = new StringBuilder(); List<Map<String, String>> expectedInstancesEvicted = getMultiColDatedExpectedEvictedInstances( candidatePartitions, range.first, timeZone, expectedInstancePaths); int expectedSurvivorSize = candidatePartitions.size() - expectedInstancesEvicted.size(); List<HCatPartition> survivingPartitions = client.getPartitions(DATABASE_NAME, tableName); Assert.assertEquals( survivingPartitions.size(), expectedSurvivorSize, "Unexpected number of surviving partitions"); Assert.assertEquals( expectedInstancesEvicted.size(), filteredPartitions.size(), "Unexpected number of evicted partitions"); final String actualInstancesEvicted = readLogFile(new Path(logFile)); validateInstancePaths(actualInstancesEvicted, expectedInstancePaths.toString()); if (isExternal) { verifyMultiColDatedFSPartitionsAreDeleted(candidatePartitions, range.first, timeZone); } } finally { dropMultiColDatedPartitions(tableName, candidatePartitions); Assert.assertEquals(client.getPartitions(DATABASE_NAME, tableName).size(), 0); } }