// make sure oozie changes mentioned FALCON-389 are done on the clusters. Otherwise the test // will fail. // Noticed with hive 0.13 we need the following issues resolved to work HIVE-6848 and // HIVE-6868. Also oozie share libs need to have hive jars that have these jira's resolved and // the maven depenendcy you are using to run the tests has to have hcat that has these fixed. // This test can fail randomly because of https://issues.apache.org/jira/browse/FALCON-401 @Test(dataProvider = "generateSeparators") public void oneSourceTwoTarget(String separator) throws Exception { String tcName = "HCatReplication_oneSourceTwoTarget"; if (separator.equals("-")) { tcName += "_hyphen"; } else { tcName += "_slash"; } String tblName = tcName; String testHdfsDir = baseTestHDFSDir + "/" + tcName; HadoopUtil.recreateDir(serverFS, testHdfsDir); final String startDate = "2010-01-01T20:00Z"; final String endDate = "2099-01-01T00:00Z"; final String dataEndDate = "2010-01-01T21:00Z"; final String tableUriPartitionFragment = StringUtils.join(new String[] {"#dt=${YEAR}", "${MONTH}", "${DAY}", "${HOUR}"}, separator); String tableUri = "catalog:" + dbName + ":" + tblName + tableUriPartitionFragment; final String datePattern = StringUtils.join(new String[] {"yyyy", "MM", "dd", "HH"}, separator); // use the start date for both as this will only generate 2 partitions. List<String> dataDates = TimeUtil.getMinuteDatesOnEitherSide( startDate, dataEndDate, 60, DateTimeFormat.forPattern(datePattern)); final List<String> dataset = HadoopUtil.flattenAndPutDataInFolder(clusterFS, localHCatData, testHdfsDir, dataDates); final String col1Name = "id"; final String col2Name = "value"; final String partitionColumn = "dt"; ArrayList<HCatFieldSchema> cols = new ArrayList<>(); cols.add(HCatUtil.getStringSchema(col1Name, col1Name + " comment")); cols.add(HCatUtil.getStringSchema(col2Name, col2Name + " comment")); ArrayList<HCatFieldSchema> partitionCols = new ArrayList<>(); // create table on cluster 1 and add data to it. partitionCols.add(HCatUtil.getStringSchema(partitionColumn, partitionColumn + " partition")); createTable(clusterHC, dbName, tblName, cols, partitionCols, testHdfsDir); addPartitionsToTable(dataDates, dataset, "dt", dbName, tblName, clusterHC); // create table on target cluster. createTable(cluster2HC, dbName, tblName, cols, partitionCols, testHdfsDir); createTable(cluster3HC, dbName, tblName, cols, partitionCols, testHdfsDir); Bundle.submitCluster(bundles[0], bundles[1], bundles[2]); bundles[0].setInputFeedPeriodicity(1, Frequency.TimeUnit.hours); bundles[0].setInputFeedValidity(startDate, endDate); bundles[0].setInputFeedTableUri(tableUri); String feed = bundles[0].getDataSets().get(0); // set the cluster 2 as the target. feed = FeedMerlin.fromString(feed) .addFeedCluster( new FeedMerlin.FeedClusterBuilder( Util.readEntityName(bundles[1].getClusters().get(0))) .withRetention("months(9000)", ActionType.DELETE) .withValidity(startDate, endDate) .withClusterType(ClusterType.TARGET) .withTableUri(tableUri) .build()) .toString(); // set the cluster 3 as the target. feed = FeedMerlin.fromString(feed) .addFeedCluster( new FeedMerlin.FeedClusterBuilder( Util.readEntityName(bundles[2].getClusters().get(0))) .withRetention("months(9000)", ActionType.DELETE) .withValidity(startDate, endDate) .withClusterType(ClusterType.TARGET) .withTableUri(tableUri) .build()) .toString(); AssertUtil.assertSucceeded(prism.getFeedHelper().submitAndSchedule(feed)); TimeUtil.sleepSeconds(TIMEOUT); // check if all coordinators exist Assert.assertEquals( OozieUtil.checkIfFeedCoordExist(cluster2OC, Util.readEntityName(feed), "REPLICATION"), 1); // check if all coordinators exist Assert.assertEquals( OozieUtil.checkIfFeedCoordExist(cluster3OC, Util.readEntityName(feed), "REPLICATION"), 1); // replication should start, wait while it ends // we will check for 2 instances so that both partitions are copied over. InstanceUtil.waitTillInstanceReachState( cluster2OC, Util.readEntityName(feed), 2, CoordinatorAction.Status.SUCCEEDED, EntityType.FEED); // replication should start, wait while it ends // we will check for 2 instances so that both partitions are copied over. InstanceUtil.waitTillInstanceReachState( cluster3OC, Util.readEntityName(feed), 2, CoordinatorAction.Status.SUCCEEDED, EntityType.FEED); // check if data was replicated correctly List<Path> srcData = HadoopUtil.getAllFilesRecursivelyHDFS(clusterFS, new Path(testHdfsDir)); LOGGER.info("Data on source cluster: " + srcData); List<Path> cluster2TargetData = HadoopUtil.getAllFilesRecursivelyHDFS(cluster2FS, new Path(testHdfsDir)); LOGGER.info("Data on target cluster: " + cluster2TargetData); AssertUtil.checkForListSizes(srcData, cluster2TargetData); List<Path> cluster3TargetData = HadoopUtil.getAllFilesRecursivelyHDFS(cluster3FS, new Path(testHdfsDir)); LOGGER.info("Data on target cluster: " + cluster3TargetData); AssertUtil.checkForListSizes(srcData, cluster3TargetData); }
/** * Set feed cluster1 as target, clusters 2 and 3 as source. Run feed. Update feed and check if * action succeed. Check that appropriate number of replication and retention coordinators exist * on matching clusters. * * @throws Exception */ @Test(enabled = true, timeOut = 1200000) public void multipleSourceOneTarget() throws Exception { bundles[0].setInputFeedDataPath(inputPath); Bundle.submitCluster(bundles[0], bundles[1], bundles[2]); FeedMerlin feed = new FeedMerlin(bundles[0].getDataSets().get(0)); feed.clearFeedClusters(); // use the colo string here so that the test works in embedded and distributed mode. String postFix = "/US/" + cluster2Colo; String prefix = bundles[0].getFeedDataPathPrefix(); HadoopUtil.deleteDirIfExists(prefix.substring(1), cluster2FS); HadoopUtil.lateDataReplenish(cluster2FS, 5, 80, prefix, postFix); // use the colo string here so that the test works in embedded and distributed mode. postFix = "/UK/" + cluster3Colo; prefix = bundles[0].getFeedDataPathPrefix(); HadoopUtil.deleteDirIfExists(prefix.substring(1), cluster3FS); HadoopUtil.lateDataReplenish(cluster3FS, 5, 80, prefix, postFix); String startTime = TimeUtil.getTimeWrtSystemTime(-30); feed.addFeedCluster( new FeedMerlin.FeedClusterBuilder(Util.readEntityName(bundles[1].getClusters().get(0))) .withRetention("hours(10)", ActionType.DELETE) .withValidity(startTime, TimeUtil.addMinsToTime(startTime, 85)) .withClusterType(ClusterType.SOURCE) .withPartition("US/${cluster.colo}") .build()); feed.addFeedCluster( new FeedMerlin.FeedClusterBuilder(Util.readEntityName(bundles[0].getClusters().get(0))) .withRetention("hours(10)", ActionType.DELETE) .withValidity( TimeUtil.addMinsToTime(startTime, 20), TimeUtil.addMinsToTime(startTime, 105)) .withClusterType(ClusterType.TARGET) .build()); feed.addFeedCluster( new FeedMerlin.FeedClusterBuilder(Util.readEntityName(bundles[2].getClusters().get(0))) .withRetention("hours(10)", ActionType.DELETE) .withValidity( TimeUtil.addMinsToTime(startTime, 40), TimeUtil.addMinsToTime(startTime, 130)) .withClusterType(ClusterType.SOURCE) .withPartition("UK/${cluster.colo}") .build()); LOGGER.info("feed: " + Util.prettyPrintXml(feed.toString())); AssertUtil.assertSucceeded(prism.getFeedHelper().submitEntity(feed.toString())); AssertUtil.assertSucceeded(prism.getFeedHelper().schedule(feed.toString())); // change feed location path feed.setFilePath(alternativeInputPath); LOGGER.info("updated feed: " + Util.prettyPrintXml(feed.toString())); // update feed AssertUtil.assertSucceeded(prism.getFeedHelper().update(feed.toString(), feed.toString())); Assert.assertEquals( OozieUtil.checkIfFeedCoordExist(cluster2OC, feed.getName(), "REPLICATION"), 0); Assert.assertEquals( OozieUtil.checkIfFeedCoordExist(cluster2OC, feed.getName(), "RETENTION"), 2); Assert.assertEquals( OozieUtil.checkIfFeedCoordExist(cluster3OC, feed.getName(), "REPLICATION"), 0); Assert.assertEquals( OozieUtil.checkIfFeedCoordExist(cluster3OC, feed.getName(), "RETENTION"), 2); Assert.assertEquals( OozieUtil.checkIfFeedCoordExist(cluster1OC, feed.getName(), "REPLICATION"), 4); Assert.assertEquals( OozieUtil.checkIfFeedCoordExist(cluster1OC, feed.getName(), "RETENTION"), 2); }