@BeforeClass(alwaysRun = true) public void createTestData() throws Exception { LOGGER.info("in @BeforeClass"); HadoopUtil.uploadDir(clusterFS, aggregateWorkflowDir, OSUtil.RESOURCES_OOZIE); Bundle b = BundleUtil.readELBundle(); b = new Bundle(b, cluster); b.setInputFeedDataPath(feedInputPath); }
@AfterClass(alwaysRun = true) public void deleteData() throws Exception { LOGGER.info("in @AfterClass"); Bundle b = BundleUtil.readELBundle(); b = new Bundle(b, cluster); b.setInputFeedDataPath(feedInputPath); String prefix = b.getFeedDataPathPrefix(); HadoopUtil.deleteDirIfExists(prefix.substring(1), clusterFS); }
/** * Run recipe with different frequencies. Submission should go through. Check frequency of the * launched oozie job */ @Test(dataProvider = "frequencyGenerator") public void differentRecipeFrequenciesTest(String frequency) throws Exception { setUp(RecipeExecLocation.SourceCluster); LOGGER.info("Testing with frequency: " + frequency); String tblName = "myTable"; recipeMerlin .withSourceDb(DB_NAME) .withSourceTable(tblName) .withFrequency(new Frequency(frequency)); runSql(connection, "create table " + tblName + "(comment string)"); final List<String> command = recipeMerlin.getSubmissionCommand(); Assert.assertEquals(Bundle.runFalconCLI(command), 0, "Recipe submission failed."); LOGGER.info("Submission went through."); InstanceUtil.waitTillInstanceReachState( clusterOC, recipeMerlin.getName(), 1, CoordinatorAction.Status.RUNNING, EntityType.PROCESS); String filter = "name=FALCON_PROCESS_" + recipeMerlin.getName(); List<BundleJob> bundleJobs = OozieUtil.getBundles(clusterOC, filter, 0, 10); List<String> bundleIds = OozieUtil.getBundleIds(bundleJobs); String bundleId = OozieUtil.getMaxId(bundleIds); List<CoordinatorJob> coords = clusterOC.getBundleJobInfo(bundleId).getCoordinators(); List<String> cIds = new ArrayList<String>(); for (CoordinatorJob coord : coords) { cIds.add(coord.getId()); } String coordId = OozieUtil.getMinId(cIds); CoordinatorJob job = clusterOC.getCoordJobInfo(coordId); CoordinatorJob.Timeunit timeUnit = job.getTimeUnit(); String freq = job.getFrequency(); LOGGER.info("Frequency of running job: " + timeUnit + " " + freq); Assert.assertTrue( frequency.contains(timeUnit.name().toLowerCase().replace("_", "")) && frequency.contains(freq), "Running job has different frequency."); }
private void setUp(RecipeExecLocation recipeExecLocation) throws Exception { clusterHC = cluster.getClusterHelper().getHCatClient(); clusterHC2 = cluster2.getClusterHelper().getHCatClient(); bundles[0] = new Bundle(BundleUtil.readHCatBundle(), cluster); bundles[1] = new Bundle(BundleUtil.readHCatBundle(), cluster2); bundles[0].generateUniqueBundle(this); bundles[1].generateUniqueBundle(this); final ClusterMerlin srcCluster = bundles[0].getClusterElement(); final ClusterMerlin tgtCluster = bundles[1].getClusterElement(); String recipeDir = "HiveDrRecipe"; if (MerlinConstants.IS_SECURE) { recipeDir = "HiveDrSecureRecipe"; } Bundle.submitCluster(recipeExecLocation.getRecipeBundle(bundles[0], bundles[1])); recipeMerlin = RecipeMerlin.readFromDir(recipeDir, FalconCLI.RecipeOperation.HIVE_DISASTER_RECOVERY) .withRecipeCluster(recipeExecLocation.getRecipeCluster(srcCluster, tgtCluster)); recipeMerlin .withSourceCluster(srcCluster) .withTargetCluster(tgtCluster) .withFrequency(new Frequency("5", Frequency.TimeUnit.minutes)) .withValidity(TimeUtil.getTimeWrtSystemTime(-5), TimeUtil.getTimeWrtSystemTime(15)); recipeMerlin.setUniqueName(this.getClass().getSimpleName()); connection = cluster.getClusterHelper().getHiveJdbcConnection(); runSql(connection, "drop database if exists hdr_sdb1 cascade"); runSql(connection, "create database hdr_sdb1"); runSql(connection, "use hdr_sdb1"); connection2 = cluster2.getClusterHelper().getHiveJdbcConnection(); runSql(connection2, "drop database if exists hdr_sdb1 cascade"); runSql(connection2, "create database hdr_sdb1"); runSql(connection2, "use hdr_sdb1"); }
@Test public void drChangeColumn() throws Exception { final RecipeExecLocation recipeExecLocation = RecipeExecLocation.SourceCluster; setUp(recipeExecLocation); final String tblName = "tableForColumnChange"; recipeMerlin.withSourceDb(DB_NAME).withSourceTable(tblName); final List<String> command1 = recipeMerlin.getSubmissionCommand(); final String recipe1Name = recipeMerlin.getName(); runSql(connection, "create table " + tblName + "(id int)"); bootstrapCopy(connection, clusterFS, tblName, connection2, clusterFS2, tblName); Assert.assertEquals(Bundle.runFalconCLI(command1), 0, "Recipe submission failed."); runSql(connection, "ALTER TABLE " + tblName + " CHANGE id id STRING COMMENT 'some_comment'"); InstanceUtil.waitTillInstanceReachState( recipeExecLocation.getRecipeOC(clusterOC, clusterOC2), recipe1Name, 1, CoordinatorAction.Status.SUCCEEDED, EntityType.PROCESS); HiveAssert.assertTableEqual( cluster, clusterHC.getTable(DB_NAME, tblName), cluster2, clusterHC2.getTable(DB_NAME, tblName), new NotifyingAssert(true)) .assertAll(); }
/** * 1 src tbl 1 dst tbl. Change table properties and comment at the source. Changes should get * reflected at destination. */ @Test public void drChangeCommentAndPropertyTest() throws Exception { final RecipeExecLocation recipeExecLocation = RecipeExecLocation.SourceCluster; setUp(recipeExecLocation); final String tblName = "myTable"; recipeMerlin.withSourceDb(DB_NAME).withSourceTable(tblName); final List<String> command = recipeMerlin.getSubmissionCommand(); runSql(connection, "create table " + tblName + "(field string)"); // add new table property runSql( connection, "ALTER TABLE " + tblName + " SET TBLPROPERTIES('someProperty' = 'initialValue')"); // set comment runSql( connection, "ALTER TABLE " + tblName + " SET TBLPROPERTIES('comment' = 'this comment will be " + "changed, SHOULD NOT appear')"); LOGGER.info(tblName + " before bootstrap copy: "); runSql(connection, "describe extended " + tblName); bootstrapCopy(connection, clusterFS, tblName, connection2, clusterFS2, tblName); // change table property and comment runSql( connection, "ALTER TABLE " + tblName + " SET TBLPROPERTIES('someProperty' = 'anotherValue')"); runSql( connection, "ALTER TABLE " + tblName + " SET TBLPROPERTIES('comment' = 'this comment should " + "appear after replication done')"); LOGGER.info(tblName + " after modifications, before replication: "); runSql(connection, "describe extended " + tblName); Assert.assertEquals(Bundle.runFalconCLI(command), 0, "Recipe submission failed."); InstanceUtil.waitTillInstanceReachState( recipeExecLocation.getRecipeOC(clusterOC, clusterOC2), recipeMerlin.getName(), 1, CoordinatorAction.Status.SUCCEEDED, EntityType.PROCESS); HiveAssert.assertTableEqual( cluster, clusterHC.getTable(DB_NAME, tblName), cluster2, clusterHC2.getTable(DB_NAME, tblName), new NotifyingAssert(true)) .assertAll(); }
@BeforeClass(alwaysRun = true) public void createTestData() throws Exception { LOGGER.info("in @BeforeClass"); HadoopUtil.uploadDir(clusterFS, aggregateWorkflowDir, OSUtil.RESOURCES_OOZIE); Bundle b = BundleUtil.readELBundle(); b.generateUniqueBundle(); b = new Bundle(b, cluster); String startDate = "2010-01-01T23:20Z"; String endDate = "2010-01-02T01:21Z"; b.setInputFeedDataPath(feedInputPath); String prefix = b.getFeedDataPathPrefix(); HadoopUtil.deleteDirIfExists(prefix.substring(1), clusterFS); List<String> dataDates = TimeUtil.getMinuteDatesOnEitherSide(startDate, endDate, 20); HadoopUtil.flattenAndPutDataInFolder(clusterFS, OSUtil.NORMAL_INPUT, prefix, dataDates); }
@Test public void drTwoTablesOneRequest() throws Exception { final RecipeExecLocation recipeExecLocation = RecipeExecLocation.TargetCluster; setUp(recipeExecLocation); final String tblName = "firstTableDR"; final String tbl2Name = "secondTableDR"; recipeMerlin.withSourceDb(DB_NAME).withSourceTable(tblName + ',' + tbl2Name); final List<String> command = recipeMerlin.getSubmissionCommand(); runSql(connection, "create table " + tblName + "(comment string)"); runSql(connection, "create table " + tbl2Name + "(comment string)"); bootstrapCopy(connection, clusterFS, tblName, connection2, clusterFS2, tblName); bootstrapCopy(connection, clusterFS, tbl2Name, connection2, clusterFS2, tbl2Name); runSql( connection, "insert into table " + tblName + " values" + "('this string has been added post bootstrap - should appear after dr')"); runSql( connection, "insert into table " + tbl2Name + " values" + "('this string has been added post bootstrap - should appear after dr')"); Assert.assertEquals(Bundle.runFalconCLI(command), 0, "Recipe submission failed."); InstanceUtil.waitTillInstanceReachState( recipeExecLocation.getRecipeOC(clusterOC, clusterOC2), recipeMerlin.getName(), 1, CoordinatorAction.Status.SUCCEEDED, EntityType.PROCESS); final NotifyingAssert anAssert = new NotifyingAssert(true); HiveAssert.assertTableEqual( cluster, clusterHC.getTable(DB_NAME, tblName), cluster2, clusterHC2.getTable(DB_NAME, tblName), anAssert); HiveAssert.assertTableEqual( cluster, clusterHC.getTable(DB_NAME, tbl2Name), cluster2, clusterHC2.getTable(DB_NAME, tbl2Name), anAssert); anAssert.assertAll(); }
@Test public void drSerDeWithProperties() throws Exception { final RecipeExecLocation recipeExecLocation = RecipeExecLocation.SourceCluster; setUp(recipeExecLocation); final String tblName = "serdeTable"; recipeMerlin.withSourceDb(DB_NAME).withSourceTable(tblName); final List<String> command = recipeMerlin.getSubmissionCommand(); runSql( connection, "create table " + tblName + "(comment string) " + "row format serde 'org.apache.hive.hcatalog.data.JsonSerDe'"); bootstrapCopy(connection, clusterFS, tblName, connection2, clusterFS2, tblName); runSql( connection, "insert into table " + tblName + " values" + "('this string has been added post bootstrap - should appear after dr')"); runSql( connection, "ALTER TABLE " + tblName + " SET SERDEPROPERTIES ('someProperty' = 'value')"); Assert.assertEquals(Bundle.runFalconCLI(command), 0, "Recipe submission failed."); InstanceUtil.waitTillInstanceReachState( recipeExecLocation.getRecipeOC(clusterOC, clusterOC2), recipeMerlin.getName(), 1, CoordinatorAction.Status.SUCCEEDED, EntityType.PROCESS); HiveAssert.assertTableEqual( cluster, clusterHC.getTable(DB_NAME, tblName), cluster2, clusterHC2.getTable(DB_NAME, tblName), new NotifyingAssert(true)) .assertAll(); }
@Test public void drExternalToNonExternal() throws Exception { final RecipeExecLocation recipeExecLocation = RecipeExecLocation.SourceCluster; setUp(recipeExecLocation); final String tblName = "externalToNonExternal"; recipeMerlin.withSourceDb(DB_NAME).withSourceTable(tblName); final List<String> command = recipeMerlin.getSubmissionCommand(); createExternalTable(connection, clusterFS, baseTestHDFSDir + "click_data/", tblName); bootstrapCopy(connection, clusterFS, tblName, connection2, clusterFS2, tblName); // change column name runSql(connection, "alter table " + tblName + " change column data data_new string"); Assert.assertEquals(Bundle.runFalconCLI(command), 0, "Recipe submission failed."); InstanceUtil.waitTillInstanceReachState( recipeExecLocation.getRecipeOC(clusterOC, clusterOC2), recipeMerlin.getName(), 1, CoordinatorAction.Status.SUCCEEDED, EntityType.PROCESS); final NotifyingAssert anAssert = new NotifyingAssert(true); HiveAssert.assertTableEqual( cluster, clusterHC.getTable(DB_NAME, tblName), cluster2, clusterHC2.getTable(DB_NAME, tblName), anAssert, false); anAssert.assertNotEquals( clusterHC2.getTable(DB_NAME, tblName).getTabletype(), clusterHC.getTable(DB_NAME, tblName).getTableName(), "Source and destination tables should have different Tabletype"); anAssert.assertNotEquals( clusterHC2.getTable(DB_NAME, tblName).getTblProps().get("EXTERNAL"), clusterHC.getTable(DB_NAME, tblName).getTblProps().get("EXTERNAL"), "Source and destination tables should have different value of property EXTERNAL"); anAssert.assertAll(); }
/** * Set feed cluster1 as target, clusters 2 and 3 as source. Run feed. Update feed and check if * action succeed. Check that appropriate number of replication and retention coordinators exist * on matching clusters. * * @throws Exception */ @Test(enabled = true, timeOut = 1200000) public void multipleSourceOneTarget() throws Exception { bundles[0].setInputFeedDataPath(inputPath); Bundle.submitCluster(bundles[0], bundles[1], bundles[2]); FeedMerlin feed = new FeedMerlin(bundles[0].getDataSets().get(0)); feed.clearFeedClusters(); // use the colo string here so that the test works in embedded and distributed mode. String postFix = "/US/" + cluster2Colo; String prefix = bundles[0].getFeedDataPathPrefix(); HadoopUtil.deleteDirIfExists(prefix.substring(1), cluster2FS); HadoopUtil.lateDataReplenish(cluster2FS, 5, 80, prefix, postFix); // use the colo string here so that the test works in embedded and distributed mode. postFix = "/UK/" + cluster3Colo; prefix = bundles[0].getFeedDataPathPrefix(); HadoopUtil.deleteDirIfExists(prefix.substring(1), cluster3FS); HadoopUtil.lateDataReplenish(cluster3FS, 5, 80, prefix, postFix); String startTime = TimeUtil.getTimeWrtSystemTime(-30); feed.addFeedCluster( new FeedMerlin.FeedClusterBuilder(Util.readEntityName(bundles[1].getClusters().get(0))) .withRetention("hours(10)", ActionType.DELETE) .withValidity(startTime, TimeUtil.addMinsToTime(startTime, 85)) .withClusterType(ClusterType.SOURCE) .withPartition("US/${cluster.colo}") .build()); feed.addFeedCluster( new FeedMerlin.FeedClusterBuilder(Util.readEntityName(bundles[0].getClusters().get(0))) .withRetention("hours(10)", ActionType.DELETE) .withValidity( TimeUtil.addMinsToTime(startTime, 20), TimeUtil.addMinsToTime(startTime, 105)) .withClusterType(ClusterType.TARGET) .build()); feed.addFeedCluster( new FeedMerlin.FeedClusterBuilder(Util.readEntityName(bundles[2].getClusters().get(0))) .withRetention("hours(10)", ActionType.DELETE) .withValidity( TimeUtil.addMinsToTime(startTime, 40), TimeUtil.addMinsToTime(startTime, 130)) .withClusterType(ClusterType.SOURCE) .withPartition("UK/${cluster.colo}") .build()); LOGGER.info("feed: " + Util.prettyPrintXml(feed.toString())); AssertUtil.assertSucceeded(prism.getFeedHelper().submitEntity(feed.toString())); AssertUtil.assertSucceeded(prism.getFeedHelper().schedule(feed.toString())); // change feed location path feed.setFilePath(alternativeInputPath); LOGGER.info("updated feed: " + Util.prettyPrintXml(feed.toString())); // update feed AssertUtil.assertSucceeded(prism.getFeedHelper().update(feed.toString(), feed.toString())); Assert.assertEquals( OozieUtil.checkIfFeedCoordExist(cluster2OC, feed.getName(), "REPLICATION"), 0); Assert.assertEquals( OozieUtil.checkIfFeedCoordExist(cluster2OC, feed.getName(), "RETENTION"), 2); Assert.assertEquals( OozieUtil.checkIfFeedCoordExist(cluster3OC, feed.getName(), "REPLICATION"), 0); Assert.assertEquals( OozieUtil.checkIfFeedCoordExist(cluster3OC, feed.getName(), "RETENTION"), 2); Assert.assertEquals( OozieUtil.checkIfFeedCoordExist(cluster1OC, feed.getName(), "REPLICATION"), 4); Assert.assertEquals( OozieUtil.checkIfFeedCoordExist(cluster1OC, feed.getName(), "RETENTION"), 2); }
/** * Set feed1 to have cluster1 as source, cluster3 as target. Set feed2 clusters vise versa. Add * both clusters to process and feed2 as input feed. Run process. Update feed1. TODO test case is * incomplete * * @throws Exception */ @Test(enabled = true, timeOut = 1800000) public void updateFeedDependentProcessTest() throws Exception { // set cluster colos bundles[0].setCLusterColo(cluster1Colo); bundles[1].setCLusterColo(cluster2Colo); bundles[2].setCLusterColo(cluster3Colo); // submit 3 clusters Bundle.submitCluster(bundles[0], bundles[1], bundles[2]); // get 2 unique feeds FeedMerlin feed01 = new FeedMerlin(bundles[0].getInputFeedFromBundle()); FeedMerlin feed02 = new FeedMerlin(bundles[1].getInputFeedFromBundle()); FeedMerlin outputFeed = new FeedMerlin(bundles[0].getOutputFeedFromBundle()); // set clusters to null; feed01.clearFeedClusters(); feed02.clearFeedClusters(); outputFeed.clearFeedClusters(); // set new feed input data feed01.setFeedPathValue(baseTestDir + "/feed01" + MINUTE_DATE_PATTERN); feed02.setFeedPathValue(baseTestDir + "/feed02" + MINUTE_DATE_PATTERN); // generate data in both the colos ua1 and ua3 String prefix = feed01.getFeedPrefix(); HadoopUtil.deleteDirIfExists(prefix.substring(1), cluster1FS); HadoopUtil.lateDataReplenish(cluster1FS, 25, 1, prefix, null); prefix = feed02.getFeedPrefix(); HadoopUtil.deleteDirIfExists(prefix.substring(1), cluster3FS); HadoopUtil.lateDataReplenish(cluster3FS, 25, 1, prefix, null); String startTime = TimeUtil.getTimeWrtSystemTime(-50); // set clusters for feed01 feed01.addFeedCluster( new FeedMerlin.FeedClusterBuilder(Util.readEntityName(bundles[0].getClusters().get(0))) .withRetention("hours(10)", ActionType.DELETE) .withValidity(startTime, "2099-01-01T00:00Z") .withClusterType(ClusterType.SOURCE) .build()); feed01.addFeedCluster( new FeedMerlin.FeedClusterBuilder(Util.readEntityName(bundles[2].getClusters().get(0))) .withRetention("hours(10)", ActionType.DELETE) .withValidity(startTime, "2099-01-01T00:00Z") .withClusterType(ClusterType.TARGET) .build()); // set clusters for feed02 feed02.addFeedCluster( new FeedMerlin.FeedClusterBuilder(Util.readEntityName(bundles[0].getClusters().get(0))) .withRetention("hours(10)", ActionType.DELETE) .withValidity(startTime, "2099-01-01T00:00Z") .withClusterType(ClusterType.TARGET) .build()); feed02.addFeedCluster( new FeedMerlin.FeedClusterBuilder(Util.readEntityName(bundles[2].getClusters().get(0))) .withRetention("hours(10)", ActionType.DELETE) .withValidity(startTime, "2099-01-01T00:00Z") .withClusterType(ClusterType.SOURCE) .build()); // set clusters for output feed outputFeed.addFeedCluster( new FeedMerlin.FeedClusterBuilder(Util.readEntityName(bundles[0].getClusters().get(0))) .withRetention("hours(10)", ActionType.DELETE) .withValidity(startTime, "2099-01-01T00:00Z") .withClusterType(ClusterType.SOURCE) .build()); outputFeed.addFeedCluster( new FeedMerlin.FeedClusterBuilder(Util.readEntityName(bundles[2].getClusters().get(0))) .withRetention("hours(10)", ActionType.DELETE) .withValidity(startTime, "2099-01-01T00:00Z") .withClusterType(ClusterType.TARGET) .build()); // submit and schedule feeds prism.getFeedHelper().submitAndSchedule(feed01.toString()); prism.getFeedHelper().submitAndSchedule(feed02.toString()); prism.getFeedHelper().submitAndSchedule(outputFeed.toString()); // create a process with 2 clusters // get a process ProcessMerlin process = new ProcessMerlin(bundles[0].getProcessData()); // add clusters to process String processStartTime = TimeUtil.getTimeWrtSystemTime(-6); String processEndTime = TimeUtil.getTimeWrtSystemTime(70); process.clearProcessCluster(); process.addProcessCluster( new ProcessMerlin.ProcessClusterBuilder( Util.readEntityName(bundles[0].getClusters().get(0))) .withValidity(processStartTime, processEndTime) .build()); process.addProcessCluster( new ProcessMerlin.ProcessClusterBuilder( Util.readEntityName(bundles[2].getClusters().get(0))) .withValidity(processStartTime, processEndTime) .build()); process.addInputFeed(feed02.getName(), feed02.getName()); // submit and schedule process AssertUtil.assertSucceeded(prism.getProcessHelper().submitAndSchedule(process.toString())); LOGGER.info("Wait till process goes into running "); int timeout = OSUtil.IS_WINDOWS ? 50 : 25; InstanceUtil.waitTillInstanceReachState( serverOC.get(0), process.getName(), 1, Status.RUNNING, EntityType.PROCESS, timeout); InstanceUtil.waitTillInstanceReachState( serverOC.get(2), process.getName(), 1, Status.RUNNING, EntityType.PROCESS, timeout); feed01.setFilePath(alternativeInputPath); LOGGER.info("updated feed: " + Util.prettyPrintXml(feed01.toString())); AssertUtil.assertSucceeded(prism.getFeedHelper().update(feed01.toString(), feed01.toString())); }
/** * 1 src tbl 1 dst tbl replication. Insert/delete/replace partitions using dynamic partition * queries. The changes should get reflected at destination. */ @Test public void drInsertDropReplaceDynamicPartition() throws Exception { final RecipeExecLocation recipeExecLocation = RecipeExecLocation.SourceCluster; setUp(recipeExecLocation); final String tblName = "dynamicPartitionDR"; recipeMerlin.withSourceDb(DB_NAME).withSourceTable(tblName); final List<String> command = recipeMerlin.getSubmissionCommand(); // disable strict mode to use only dynamic partition runSql(connection, "set hive.exec.dynamic.partition.mode=nonstrict"); runSql( connection, "create table " + tblName + "(comment string) partitioned by (pname string)"); runSql( connection, "insert into table " + tblName + " partition (pname) values" + "('this partition is going to be deleted - should NOT appear after dr', 'DELETE')"); runSql( connection, "insert into table " + tblName + " partition (pname) values" + "('this partition is going to be replaced - should NOT appear after dr', 'REPLACE')"); runSql( connection, "insert into table " + tblName + " partition (pname) values" + "('this partition will have more data - should appear after dr', 'ADD_DATA')"); LOGGER.info(tblName + " before bootstrap copying: "); runSql(connection, "select * from " + tblName); bootstrapCopy(connection, clusterFS, tblName, connection2, clusterFS2, tblName); runSql( connection, "insert into table " + tblName + " partition (pname) values" + "('this partition has been added post bootstrap - should appear after dr', 'NEW_PART')"); runSql( connection, "insert into table " + tblName + " partition (pname) values" + "('more data has been added post bootstrap - should appear after dr', 'ADD_DATA')"); runSql(connection, "alter table " + tblName + " drop partition(pname = 'DELETE')"); runSql(connection, "alter table " + tblName + " drop partition(pname = 'REPLACE')"); runSql( connection, "insert into table " + tblName + " partition (pname) values" + "('this partition has been replaced - should appear after dr', 'REPLACE')"); LOGGER.info(tblName + " after modifications, before replication: "); runSql(connection, "select * from " + tblName); Assert.assertEquals(Bundle.runFalconCLI(command), 0, "Recipe submission failed."); InstanceUtil.waitTillInstanceReachState( recipeExecLocation.getRecipeOC(clusterOC, clusterOC2), recipeMerlin.getName(), 1, CoordinatorAction.Status.SUCCEEDED, EntityType.PROCESS); HiveAssert.assertTableEqual( cluster, clusterHC.getTable(DB_NAME, tblName), cluster2, clusterHC2.getTable(DB_NAME, tblName), new NotifyingAssert(true)) .assertAll(); }
@Test public void drTwoDstTablesTwoRequests() throws Exception { final RecipeExecLocation recipeExecLocation = RecipeExecLocation.TargetCluster; setUp(recipeExecLocation); final HCatClient clusterHC3 = cluster3.getClusterHelper().getHCatClient(); final Connection connection3 = cluster3.getClusterHelper().getHiveJdbcConnection(); runSql(connection3, "drop database if exists hdr_sdb1 cascade"); runSql(connection3, "create database hdr_sdb1"); runSql(connection3, "use hdr_sdb1"); final String tblName = "vanillaTable"; recipeMerlin.withSourceDb(DB_NAME).withSourceTable(tblName); final String recipe1Name = recipeMerlin.getName(); final List<String> command1 = recipeMerlin.getSubmissionCommand(); final Bundle bundle3 = new Bundle(BundleUtil.readHCatBundle(), cluster3); bundle3.generateUniqueBundle(this); bundle3.submitClusters(prism); recipeMerlin .withTargetCluster(bundle3.getClusterElement()) .withRecipeCluster( recipeExecLocation.getRecipeCluster( bundles[0].getClusterElement(), bundle3.getClusterElement())); recipeMerlin.setUniqueName(this.getClass().getSimpleName()); final List<String> command2 = recipeMerlin.getSubmissionCommand(); final String recipe2Name = recipeMerlin.getName(); runSql(connection, "create table " + tblName + "(comment string)"); bootstrapCopy(connection, clusterFS, tblName, connection2, clusterFS2, tblName); bootstrapCopy(connection, clusterFS, tblName, connection3, clusterFS3, tblName); runSql( connection, "insert into table " + tblName + " values" + "('this string has been added post bootstrap - should appear after dr')"); Assert.assertEquals(Bundle.runFalconCLI(command1), 0, "Recipe submission failed."); Assert.assertEquals(Bundle.runFalconCLI(command2), 0, "Recipe submission failed."); InstanceUtil.waitTillInstanceReachState( recipeExecLocation.getRecipeOC(clusterOC, clusterOC2), recipe1Name, 1, CoordinatorAction.Status.SUCCEEDED, EntityType.PROCESS); InstanceUtil.waitTillInstanceReachState( recipeExecLocation.getRecipeOC(clusterOC, clusterOC3), recipe2Name, 1, CoordinatorAction.Status.SUCCEEDED, EntityType.PROCESS); final NotifyingAssert anAssert = new NotifyingAssert(true); HiveAssert.assertTableEqual( cluster, clusterHC.getTable(DB_NAME, tblName), cluster2, clusterHC2.getTable(DB_NAME, tblName), anAssert); HiveAssert.assertTableEqual( cluster, clusterHC.getTable(DB_NAME, tblName), cluster3, clusterHC3.getTable(DB_NAME, tblName), anAssert); anAssert.assertAll(); }
/** * 1 src tbl 1 dst tbl replication. Insert/overwrite partitions using dynamic partitions queries. * The changes should get reflected at destination. * * @throws Exception */ @Test public void drInsertOverwriteDynamicPartition() throws Exception { final RecipeExecLocation recipeExecLocation = RecipeExecLocation.SourceCluster; setUp(recipeExecLocation); final String tblName = "drInsertOverwritePartition"; final String hlpTblName = "drInsertOverwritePartitionHelperTbl"; recipeMerlin.withSourceDb(DB_NAME).withSourceTable(tblName); final List<String> command = recipeMerlin.getSubmissionCommand(); // disable strict mode to use only dynamic partition runSql(connection, "set hive.exec.dynamic.partition.mode=nonstrict"); runSql( connection, "create table " + hlpTblName + "(comment string) partitioned by (pname string)"); runSql( connection, "insert into table " + hlpTblName + " partition (pname)" + " values('overwrite data - should appear after dr', 'OVERWRITE_PART')"); runSql( connection, "insert into table " + hlpTblName + " partition (pname)" + " values('newdata row2 - should appear after dr', 'NEW_DATA')"); runSql( connection, "insert into table " + hlpTblName + " partition (pname)" + " values('newdata row1 - should appear after dr', 'NEW_DATA')"); runSql( connection, "create table " + tblName + "(comment string) partitioned by (pname string)"); runSql( connection, "insert into table " + tblName + " partition (pname) values" + "('this data should be retained - should appear after dr', 'OLD_PART')"); runSql( connection, "insert into table " + tblName + " partition (pname) values" + "('this data should get overwritten - should NOT appear after dr', 'OVERWRITE_PART')"); LOGGER.info(tblName + " before bootstrap copying: "); runSql(connection, "select * from " + tblName); bootstrapCopy(connection, clusterFS, tblName, connection2, clusterFS2, tblName); runSql( connection, "insert overwrite table " + tblName + " partition (pname) " + "select comment, pname from " + hlpTblName + " where comment REGEXP '^overwrite'"); runSql( connection, "insert overwrite table " + tblName + " partition (pname) " + "select comment, pname from " + hlpTblName + " where comment REGEXP '^newdata'"); LOGGER.info(tblName + " after modifications, before replication: "); runSql(connection, "select * from " + tblName); Assert.assertEquals(Bundle.runFalconCLI(command), 0, "Recipe submission failed."); InstanceUtil.waitTillInstanceReachState( recipeExecLocation.getRecipeOC(clusterOC, clusterOC2), recipeMerlin.getName(), 1, CoordinatorAction.Status.SUCCEEDED, EntityType.PROCESS); HiveAssert.assertTableEqual( cluster, clusterHC.getTable(DB_NAME, tblName), cluster2, clusterHC2.getTable(DB_NAME, tblName), new NotifyingAssert(true)) .assertAll(); }
// make sure oozie changes mentioned FALCON-389 are done on the clusters. Otherwise the test // will fail. // Noticed with hive 0.13 we need the following issues resolved to work HIVE-6848 and // HIVE-6868. Also oozie share libs need to have hive jars that have these jira's resolved and // the maven depenendcy you are using to run the tests has to have hcat that has these fixed. // This test can fail randomly because of https://issues.apache.org/jira/browse/FALCON-401 @Test(dataProvider = "generateSeparators") public void oneSourceTwoTarget(String separator) throws Exception { String tcName = "HCatReplication_oneSourceTwoTarget"; if (separator.equals("-")) { tcName += "_hyphen"; } else { tcName += "_slash"; } String tblName = tcName; String testHdfsDir = baseTestHDFSDir + "/" + tcName; HadoopUtil.recreateDir(serverFS, testHdfsDir); final String startDate = "2010-01-01T20:00Z"; final String endDate = "2099-01-01T00:00Z"; final String dataEndDate = "2010-01-01T21:00Z"; final String tableUriPartitionFragment = StringUtils.join(new String[] {"#dt=${YEAR}", "${MONTH}", "${DAY}", "${HOUR}"}, separator); String tableUri = "catalog:" + dbName + ":" + tblName + tableUriPartitionFragment; final String datePattern = StringUtils.join(new String[] {"yyyy", "MM", "dd", "HH"}, separator); // use the start date for both as this will only generate 2 partitions. List<String> dataDates = TimeUtil.getMinuteDatesOnEitherSide( startDate, dataEndDate, 60, DateTimeFormat.forPattern(datePattern)); final List<String> dataset = HadoopUtil.flattenAndPutDataInFolder(clusterFS, localHCatData, testHdfsDir, dataDates); final String col1Name = "id"; final String col2Name = "value"; final String partitionColumn = "dt"; ArrayList<HCatFieldSchema> cols = new ArrayList<HCatFieldSchema>(); cols.add(HCatUtil.getStringSchema(col1Name, col1Name + " comment")); cols.add(HCatUtil.getStringSchema(col2Name, col2Name + " comment")); ArrayList<HCatFieldSchema> partitionCols = new ArrayList<HCatFieldSchema>(); // create table on cluster 1 and add data to it. partitionCols.add(HCatUtil.getStringSchema(partitionColumn, partitionColumn + " partition")); createTable(clusterHC, dbName, tblName, cols, partitionCols, testHdfsDir); addPartitionsToTable(dataDates, dataset, "dt", dbName, tblName, clusterHC); // create table on target cluster. createTable(cluster2HC, dbName, tblName, cols, partitionCols, testHdfsDir); createTable(cluster3HC, dbName, tblName, cols, partitionCols, testHdfsDir); Bundle.submitCluster(bundles[0], bundles[1], bundles[2]); bundles[0].setInputFeedPeriodicity(1, Frequency.TimeUnit.hours); bundles[0].setInputFeedValidity(startDate, endDate); bundles[0].setInputFeedTableUri(tableUri); String feed = bundles[0].getDataSets().get(0); // set the cluster 2 as the target. feed = FeedMerlin.fromString(feed) .addFeedCluster( new FeedMerlin.FeedClusterBuilder( Util.readEntityName(bundles[1].getClusters().get(0))) .withRetention("months(9000)", ActionType.DELETE) .withValidity(startDate, endDate) .withClusterType(ClusterType.TARGET) .withTableUri(tableUri) .build()) .toString(); // set the cluster 3 as the target. feed = FeedMerlin.fromString(feed) .addFeedCluster( new FeedMerlin.FeedClusterBuilder( Util.readEntityName(bundles[2].getClusters().get(0))) .withRetention("months(9000)", ActionType.DELETE) .withValidity(startDate, endDate) .withClusterType(ClusterType.TARGET) .withTableUri(tableUri) .build()) .toString(); AssertUtil.assertSucceeded(prism.getFeedHelper().submitAndSchedule(feed)); TimeUtil.sleepSeconds(TIMEOUT); // check if all coordinators exist Assert.assertEquals( InstanceUtil.checkIfFeedCoordExist( cluster2.getFeedHelper(), Util.readEntityName(feed), "REPLICATION"), 1); // check if all coordinators exist Assert.assertEquals( InstanceUtil.checkIfFeedCoordExist( cluster3.getFeedHelper(), Util.readEntityName(feed), "REPLICATION"), 1); // replication should start, wait while it ends // we will check for 2 instances so that both partitions are copied over. InstanceUtil.waitTillInstanceReachState( cluster2OC, Util.readEntityName(feed), 2, CoordinatorAction.Status.SUCCEEDED, EntityType.FEED); // replication should start, wait while it ends // we will check for 2 instances so that both partitions are copied over. InstanceUtil.waitTillInstanceReachState( cluster3OC, Util.readEntityName(feed), 2, CoordinatorAction.Status.SUCCEEDED, EntityType.FEED); // check if data was replicated correctly List<Path> srcData = HadoopUtil.getAllFilesRecursivelyHDFS(clusterFS, new Path(testHdfsDir)); LOGGER.info("Data on source cluster: " + srcData); List<Path> cluster2TargetData = HadoopUtil.getAllFilesRecursivelyHDFS(cluster2FS, new Path(testHdfsDir)); LOGGER.info("Data on target cluster: " + cluster2TargetData); AssertUtil.checkForListSizes(srcData, cluster2TargetData); List<Path> cluster3TargetData = HadoopUtil.getAllFilesRecursivelyHDFS(cluster3FS, new Path(testHdfsDir)); LOGGER.info("Data on target cluster: " + cluster3TargetData); AssertUtil.checkForListSizes(srcData, cluster3TargetData); }
@Test(dataProvider = "getRecipeLocation") public void drPartition(final RecipeExecLocation recipeExecLocation) throws Exception { setUp(recipeExecLocation); final String tblName = "partitionDR"; recipeMerlin.withSourceDb(DB_NAME).withSourceTable(tblName); final List<String> command = recipeMerlin.getSubmissionCommand(); runSql( connection, "create table " + tblName + "(comment string) partitioned by (pname string)"); runSql( connection, "insert into table " + tblName + " partition (pname = 'DELETE') values" + "('this partition is going to be deleted - should NOT appear after dr')"); runSql( connection, "insert into table " + tblName + " partition (pname = 'REPLACE') values" + "('this partition is going to be replaced - should NOT appear after dr')"); runSql( connection, "insert into table " + tblName + " partition (pname = 'ADD_DATA') values" + "('this partition will have more data - should appear after dr')"); bootstrapCopy(connection, clusterFS, tblName, connection2, clusterFS2, tblName); runSql( connection, "insert into table " + tblName + " partition (pname = 'NEW_PART') values" + "('this partition has been added post bootstrap - should appear after dr')"); runSql( connection, "insert into table " + tblName + " partition (pname = 'ADD_DATA') values" + "('more data has been added post bootstrap - should appear after dr')"); runSql(connection, "alter table " + tblName + " drop partition(pname = 'DELETE')"); runSql(connection, "alter table " + tblName + " drop partition(pname = 'REPLACE')"); runSql( connection, "insert into table " + tblName + " partition (pname = 'REPLACE') values" + "('this partition has been replaced - should appear after dr')"); Assert.assertEquals(Bundle.runFalconCLI(command), 0, "Recipe submission failed."); InstanceUtil.waitTillInstanceReachState( recipeExecLocation.getRecipeOC(clusterOC, clusterOC2), recipeMerlin.getName(), 1, CoordinatorAction.Status.SUCCEEDED, EntityType.PROCESS); HiveAssert.assertTableEqual( cluster, clusterHC.getTable(DB_NAME, tblName), cluster2, clusterHC2.getTable(DB_NAME, tblName), new NotifyingAssert(true)) .assertAll(); }