/** * Schedule process. Suspend it. -getStatus of it's instances. Check if response reflects their * status as suspended. * * @throws Exception */ @Test(groups = {"singleCluster"}) public void testProcessInstanceStatusSuspended() throws Exception { bundles[0].setProcessValidity("2010-01-02T01:00Z", "2010-01-02T01:22Z"); bundles[0].setProcessPeriodicity(5, TimeUnit.minutes); for (int i = 0; i < bundles[0].getClusters().size(); i++) { LOGGER.info( "cluster to be submitted: " + i + " " + Util.prettyPrintXml(bundles[0].getClusters().get(i))); } bundles[0].submitFeedsScheduleProcess(prism); AssertUtil.checkStatus( serverOC.get(0), EntityType.PROCESS, bundles[0].getProcessData(), Job.Status.RUNNING); AssertUtil.assertSucceeded( prism.getProcessHelper().suspend(URLS.SUSPEND_URL, bundles[0].getProcessData())); AssertUtil.checkStatus( serverOC.get(0), EntityType.PROCESS, bundles[0].getProcessData(), Job.Status.SUSPENDED); TimeUtil.sleepSeconds(15); InstancesResult r = prism .getProcessHelper() .getProcessInstanceStatus( Util.readEntityName(bundles[0].getProcessData()), "?start=2010-01-02T01:00Z&end=2010-01-02T01:20Z"); InstanceUtil.validateSuccess(r, bundles[0], WorkflowStatus.SUSPENDED); }
/** Upload feeds, processes and clusters with different names. */ @BeforeClass(alwaysRun = true) public void prepareData() throws IOException, URISyntaxException, AuthenticationException, InterruptedException, JAXBException { uploadDirToClusters(aggregateWorkflowDir, OSUtil.RESOURCES_OOZIE); removeTestClassEntities(); bundles[0] = BundleUtil.readELBundle(); bundles[0] = new Bundle(bundles[0], servers.get(0)); bundles[0].generateUniqueBundle(this); bundles[0].setProcessWorkflow(aggregateWorkflowDir); bundles[0].submitBundle(prism); // submit different clusters, feeds and processes FeedMerlin feed = new FeedMerlin(bundles[0].getInputFeedFromBundle()); ProcessMerlin process = bundles[0].getProcessObject(); ClusterMerlin cluster = bundles[0].getClusterElement(); String clusterNamePrefix = bundles[0].getClusterElement().getName() + '-'; String processNamePrefix = bundles[0].getProcessName() + '-'; String feedNamePrefix = bundles[0].getInputFeedNameFromBundle() + '-'; List randomNames = getPatternName(); for (Object randomName : randomNames) { process.setName(processNamePrefix + randomName); AssertUtil.assertSucceeded(prism.getProcessHelper().submitAndSchedule(process.toString())); feed.setName(feedNamePrefix + randomName); AssertUtil.assertSucceeded(prism.getFeedHelper().submitAndSchedule(feed.toString())); cluster.setName(clusterNamePrefix + randomName); AssertUtil.assertSucceeded(prism.getClusterHelper().submitEntity(cluster.toString())); } }
/** * Schedule process. Suspend and then resume it. -getStatus of its instances. Check that response * reflects that instances are running. * * @throws Exception */ @Test(groups = {"singleCluster"}) public void testProcessInstanceStatusResumed() throws Exception { bundles[0].setProcessValidity("2010-01-02T01:00Z", "2010-01-02T01:22Z"); bundles[0].setProcessPeriodicity(5, TimeUnit.minutes); bundles[0].setOutputFeedPeriodicity(5, TimeUnit.minutes); bundles[0].setOutputFeedLocationData(feedOutputPath); bundles[0].setProcessConcurrency(2); bundles[0].submitFeedsScheduleProcess(prism); AssertUtil.checkStatus( serverOC.get(0), EntityType.PROCESS, bundles[0].getProcessData(), Job.Status.RUNNING); prism.getProcessHelper().suspend(URLS.SUSPEND_URL, bundles[0].getProcessData()); AssertUtil.checkStatus( serverOC.get(0), EntityType.PROCESS, bundles[0].getProcessData(), Job.Status.SUSPENDED); prism.getProcessHelper().resume(URLS.RESUME_URL, bundles[0].getProcessData()); TimeUtil.sleepSeconds(15); AssertUtil.checkStatus( serverOC.get(0), EntityType.PROCESS, bundles[0].getProcessData(), Job.Status.RUNNING); InstancesResult r = prism .getProcessHelper() .getProcessInstanceStatus( Util.readEntityName(bundles[0].getProcessData()), "?start=2010-01-02T01:00Z&end=2010-01-02T01:22Z"); InstanceUtil.validateSuccess(r, bundles[0], WorkflowStatus.RUNNING); }
/** * Test feed acl modification. * * @throws Exception */ @Test(dataProvider = "generateAclOwnerAndGroup") public void feedAclUpdate(final String newOwner, final String newGroup) throws Exception { bundles[0].submitClusters(prism); final String oldFeed = bundles[0].getInputFeedFromBundle(); AssertUtil.assertSucceeded(feedHelper.submitAndSchedule(oldFeed)); final FeedMerlin feedMerlin = new FeedMerlin(oldFeed); feedMerlin.setACL(newOwner, newGroup, "*"); final String newFeed = feedMerlin.toString(); AssertUtil.assertFailed( feedHelper.update(oldFeed, newFeed), "AuthorizationException: Permission denied"); }
private void validateOutputPatternList( EntityElement[] entityElements, EntityElement[] outputelements, String pattern) { List<String> actualOutputElements = new ArrayList<>(); List<String> expectedOutputElements = new ArrayList<>(); for (EntityElement e : entityElements) { if (getOutputEntity(e.name, pattern)) { expectedOutputElements.add(e.name); } } for (EntityElement e : outputelements) { actualOutputElements.add(e.name); } LOGGER.debug("actualElement : " + actualOutputElements); LOGGER.debug("expectedElement : " + expectedOutputElements); // Checking no of elements present in output. AssertUtil.checkForListSizes(expectedOutputElements, actualOutputElements); // Checking expected out and actual output contains same enitities. Assert.assertTrue( expectedOutputElements.containsAll(actualOutputElements), "Output list elements are not as expected"); for (String element : expectedOutputElements) { Assert.assertTrue( actualOutputElements.contains(element), "Element " + element + "is not present in output"); } }
private void validate(ServiceResponse response) throws JAXBException { AssertUtil.assertFailed(response); Assert.assertTrue( response .getMessage() .contains( "org.apache.falcon.FalconException: AUTHENTICATION : E1004 :" + " E1004: Expression language evaluation error, Unable to evaluate :${coord:someEL(1)"), "Correct response was not present in process / feed schedule"); }
/** * Schedule process. -getStatus of it's first instance using only -start parameter which points to * start time of process validity. Check that response reflects expected status of instance. * * @throws Exception */ @Test(groups = {"singleCluster"}) public void testProcessInstanceStatusOnlyStart() throws Exception { bundles[0].setProcessValidity("2010-01-02T01:00Z", "2010-01-02T01:22Z"); bundles[0].setProcessPeriodicity(5, TimeUnit.minutes); bundles[0].submitFeedsScheduleProcess(prism); AssertUtil.checkStatus( serverOC.get(0), EntityType.PROCESS, bundles[0].getProcessData(), Job.Status.RUNNING); InstancesResult r = prism .getProcessHelper() .getProcessInstanceStatus( Util.readEntityName(bundles[0].getProcessData()), "?start=2010-01-02T01:00Z"); InstanceUtil.validateSuccessOnlyStart(r, WorkflowStatus.RUNNING); }
/** tries to update feed with invalid el exp */ @Test(groups = {"singleCluster"}) public void testDryRunFailureUpdateFeed() throws Exception { bundles[0].submitClusters(prism); String feed = bundles[0].getInputFeedFromBundle(); ServiceResponse response = prism.getFeedHelper().submitAndSchedule(Util.URLS.SUBMIT_AND_SCHEDULE_URL, feed); AssertUtil.assertSucceeded(response); feed = Util.setFeedProperty(feed, "EntityDryRunTestProp", "${coord:someEL(1)"); response = prism.getFeedHelper().update(feed, feed); validate(response); Assert.assertEquals( OozieUtil.getNumberOfBundle(cluster, EntityType.FEED, Util.readEntityName(feed)), 1, "more than one bundle found after failed update request"); }
/** * Perform -getStatus using only -start parameter within time-range of non-materialized instances. * There should be no instances returned in response. * * @throws Exception */ @Test(groups = {"singleCluster"}) public void testProcessInstanceStatusOnlyStartAfterMat() throws Exception { bundles[0].setProcessValidity("2010-01-02T01:00Z", "2010-01-03T10:22Z"); bundles[0].setProcessTimeOut(3, TimeUnit.minutes); bundles[0].setProcessPeriodicity(1, TimeUnit.minutes); bundles[0].setProcessConcurrency(1); bundles[0].submitFeedsScheduleProcess(prism); InstancesResult r = prism .getProcessHelper() .getProcessInstanceStatus( Util.readEntityName(bundles[0].getProcessData()), "?start=2010-01-02T05:00Z"); AssertUtil.assertSucceeded(r); Assert.assertEquals(r.getInstances(), null); }
/** * Schedule process and then suspend it. -getStatus of first instance only -start parameter. * Instance should be suspended. * * @throws Exception */ @Test(groups = {"singleCluster"}) public void testProcessInstanceStatusOnlyStartSuspended() throws Exception { bundles[0].setProcessValidity("2010-01-02T01:00Z", "2010-01-02T01:22Z"); bundles[0].setProcessPeriodicity(5, TimeUnit.minutes); bundles[0].submitFeedsScheduleProcess(prism); AssertUtil.assertSucceeded( prism.getProcessHelper().suspend(URLS.SUSPEND_URL, bundles[0].getProcessData())); TimeUtil.sleepSeconds(15); InstancesResult r = prism .getProcessHelper() .getProcessInstanceStatus( Util.readEntityName(bundles[0].getProcessData()), "?start=2010-01-02T01:00Z"); InstanceUtil.validateSuccessOnlyStart(r, WorkflowStatus.SUSPENDED); }
/** tries to update feed with invalid el exp. */ @Test(groups = {"singleCluster"}) public void testDryRunFailureUpdateFeed() throws Exception { bundles[0].submitClusters(prism); FeedMerlin feed = new FeedMerlin(bundles[0].getInputFeedFromBundle()); ServiceResponse response = prism.getFeedHelper().submitAndSchedule(feed.toString()); AssertUtil.assertSucceeded(response); feed.withProperty("EntityDryRunTestProp", "${coord:someEL(1)"); response = prism.getFeedHelper().update(feed.toString(), feed.toString()); validate( response, "The new entity (feed) " + bundles[0].getInputFeedNameFromBundle() + " can't be scheduled"); Assert.assertEquals( OozieUtil.getNumberOfBundle(clusterOC, EntityType.FEED, feed.getName()), 1, "more than one bundle found after failed update request"); }
/** * Schedule and then delete process. Try to get the status of its instances. Attempt should fail * with an appropriate code. * * @throws Exception */ @Test(groups = {"singleCluster"}) public void testProcessInstanceStatusKilled() throws Exception { bundles[0].setProcessValidity("2010-01-02T01:00Z", "2010-01-02T01:22Z"); bundles[0].setProcessPeriodicity(5, TimeUnit.minutes); bundles[0].submitFeedsScheduleProcess(prism); AssertUtil.assertSucceeded( prism.getProcessHelper().delete(URLS.DELETE_URL, bundles[0].getProcessData())); InstancesResult r = prism .getProcessHelper() .getProcessInstanceStatus( Util.readEntityName(bundles[0].getProcessData()), "?start=2010-01-02T01:00Z&end=2010-01-02T01:20Z"); if ((r.getStatusCode() != ResponseKeys.PROCESS_NOT_FOUND)) { Assert.assertTrue(false); } }
/** * Schedule process. Perform -getStatus using -start/-end parameters which are out of process * validity range. Attempt should fail. * * @throws Exception */ @Test(groups = {"singleCluster"}) public void testProcessInstanceStatusStartEndOutOfRange() throws Exception { bundles[0].setProcessValidity("2010-01-02T01:00Z", "2010-01-02T01:22Z"); bundles[0].setProcessPeriodicity(5, TimeUnit.minutes); bundles[0].setOutputFeedPeriodicity(5, TimeUnit.minutes); bundles[0].setOutputFeedLocationData(feedOutputPath); bundles[0].setProcessConcurrency(2); bundles[0].submitFeedsScheduleProcess(prism); AssertUtil.checkStatus( serverOC.get(0), EntityType.PROCESS, bundles[0].getProcessData(), Job.Status.RUNNING); InstancesResult r = prism .getProcessHelper() .getProcessInstanceStatus( Util.readEntityName(bundles[0].getProcessData()), "?start=2010-01-02T00:00Z&end=2010-01-02T01:30Z"); InstanceUtil.validateSuccessWithStatusCode(r, 400); }
/** * Data is created for the feed, so instance status is available. Then, change the data path and * update the feed. The instance status should change to partial. */ @Test public void testFeedListingAfterFeedDataPathUpdate() throws Exception { bundles[0].setProcessValidity("2010-01-02T01:00Z", "2010-01-02T01:21Z"); bundles[0].setProcessConcurrency(1); bundles[0].submitFeedsScheduleProcess(prism); InstanceUtil.waitTillInstancesAreCreated(clusterOC, bundles[0].getProcessData(), 0); List<List<String>> missingDependencies = OozieUtil.createMissingDependencies(cluster, EntityType.PROCESS, processName, 0); List<String> missingDependencyLastInstance = missingDependencies.get(missingDependencies.size() - 1); HadoopUtil.flattenAndPutDataInFolder( clusterFS, OSUtil.SINGLE_FILE, missingDependencyLastInstance); InstanceUtil.waitTillInstanceReachState( clusterOC, processName, 1, CoordinatorAction.Status.RUNNING, EntityType.PROCESS, 5); FeedInstanceResult r = prism .getFeedHelper() .getFeedInstanceListing( Util.readEntityName(bundles[0].getDataSets().get(0)), "?start=2010-01-02T01:00Z&end=2010-01-02T01:21Z"); validateResponse(r, 5, 0, 0, 0, 5); String inputFeed = bundles[0].getInputFeedFromBundle(); bundles[0].setInputFeedDataPath(baseTestDir + "/inputNew" + MINUTE_DATE_PATTERN); ServiceResponse serviceResponse = prism.getFeedHelper().update(inputFeed, bundles[0].getInputFeedFromBundle()); AssertUtil.assertSucceeded(serviceResponse); // Since we have not created directories for new path, the feed instance status should be // missing r = prism .getFeedHelper() .getFeedInstanceListing( Util.readEntityName(bundles[0].getDataSets().get(0)), "?start=2010-01-02T01:00Z&end=2010-01-02T01:21Z"); validateResponse(r, 5, 5, 0, 0, 0); }
/** * Set feed cluster1 as target, clusters 2 and 3 as source. Run feed. Update feed and check if * action succeed. Check that appropriate number of replication and retention coordinators exist * on matching clusters. * * @throws Exception */ @Test(enabled = true, timeOut = 1200000) public void multipleSourceOneTarget() throws Exception { bundles[0].setInputFeedDataPath(inputPath); Bundle.submitCluster(bundles[0], bundles[1], bundles[2]); FeedMerlin feed = new FeedMerlin(bundles[0].getDataSets().get(0)); feed.clearFeedClusters(); // use the colo string here so that the test works in embedded and distributed mode. String postFix = "/US/" + cluster2Colo; String prefix = bundles[0].getFeedDataPathPrefix(); HadoopUtil.deleteDirIfExists(prefix.substring(1), cluster2FS); HadoopUtil.lateDataReplenish(cluster2FS, 5, 80, prefix, postFix); // use the colo string here so that the test works in embedded and distributed mode. postFix = "/UK/" + cluster3Colo; prefix = bundles[0].getFeedDataPathPrefix(); HadoopUtil.deleteDirIfExists(prefix.substring(1), cluster3FS); HadoopUtil.lateDataReplenish(cluster3FS, 5, 80, prefix, postFix); String startTime = TimeUtil.getTimeWrtSystemTime(-30); feed.addFeedCluster( new FeedMerlin.FeedClusterBuilder(Util.readEntityName(bundles[1].getClusters().get(0))) .withRetention("hours(10)", ActionType.DELETE) .withValidity(startTime, TimeUtil.addMinsToTime(startTime, 85)) .withClusterType(ClusterType.SOURCE) .withPartition("US/${cluster.colo}") .build()); feed.addFeedCluster( new FeedMerlin.FeedClusterBuilder(Util.readEntityName(bundles[0].getClusters().get(0))) .withRetention("hours(10)", ActionType.DELETE) .withValidity( TimeUtil.addMinsToTime(startTime, 20), TimeUtil.addMinsToTime(startTime, 105)) .withClusterType(ClusterType.TARGET) .build()); feed.addFeedCluster( new FeedMerlin.FeedClusterBuilder(Util.readEntityName(bundles[2].getClusters().get(0))) .withRetention("hours(10)", ActionType.DELETE) .withValidity( TimeUtil.addMinsToTime(startTime, 40), TimeUtil.addMinsToTime(startTime, 130)) .withClusterType(ClusterType.SOURCE) .withPartition("UK/${cluster.colo}") .build()); LOGGER.info("feed: " + Util.prettyPrintXml(feed.toString())); AssertUtil.assertSucceeded(prism.getFeedHelper().submitEntity(feed.toString())); AssertUtil.assertSucceeded(prism.getFeedHelper().schedule(feed.toString())); // change feed location path feed.setFilePath(alternativeInputPath); LOGGER.info("updated feed: " + Util.prettyPrintXml(feed.toString())); // update feed AssertUtil.assertSucceeded(prism.getFeedHelper().update(feed.toString(), feed.toString())); Assert.assertEquals( OozieUtil.checkIfFeedCoordExist(cluster2OC, feed.getName(), "REPLICATION"), 0); Assert.assertEquals( OozieUtil.checkIfFeedCoordExist(cluster2OC, feed.getName(), "RETENTION"), 2); Assert.assertEquals( OozieUtil.checkIfFeedCoordExist(cluster3OC, feed.getName(), "REPLICATION"), 0); Assert.assertEquals( OozieUtil.checkIfFeedCoordExist(cluster3OC, feed.getName(), "RETENTION"), 2); Assert.assertEquals( OozieUtil.checkIfFeedCoordExist(cluster1OC, feed.getName(), "REPLICATION"), 4); Assert.assertEquals( OozieUtil.checkIfFeedCoordExist(cluster1OC, feed.getName(), "RETENTION"), 2); }
/** * Set feed1 to have cluster1 as source, cluster3 as target. Set feed2 clusters vise versa. Add * both clusters to process and feed2 as input feed. Run process. Update feed1. TODO test case is * incomplete * * @throws Exception */ @Test(enabled = true, timeOut = 1800000) public void updateFeedDependentProcessTest() throws Exception { // set cluster colos bundles[0].setCLusterColo(cluster1Colo); bundles[1].setCLusterColo(cluster2Colo); bundles[2].setCLusterColo(cluster3Colo); // submit 3 clusters Bundle.submitCluster(bundles[0], bundles[1], bundles[2]); // get 2 unique feeds FeedMerlin feed01 = new FeedMerlin(bundles[0].getInputFeedFromBundle()); FeedMerlin feed02 = new FeedMerlin(bundles[1].getInputFeedFromBundle()); FeedMerlin outputFeed = new FeedMerlin(bundles[0].getOutputFeedFromBundle()); // set clusters to null; feed01.clearFeedClusters(); feed02.clearFeedClusters(); outputFeed.clearFeedClusters(); // set new feed input data feed01.setFeedPathValue(baseTestDir + "/feed01" + MINUTE_DATE_PATTERN); feed02.setFeedPathValue(baseTestDir + "/feed02" + MINUTE_DATE_PATTERN); // generate data in both the colos ua1 and ua3 String prefix = feed01.getFeedPrefix(); HadoopUtil.deleteDirIfExists(prefix.substring(1), cluster1FS); HadoopUtil.lateDataReplenish(cluster1FS, 25, 1, prefix, null); prefix = feed02.getFeedPrefix(); HadoopUtil.deleteDirIfExists(prefix.substring(1), cluster3FS); HadoopUtil.lateDataReplenish(cluster3FS, 25, 1, prefix, null); String startTime = TimeUtil.getTimeWrtSystemTime(-50); // set clusters for feed01 feed01.addFeedCluster( new FeedMerlin.FeedClusterBuilder(Util.readEntityName(bundles[0].getClusters().get(0))) .withRetention("hours(10)", ActionType.DELETE) .withValidity(startTime, "2099-01-01T00:00Z") .withClusterType(ClusterType.SOURCE) .build()); feed01.addFeedCluster( new FeedMerlin.FeedClusterBuilder(Util.readEntityName(bundles[2].getClusters().get(0))) .withRetention("hours(10)", ActionType.DELETE) .withValidity(startTime, "2099-01-01T00:00Z") .withClusterType(ClusterType.TARGET) .build()); // set clusters for feed02 feed02.addFeedCluster( new FeedMerlin.FeedClusterBuilder(Util.readEntityName(bundles[0].getClusters().get(0))) .withRetention("hours(10)", ActionType.DELETE) .withValidity(startTime, "2099-01-01T00:00Z") .withClusterType(ClusterType.TARGET) .build()); feed02.addFeedCluster( new FeedMerlin.FeedClusterBuilder(Util.readEntityName(bundles[2].getClusters().get(0))) .withRetention("hours(10)", ActionType.DELETE) .withValidity(startTime, "2099-01-01T00:00Z") .withClusterType(ClusterType.SOURCE) .build()); // set clusters for output feed outputFeed.addFeedCluster( new FeedMerlin.FeedClusterBuilder(Util.readEntityName(bundles[0].getClusters().get(0))) .withRetention("hours(10)", ActionType.DELETE) .withValidity(startTime, "2099-01-01T00:00Z") .withClusterType(ClusterType.SOURCE) .build()); outputFeed.addFeedCluster( new FeedMerlin.FeedClusterBuilder(Util.readEntityName(bundles[2].getClusters().get(0))) .withRetention("hours(10)", ActionType.DELETE) .withValidity(startTime, "2099-01-01T00:00Z") .withClusterType(ClusterType.TARGET) .build()); // submit and schedule feeds prism.getFeedHelper().submitAndSchedule(feed01.toString()); prism.getFeedHelper().submitAndSchedule(feed02.toString()); prism.getFeedHelper().submitAndSchedule(outputFeed.toString()); // create a process with 2 clusters // get a process ProcessMerlin process = new ProcessMerlin(bundles[0].getProcessData()); // add clusters to process String processStartTime = TimeUtil.getTimeWrtSystemTime(-6); String processEndTime = TimeUtil.getTimeWrtSystemTime(70); process.clearProcessCluster(); process.addProcessCluster( new ProcessMerlin.ProcessClusterBuilder( Util.readEntityName(bundles[0].getClusters().get(0))) .withValidity(processStartTime, processEndTime) .build()); process.addProcessCluster( new ProcessMerlin.ProcessClusterBuilder( Util.readEntityName(bundles[2].getClusters().get(0))) .withValidity(processStartTime, processEndTime) .build()); process.addInputFeed(feed02.getName(), feed02.getName()); // submit and schedule process AssertUtil.assertSucceeded(prism.getProcessHelper().submitAndSchedule(process.toString())); LOGGER.info("Wait till process goes into running "); int timeout = OSUtil.IS_WINDOWS ? 50 : 25; InstanceUtil.waitTillInstanceReachState( serverOC.get(0), process.getName(), 1, Status.RUNNING, EntityType.PROCESS, timeout); InstanceUtil.waitTillInstanceReachState( serverOC.get(2), process.getName(), 1, Status.RUNNING, EntityType.PROCESS, timeout); feed01.setFilePath(alternativeInputPath); LOGGER.info("updated feed: " + Util.prettyPrintXml(feed01.toString())); AssertUtil.assertSucceeded(prism.getFeedHelper().update(feed01.toString(), feed01.toString())); }
private void validate(ServiceResponse response, String message) throws JAXBException { AssertUtil.assertFailed(response); Assert.assertTrue( response.getMessage().contains(message), "Correct response was not present in process / feed schedule"); }
// make sure oozie changes mentioned FALCON-389 are done on the clusters. Otherwise the test // will fail. // Noticed with hive 0.13 we need the following issues resolved to work HIVE-6848 and // HIVE-6868. Also oozie share libs need to have hive jars that have these jira's resolved and // the maven depenendcy you are using to run the tests has to have hcat that has these fixed. // This test can fail randomly because of https://issues.apache.org/jira/browse/FALCON-401 @Test(dataProvider = "generateSeparators") public void oneSourceTwoTarget(String separator) throws Exception { String tcName = "HCatReplication_oneSourceTwoTarget"; if (separator.equals("-")) { tcName += "_hyphen"; } else { tcName += "_slash"; } String tblName = tcName; String testHdfsDir = baseTestHDFSDir + "/" + tcName; HadoopUtil.recreateDir(serverFS, testHdfsDir); final String startDate = "2010-01-01T20:00Z"; final String endDate = "2099-01-01T00:00Z"; final String dataEndDate = "2010-01-01T21:00Z"; final String tableUriPartitionFragment = StringUtils.join(new String[] {"#dt=${YEAR}", "${MONTH}", "${DAY}", "${HOUR}"}, separator); String tableUri = "catalog:" + dbName + ":" + tblName + tableUriPartitionFragment; final String datePattern = StringUtils.join(new String[] {"yyyy", "MM", "dd", "HH"}, separator); // use the start date for both as this will only generate 2 partitions. List<String> dataDates = TimeUtil.getMinuteDatesOnEitherSide( startDate, dataEndDate, 60, DateTimeFormat.forPattern(datePattern)); final List<String> dataset = HadoopUtil.flattenAndPutDataInFolder(clusterFS, localHCatData, testHdfsDir, dataDates); final String col1Name = "id"; final String col2Name = "value"; final String partitionColumn = "dt"; ArrayList<HCatFieldSchema> cols = new ArrayList<HCatFieldSchema>(); cols.add(HCatUtil.getStringSchema(col1Name, col1Name + " comment")); cols.add(HCatUtil.getStringSchema(col2Name, col2Name + " comment")); ArrayList<HCatFieldSchema> partitionCols = new ArrayList<HCatFieldSchema>(); // create table on cluster 1 and add data to it. partitionCols.add(HCatUtil.getStringSchema(partitionColumn, partitionColumn + " partition")); createTable(clusterHC, dbName, tblName, cols, partitionCols, testHdfsDir); addPartitionsToTable(dataDates, dataset, "dt", dbName, tblName, clusterHC); // create table on target cluster. createTable(cluster2HC, dbName, tblName, cols, partitionCols, testHdfsDir); createTable(cluster3HC, dbName, tblName, cols, partitionCols, testHdfsDir); Bundle.submitCluster(bundles[0], bundles[1], bundles[2]); bundles[0].setInputFeedPeriodicity(1, Frequency.TimeUnit.hours); bundles[0].setInputFeedValidity(startDate, endDate); bundles[0].setInputFeedTableUri(tableUri); String feed = bundles[0].getDataSets().get(0); // set the cluster 2 as the target. feed = FeedMerlin.fromString(feed) .addFeedCluster( new FeedMerlin.FeedClusterBuilder( Util.readEntityName(bundles[1].getClusters().get(0))) .withRetention("months(9000)", ActionType.DELETE) .withValidity(startDate, endDate) .withClusterType(ClusterType.TARGET) .withTableUri(tableUri) .build()) .toString(); // set the cluster 3 as the target. feed = FeedMerlin.fromString(feed) .addFeedCluster( new FeedMerlin.FeedClusterBuilder( Util.readEntityName(bundles[2].getClusters().get(0))) .withRetention("months(9000)", ActionType.DELETE) .withValidity(startDate, endDate) .withClusterType(ClusterType.TARGET) .withTableUri(tableUri) .build()) .toString(); AssertUtil.assertSucceeded(prism.getFeedHelper().submitAndSchedule(feed)); TimeUtil.sleepSeconds(TIMEOUT); // check if all coordinators exist Assert.assertEquals( InstanceUtil.checkIfFeedCoordExist( cluster2.getFeedHelper(), Util.readEntityName(feed), "REPLICATION"), 1); // check if all coordinators exist Assert.assertEquals( InstanceUtil.checkIfFeedCoordExist( cluster3.getFeedHelper(), Util.readEntityName(feed), "REPLICATION"), 1); // replication should start, wait while it ends // we will check for 2 instances so that both partitions are copied over. InstanceUtil.waitTillInstanceReachState( cluster2OC, Util.readEntityName(feed), 2, CoordinatorAction.Status.SUCCEEDED, EntityType.FEED); // replication should start, wait while it ends // we will check for 2 instances so that both partitions are copied over. InstanceUtil.waitTillInstanceReachState( cluster3OC, Util.readEntityName(feed), 2, CoordinatorAction.Status.SUCCEEDED, EntityType.FEED); // check if data was replicated correctly List<Path> srcData = HadoopUtil.getAllFilesRecursivelyHDFS(clusterFS, new Path(testHdfsDir)); LOGGER.info("Data on source cluster: " + srcData); List<Path> cluster2TargetData = HadoopUtil.getAllFilesRecursivelyHDFS(cluster2FS, new Path(testHdfsDir)); LOGGER.info("Data on target cluster: " + cluster2TargetData); AssertUtil.checkForListSizes(srcData, cluster2TargetData); List<Path> cluster3TargetData = HadoopUtil.getAllFilesRecursivelyHDFS(cluster3FS, new Path(testHdfsDir)); LOGGER.info("Data on target cluster: " + cluster3TargetData); AssertUtil.checkForListSizes(srcData, cluster3TargetData); }
/* * Prepares running feed with instances ordered (desc): 1 waiting, 1 suspended, 1 running, * 3 waiting and 6 killed. Testing is based on expected instances statuses. */ private void prepareScenario() throws AuthenticationException, IOException, URISyntaxException, JAXBException, OozieClientException, InterruptedException { bundles[0].setInputFeedPeriodicity(5, Frequency.TimeUnit.minutes); bundles[0].setInputFeedDataPath(feedDataLocation); String feed = bundles[0].getInputFeedFromBundle(); feedName = Util.readEntityName(feed); String cluster1Def = bundles[0].getClusters().get(0); String cluster2Def = bundles[1].getClusters().get(0); // erase all clusters from feed definition feed = FeedMerlin.fromString(feed).clearFeedClusters().toString(); // set cluster1 as source feed = FeedMerlin.fromString(feed) .addFeedCluster( new FeedMerlin.FeedClusterBuilder(Util.readEntityName(cluster1Def)) .withRetention("days(1000000)", ActionType.DELETE) .withValidity(startTime, endTime) .withClusterType(ClusterType.SOURCE) .build()) .toString(); // set cluster2 as target feed = FeedMerlin.fromString(feed) .addFeedCluster( new FeedMerlin.FeedClusterBuilder(Util.readEntityName(cluster2Def)) .withRetention("days(1000000)", ActionType.DELETE) .withValidity(startTime, endTime) .withClusterType(ClusterType.TARGET) .withDataLocation(targetDataLocation) .build()) .toString(); // submit clusters AssertUtil.assertSucceeded(prism.getClusterHelper().submitEntity(cluster1Def)); AssertUtil.assertSucceeded(prism.getClusterHelper().submitEntity(cluster2Def)); // submit and schedule feed AssertUtil.assertSucceeded(prism.getFeedHelper().submitAndSchedule(feed)); InstanceUtil.waitTillInstancesAreCreated(cluster2, feed, 0); InstanceUtil.waitTillInstanceReachState( cluster2OC, feedName, 12, CoordinatorAction.Status.WAITING, EntityType.FEED); // retrieve specific instances to rule them directly List<CoordinatorAction> actions = getReplicationInstances(cluster2OC, feedName); LOGGER.info(actions); Assert.assertNotNull(actions, "Required coordinator not found."); Assert.assertEquals(actions.size(), 12, "Unexpected number of actions."); // killing first 6 instances String range; InstancesResult r; for (int i = 0; i < 6; i++) { HadoopUtil.createFolders( serverFS.get(0), "", Arrays.asList(actions.get(i).getMissingDependencies().split("#"))); // only running instance can be killed, so we should make it running and then kill it InstanceUtil.waitTillInstanceReachState( cluster2OC, feedName, 1, CoordinatorAction.Status.RUNNING, EntityType.FEED, 3); range = "?start=" + TimeUtil.addMinsToTime(startTime, i * 5 - 1) + "&end=" + TimeUtil.addMinsToTime(startTime, i * 5 + 1); r = prism.getFeedHelper().getProcessInstanceKill(feedName, range); InstanceUtil.validateResponse(r, 1, 0, 0, 0, 1); } // wait for 10th instance to run, suspend it then HadoopUtil.createFolders( serverFS.get(0), "", Arrays.asList(actions.get(9).getMissingDependencies().split("#"))); InstanceUtil.waitTillInstanceReachState( cluster2OC, feedName, 1, CoordinatorAction.Status.RUNNING, EntityType.FEED, 3); range = "?start=" + TimeUtil.addMinsToTime(endTime, -15) + "&end=" + TimeUtil.addMinsToTime(endTime, -10); r = prism.getFeedHelper().getProcessInstanceSuspend(feedName, range); InstanceUtil.validateResponse(r, 1, 0, 1, 0, 0); // wait for 11h to run HadoopUtil.createFolders( serverFS.get(0), "", Arrays.asList(actions.get(10).getMissingDependencies().split("#"))); InstanceUtil.waitTillInstanceReachState( cluster2OC, feedName, 1, CoordinatorAction.Status.RUNNING, EntityType.FEED, 3); // check that the scenario works as expected. r = prism .getFeedHelper() .getProcessInstanceStatus(feedName, "?start=" + startTime + "&numResults=12"); InstanceUtil.validateResponse(r, 12, 1, 1, 4, 6); }