/** * Run recipe with different frequencies. Submission should go through. Check frequency of the * launched oozie job */ @Test(dataProvider = "frequencyGenerator") public void differentRecipeFrequenciesTest(String frequency) throws Exception { setUp(RecipeExecLocation.SourceCluster); LOGGER.info("Testing with frequency: " + frequency); String tblName = "myTable"; recipeMerlin .withSourceDb(DB_NAME) .withSourceTable(tblName) .withFrequency(new Frequency(frequency)); runSql(connection, "create table " + tblName + "(comment string)"); final List<String> command = recipeMerlin.getSubmissionCommand(); Assert.assertEquals(Bundle.runFalconCLI(command), 0, "Recipe submission failed."); LOGGER.info("Submission went through."); InstanceUtil.waitTillInstanceReachState( clusterOC, recipeMerlin.getName(), 1, CoordinatorAction.Status.RUNNING, EntityType.PROCESS); String filter = "name=FALCON_PROCESS_" + recipeMerlin.getName(); List<BundleJob> bundleJobs = OozieUtil.getBundles(clusterOC, filter, 0, 10); List<String> bundleIds = OozieUtil.getBundleIds(bundleJobs); String bundleId = OozieUtil.getMaxId(bundleIds); List<CoordinatorJob> coords = clusterOC.getBundleJobInfo(bundleId).getCoordinators(); List<String> cIds = new ArrayList<String>(); for (CoordinatorJob coord : coords) { cIds.add(coord.getId()); } String coordId = OozieUtil.getMinId(cIds); CoordinatorJob job = clusterOC.getCoordJobInfo(coordId); CoordinatorJob.Timeunit timeUnit = job.getTimeUnit(); String freq = job.getFrequency(); LOGGER.info("Frequency of running job: " + timeUnit + " " + freq); Assert.assertTrue( frequency.contains(timeUnit.name().toLowerCase().replace("_", "")) && frequency.contains(freq), "Running job has different frequency."); }
/* * Retrieves replication coordinator instances. * @param client target oozie client * @param fName feed name */ private List<CoordinatorAction> getReplicationInstances(OozieClient client, String fName) throws OozieClientException { String filter = "name=FALCON_FEED_" + fName; List<BundleJob> bundleJobs = OozieUtil.getBundles(client, filter, 0, 10); Assert.assertNotEquals(bundleJobs.size(), 0, "Could not retrieve bundles"); List<String> bundleIds = OozieUtil.getBundleIds(bundleJobs); String bundleId = OozieUtil.getMaxId(bundleIds); LOGGER.info(String.format("Using bundle %s", bundleId)); List<CoordinatorJob> coords = client.getBundleJobInfo(bundleId).getCoordinators(); String coordId = null; for (CoordinatorJob coord : coords) { if (coord.getAppName().contains("FEED_REPLICATION")) { coordId = coord.getId(); break; } } LOGGER.info(String.format("Using coordinator id: %s", coordId)); Assert.assertNotNull(coordId, "Replication coordinator not found."); CoordinatorJob coordinatorJob = client.getCoordJobInfo(coordId); return coordinatorJob.getActions(); }
/** tries to update feed with invalid el exp */ @Test(groups = {"singleCluster"}) public void testDryRunFailureUpdateFeed() throws Exception { bundles[0].submitClusters(prism); String feed = bundles[0].getInputFeedFromBundle(); ServiceResponse response = prism.getFeedHelper().submitAndSchedule(Util.URLS.SUBMIT_AND_SCHEDULE_URL, feed); AssertUtil.assertSucceeded(response); feed = Util.setFeedProperty(feed, "EntityDryRunTestProp", "${coord:someEL(1)"); response = prism.getFeedHelper().update(feed, feed); validate(response); Assert.assertEquals( OozieUtil.getNumberOfBundle(cluster, EntityType.FEED, Util.readEntityName(feed)), 1, "more than one bundle found after failed update request"); }
/** tries to update process with invalid EL exp. */ @Test(groups = {"singleCluster"}) public void testDryRunFailureUpdateProcess() throws Exception { bundles[0].setProcessValidity( TimeUtil.getTimeWrtSystemTime(-10), TimeUtil.getTimeWrtSystemTime(100)); bundles[0].submitAndScheduleProcess(); bundles[0].setProcessProperty("EntityDryRunTestProp", "${coord:someEL(1)"); ServiceResponse response = prism.getProcessHelper().update(bundles[0].getProcessData(), bundles[0].getProcessData()); validate( response, "The new entity (process) " + bundles[0].getProcessName() + " can't be scheduled"); Assert.assertEquals( OozieUtil.getNumberOfBundle(clusterOC, EntityType.PROCESS, bundles[0].getProcessName()), 1, "more than one bundle found after failed update request"); }
/** tries to update feed with invalid el exp. */ @Test(groups = {"singleCluster"}) public void testDryRunFailureUpdateFeed() throws Exception { bundles[0].submitClusters(prism); FeedMerlin feed = new FeedMerlin(bundles[0].getInputFeedFromBundle()); ServiceResponse response = prism.getFeedHelper().submitAndSchedule(feed.toString()); AssertUtil.assertSucceeded(response); feed.withProperty("EntityDryRunTestProp", "${coord:someEL(1)"); response = prism.getFeedHelper().update(feed.toString(), feed.toString()); validate( response, "The new entity (feed) " + bundles[0].getInputFeedNameFromBundle() + " can't be scheduled"); Assert.assertEquals( OozieUtil.getNumberOfBundle(clusterOC, EntityType.FEED, feed.getName()), 1, "more than one bundle found after failed update request"); }
/** Test when only empty directories exist for all instances. */ @Test public void testFeedListingWhenAllEmpty() throws Exception { bundles[0].setProcessValidity("2010-01-02T01:00Z", "2010-01-02T01:21Z"); bundles[0].setProcessConcurrency(1); bundles[0].submitFeedsScheduleProcess(prism); InstanceUtil.waitTillInstancesAreCreated(clusterOC, bundles[0].getProcessData(), 0); OozieUtil.createMissingDependencies(cluster, EntityType.PROCESS, processName, 0); InstanceUtil.waitTillInstanceReachState( clusterOC, processName, 1, CoordinatorAction.Status.RUNNING, EntityType.PROCESS, 5); FeedInstanceResult r = prism .getFeedHelper() .getFeedInstanceListing( Util.readEntityName(bundles[0].getDataSets().get(0)), "?start=2010-01-02T01:00Z&end=2010-01-02T01:21Z"); validateResponse(r, 5, 0, 5, 0, 0); }
/** Test when all data is available for all instances. */ @Test public void testFeedListingWhenAllAvailable() throws Exception { bundles[0].setProcessValidity("2010-01-02T01:00Z", "2010-01-02T01:21Z"); bundles[0].setProcessConcurrency(1); bundles[0].submitFeedsScheduleProcess(prism); InstanceUtil.waitTillInstancesAreCreated(clusterOC, bundles[0].getProcessData(), 0); List<List<String>> missingDependencies = OozieUtil.createMissingDependencies(cluster, EntityType.PROCESS, processName, 0); List<String> missingDependencyLastInstance = missingDependencies.get(missingDependencies.size() - 1); HadoopUtil.flattenAndPutDataInFolder( clusterFS, OSUtil.SINGLE_FILE, missingDependencyLastInstance); InstanceUtil.waitTillInstanceReachState( clusterOC, processName, 1, CoordinatorAction.Status.RUNNING, EntityType.PROCESS, 5); FeedInstanceResult r = prism .getFeedHelper() .getFeedInstanceListing( Util.readEntityName(bundles[0].getDataSets().get(0)), "?start=2010-01-02T01:00Z&end=2010-01-02T01:21Z"); validateResponse(r, 5, 0, 0, 0, 5); }
/** * Data is created for the feed, so instance status is available. Then, change the data path and * update the feed. The instance status should change to partial. */ @Test public void testFeedListingAfterFeedDataPathUpdate() throws Exception { bundles[0].setProcessValidity("2010-01-02T01:00Z", "2010-01-02T01:21Z"); bundles[0].setProcessConcurrency(1); bundles[0].submitFeedsScheduleProcess(prism); InstanceUtil.waitTillInstancesAreCreated(clusterOC, bundles[0].getProcessData(), 0); List<List<String>> missingDependencies = OozieUtil.createMissingDependencies(cluster, EntityType.PROCESS, processName, 0); List<String> missingDependencyLastInstance = missingDependencies.get(missingDependencies.size() - 1); HadoopUtil.flattenAndPutDataInFolder( clusterFS, OSUtil.SINGLE_FILE, missingDependencyLastInstance); InstanceUtil.waitTillInstanceReachState( clusterOC, processName, 1, CoordinatorAction.Status.RUNNING, EntityType.PROCESS, 5); FeedInstanceResult r = prism .getFeedHelper() .getFeedInstanceListing( Util.readEntityName(bundles[0].getDataSets().get(0)), "?start=2010-01-02T01:00Z&end=2010-01-02T01:21Z"); validateResponse(r, 5, 0, 0, 0, 5); String inputFeed = bundles[0].getInputFeedFromBundle(); bundles[0].setInputFeedDataPath(baseTestDir + "/inputNew" + MINUTE_DATE_PATTERN); ServiceResponse serviceResponse = prism.getFeedHelper().update(inputFeed, bundles[0].getInputFeedFromBundle()); AssertUtil.assertSucceeded(serviceResponse); // Since we have not created directories for new path, the feed instance status should be // missing r = prism .getFeedHelper() .getFeedInstanceListing( Util.readEntityName(bundles[0].getDataSets().get(0)), "?start=2010-01-02T01:00Z&end=2010-01-02T01:21Z"); validateResponse(r, 5, 5, 0, 0, 0); }
// make sure oozie changes mentioned FALCON-389 are done on the clusters. Otherwise the test // will fail. // Noticed with hive 0.13 we need the following issues resolved to work HIVE-6848 and // HIVE-6868. Also oozie share libs need to have hive jars that have these jira's resolved and // the maven depenendcy you are using to run the tests has to have hcat that has these fixed. // This test can fail randomly because of https://issues.apache.org/jira/browse/FALCON-401 @Test(dataProvider = "generateSeparators") public void oneSourceTwoTarget(String separator) throws Exception { String tcName = "HCatReplication_oneSourceTwoTarget"; if (separator.equals("-")) { tcName += "_hyphen"; } else { tcName += "_slash"; } String tblName = tcName; String testHdfsDir = baseTestHDFSDir + "/" + tcName; HadoopUtil.recreateDir(serverFS, testHdfsDir); final String startDate = "2010-01-01T20:00Z"; final String endDate = "2099-01-01T00:00Z"; final String dataEndDate = "2010-01-01T21:00Z"; final String tableUriPartitionFragment = StringUtils.join(new String[] {"#dt=${YEAR}", "${MONTH}", "${DAY}", "${HOUR}"}, separator); String tableUri = "catalog:" + dbName + ":" + tblName + tableUriPartitionFragment; final String datePattern = StringUtils.join(new String[] {"yyyy", "MM", "dd", "HH"}, separator); // use the start date for both as this will only generate 2 partitions. List<String> dataDates = TimeUtil.getMinuteDatesOnEitherSide( startDate, dataEndDate, 60, DateTimeFormat.forPattern(datePattern)); final List<String> dataset = HadoopUtil.flattenAndPutDataInFolder(clusterFS, localHCatData, testHdfsDir, dataDates); final String col1Name = "id"; final String col2Name = "value"; final String partitionColumn = "dt"; ArrayList<HCatFieldSchema> cols = new ArrayList<>(); cols.add(HCatUtil.getStringSchema(col1Name, col1Name + " comment")); cols.add(HCatUtil.getStringSchema(col2Name, col2Name + " comment")); ArrayList<HCatFieldSchema> partitionCols = new ArrayList<>(); // create table on cluster 1 and add data to it. partitionCols.add(HCatUtil.getStringSchema(partitionColumn, partitionColumn + " partition")); createTable(clusterHC, dbName, tblName, cols, partitionCols, testHdfsDir); addPartitionsToTable(dataDates, dataset, "dt", dbName, tblName, clusterHC); // create table on target cluster. createTable(cluster2HC, dbName, tblName, cols, partitionCols, testHdfsDir); createTable(cluster3HC, dbName, tblName, cols, partitionCols, testHdfsDir); Bundle.submitCluster(bundles[0], bundles[1], bundles[2]); bundles[0].setInputFeedPeriodicity(1, Frequency.TimeUnit.hours); bundles[0].setInputFeedValidity(startDate, endDate); bundles[0].setInputFeedTableUri(tableUri); String feed = bundles[0].getDataSets().get(0); // set the cluster 2 as the target. feed = FeedMerlin.fromString(feed) .addFeedCluster( new FeedMerlin.FeedClusterBuilder( Util.readEntityName(bundles[1].getClusters().get(0))) .withRetention("months(9000)", ActionType.DELETE) .withValidity(startDate, endDate) .withClusterType(ClusterType.TARGET) .withTableUri(tableUri) .build()) .toString(); // set the cluster 3 as the target. feed = FeedMerlin.fromString(feed) .addFeedCluster( new FeedMerlin.FeedClusterBuilder( Util.readEntityName(bundles[2].getClusters().get(0))) .withRetention("months(9000)", ActionType.DELETE) .withValidity(startDate, endDate) .withClusterType(ClusterType.TARGET) .withTableUri(tableUri) .build()) .toString(); AssertUtil.assertSucceeded(prism.getFeedHelper().submitAndSchedule(feed)); TimeUtil.sleepSeconds(TIMEOUT); // check if all coordinators exist Assert.assertEquals( OozieUtil.checkIfFeedCoordExist(cluster2OC, Util.readEntityName(feed), "REPLICATION"), 1); // check if all coordinators exist Assert.assertEquals( OozieUtil.checkIfFeedCoordExist(cluster3OC, Util.readEntityName(feed), "REPLICATION"), 1); // replication should start, wait while it ends // we will check for 2 instances so that both partitions are copied over. InstanceUtil.waitTillInstanceReachState( cluster2OC, Util.readEntityName(feed), 2, CoordinatorAction.Status.SUCCEEDED, EntityType.FEED); // replication should start, wait while it ends // we will check for 2 instances so that both partitions are copied over. InstanceUtil.waitTillInstanceReachState( cluster3OC, Util.readEntityName(feed), 2, CoordinatorAction.Status.SUCCEEDED, EntityType.FEED); // check if data was replicated correctly List<Path> srcData = HadoopUtil.getAllFilesRecursivelyHDFS(clusterFS, new Path(testHdfsDir)); LOGGER.info("Data on source cluster: " + srcData); List<Path> cluster2TargetData = HadoopUtil.getAllFilesRecursivelyHDFS(cluster2FS, new Path(testHdfsDir)); LOGGER.info("Data on target cluster: " + cluster2TargetData); AssertUtil.checkForListSizes(srcData, cluster2TargetData); List<Path> cluster3TargetData = HadoopUtil.getAllFilesRecursivelyHDFS(cluster3FS, new Path(testHdfsDir)); LOGGER.info("Data on target cluster: " + cluster3TargetData); AssertUtil.checkForListSizes(srcData, cluster3TargetData); }
/** * Set feed cluster1 as target, clusters 2 and 3 as source. Run feed. Update feed and check if * action succeed. Check that appropriate number of replication and retention coordinators exist * on matching clusters. * * @throws Exception */ @Test(enabled = true, timeOut = 1200000) public void multipleSourceOneTarget() throws Exception { bundles[0].setInputFeedDataPath(inputPath); Bundle.submitCluster(bundles[0], bundles[1], bundles[2]); FeedMerlin feed = new FeedMerlin(bundles[0].getDataSets().get(0)); feed.clearFeedClusters(); // use the colo string here so that the test works in embedded and distributed mode. String postFix = "/US/" + cluster2Colo; String prefix = bundles[0].getFeedDataPathPrefix(); HadoopUtil.deleteDirIfExists(prefix.substring(1), cluster2FS); HadoopUtil.lateDataReplenish(cluster2FS, 5, 80, prefix, postFix); // use the colo string here so that the test works in embedded and distributed mode. postFix = "/UK/" + cluster3Colo; prefix = bundles[0].getFeedDataPathPrefix(); HadoopUtil.deleteDirIfExists(prefix.substring(1), cluster3FS); HadoopUtil.lateDataReplenish(cluster3FS, 5, 80, prefix, postFix); String startTime = TimeUtil.getTimeWrtSystemTime(-30); feed.addFeedCluster( new FeedMerlin.FeedClusterBuilder(Util.readEntityName(bundles[1].getClusters().get(0))) .withRetention("hours(10)", ActionType.DELETE) .withValidity(startTime, TimeUtil.addMinsToTime(startTime, 85)) .withClusterType(ClusterType.SOURCE) .withPartition("US/${cluster.colo}") .build()); feed.addFeedCluster( new FeedMerlin.FeedClusterBuilder(Util.readEntityName(bundles[0].getClusters().get(0))) .withRetention("hours(10)", ActionType.DELETE) .withValidity( TimeUtil.addMinsToTime(startTime, 20), TimeUtil.addMinsToTime(startTime, 105)) .withClusterType(ClusterType.TARGET) .build()); feed.addFeedCluster( new FeedMerlin.FeedClusterBuilder(Util.readEntityName(bundles[2].getClusters().get(0))) .withRetention("hours(10)", ActionType.DELETE) .withValidity( TimeUtil.addMinsToTime(startTime, 40), TimeUtil.addMinsToTime(startTime, 130)) .withClusterType(ClusterType.SOURCE) .withPartition("UK/${cluster.colo}") .build()); LOGGER.info("feed: " + Util.prettyPrintXml(feed.toString())); AssertUtil.assertSucceeded(prism.getFeedHelper().submitEntity(feed.toString())); AssertUtil.assertSucceeded(prism.getFeedHelper().schedule(feed.toString())); // change feed location path feed.setFilePath(alternativeInputPath); LOGGER.info("updated feed: " + Util.prettyPrintXml(feed.toString())); // update feed AssertUtil.assertSucceeded(prism.getFeedHelper().update(feed.toString(), feed.toString())); Assert.assertEquals( OozieUtil.checkIfFeedCoordExist(cluster2OC, feed.getName(), "REPLICATION"), 0); Assert.assertEquals( OozieUtil.checkIfFeedCoordExist(cluster2OC, feed.getName(), "RETENTION"), 2); Assert.assertEquals( OozieUtil.checkIfFeedCoordExist(cluster3OC, feed.getName(), "REPLICATION"), 0); Assert.assertEquals( OozieUtil.checkIfFeedCoordExist(cluster3OC, feed.getName(), "RETENTION"), 2); Assert.assertEquals( OozieUtil.checkIfFeedCoordExist(cluster1OC, feed.getName(), "REPLICATION"), 4); Assert.assertEquals( OozieUtil.checkIfFeedCoordExist(cluster1OC, feed.getName(), "RETENTION"), 2); }
public OozieClient getOozieClient() { if (null == this.oozieClient) { this.oozieClient = OozieUtil.getClient(this.oozieURL); } return this.oozieClient; }