Exemple #1
0
  /**
   * Run recipe with different frequencies. Submission should go through. Check frequency of the
   * launched oozie job
   */
  @Test(dataProvider = "frequencyGenerator")
  public void differentRecipeFrequenciesTest(String frequency) throws Exception {
    setUp(RecipeExecLocation.SourceCluster);
    LOGGER.info("Testing with frequency: " + frequency);
    String tblName = "myTable";
    recipeMerlin
        .withSourceDb(DB_NAME)
        .withSourceTable(tblName)
        .withFrequency(new Frequency(frequency));
    runSql(connection, "create table " + tblName + "(comment string)");
    final List<String> command = recipeMerlin.getSubmissionCommand();
    Assert.assertEquals(Bundle.runFalconCLI(command), 0, "Recipe submission failed.");
    LOGGER.info("Submission went through.");

    InstanceUtil.waitTillInstanceReachState(
        clusterOC, recipeMerlin.getName(), 1, CoordinatorAction.Status.RUNNING, EntityType.PROCESS);
    String filter = "name=FALCON_PROCESS_" + recipeMerlin.getName();
    List<BundleJob> bundleJobs = OozieUtil.getBundles(clusterOC, filter, 0, 10);
    List<String> bundleIds = OozieUtil.getBundleIds(bundleJobs);
    String bundleId = OozieUtil.getMaxId(bundleIds);
    List<CoordinatorJob> coords = clusterOC.getBundleJobInfo(bundleId).getCoordinators();
    List<String> cIds = new ArrayList<String>();
    for (CoordinatorJob coord : coords) {
      cIds.add(coord.getId());
    }
    String coordId = OozieUtil.getMinId(cIds);
    CoordinatorJob job = clusterOC.getCoordJobInfo(coordId);
    CoordinatorJob.Timeunit timeUnit = job.getTimeUnit();
    String freq = job.getFrequency();
    LOGGER.info("Frequency of running job: " + timeUnit + " " + freq);
    Assert.assertTrue(
        frequency.contains(timeUnit.name().toLowerCase().replace("_", ""))
            && frequency.contains(freq),
        "Running job has different frequency.");
  }
 /*
  * Retrieves replication coordinator instances.
  * @param client target oozie client
  * @param fName feed name
  */
 private List<CoordinatorAction> getReplicationInstances(OozieClient client, String fName)
     throws OozieClientException {
   String filter = "name=FALCON_FEED_" + fName;
   List<BundleJob> bundleJobs = OozieUtil.getBundles(client, filter, 0, 10);
   Assert.assertNotEquals(bundleJobs.size(), 0, "Could not retrieve bundles");
   List<String> bundleIds = OozieUtil.getBundleIds(bundleJobs);
   String bundleId = OozieUtil.getMaxId(bundleIds);
   LOGGER.info(String.format("Using bundle %s", bundleId));
   List<CoordinatorJob> coords = client.getBundleJobInfo(bundleId).getCoordinators();
   String coordId = null;
   for (CoordinatorJob coord : coords) {
     if (coord.getAppName().contains("FEED_REPLICATION")) {
       coordId = coord.getId();
       break;
     }
   }
   LOGGER.info(String.format("Using coordinator id: %s", coordId));
   Assert.assertNotNull(coordId, "Replication coordinator not found.");
   CoordinatorJob coordinatorJob = client.getCoordJobInfo(coordId);
   return coordinatorJob.getActions();
 }
 /** tries to update feed with invalid el exp */
 @Test(groups = {"singleCluster"})
 public void testDryRunFailureUpdateFeed() throws Exception {
   bundles[0].submitClusters(prism);
   String feed = bundles[0].getInputFeedFromBundle();
   ServiceResponse response =
       prism.getFeedHelper().submitAndSchedule(Util.URLS.SUBMIT_AND_SCHEDULE_URL, feed);
   AssertUtil.assertSucceeded(response);
   feed = Util.setFeedProperty(feed, "EntityDryRunTestProp", "${coord:someEL(1)");
   response = prism.getFeedHelper().update(feed, feed);
   validate(response);
   Assert.assertEquals(
       OozieUtil.getNumberOfBundle(cluster, EntityType.FEED, Util.readEntityName(feed)),
       1,
       "more than one bundle found after failed update request");
 }
 /** tries to update process with invalid EL exp. */
 @Test(groups = {"singleCluster"})
 public void testDryRunFailureUpdateProcess() throws Exception {
   bundles[0].setProcessValidity(
       TimeUtil.getTimeWrtSystemTime(-10), TimeUtil.getTimeWrtSystemTime(100));
   bundles[0].submitAndScheduleProcess();
   bundles[0].setProcessProperty("EntityDryRunTestProp", "${coord:someEL(1)");
   ServiceResponse response =
       prism.getProcessHelper().update(bundles[0].getProcessData(), bundles[0].getProcessData());
   validate(
       response,
       "The new entity (process) " + bundles[0].getProcessName() + " can't be scheduled");
   Assert.assertEquals(
       OozieUtil.getNumberOfBundle(clusterOC, EntityType.PROCESS, bundles[0].getProcessName()),
       1,
       "more than one bundle found after failed update request");
 }
 /** tries to update feed with invalid el exp. */
 @Test(groups = {"singleCluster"})
 public void testDryRunFailureUpdateFeed() throws Exception {
   bundles[0].submitClusters(prism);
   FeedMerlin feed = new FeedMerlin(bundles[0].getInputFeedFromBundle());
   ServiceResponse response = prism.getFeedHelper().submitAndSchedule(feed.toString());
   AssertUtil.assertSucceeded(response);
   feed.withProperty("EntityDryRunTestProp", "${coord:someEL(1)");
   response = prism.getFeedHelper().update(feed.toString(), feed.toString());
   validate(
       response,
       "The new entity (feed) " + bundles[0].getInputFeedNameFromBundle() + " can't be scheduled");
   Assert.assertEquals(
       OozieUtil.getNumberOfBundle(clusterOC, EntityType.FEED, feed.getName()),
       1,
       "more than one bundle found after failed update request");
 }
 /** Test when only empty directories exist for all instances. */
 @Test
 public void testFeedListingWhenAllEmpty() throws Exception {
   bundles[0].setProcessValidity("2010-01-02T01:00Z", "2010-01-02T01:21Z");
   bundles[0].setProcessConcurrency(1);
   bundles[0].submitFeedsScheduleProcess(prism);
   InstanceUtil.waitTillInstancesAreCreated(clusterOC, bundles[0].getProcessData(), 0);
   OozieUtil.createMissingDependencies(cluster, EntityType.PROCESS, processName, 0);
   InstanceUtil.waitTillInstanceReachState(
       clusterOC, processName, 1, CoordinatorAction.Status.RUNNING, EntityType.PROCESS, 5);
   FeedInstanceResult r =
       prism
           .getFeedHelper()
           .getFeedInstanceListing(
               Util.readEntityName(bundles[0].getDataSets().get(0)),
               "?start=2010-01-02T01:00Z&end=2010-01-02T01:21Z");
   validateResponse(r, 5, 0, 5, 0, 0);
 }
 /** Test when all data is available for all instances. */
 @Test
 public void testFeedListingWhenAllAvailable() throws Exception {
   bundles[0].setProcessValidity("2010-01-02T01:00Z", "2010-01-02T01:21Z");
   bundles[0].setProcessConcurrency(1);
   bundles[0].submitFeedsScheduleProcess(prism);
   InstanceUtil.waitTillInstancesAreCreated(clusterOC, bundles[0].getProcessData(), 0);
   List<List<String>> missingDependencies =
       OozieUtil.createMissingDependencies(cluster, EntityType.PROCESS, processName, 0);
   List<String> missingDependencyLastInstance =
       missingDependencies.get(missingDependencies.size() - 1);
   HadoopUtil.flattenAndPutDataInFolder(
       clusterFS, OSUtil.SINGLE_FILE, missingDependencyLastInstance);
   InstanceUtil.waitTillInstanceReachState(
       clusterOC, processName, 1, CoordinatorAction.Status.RUNNING, EntityType.PROCESS, 5);
   FeedInstanceResult r =
       prism
           .getFeedHelper()
           .getFeedInstanceListing(
               Util.readEntityName(bundles[0].getDataSets().get(0)),
               "?start=2010-01-02T01:00Z&end=2010-01-02T01:21Z");
   validateResponse(r, 5, 0, 0, 0, 5);
 }
 /**
  * Data is created for the feed, so instance status is available. Then, change the data path and
  * update the feed. The instance status should change to partial.
  */
 @Test
 public void testFeedListingAfterFeedDataPathUpdate() throws Exception {
   bundles[0].setProcessValidity("2010-01-02T01:00Z", "2010-01-02T01:21Z");
   bundles[0].setProcessConcurrency(1);
   bundles[0].submitFeedsScheduleProcess(prism);
   InstanceUtil.waitTillInstancesAreCreated(clusterOC, bundles[0].getProcessData(), 0);
   List<List<String>> missingDependencies =
       OozieUtil.createMissingDependencies(cluster, EntityType.PROCESS, processName, 0);
   List<String> missingDependencyLastInstance =
       missingDependencies.get(missingDependencies.size() - 1);
   HadoopUtil.flattenAndPutDataInFolder(
       clusterFS, OSUtil.SINGLE_FILE, missingDependencyLastInstance);
   InstanceUtil.waitTillInstanceReachState(
       clusterOC, processName, 1, CoordinatorAction.Status.RUNNING, EntityType.PROCESS, 5);
   FeedInstanceResult r =
       prism
           .getFeedHelper()
           .getFeedInstanceListing(
               Util.readEntityName(bundles[0].getDataSets().get(0)),
               "?start=2010-01-02T01:00Z&end=2010-01-02T01:21Z");
   validateResponse(r, 5, 0, 0, 0, 5);
   String inputFeed = bundles[0].getInputFeedFromBundle();
   bundles[0].setInputFeedDataPath(baseTestDir + "/inputNew" + MINUTE_DATE_PATTERN);
   ServiceResponse serviceResponse =
       prism.getFeedHelper().update(inputFeed, bundles[0].getInputFeedFromBundle());
   AssertUtil.assertSucceeded(serviceResponse);
   // Since we have not created directories for new path, the feed instance status should be
   // missing
   r =
       prism
           .getFeedHelper()
           .getFeedInstanceListing(
               Util.readEntityName(bundles[0].getDataSets().get(0)),
               "?start=2010-01-02T01:00Z&end=2010-01-02T01:21Z");
   validateResponse(r, 5, 5, 0, 0, 0);
 }
  // make sure oozie changes mentioned FALCON-389 are done on the clusters. Otherwise the test
  // will fail.
  // Noticed with hive 0.13 we need the following issues resolved to work HIVE-6848 and
  // HIVE-6868. Also oozie share libs need to have hive jars that have these jira's resolved and
  // the maven depenendcy you are using to run the tests has to have hcat that has these fixed.
  // This test can fail randomly because of https://issues.apache.org/jira/browse/FALCON-401
  @Test(dataProvider = "generateSeparators")
  public void oneSourceTwoTarget(String separator) throws Exception {
    String tcName = "HCatReplication_oneSourceTwoTarget";
    if (separator.equals("-")) {
      tcName += "_hyphen";
    } else {
      tcName += "_slash";
    }
    String tblName = tcName;
    String testHdfsDir = baseTestHDFSDir + "/" + tcName;
    HadoopUtil.recreateDir(serverFS, testHdfsDir);
    final String startDate = "2010-01-01T20:00Z";
    final String endDate = "2099-01-01T00:00Z";
    final String dataEndDate = "2010-01-01T21:00Z";
    final String tableUriPartitionFragment =
        StringUtils.join(new String[] {"#dt=${YEAR}", "${MONTH}", "${DAY}", "${HOUR}"}, separator);
    String tableUri = "catalog:" + dbName + ":" + tblName + tableUriPartitionFragment;
    final String datePattern = StringUtils.join(new String[] {"yyyy", "MM", "dd", "HH"}, separator);
    // use the start date for both as this will only generate 2 partitions.
    List<String> dataDates =
        TimeUtil.getMinuteDatesOnEitherSide(
            startDate, dataEndDate, 60, DateTimeFormat.forPattern(datePattern));

    final List<String> dataset =
        HadoopUtil.flattenAndPutDataInFolder(clusterFS, localHCatData, testHdfsDir, dataDates);
    final String col1Name = "id";
    final String col2Name = "value";
    final String partitionColumn = "dt";

    ArrayList<HCatFieldSchema> cols = new ArrayList<>();
    cols.add(HCatUtil.getStringSchema(col1Name, col1Name + " comment"));
    cols.add(HCatUtil.getStringSchema(col2Name, col2Name + " comment"));
    ArrayList<HCatFieldSchema> partitionCols = new ArrayList<>();

    // create table on cluster 1 and add data to it.
    partitionCols.add(HCatUtil.getStringSchema(partitionColumn, partitionColumn + " partition"));
    createTable(clusterHC, dbName, tblName, cols, partitionCols, testHdfsDir);
    addPartitionsToTable(dataDates, dataset, "dt", dbName, tblName, clusterHC);

    // create table on target cluster.
    createTable(cluster2HC, dbName, tblName, cols, partitionCols, testHdfsDir);
    createTable(cluster3HC, dbName, tblName, cols, partitionCols, testHdfsDir);

    Bundle.submitCluster(bundles[0], bundles[1], bundles[2]);

    bundles[0].setInputFeedPeriodicity(1, Frequency.TimeUnit.hours);
    bundles[0].setInputFeedValidity(startDate, endDate);
    bundles[0].setInputFeedTableUri(tableUri);

    String feed = bundles[0].getDataSets().get(0);
    // set the cluster 2 as the target.
    feed =
        FeedMerlin.fromString(feed)
            .addFeedCluster(
                new FeedMerlin.FeedClusterBuilder(
                        Util.readEntityName(bundles[1].getClusters().get(0)))
                    .withRetention("months(9000)", ActionType.DELETE)
                    .withValidity(startDate, endDate)
                    .withClusterType(ClusterType.TARGET)
                    .withTableUri(tableUri)
                    .build())
            .toString();
    // set the cluster 3 as the target.
    feed =
        FeedMerlin.fromString(feed)
            .addFeedCluster(
                new FeedMerlin.FeedClusterBuilder(
                        Util.readEntityName(bundles[2].getClusters().get(0)))
                    .withRetention("months(9000)", ActionType.DELETE)
                    .withValidity(startDate, endDate)
                    .withClusterType(ClusterType.TARGET)
                    .withTableUri(tableUri)
                    .build())
            .toString();

    AssertUtil.assertSucceeded(prism.getFeedHelper().submitAndSchedule(feed));
    TimeUtil.sleepSeconds(TIMEOUT);
    // check if all coordinators exist
    Assert.assertEquals(
        OozieUtil.checkIfFeedCoordExist(cluster2OC, Util.readEntityName(feed), "REPLICATION"), 1);

    // check if all coordinators exist
    Assert.assertEquals(
        OozieUtil.checkIfFeedCoordExist(cluster3OC, Util.readEntityName(feed), "REPLICATION"), 1);

    // replication should start, wait while it ends
    // we will check for 2 instances so that both partitions are copied over.
    InstanceUtil.waitTillInstanceReachState(
        cluster2OC,
        Util.readEntityName(feed),
        2,
        CoordinatorAction.Status.SUCCEEDED,
        EntityType.FEED);

    // replication should start, wait while it ends
    // we will check for 2 instances so that both partitions are copied over.
    InstanceUtil.waitTillInstanceReachState(
        cluster3OC,
        Util.readEntityName(feed),
        2,
        CoordinatorAction.Status.SUCCEEDED,
        EntityType.FEED);

    // check if data was replicated correctly
    List<Path> srcData = HadoopUtil.getAllFilesRecursivelyHDFS(clusterFS, new Path(testHdfsDir));
    LOGGER.info("Data on source cluster: " + srcData);
    List<Path> cluster2TargetData =
        HadoopUtil.getAllFilesRecursivelyHDFS(cluster2FS, new Path(testHdfsDir));
    LOGGER.info("Data on target cluster: " + cluster2TargetData);
    AssertUtil.checkForListSizes(srcData, cluster2TargetData);
    List<Path> cluster3TargetData =
        HadoopUtil.getAllFilesRecursivelyHDFS(cluster3FS, new Path(testHdfsDir));
    LOGGER.info("Data on target cluster: " + cluster3TargetData);
    AssertUtil.checkForListSizes(srcData, cluster3TargetData);
  }
  /**
   * Set feed cluster1 as target, clusters 2 and 3 as source. Run feed. Update feed and check if
   * action succeed. Check that appropriate number of replication and retention coordinators exist
   * on matching clusters.
   *
   * @throws Exception
   */
  @Test(enabled = true, timeOut = 1200000)
  public void multipleSourceOneTarget() throws Exception {

    bundles[0].setInputFeedDataPath(inputPath);
    Bundle.submitCluster(bundles[0], bundles[1], bundles[2]);

    FeedMerlin feed = new FeedMerlin(bundles[0].getDataSets().get(0));
    feed.clearFeedClusters();

    // use the colo string here so that the test works in embedded and distributed mode.
    String postFix = "/US/" + cluster2Colo;
    String prefix = bundles[0].getFeedDataPathPrefix();
    HadoopUtil.deleteDirIfExists(prefix.substring(1), cluster2FS);
    HadoopUtil.lateDataReplenish(cluster2FS, 5, 80, prefix, postFix);

    // use the colo string here so that the test works in embedded and distributed mode.
    postFix = "/UK/" + cluster3Colo;
    prefix = bundles[0].getFeedDataPathPrefix();
    HadoopUtil.deleteDirIfExists(prefix.substring(1), cluster3FS);
    HadoopUtil.lateDataReplenish(cluster3FS, 5, 80, prefix, postFix);

    String startTime = TimeUtil.getTimeWrtSystemTime(-30);

    feed.addFeedCluster(
        new FeedMerlin.FeedClusterBuilder(Util.readEntityName(bundles[1].getClusters().get(0)))
            .withRetention("hours(10)", ActionType.DELETE)
            .withValidity(startTime, TimeUtil.addMinsToTime(startTime, 85))
            .withClusterType(ClusterType.SOURCE)
            .withPartition("US/${cluster.colo}")
            .build());

    feed.addFeedCluster(
        new FeedMerlin.FeedClusterBuilder(Util.readEntityName(bundles[0].getClusters().get(0)))
            .withRetention("hours(10)", ActionType.DELETE)
            .withValidity(
                TimeUtil.addMinsToTime(startTime, 20), TimeUtil.addMinsToTime(startTime, 105))
            .withClusterType(ClusterType.TARGET)
            .build());

    feed.addFeedCluster(
        new FeedMerlin.FeedClusterBuilder(Util.readEntityName(bundles[2].getClusters().get(0)))
            .withRetention("hours(10)", ActionType.DELETE)
            .withValidity(
                TimeUtil.addMinsToTime(startTime, 40), TimeUtil.addMinsToTime(startTime, 130))
            .withClusterType(ClusterType.SOURCE)
            .withPartition("UK/${cluster.colo}")
            .build());

    LOGGER.info("feed: " + Util.prettyPrintXml(feed.toString()));

    AssertUtil.assertSucceeded(prism.getFeedHelper().submitEntity(feed.toString()));
    AssertUtil.assertSucceeded(prism.getFeedHelper().schedule(feed.toString()));

    // change feed location path
    feed.setFilePath(alternativeInputPath);

    LOGGER.info("updated feed: " + Util.prettyPrintXml(feed.toString()));

    // update feed
    AssertUtil.assertSucceeded(prism.getFeedHelper().update(feed.toString(), feed.toString()));
    Assert.assertEquals(
        OozieUtil.checkIfFeedCoordExist(cluster2OC, feed.getName(), "REPLICATION"), 0);
    Assert.assertEquals(
        OozieUtil.checkIfFeedCoordExist(cluster2OC, feed.getName(), "RETENTION"), 2);
    Assert.assertEquals(
        OozieUtil.checkIfFeedCoordExist(cluster3OC, feed.getName(), "REPLICATION"), 0);
    Assert.assertEquals(
        OozieUtil.checkIfFeedCoordExist(cluster3OC, feed.getName(), "RETENTION"), 2);
    Assert.assertEquals(
        OozieUtil.checkIfFeedCoordExist(cluster1OC, feed.getName(), "REPLICATION"), 4);
    Assert.assertEquals(
        OozieUtil.checkIfFeedCoordExist(cluster1OC, feed.getName(), "RETENTION"), 2);
  }
 public OozieClient getOozieClient() {
   if (null == this.oozieClient) {
     this.oozieClient = OozieUtil.getClient(this.oozieURL);
   }
   return this.oozieClient;
 }