@BeforeClass(alwaysRun = true)
 public void createTestData() throws Exception {
   LOGGER.info("in @BeforeClass");
   HadoopUtil.uploadDir(clusterFS, aggregateWorkflowDir, OSUtil.RESOURCES_OOZIE);
   Bundle b = BundleUtil.readELBundle();
   b = new Bundle(b, cluster);
   b.setInputFeedDataPath(feedInputPath);
 }
 @AfterClass(alwaysRun = true)
 public void deleteData() throws Exception {
   LOGGER.info("in @AfterClass");
   Bundle b = BundleUtil.readELBundle();
   b = new Bundle(b, cluster);
   b.setInputFeedDataPath(feedInputPath);
   String prefix = b.getFeedDataPathPrefix();
   HadoopUtil.deleteDirIfExists(prefix.substring(1), clusterFS);
 }
Example #3
0
  /**
   * Run recipe with different frequencies. Submission should go through. Check frequency of the
   * launched oozie job
   */
  @Test(dataProvider = "frequencyGenerator")
  public void differentRecipeFrequenciesTest(String frequency) throws Exception {
    setUp(RecipeExecLocation.SourceCluster);
    LOGGER.info("Testing with frequency: " + frequency);
    String tblName = "myTable";
    recipeMerlin
        .withSourceDb(DB_NAME)
        .withSourceTable(tblName)
        .withFrequency(new Frequency(frequency));
    runSql(connection, "create table " + tblName + "(comment string)");
    final List<String> command = recipeMerlin.getSubmissionCommand();
    Assert.assertEquals(Bundle.runFalconCLI(command), 0, "Recipe submission failed.");
    LOGGER.info("Submission went through.");

    InstanceUtil.waitTillInstanceReachState(
        clusterOC, recipeMerlin.getName(), 1, CoordinatorAction.Status.RUNNING, EntityType.PROCESS);
    String filter = "name=FALCON_PROCESS_" + recipeMerlin.getName();
    List<BundleJob> bundleJobs = OozieUtil.getBundles(clusterOC, filter, 0, 10);
    List<String> bundleIds = OozieUtil.getBundleIds(bundleJobs);
    String bundleId = OozieUtil.getMaxId(bundleIds);
    List<CoordinatorJob> coords = clusterOC.getBundleJobInfo(bundleId).getCoordinators();
    List<String> cIds = new ArrayList<String>();
    for (CoordinatorJob coord : coords) {
      cIds.add(coord.getId());
    }
    String coordId = OozieUtil.getMinId(cIds);
    CoordinatorJob job = clusterOC.getCoordJobInfo(coordId);
    CoordinatorJob.Timeunit timeUnit = job.getTimeUnit();
    String freq = job.getFrequency();
    LOGGER.info("Frequency of running job: " + timeUnit + " " + freq);
    Assert.assertTrue(
        frequency.contains(timeUnit.name().toLowerCase().replace("_", ""))
            && frequency.contains(freq),
        "Running job has different frequency.");
  }
Example #4
0
  private void setUp(RecipeExecLocation recipeExecLocation) throws Exception {
    clusterHC = cluster.getClusterHelper().getHCatClient();
    clusterHC2 = cluster2.getClusterHelper().getHCatClient();
    bundles[0] = new Bundle(BundleUtil.readHCatBundle(), cluster);
    bundles[1] = new Bundle(BundleUtil.readHCatBundle(), cluster2);
    bundles[0].generateUniqueBundle(this);
    bundles[1].generateUniqueBundle(this);
    final ClusterMerlin srcCluster = bundles[0].getClusterElement();
    final ClusterMerlin tgtCluster = bundles[1].getClusterElement();
    String recipeDir = "HiveDrRecipe";
    if (MerlinConstants.IS_SECURE) {
      recipeDir = "HiveDrSecureRecipe";
    }
    Bundle.submitCluster(recipeExecLocation.getRecipeBundle(bundles[0], bundles[1]));
    recipeMerlin =
        RecipeMerlin.readFromDir(recipeDir, FalconCLI.RecipeOperation.HIVE_DISASTER_RECOVERY)
            .withRecipeCluster(recipeExecLocation.getRecipeCluster(srcCluster, tgtCluster));
    recipeMerlin
        .withSourceCluster(srcCluster)
        .withTargetCluster(tgtCluster)
        .withFrequency(new Frequency("5", Frequency.TimeUnit.minutes))
        .withValidity(TimeUtil.getTimeWrtSystemTime(-5), TimeUtil.getTimeWrtSystemTime(15));
    recipeMerlin.setUniqueName(this.getClass().getSimpleName());

    connection = cluster.getClusterHelper().getHiveJdbcConnection();
    runSql(connection, "drop database if exists hdr_sdb1 cascade");
    runSql(connection, "create database hdr_sdb1");
    runSql(connection, "use hdr_sdb1");

    connection2 = cluster2.getClusterHelper().getHiveJdbcConnection();
    runSql(connection2, "drop database if exists hdr_sdb1 cascade");
    runSql(connection2, "create database hdr_sdb1");
    runSql(connection2, "use hdr_sdb1");
  }
Example #5
0
  @Test
  public void drChangeColumn() throws Exception {
    final RecipeExecLocation recipeExecLocation = RecipeExecLocation.SourceCluster;
    setUp(recipeExecLocation);
    final String tblName = "tableForColumnChange";
    recipeMerlin.withSourceDb(DB_NAME).withSourceTable(tblName);
    final List<String> command1 = recipeMerlin.getSubmissionCommand();
    final String recipe1Name = recipeMerlin.getName();
    runSql(connection, "create table " + tblName + "(id int)");

    bootstrapCopy(connection, clusterFS, tblName, connection2, clusterFS2, tblName);

    Assert.assertEquals(Bundle.runFalconCLI(command1), 0, "Recipe submission failed.");
    runSql(connection, "ALTER TABLE " + tblName + " CHANGE id id STRING COMMENT 'some_comment'");

    InstanceUtil.waitTillInstanceReachState(
        recipeExecLocation.getRecipeOC(clusterOC, clusterOC2),
        recipe1Name,
        1,
        CoordinatorAction.Status.SUCCEEDED,
        EntityType.PROCESS);

    HiveAssert.assertTableEqual(
            cluster,
            clusterHC.getTable(DB_NAME, tblName),
            cluster2,
            clusterHC2.getTable(DB_NAME, tblName),
            new NotifyingAssert(true))
        .assertAll();
  }
Example #6
0
  /**
   * 1 src tbl 1 dst tbl. Change table properties and comment at the source. Changes should get
   * reflected at destination.
   */
  @Test
  public void drChangeCommentAndPropertyTest() throws Exception {
    final RecipeExecLocation recipeExecLocation = RecipeExecLocation.SourceCluster;
    setUp(recipeExecLocation);
    final String tblName = "myTable";
    recipeMerlin.withSourceDb(DB_NAME).withSourceTable(tblName);
    final List<String> command = recipeMerlin.getSubmissionCommand();

    runSql(connection, "create table " + tblName + "(field string)");
    // add new table property
    runSql(
        connection,
        "ALTER TABLE " + tblName + " SET TBLPROPERTIES('someProperty' = 'initialValue')");
    // set comment
    runSql(
        connection,
        "ALTER TABLE "
            + tblName
            + " SET TBLPROPERTIES('comment' = 'this comment will be "
            + "changed, SHOULD NOT appear')");

    LOGGER.info(tblName + " before bootstrap copy: ");
    runSql(connection, "describe extended " + tblName);

    bootstrapCopy(connection, clusterFS, tblName, connection2, clusterFS2, tblName);

    // change table property and comment
    runSql(
        connection,
        "ALTER TABLE " + tblName + " SET TBLPROPERTIES('someProperty' = 'anotherValue')");
    runSql(
        connection,
        "ALTER TABLE "
            + tblName
            + " SET TBLPROPERTIES('comment' = 'this comment should "
            + "appear after replication done')");

    LOGGER.info(tblName + " after modifications, before replication: ");
    runSql(connection, "describe extended " + tblName);

    Assert.assertEquals(Bundle.runFalconCLI(command), 0, "Recipe submission failed.");

    InstanceUtil.waitTillInstanceReachState(
        recipeExecLocation.getRecipeOC(clusterOC, clusterOC2),
        recipeMerlin.getName(),
        1,
        CoordinatorAction.Status.SUCCEEDED,
        EntityType.PROCESS);

    HiveAssert.assertTableEqual(
            cluster,
            clusterHC.getTable(DB_NAME, tblName),
            cluster2,
            clusterHC2.getTable(DB_NAME, tblName),
            new NotifyingAssert(true))
        .assertAll();
  }
  @BeforeClass(alwaysRun = true)
  public void createTestData() throws Exception {
    LOGGER.info("in @BeforeClass");
    HadoopUtil.uploadDir(clusterFS, aggregateWorkflowDir, OSUtil.RESOURCES_OOZIE);

    Bundle b = BundleUtil.readELBundle();
    b.generateUniqueBundle();
    b = new Bundle(b, cluster);

    String startDate = "2010-01-01T23:20Z";
    String endDate = "2010-01-02T01:21Z";

    b.setInputFeedDataPath(feedInputPath);
    String prefix = b.getFeedDataPathPrefix();
    HadoopUtil.deleteDirIfExists(prefix.substring(1), clusterFS);

    List<String> dataDates = TimeUtil.getMinuteDatesOnEitherSide(startDate, endDate, 20);
    HadoopUtil.flattenAndPutDataInFolder(clusterFS, OSUtil.NORMAL_INPUT, prefix, dataDates);
  }
Example #8
0
  @Test
  public void drTwoTablesOneRequest() throws Exception {
    final RecipeExecLocation recipeExecLocation = RecipeExecLocation.TargetCluster;
    setUp(recipeExecLocation);
    final String tblName = "firstTableDR";
    final String tbl2Name = "secondTableDR";
    recipeMerlin.withSourceDb(DB_NAME).withSourceTable(tblName + ',' + tbl2Name);
    final List<String> command = recipeMerlin.getSubmissionCommand();

    runSql(connection, "create table " + tblName + "(comment string)");
    runSql(connection, "create table " + tbl2Name + "(comment string)");

    bootstrapCopy(connection, clusterFS, tblName, connection2, clusterFS2, tblName);
    bootstrapCopy(connection, clusterFS, tbl2Name, connection2, clusterFS2, tbl2Name);

    runSql(
        connection,
        "insert into table "
            + tblName
            + " values"
            + "('this string has been added post bootstrap - should appear after dr')");
    runSql(
        connection,
        "insert into table "
            + tbl2Name
            + " values"
            + "('this string has been added post bootstrap - should appear after dr')");

    Assert.assertEquals(Bundle.runFalconCLI(command), 0, "Recipe submission failed.");

    InstanceUtil.waitTillInstanceReachState(
        recipeExecLocation.getRecipeOC(clusterOC, clusterOC2),
        recipeMerlin.getName(),
        1,
        CoordinatorAction.Status.SUCCEEDED,
        EntityType.PROCESS);

    final NotifyingAssert anAssert = new NotifyingAssert(true);
    HiveAssert.assertTableEqual(
        cluster,
        clusterHC.getTable(DB_NAME, tblName),
        cluster2,
        clusterHC2.getTable(DB_NAME, tblName),
        anAssert);
    HiveAssert.assertTableEqual(
        cluster,
        clusterHC.getTable(DB_NAME, tbl2Name),
        cluster2,
        clusterHC2.getTable(DB_NAME, tbl2Name),
        anAssert);
    anAssert.assertAll();
  }
Example #9
0
  @Test
  public void drSerDeWithProperties() throws Exception {
    final RecipeExecLocation recipeExecLocation = RecipeExecLocation.SourceCluster;
    setUp(recipeExecLocation);
    final String tblName = "serdeTable";
    recipeMerlin.withSourceDb(DB_NAME).withSourceTable(tblName);
    final List<String> command = recipeMerlin.getSubmissionCommand();

    runSql(
        connection,
        "create table "
            + tblName
            + "(comment string) "
            + "row format serde 'org.apache.hive.hcatalog.data.JsonSerDe'");

    bootstrapCopy(connection, clusterFS, tblName, connection2, clusterFS2, tblName);

    runSql(
        connection,
        "insert into table "
            + tblName
            + " values"
            + "('this string has been added post bootstrap - should appear after dr')");

    runSql(
        connection, "ALTER TABLE " + tblName + " SET SERDEPROPERTIES ('someProperty' = 'value')");

    Assert.assertEquals(Bundle.runFalconCLI(command), 0, "Recipe submission failed.");

    InstanceUtil.waitTillInstanceReachState(
        recipeExecLocation.getRecipeOC(clusterOC, clusterOC2),
        recipeMerlin.getName(),
        1,
        CoordinatorAction.Status.SUCCEEDED,
        EntityType.PROCESS);

    HiveAssert.assertTableEqual(
            cluster,
            clusterHC.getTable(DB_NAME, tblName),
            cluster2,
            clusterHC2.getTable(DB_NAME, tblName),
            new NotifyingAssert(true))
        .assertAll();
  }
Example #10
0
  @Test
  public void drExternalToNonExternal() throws Exception {
    final RecipeExecLocation recipeExecLocation = RecipeExecLocation.SourceCluster;
    setUp(recipeExecLocation);
    final String tblName = "externalToNonExternal";
    recipeMerlin.withSourceDb(DB_NAME).withSourceTable(tblName);
    final List<String> command = recipeMerlin.getSubmissionCommand();

    createExternalTable(connection, clusterFS, baseTestHDFSDir + "click_data/", tblName);
    bootstrapCopy(connection, clusterFS, tblName, connection2, clusterFS2, tblName);

    // change column name
    runSql(connection, "alter table " + tblName + " change column data data_new string");

    Assert.assertEquals(Bundle.runFalconCLI(command), 0, "Recipe submission failed.");

    InstanceUtil.waitTillInstanceReachState(
        recipeExecLocation.getRecipeOC(clusterOC, clusterOC2),
        recipeMerlin.getName(),
        1,
        CoordinatorAction.Status.SUCCEEDED,
        EntityType.PROCESS);

    final NotifyingAssert anAssert = new NotifyingAssert(true);
    HiveAssert.assertTableEqual(
        cluster,
        clusterHC.getTable(DB_NAME, tblName),
        cluster2,
        clusterHC2.getTable(DB_NAME, tblName),
        anAssert,
        false);
    anAssert.assertNotEquals(
        clusterHC2.getTable(DB_NAME, tblName).getTabletype(),
        clusterHC.getTable(DB_NAME, tblName).getTableName(),
        "Source and destination tables should have different Tabletype");
    anAssert.assertNotEquals(
        clusterHC2.getTable(DB_NAME, tblName).getTblProps().get("EXTERNAL"),
        clusterHC.getTable(DB_NAME, tblName).getTblProps().get("EXTERNAL"),
        "Source and destination tables should have different value of property EXTERNAL");
    anAssert.assertAll();
  }
  /**
   * Set feed cluster1 as target, clusters 2 and 3 as source. Run feed. Update feed and check if
   * action succeed. Check that appropriate number of replication and retention coordinators exist
   * on matching clusters.
   *
   * @throws Exception
   */
  @Test(enabled = true, timeOut = 1200000)
  public void multipleSourceOneTarget() throws Exception {

    bundles[0].setInputFeedDataPath(inputPath);
    Bundle.submitCluster(bundles[0], bundles[1], bundles[2]);

    FeedMerlin feed = new FeedMerlin(bundles[0].getDataSets().get(0));
    feed.clearFeedClusters();

    // use the colo string here so that the test works in embedded and distributed mode.
    String postFix = "/US/" + cluster2Colo;
    String prefix = bundles[0].getFeedDataPathPrefix();
    HadoopUtil.deleteDirIfExists(prefix.substring(1), cluster2FS);
    HadoopUtil.lateDataReplenish(cluster2FS, 5, 80, prefix, postFix);

    // use the colo string here so that the test works in embedded and distributed mode.
    postFix = "/UK/" + cluster3Colo;
    prefix = bundles[0].getFeedDataPathPrefix();
    HadoopUtil.deleteDirIfExists(prefix.substring(1), cluster3FS);
    HadoopUtil.lateDataReplenish(cluster3FS, 5, 80, prefix, postFix);

    String startTime = TimeUtil.getTimeWrtSystemTime(-30);

    feed.addFeedCluster(
        new FeedMerlin.FeedClusterBuilder(Util.readEntityName(bundles[1].getClusters().get(0)))
            .withRetention("hours(10)", ActionType.DELETE)
            .withValidity(startTime, TimeUtil.addMinsToTime(startTime, 85))
            .withClusterType(ClusterType.SOURCE)
            .withPartition("US/${cluster.colo}")
            .build());

    feed.addFeedCluster(
        new FeedMerlin.FeedClusterBuilder(Util.readEntityName(bundles[0].getClusters().get(0)))
            .withRetention("hours(10)", ActionType.DELETE)
            .withValidity(
                TimeUtil.addMinsToTime(startTime, 20), TimeUtil.addMinsToTime(startTime, 105))
            .withClusterType(ClusterType.TARGET)
            .build());

    feed.addFeedCluster(
        new FeedMerlin.FeedClusterBuilder(Util.readEntityName(bundles[2].getClusters().get(0)))
            .withRetention("hours(10)", ActionType.DELETE)
            .withValidity(
                TimeUtil.addMinsToTime(startTime, 40), TimeUtil.addMinsToTime(startTime, 130))
            .withClusterType(ClusterType.SOURCE)
            .withPartition("UK/${cluster.colo}")
            .build());

    LOGGER.info("feed: " + Util.prettyPrintXml(feed.toString()));

    AssertUtil.assertSucceeded(prism.getFeedHelper().submitEntity(feed.toString()));
    AssertUtil.assertSucceeded(prism.getFeedHelper().schedule(feed.toString()));

    // change feed location path
    feed.setFilePath(alternativeInputPath);

    LOGGER.info("updated feed: " + Util.prettyPrintXml(feed.toString()));

    // update feed
    AssertUtil.assertSucceeded(prism.getFeedHelper().update(feed.toString(), feed.toString()));
    Assert.assertEquals(
        OozieUtil.checkIfFeedCoordExist(cluster2OC, feed.getName(), "REPLICATION"), 0);
    Assert.assertEquals(
        OozieUtil.checkIfFeedCoordExist(cluster2OC, feed.getName(), "RETENTION"), 2);
    Assert.assertEquals(
        OozieUtil.checkIfFeedCoordExist(cluster3OC, feed.getName(), "REPLICATION"), 0);
    Assert.assertEquals(
        OozieUtil.checkIfFeedCoordExist(cluster3OC, feed.getName(), "RETENTION"), 2);
    Assert.assertEquals(
        OozieUtil.checkIfFeedCoordExist(cluster1OC, feed.getName(), "REPLICATION"), 4);
    Assert.assertEquals(
        OozieUtil.checkIfFeedCoordExist(cluster1OC, feed.getName(), "RETENTION"), 2);
  }
  /**
   * Set feed1 to have cluster1 as source, cluster3 as target. Set feed2 clusters vise versa. Add
   * both clusters to process and feed2 as input feed. Run process. Update feed1. TODO test case is
   * incomplete
   *
   * @throws Exception
   */
  @Test(enabled = true, timeOut = 1800000)
  public void updateFeedDependentProcessTest() throws Exception {
    // set cluster colos
    bundles[0].setCLusterColo(cluster1Colo);
    bundles[1].setCLusterColo(cluster2Colo);
    bundles[2].setCLusterColo(cluster3Colo);

    // submit 3 clusters
    Bundle.submitCluster(bundles[0], bundles[1], bundles[2]);

    // get 2 unique feeds
    FeedMerlin feed01 = new FeedMerlin(bundles[0].getInputFeedFromBundle());
    FeedMerlin feed02 = new FeedMerlin(bundles[1].getInputFeedFromBundle());
    FeedMerlin outputFeed = new FeedMerlin(bundles[0].getOutputFeedFromBundle());

    // set clusters to null;
    feed01.clearFeedClusters();
    feed02.clearFeedClusters();
    outputFeed.clearFeedClusters();

    // set new feed input data
    feed01.setFeedPathValue(baseTestDir + "/feed01" + MINUTE_DATE_PATTERN);
    feed02.setFeedPathValue(baseTestDir + "/feed02" + MINUTE_DATE_PATTERN);

    // generate data in both the colos ua1 and ua3
    String prefix = feed01.getFeedPrefix();
    HadoopUtil.deleteDirIfExists(prefix.substring(1), cluster1FS);
    HadoopUtil.lateDataReplenish(cluster1FS, 25, 1, prefix, null);

    prefix = feed02.getFeedPrefix();
    HadoopUtil.deleteDirIfExists(prefix.substring(1), cluster3FS);
    HadoopUtil.lateDataReplenish(cluster3FS, 25, 1, prefix, null);

    String startTime = TimeUtil.getTimeWrtSystemTime(-50);

    // set clusters for feed01
    feed01.addFeedCluster(
        new FeedMerlin.FeedClusterBuilder(Util.readEntityName(bundles[0].getClusters().get(0)))
            .withRetention("hours(10)", ActionType.DELETE)
            .withValidity(startTime, "2099-01-01T00:00Z")
            .withClusterType(ClusterType.SOURCE)
            .build());

    feed01.addFeedCluster(
        new FeedMerlin.FeedClusterBuilder(Util.readEntityName(bundles[2].getClusters().get(0)))
            .withRetention("hours(10)", ActionType.DELETE)
            .withValidity(startTime, "2099-01-01T00:00Z")
            .withClusterType(ClusterType.TARGET)
            .build());

    // set clusters for feed02
    feed02.addFeedCluster(
        new FeedMerlin.FeedClusterBuilder(Util.readEntityName(bundles[0].getClusters().get(0)))
            .withRetention("hours(10)", ActionType.DELETE)
            .withValidity(startTime, "2099-01-01T00:00Z")
            .withClusterType(ClusterType.TARGET)
            .build());

    feed02.addFeedCluster(
        new FeedMerlin.FeedClusterBuilder(Util.readEntityName(bundles[2].getClusters().get(0)))
            .withRetention("hours(10)", ActionType.DELETE)
            .withValidity(startTime, "2099-01-01T00:00Z")
            .withClusterType(ClusterType.SOURCE)
            .build());

    // set clusters for output feed
    outputFeed.addFeedCluster(
        new FeedMerlin.FeedClusterBuilder(Util.readEntityName(bundles[0].getClusters().get(0)))
            .withRetention("hours(10)", ActionType.DELETE)
            .withValidity(startTime, "2099-01-01T00:00Z")
            .withClusterType(ClusterType.SOURCE)
            .build());

    outputFeed.addFeedCluster(
        new FeedMerlin.FeedClusterBuilder(Util.readEntityName(bundles[2].getClusters().get(0)))
            .withRetention("hours(10)", ActionType.DELETE)
            .withValidity(startTime, "2099-01-01T00:00Z")
            .withClusterType(ClusterType.TARGET)
            .build());

    // submit and schedule feeds
    prism.getFeedHelper().submitAndSchedule(feed01.toString());
    prism.getFeedHelper().submitAndSchedule(feed02.toString());
    prism.getFeedHelper().submitAndSchedule(outputFeed.toString());

    // create a process with 2 clusters

    // get a process
    ProcessMerlin process = new ProcessMerlin(bundles[0].getProcessData());

    // add clusters to process
    String processStartTime = TimeUtil.getTimeWrtSystemTime(-6);
    String processEndTime = TimeUtil.getTimeWrtSystemTime(70);

    process.clearProcessCluster();

    process.addProcessCluster(
        new ProcessMerlin.ProcessClusterBuilder(
                Util.readEntityName(bundles[0].getClusters().get(0)))
            .withValidity(processStartTime, processEndTime)
            .build());

    process.addProcessCluster(
        new ProcessMerlin.ProcessClusterBuilder(
                Util.readEntityName(bundles[2].getClusters().get(0)))
            .withValidity(processStartTime, processEndTime)
            .build());
    process.addInputFeed(feed02.getName(), feed02.getName());

    // submit and schedule process
    AssertUtil.assertSucceeded(prism.getProcessHelper().submitAndSchedule(process.toString()));

    LOGGER.info("Wait till process goes into running ");

    int timeout = OSUtil.IS_WINDOWS ? 50 : 25;
    InstanceUtil.waitTillInstanceReachState(
        serverOC.get(0), process.getName(), 1, Status.RUNNING, EntityType.PROCESS, timeout);
    InstanceUtil.waitTillInstanceReachState(
        serverOC.get(2), process.getName(), 1, Status.RUNNING, EntityType.PROCESS, timeout);

    feed01.setFilePath(alternativeInputPath);
    LOGGER.info("updated feed: " + Util.prettyPrintXml(feed01.toString()));
    AssertUtil.assertSucceeded(prism.getFeedHelper().update(feed01.toString(), feed01.toString()));
  }
Example #13
0
  /**
   * 1 src tbl 1 dst tbl replication. Insert/delete/replace partitions using dynamic partition
   * queries. The changes should get reflected at destination.
   */
  @Test
  public void drInsertDropReplaceDynamicPartition() throws Exception {
    final RecipeExecLocation recipeExecLocation = RecipeExecLocation.SourceCluster;
    setUp(recipeExecLocation);
    final String tblName = "dynamicPartitionDR";
    recipeMerlin.withSourceDb(DB_NAME).withSourceTable(tblName);
    final List<String> command = recipeMerlin.getSubmissionCommand();

    // disable strict mode to use only dynamic partition
    runSql(connection, "set hive.exec.dynamic.partition.mode=nonstrict");

    runSql(
        connection, "create table " + tblName + "(comment string) partitioned by (pname string)");
    runSql(
        connection,
        "insert into table "
            + tblName
            + " partition (pname) values"
            + "('this partition is going to be deleted - should NOT appear after dr', 'DELETE')");
    runSql(
        connection,
        "insert into table "
            + tblName
            + " partition (pname) values"
            + "('this partition is going to be replaced - should NOT appear after dr', 'REPLACE')");
    runSql(
        connection,
        "insert into table "
            + tblName
            + " partition (pname) values"
            + "('this partition will have more data - should appear after dr', 'ADD_DATA')");

    LOGGER.info(tblName + " before bootstrap copying: ");
    runSql(connection, "select * from " + tblName);
    bootstrapCopy(connection, clusterFS, tblName, connection2, clusterFS2, tblName);

    runSql(
        connection,
        "insert into table "
            + tblName
            + " partition (pname) values"
            + "('this partition has been added post bootstrap - should appear after dr', 'NEW_PART')");
    runSql(
        connection,
        "insert into table "
            + tblName
            + " partition (pname) values"
            + "('more data has been added post bootstrap - should appear after dr', 'ADD_DATA')");
    runSql(connection, "alter table " + tblName + " drop partition(pname = 'DELETE')");
    runSql(connection, "alter table " + tblName + " drop partition(pname = 'REPLACE')");
    runSql(
        connection,
        "insert into table "
            + tblName
            + " partition (pname) values"
            + "('this partition has been replaced - should appear after dr', 'REPLACE')");

    LOGGER.info(tblName + " after modifications, before replication: ");
    runSql(connection, "select * from " + tblName);

    Assert.assertEquals(Bundle.runFalconCLI(command), 0, "Recipe submission failed.");

    InstanceUtil.waitTillInstanceReachState(
        recipeExecLocation.getRecipeOC(clusterOC, clusterOC2),
        recipeMerlin.getName(),
        1,
        CoordinatorAction.Status.SUCCEEDED,
        EntityType.PROCESS);

    HiveAssert.assertTableEqual(
            cluster,
            clusterHC.getTable(DB_NAME, tblName),
            cluster2,
            clusterHC2.getTable(DB_NAME, tblName),
            new NotifyingAssert(true))
        .assertAll();
  }
Example #14
0
  @Test
  public void drTwoDstTablesTwoRequests() throws Exception {
    final RecipeExecLocation recipeExecLocation = RecipeExecLocation.TargetCluster;
    setUp(recipeExecLocation);
    final HCatClient clusterHC3 = cluster3.getClusterHelper().getHCatClient();
    final Connection connection3 = cluster3.getClusterHelper().getHiveJdbcConnection();
    runSql(connection3, "drop database if exists hdr_sdb1 cascade");
    runSql(connection3, "create database hdr_sdb1");
    runSql(connection3, "use hdr_sdb1");

    final String tblName = "vanillaTable";
    recipeMerlin.withSourceDb(DB_NAME).withSourceTable(tblName);
    final String recipe1Name = recipeMerlin.getName();
    final List<String> command1 = recipeMerlin.getSubmissionCommand();

    final Bundle bundle3 = new Bundle(BundleUtil.readHCatBundle(), cluster3);
    bundle3.generateUniqueBundle(this);
    bundle3.submitClusters(prism);
    recipeMerlin
        .withTargetCluster(bundle3.getClusterElement())
        .withRecipeCluster(
            recipeExecLocation.getRecipeCluster(
                bundles[0].getClusterElement(), bundle3.getClusterElement()));
    recipeMerlin.setUniqueName(this.getClass().getSimpleName());

    final List<String> command2 = recipeMerlin.getSubmissionCommand();
    final String recipe2Name = recipeMerlin.getName();

    runSql(connection, "create table " + tblName + "(comment string)");

    bootstrapCopy(connection, clusterFS, tblName, connection2, clusterFS2, tblName);
    bootstrapCopy(connection, clusterFS, tblName, connection3, clusterFS3, tblName);

    runSql(
        connection,
        "insert into table "
            + tblName
            + " values"
            + "('this string has been added post bootstrap - should appear after dr')");

    Assert.assertEquals(Bundle.runFalconCLI(command1), 0, "Recipe submission failed.");
    Assert.assertEquals(Bundle.runFalconCLI(command2), 0, "Recipe submission failed.");

    InstanceUtil.waitTillInstanceReachState(
        recipeExecLocation.getRecipeOC(clusterOC, clusterOC2),
        recipe1Name,
        1,
        CoordinatorAction.Status.SUCCEEDED,
        EntityType.PROCESS);
    InstanceUtil.waitTillInstanceReachState(
        recipeExecLocation.getRecipeOC(clusterOC, clusterOC3),
        recipe2Name,
        1,
        CoordinatorAction.Status.SUCCEEDED,
        EntityType.PROCESS);

    final NotifyingAssert anAssert = new NotifyingAssert(true);
    HiveAssert.assertTableEqual(
        cluster,
        clusterHC.getTable(DB_NAME, tblName),
        cluster2,
        clusterHC2.getTable(DB_NAME, tblName),
        anAssert);
    HiveAssert.assertTableEqual(
        cluster,
        clusterHC.getTable(DB_NAME, tblName),
        cluster3,
        clusterHC3.getTable(DB_NAME, tblName),
        anAssert);
    anAssert.assertAll();
  }
Example #15
0
  /**
   * 1 src tbl 1 dst tbl replication. Insert/overwrite partitions using dynamic partitions queries.
   * The changes should get reflected at destination.
   *
   * @throws Exception
   */
  @Test
  public void drInsertOverwriteDynamicPartition() throws Exception {
    final RecipeExecLocation recipeExecLocation = RecipeExecLocation.SourceCluster;
    setUp(recipeExecLocation);
    final String tblName = "drInsertOverwritePartition";
    final String hlpTblName = "drInsertOverwritePartitionHelperTbl";
    recipeMerlin.withSourceDb(DB_NAME).withSourceTable(tblName);
    final List<String> command = recipeMerlin.getSubmissionCommand();

    // disable strict mode to use only dynamic partition
    runSql(connection, "set hive.exec.dynamic.partition.mode=nonstrict");

    runSql(
        connection,
        "create table " + hlpTblName + "(comment string) partitioned by (pname string)");
    runSql(
        connection,
        "insert into table "
            + hlpTblName
            + " partition (pname)"
            + " values('overwrite data - should appear after dr', 'OVERWRITE_PART')");
    runSql(
        connection,
        "insert into table "
            + hlpTblName
            + " partition (pname)"
            + " values('newdata row2 - should appear after dr', 'NEW_DATA')");
    runSql(
        connection,
        "insert into table "
            + hlpTblName
            + " partition (pname)"
            + " values('newdata row1 - should appear after dr', 'NEW_DATA')");

    runSql(
        connection, "create table " + tblName + "(comment string) partitioned by (pname string)");
    runSql(
        connection,
        "insert into table "
            + tblName
            + " partition (pname) values"
            + "('this data should be retained - should appear after dr', 'OLD_PART')");
    runSql(
        connection,
        "insert into table "
            + tblName
            + " partition (pname) values"
            + "('this data should get overwritten - should NOT appear after dr', 'OVERWRITE_PART')");

    LOGGER.info(tblName + " before bootstrap copying: ");
    runSql(connection, "select * from " + tblName);
    bootstrapCopy(connection, clusterFS, tblName, connection2, clusterFS2, tblName);

    runSql(
        connection,
        "insert overwrite table "
            + tblName
            + " partition (pname) "
            + "select comment, pname from "
            + hlpTblName
            + " where comment REGEXP '^overwrite'");
    runSql(
        connection,
        "insert overwrite table "
            + tblName
            + " partition (pname) "
            + "select comment, pname from "
            + hlpTblName
            + " where comment REGEXP '^newdata'");

    LOGGER.info(tblName + " after modifications, before replication: ");
    runSql(connection, "select * from " + tblName);

    Assert.assertEquals(Bundle.runFalconCLI(command), 0, "Recipe submission failed.");

    InstanceUtil.waitTillInstanceReachState(
        recipeExecLocation.getRecipeOC(clusterOC, clusterOC2),
        recipeMerlin.getName(),
        1,
        CoordinatorAction.Status.SUCCEEDED,
        EntityType.PROCESS);

    HiveAssert.assertTableEqual(
            cluster,
            clusterHC.getTable(DB_NAME, tblName),
            cluster2,
            clusterHC2.getTable(DB_NAME, tblName),
            new NotifyingAssert(true))
        .assertAll();
  }
  // make sure oozie changes mentioned FALCON-389 are done on the clusters. Otherwise the test
  // will fail.
  // Noticed with hive 0.13 we need the following issues resolved to work HIVE-6848 and
  // HIVE-6868. Also oozie share libs need to have hive jars that have these jira's resolved and
  // the maven depenendcy you are using to run the tests has to have hcat that has these fixed.
  // This test can fail randomly because of https://issues.apache.org/jira/browse/FALCON-401
  @Test(dataProvider = "generateSeparators")
  public void oneSourceTwoTarget(String separator) throws Exception {
    String tcName = "HCatReplication_oneSourceTwoTarget";
    if (separator.equals("-")) {
      tcName += "_hyphen";
    } else {
      tcName += "_slash";
    }
    String tblName = tcName;
    String testHdfsDir = baseTestHDFSDir + "/" + tcName;
    HadoopUtil.recreateDir(serverFS, testHdfsDir);
    final String startDate = "2010-01-01T20:00Z";
    final String endDate = "2099-01-01T00:00Z";
    final String dataEndDate = "2010-01-01T21:00Z";
    final String tableUriPartitionFragment =
        StringUtils.join(new String[] {"#dt=${YEAR}", "${MONTH}", "${DAY}", "${HOUR}"}, separator);
    String tableUri = "catalog:" + dbName + ":" + tblName + tableUriPartitionFragment;
    final String datePattern = StringUtils.join(new String[] {"yyyy", "MM", "dd", "HH"}, separator);
    // use the start date for both as this will only generate 2 partitions.
    List<String> dataDates =
        TimeUtil.getMinuteDatesOnEitherSide(
            startDate, dataEndDate, 60, DateTimeFormat.forPattern(datePattern));

    final List<String> dataset =
        HadoopUtil.flattenAndPutDataInFolder(clusterFS, localHCatData, testHdfsDir, dataDates);
    final String col1Name = "id";
    final String col2Name = "value";
    final String partitionColumn = "dt";

    ArrayList<HCatFieldSchema> cols = new ArrayList<HCatFieldSchema>();
    cols.add(HCatUtil.getStringSchema(col1Name, col1Name + " comment"));
    cols.add(HCatUtil.getStringSchema(col2Name, col2Name + " comment"));
    ArrayList<HCatFieldSchema> partitionCols = new ArrayList<HCatFieldSchema>();

    // create table on cluster 1 and add data to it.
    partitionCols.add(HCatUtil.getStringSchema(partitionColumn, partitionColumn + " partition"));
    createTable(clusterHC, dbName, tblName, cols, partitionCols, testHdfsDir);
    addPartitionsToTable(dataDates, dataset, "dt", dbName, tblName, clusterHC);

    // create table on target cluster.
    createTable(cluster2HC, dbName, tblName, cols, partitionCols, testHdfsDir);
    createTable(cluster3HC, dbName, tblName, cols, partitionCols, testHdfsDir);

    Bundle.submitCluster(bundles[0], bundles[1], bundles[2]);

    bundles[0].setInputFeedPeriodicity(1, Frequency.TimeUnit.hours);
    bundles[0].setInputFeedValidity(startDate, endDate);
    bundles[0].setInputFeedTableUri(tableUri);

    String feed = bundles[0].getDataSets().get(0);
    // set the cluster 2 as the target.
    feed =
        FeedMerlin.fromString(feed)
            .addFeedCluster(
                new FeedMerlin.FeedClusterBuilder(
                        Util.readEntityName(bundles[1].getClusters().get(0)))
                    .withRetention("months(9000)", ActionType.DELETE)
                    .withValidity(startDate, endDate)
                    .withClusterType(ClusterType.TARGET)
                    .withTableUri(tableUri)
                    .build())
            .toString();
    // set the cluster 3 as the target.
    feed =
        FeedMerlin.fromString(feed)
            .addFeedCluster(
                new FeedMerlin.FeedClusterBuilder(
                        Util.readEntityName(bundles[2].getClusters().get(0)))
                    .withRetention("months(9000)", ActionType.DELETE)
                    .withValidity(startDate, endDate)
                    .withClusterType(ClusterType.TARGET)
                    .withTableUri(tableUri)
                    .build())
            .toString();

    AssertUtil.assertSucceeded(prism.getFeedHelper().submitAndSchedule(feed));
    TimeUtil.sleepSeconds(TIMEOUT);
    // check if all coordinators exist
    Assert.assertEquals(
        InstanceUtil.checkIfFeedCoordExist(
            cluster2.getFeedHelper(), Util.readEntityName(feed), "REPLICATION"),
        1);

    // check if all coordinators exist
    Assert.assertEquals(
        InstanceUtil.checkIfFeedCoordExist(
            cluster3.getFeedHelper(), Util.readEntityName(feed), "REPLICATION"),
        1);

    // replication should start, wait while it ends
    // we will check for 2 instances so that both partitions are copied over.
    InstanceUtil.waitTillInstanceReachState(
        cluster2OC,
        Util.readEntityName(feed),
        2,
        CoordinatorAction.Status.SUCCEEDED,
        EntityType.FEED);

    // replication should start, wait while it ends
    // we will check for 2 instances so that both partitions are copied over.
    InstanceUtil.waitTillInstanceReachState(
        cluster3OC,
        Util.readEntityName(feed),
        2,
        CoordinatorAction.Status.SUCCEEDED,
        EntityType.FEED);

    // check if data was replicated correctly
    List<Path> srcData = HadoopUtil.getAllFilesRecursivelyHDFS(clusterFS, new Path(testHdfsDir));
    LOGGER.info("Data on source cluster: " + srcData);
    List<Path> cluster2TargetData =
        HadoopUtil.getAllFilesRecursivelyHDFS(cluster2FS, new Path(testHdfsDir));
    LOGGER.info("Data on target cluster: " + cluster2TargetData);
    AssertUtil.checkForListSizes(srcData, cluster2TargetData);
    List<Path> cluster3TargetData =
        HadoopUtil.getAllFilesRecursivelyHDFS(cluster3FS, new Path(testHdfsDir));
    LOGGER.info("Data on target cluster: " + cluster3TargetData);
    AssertUtil.checkForListSizes(srcData, cluster3TargetData);
  }
Example #17
0
  @Test(dataProvider = "getRecipeLocation")
  public void drPartition(final RecipeExecLocation recipeExecLocation) throws Exception {
    setUp(recipeExecLocation);
    final String tblName = "partitionDR";
    recipeMerlin.withSourceDb(DB_NAME).withSourceTable(tblName);
    final List<String> command = recipeMerlin.getSubmissionCommand();

    runSql(
        connection, "create table " + tblName + "(comment string) partitioned by (pname string)");
    runSql(
        connection,
        "insert into table "
            + tblName
            + " partition (pname = 'DELETE') values"
            + "('this partition is going to be deleted - should NOT appear after dr')");
    runSql(
        connection,
        "insert into table "
            + tblName
            + " partition (pname = 'REPLACE') values"
            + "('this partition is going to be replaced - should NOT appear after dr')");
    runSql(
        connection,
        "insert into table "
            + tblName
            + " partition (pname = 'ADD_DATA') values"
            + "('this partition will have more data - should appear after dr')");

    bootstrapCopy(connection, clusterFS, tblName, connection2, clusterFS2, tblName);

    runSql(
        connection,
        "insert into table "
            + tblName
            + " partition (pname = 'NEW_PART') values"
            + "('this partition has been added post bootstrap - should appear after dr')");
    runSql(
        connection,
        "insert into table "
            + tblName
            + " partition (pname = 'ADD_DATA') values"
            + "('more data has been added post bootstrap - should appear after dr')");
    runSql(connection, "alter table " + tblName + " drop partition(pname = 'DELETE')");
    runSql(connection, "alter table " + tblName + " drop partition(pname = 'REPLACE')");
    runSql(
        connection,
        "insert into table "
            + tblName
            + " partition (pname = 'REPLACE') values"
            + "('this partition has been replaced - should appear after dr')");

    Assert.assertEquals(Bundle.runFalconCLI(command), 0, "Recipe submission failed.");

    InstanceUtil.waitTillInstanceReachState(
        recipeExecLocation.getRecipeOC(clusterOC, clusterOC2),
        recipeMerlin.getName(),
        1,
        CoordinatorAction.Status.SUCCEEDED,
        EntityType.PROCESS);

    HiveAssert.assertTableEqual(
            cluster,
            clusterHC.getTable(DB_NAME, tblName),
            cluster2,
            clusterHC2.getTable(DB_NAME, tblName),
            new NotifyingAssert(true))
        .assertAll();
  }