Exemplo n.º 1
0
 public HCatClient getHCatClient() {
   if (null == this.hCatClient) {
     try {
       this.hCatClient = HCatUtil.getHCatClient(hcatEndpoint, hiveMetaStorePrincipal);
     } catch (HCatException e) {
       Assert.fail(
           "Unable to create hCatClient because of exception:\n"
               + ExceptionUtils.getStackTrace(e));
     }
   }
   return this.hCatClient;
 }
  // make sure oozie changes mentioned FALCON-389 are done on the clusters. Otherwise the test
  // will fail.
  // Noticed with hive 0.13 we need the following issues resolved to work HIVE-6848 and
  // HIVE-6868. Also oozie share libs need to have hive jars that have these jira's resolved and
  // the maven depenendcy you are using to run the tests has to have hcat that has these fixed.
  // This test can fail randomly because of https://issues.apache.org/jira/browse/FALCON-401
  @Test(dataProvider = "generateSeparators")
  public void oneSourceTwoTarget(String separator) throws Exception {
    String tcName = "HCatReplication_oneSourceTwoTarget";
    if (separator.equals("-")) {
      tcName += "_hyphen";
    } else {
      tcName += "_slash";
    }
    String tblName = tcName;
    String testHdfsDir = baseTestHDFSDir + "/" + tcName;
    HadoopUtil.recreateDir(serverFS, testHdfsDir);
    final String startDate = "2010-01-01T20:00Z";
    final String endDate = "2099-01-01T00:00Z";
    final String dataEndDate = "2010-01-01T21:00Z";
    final String tableUriPartitionFragment =
        StringUtils.join(new String[] {"#dt=${YEAR}", "${MONTH}", "${DAY}", "${HOUR}"}, separator);
    String tableUri = "catalog:" + dbName + ":" + tblName + tableUriPartitionFragment;
    final String datePattern = StringUtils.join(new String[] {"yyyy", "MM", "dd", "HH"}, separator);
    // use the start date for both as this will only generate 2 partitions.
    List<String> dataDates =
        TimeUtil.getMinuteDatesOnEitherSide(
            startDate, dataEndDate, 60, DateTimeFormat.forPattern(datePattern));

    final List<String> dataset =
        HadoopUtil.flattenAndPutDataInFolder(clusterFS, localHCatData, testHdfsDir, dataDates);
    final String col1Name = "id";
    final String col2Name = "value";
    final String partitionColumn = "dt";

    ArrayList<HCatFieldSchema> cols = new ArrayList<HCatFieldSchema>();
    cols.add(HCatUtil.getStringSchema(col1Name, col1Name + " comment"));
    cols.add(HCatUtil.getStringSchema(col2Name, col2Name + " comment"));
    ArrayList<HCatFieldSchema> partitionCols = new ArrayList<HCatFieldSchema>();

    // create table on cluster 1 and add data to it.
    partitionCols.add(HCatUtil.getStringSchema(partitionColumn, partitionColumn + " partition"));
    createTable(clusterHC, dbName, tblName, cols, partitionCols, testHdfsDir);
    addPartitionsToTable(dataDates, dataset, "dt", dbName, tblName, clusterHC);

    // create table on target cluster.
    createTable(cluster2HC, dbName, tblName, cols, partitionCols, testHdfsDir);
    createTable(cluster3HC, dbName, tblName, cols, partitionCols, testHdfsDir);

    Bundle.submitCluster(bundles[0], bundles[1], bundles[2]);

    bundles[0].setInputFeedPeriodicity(1, Frequency.TimeUnit.hours);
    bundles[0].setInputFeedValidity(startDate, endDate);
    bundles[0].setInputFeedTableUri(tableUri);

    String feed = bundles[0].getDataSets().get(0);
    // set the cluster 2 as the target.
    feed =
        FeedMerlin.fromString(feed)
            .addFeedCluster(
                new FeedMerlin.FeedClusterBuilder(
                        Util.readEntityName(bundles[1].getClusters().get(0)))
                    .withRetention("months(9000)", ActionType.DELETE)
                    .withValidity(startDate, endDate)
                    .withClusterType(ClusterType.TARGET)
                    .withTableUri(tableUri)
                    .build())
            .toString();
    // set the cluster 3 as the target.
    feed =
        FeedMerlin.fromString(feed)
            .addFeedCluster(
                new FeedMerlin.FeedClusterBuilder(
                        Util.readEntityName(bundles[2].getClusters().get(0)))
                    .withRetention("months(9000)", ActionType.DELETE)
                    .withValidity(startDate, endDate)
                    .withClusterType(ClusterType.TARGET)
                    .withTableUri(tableUri)
                    .build())
            .toString();

    AssertUtil.assertSucceeded(prism.getFeedHelper().submitAndSchedule(feed));
    TimeUtil.sleepSeconds(TIMEOUT);
    // check if all coordinators exist
    Assert.assertEquals(
        InstanceUtil.checkIfFeedCoordExist(
            cluster2.getFeedHelper(), Util.readEntityName(feed), "REPLICATION"),
        1);

    // check if all coordinators exist
    Assert.assertEquals(
        InstanceUtil.checkIfFeedCoordExist(
            cluster3.getFeedHelper(), Util.readEntityName(feed), "REPLICATION"),
        1);

    // replication should start, wait while it ends
    // we will check for 2 instances so that both partitions are copied over.
    InstanceUtil.waitTillInstanceReachState(
        cluster2OC,
        Util.readEntityName(feed),
        2,
        CoordinatorAction.Status.SUCCEEDED,
        EntityType.FEED);

    // replication should start, wait while it ends
    // we will check for 2 instances so that both partitions are copied over.
    InstanceUtil.waitTillInstanceReachState(
        cluster3OC,
        Util.readEntityName(feed),
        2,
        CoordinatorAction.Status.SUCCEEDED,
        EntityType.FEED);

    // check if data was replicated correctly
    List<Path> srcData = HadoopUtil.getAllFilesRecursivelyHDFS(clusterFS, new Path(testHdfsDir));
    LOGGER.info("Data on source cluster: " + srcData);
    List<Path> cluster2TargetData =
        HadoopUtil.getAllFilesRecursivelyHDFS(cluster2FS, new Path(testHdfsDir));
    LOGGER.info("Data on target cluster: " + cluster2TargetData);
    AssertUtil.checkForListSizes(srcData, cluster2TargetData);
    List<Path> cluster3TargetData =
        HadoopUtil.getAllFilesRecursivelyHDFS(cluster3FS, new Path(testHdfsDir));
    LOGGER.info("Data on target cluster: " + cluster3TargetData);
    AssertUtil.checkForListSizes(srcData, cluster3TargetData);
  }