/**
 * Test creating/using/deleting snapshots from the client
 *
 * <p>This is an end-to-end test for the snapshot utility
 *
 * <p>TODO This is essentially a clone of TestSnapshotFromClient. This is worth refactoring this
 * because there will be a few more flavors of snapshots that need to run these tests.
 */
@Category({RegionServerTests.class, LargeTests.class})
public class TestFlushSnapshotFromClient {
  private static final Log LOG = LogFactory.getLog(TestFlushSnapshotFromClient.class);

  @ClassRule
  public static final TestRule timeout =
      CategoryBasedTimeout.forClass(TestFlushSnapshotFromClient.class);

  protected static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
  protected static final int NUM_RS = 2;
  protected static final byte[] TEST_FAM = Bytes.toBytes("fam");
  protected static final TableName TABLE_NAME = TableName.valueOf("test");
  protected final int DEFAULT_NUM_ROWS = 100;
  protected Admin admin = null;

  @BeforeClass
  public static void setupCluster() throws Exception {
    setupConf(UTIL.getConfiguration());
    UTIL.startMiniCluster(NUM_RS);
  }

  protected static void setupConf(Configuration conf) {
    // disable the ui
    conf.setInt("hbase.regionsever.info.port", -1);
    // change the flush size to a small amount, regulating number of store files
    conf.setInt("hbase.hregion.memstore.flush.size", 25000);
    // so make sure we get a compaction when doing a load, but keep around some
    // files in the store
    conf.setInt("hbase.hstore.compaction.min", 10);
    conf.setInt("hbase.hstore.compactionThreshold", 10);
    // block writes if we get to 12 store files
    conf.setInt("hbase.hstore.blockingStoreFiles", 12);
    // Enable snapshot
    conf.setBoolean(SnapshotManager.HBASE_SNAPSHOT_ENABLED, true);
    conf.set(
        HConstants.HBASE_REGION_SPLIT_POLICY_KEY, ConstantSizeRegionSplitPolicy.class.getName());
    conf.set(
        CompactingMemStore.COMPACTING_MEMSTORE_TYPE_KEY,
        String.valueOf(HColumnDescriptor.MemoryCompaction.NONE));
  }

  @Before
  public void setup() throws Exception {
    createTable();
    this.admin = UTIL.getConnection().getAdmin();
  }

  protected void createTable() throws Exception {
    SnapshotTestingUtils.createTable(UTIL, TABLE_NAME, TEST_FAM);
  }

  @After
  public void tearDown() throws Exception {
    UTIL.deleteTable(TABLE_NAME);
    SnapshotTestingUtils.deleteAllSnapshots(this.admin);
    this.admin.close();
    SnapshotTestingUtils.deleteArchiveDirectory(UTIL);
  }

  @AfterClass
  public static void cleanupTest() throws Exception {
    try {
      UTIL.shutdownMiniCluster();
    } catch (Exception e) {
      LOG.warn("failure shutting down cluster", e);
    }
  }

  /**
   * Test simple flush snapshotting a table that is online
   *
   * @throws Exception
   */
  @Test
  public void testFlushTableSnapshot() throws Exception {
    // make sure we don't fail on listing snapshots
    SnapshotTestingUtils.assertNoSnapshots(admin);

    // put some stuff in the table
    SnapshotTestingUtils.loadData(UTIL, TABLE_NAME, DEFAULT_NUM_ROWS, TEST_FAM);

    LOG.debug("FS state before snapshot:");
    UTIL.getHBaseCluster().getMaster().getMasterFileSystem().logFileSystemState(LOG);

    // take a snapshot of the enabled table
    String snapshotString = "offlineTableSnapshot";
    byte[] snapshot = Bytes.toBytes(snapshotString);
    admin.snapshot(snapshotString, TABLE_NAME, SnapshotType.FLUSH);
    LOG.debug("Snapshot completed.");

    // make sure we have the snapshot
    List<SnapshotDescription> snapshots =
        SnapshotTestingUtils.assertOneSnapshotThatMatches(admin, snapshot, TABLE_NAME);

    // make sure its a valid snapshot
    LOG.debug("FS state after snapshot:");
    UTIL.getHBaseCluster().getMaster().getMasterFileSystem().logFileSystemState(LOG);

    SnapshotTestingUtils.confirmSnapshotValid(
        UTIL, ProtobufUtil.createHBaseProtosSnapshotDesc(snapshots.get(0)), TABLE_NAME, TEST_FAM);
  }

  /**
   * Test snapshotting a table that is online without flushing
   *
   * @throws Exception
   */
  @Test
  public void testSkipFlushTableSnapshot() throws Exception {
    // make sure we don't fail on listing snapshots
    SnapshotTestingUtils.assertNoSnapshots(admin);

    // put some stuff in the table
    Table table = UTIL.getConnection().getTable(TABLE_NAME);
    UTIL.loadTable(table, TEST_FAM);
    UTIL.flush(TABLE_NAME);

    LOG.debug("FS state before snapshot:");
    UTIL.getHBaseCluster().getMaster().getMasterFileSystem().logFileSystemState(LOG);

    // take a snapshot of the enabled table
    String snapshotString = "skipFlushTableSnapshot";
    byte[] snapshot = Bytes.toBytes(snapshotString);
    admin.snapshot(snapshotString, TABLE_NAME, SnapshotType.SKIPFLUSH);
    LOG.debug("Snapshot completed.");

    // make sure we have the snapshot
    List<SnapshotDescription> snapshots =
        SnapshotTestingUtils.assertOneSnapshotThatMatches(admin, snapshot, TABLE_NAME);

    // make sure its a valid snapshot
    LOG.debug("FS state after snapshot:");
    UTIL.getHBaseCluster().getMaster().getMasterFileSystem().logFileSystemState(LOG);

    SnapshotTestingUtils.confirmSnapshotValid(
        UTIL, ProtobufUtil.createHBaseProtosSnapshotDesc(snapshots.get(0)), TABLE_NAME, TEST_FAM);

    admin.deleteSnapshot(snapshot);
    snapshots = admin.listSnapshots();
    SnapshotTestingUtils.assertNoSnapshots(admin);
  }

  /**
   * Test simple flush snapshotting a table that is online
   *
   * @throws Exception
   */
  @Test
  public void testFlushTableSnapshotWithProcedure() throws Exception {
    // make sure we don't fail on listing snapshots
    SnapshotTestingUtils.assertNoSnapshots(admin);

    // put some stuff in the table
    SnapshotTestingUtils.loadData(UTIL, TABLE_NAME, DEFAULT_NUM_ROWS, TEST_FAM);

    LOG.debug("FS state before snapshot:");
    UTIL.getHBaseCluster().getMaster().getMasterFileSystem().logFileSystemState(LOG);

    // take a snapshot of the enabled table
    String snapshotString = "offlineTableSnapshot";
    byte[] snapshot = Bytes.toBytes(snapshotString);
    Map<String, String> props = new HashMap<String, String>();
    props.put("table", TABLE_NAME.getNameAsString());
    admin.execProcedure(
        SnapshotManager.ONLINE_SNAPSHOT_CONTROLLER_DESCRIPTION, snapshotString, props);

    LOG.debug("Snapshot completed.");

    // make sure we have the snapshot
    List<SnapshotDescription> snapshots =
        SnapshotTestingUtils.assertOneSnapshotThatMatches(admin, snapshot, TABLE_NAME);

    // make sure its a valid snapshot
    LOG.debug("FS state after snapshot:");
    UTIL.getHBaseCluster().getMaster().getMasterFileSystem().logFileSystemState(LOG);

    SnapshotTestingUtils.confirmSnapshotValid(
        UTIL, ProtobufUtil.createHBaseProtosSnapshotDesc(snapshots.get(0)), TABLE_NAME, TEST_FAM);
  }

  @Test
  public void testSnapshotFailsOnNonExistantTable() throws Exception {
    // make sure we don't fail on listing snapshots
    SnapshotTestingUtils.assertNoSnapshots(admin);
    TableName tableName = TableName.valueOf("_not_a_table");

    // make sure the table doesn't exist
    boolean fail = false;
    do {
      try {
        admin.getTableDescriptor(tableName);
        fail = true;
        LOG.error("Table:" + tableName + " already exists, checking a new name");
        tableName = TableName.valueOf(tableName + "!");
      } catch (TableNotFoundException e) {
        fail = false;
      }
    } while (fail);

    // snapshot the non-existant table
    try {
      admin.snapshot("fail", tableName, SnapshotType.FLUSH);
      fail("Snapshot succeeded even though there is not table.");
    } catch (SnapshotCreationException e) {
      LOG.info("Correctly failed to snapshot a non-existant table:" + e.getMessage());
    }
  }

  @Test
  public void testAsyncFlushSnapshot() throws Exception {
    HBaseProtos.SnapshotDescription snapshot =
        HBaseProtos.SnapshotDescription.newBuilder()
            .setName("asyncSnapshot")
            .setTable(TABLE_NAME.getNameAsString())
            .setType(HBaseProtos.SnapshotDescription.Type.FLUSH)
            .build();

    // take the snapshot async
    admin.takeSnapshotAsync(
        new SnapshotDescription("asyncSnapshot", TABLE_NAME, SnapshotType.FLUSH));

    // constantly loop, looking for the snapshot to complete
    HMaster master = UTIL.getMiniHBaseCluster().getMaster();
    SnapshotTestingUtils.waitForSnapshotToComplete(master, snapshot, 200);
    LOG.info(" === Async Snapshot Completed ===");
    UTIL.getHBaseCluster().getMaster().getMasterFileSystem().logFileSystemState(LOG);

    // make sure we get the snapshot
    SnapshotTestingUtils.assertOneSnapshotThatMatches(admin, snapshot);
  }

  @Test
  public void testSnapshotStateAfterMerge() throws Exception {
    int numRows = DEFAULT_NUM_ROWS;
    // make sure we don't fail on listing snapshots
    SnapshotTestingUtils.assertNoSnapshots(admin);
    // load the table so we have some data
    SnapshotTestingUtils.loadData(UTIL, TABLE_NAME, numRows, TEST_FAM);

    // Take a snapshot
    String snapshotBeforeMergeName = "snapshotBeforeMerge";
    admin.snapshot(snapshotBeforeMergeName, TABLE_NAME, SnapshotType.FLUSH);

    // Clone the table
    TableName cloneBeforeMergeName = TableName.valueOf("cloneBeforeMerge");
    admin.cloneSnapshot(snapshotBeforeMergeName, cloneBeforeMergeName);
    SnapshotTestingUtils.waitForTableToBeOnline(UTIL, cloneBeforeMergeName);

    // Merge two regions
    List<HRegionInfo> regions = admin.getTableRegions(TABLE_NAME);
    Collections.sort(
        regions,
        new Comparator<HRegionInfo>() {
          public int compare(HRegionInfo r1, HRegionInfo r2) {
            return Bytes.compareTo(r1.getStartKey(), r2.getStartKey());
          }
        });

    int numRegions = admin.getTableRegions(TABLE_NAME).size();
    int numRegionsAfterMerge = numRegions - 2;
    admin.mergeRegionsAsync(
        regions.get(1).getEncodedNameAsBytes(), regions.get(2).getEncodedNameAsBytes(), true);
    admin.mergeRegionsAsync(
        regions.get(4).getEncodedNameAsBytes(), regions.get(5).getEncodedNameAsBytes(), true);

    // Verify that there's one region less
    waitRegionsAfterMerge(numRegionsAfterMerge);
    assertEquals(numRegionsAfterMerge, admin.getTableRegions(TABLE_NAME).size());

    // Clone the table
    TableName cloneAfterMergeName = TableName.valueOf("cloneAfterMerge");
    admin.cloneSnapshot(snapshotBeforeMergeName, cloneAfterMergeName);
    SnapshotTestingUtils.waitForTableToBeOnline(UTIL, cloneAfterMergeName);

    verifyRowCount(UTIL, TABLE_NAME, numRows);
    verifyRowCount(UTIL, cloneBeforeMergeName, numRows);
    verifyRowCount(UTIL, cloneAfterMergeName, numRows);

    // test that we can delete the snapshot
    UTIL.deleteTable(cloneAfterMergeName);
    UTIL.deleteTable(cloneBeforeMergeName);
  }

  @Test
  public void testTakeSnapshotAfterMerge() throws Exception {
    int numRows = DEFAULT_NUM_ROWS;
    // make sure we don't fail on listing snapshots
    SnapshotTestingUtils.assertNoSnapshots(admin);
    // load the table so we have some data
    SnapshotTestingUtils.loadData(UTIL, TABLE_NAME, numRows, TEST_FAM);

    // Merge two regions
    List<HRegionInfo> regions = admin.getTableRegions(TABLE_NAME);
    Collections.sort(
        regions,
        new Comparator<HRegionInfo>() {
          public int compare(HRegionInfo r1, HRegionInfo r2) {
            return Bytes.compareTo(r1.getStartKey(), r2.getStartKey());
          }
        });

    int numRegions = admin.getTableRegions(TABLE_NAME).size();
    int numRegionsAfterMerge = numRegions - 2;
    admin.mergeRegionsAsync(
        regions.get(1).getEncodedNameAsBytes(), regions.get(2).getEncodedNameAsBytes(), true);
    admin.mergeRegionsAsync(
        regions.get(4).getEncodedNameAsBytes(), regions.get(5).getEncodedNameAsBytes(), true);

    waitRegionsAfterMerge(numRegionsAfterMerge);
    assertEquals(numRegionsAfterMerge, admin.getTableRegions(TABLE_NAME).size());

    // Take a snapshot
    String snapshotName = "snapshotAfterMerge";
    SnapshotTestingUtils.snapshot(admin, snapshotName, TABLE_NAME, SnapshotType.FLUSH, 3);

    // Clone the table
    TableName cloneName = TableName.valueOf("cloneMerge");
    admin.cloneSnapshot(snapshotName, cloneName);
    SnapshotTestingUtils.waitForTableToBeOnline(UTIL, cloneName);

    verifyRowCount(UTIL, TABLE_NAME, numRows);
    verifyRowCount(UTIL, cloneName, numRows);

    // test that we can delete the snapshot
    UTIL.deleteTable(cloneName);
  }

  /** Basic end-to-end test of simple-flush-based snapshots */
  @Test
  public void testFlushCreateListDestroy() throws Exception {
    LOG.debug("------- Starting Snapshot test -------------");
    // make sure we don't fail on listing snapshots
    SnapshotTestingUtils.assertNoSnapshots(admin);
    // load the table so we have some data
    SnapshotTestingUtils.loadData(UTIL, TABLE_NAME, DEFAULT_NUM_ROWS, TEST_FAM);

    String snapshotName = "flushSnapshotCreateListDestroy";
    FileSystem fs = UTIL.getHBaseCluster().getMaster().getMasterFileSystem().getFileSystem();
    Path rootDir = UTIL.getHBaseCluster().getMaster().getMasterFileSystem().getRootDir();
    SnapshotTestingUtils.createSnapshotAndValidate(
        admin, TABLE_NAME, Bytes.toString(TEST_FAM), snapshotName, rootDir, fs, true);
  }

  /**
   * Demonstrate that we reject snapshot requests if there is a snapshot already running on the same
   * table currently running and that concurrent snapshots on different tables can both succeed
   * concurretly.
   */
  @Test
  public void testConcurrentSnapshottingAttempts() throws IOException, InterruptedException {
    final TableName TABLE2_NAME = TableName.valueOf(TABLE_NAME + "2");

    int ssNum = 20;
    // make sure we don't fail on listing snapshots
    SnapshotTestingUtils.assertNoSnapshots(admin);
    // create second testing table
    SnapshotTestingUtils.createTable(UTIL, TABLE2_NAME, TEST_FAM);
    // load the table so we have some data
    SnapshotTestingUtils.loadData(UTIL, TABLE_NAME, DEFAULT_NUM_ROWS, TEST_FAM);
    SnapshotTestingUtils.loadData(UTIL, TABLE2_NAME, DEFAULT_NUM_ROWS, TEST_FAM);

    final CountDownLatch toBeSubmitted = new CountDownLatch(ssNum);
    // We'll have one of these per thread
    class SSRunnable implements Runnable {
      SnapshotDescription ss;

      SSRunnable(SnapshotDescription ss) {
        this.ss = ss;
      }

      @Override
      public void run() {
        try {
          LOG.info(
              "Submitting snapshot request: "
                  + ClientSnapshotDescriptionUtils.toString(
                      ProtobufUtil.createHBaseProtosSnapshotDesc(ss)));
          admin.takeSnapshotAsync(ss);
        } catch (Exception e) {
          LOG.info(
              "Exception during snapshot request: "
                  + ClientSnapshotDescriptionUtils.toString(
                      ProtobufUtil.createHBaseProtosSnapshotDesc(ss))
                  + ".  This is ok, we expect some",
              e);
        }
        LOG.info(
            "Submitted snapshot request: "
                + ClientSnapshotDescriptionUtils.toString(
                    ProtobufUtil.createHBaseProtosSnapshotDesc(ss)));
        toBeSubmitted.countDown();
      }
    };

    // build descriptions
    SnapshotDescription[] descs = new SnapshotDescription[ssNum];
    for (int i = 0; i < ssNum; i++) {
      if (i % 2 == 0) {
        descs[i] = new SnapshotDescription("ss" + i, TABLE_NAME, SnapshotType.FLUSH);
      } else {
        descs[i] = new SnapshotDescription("ss" + i, TABLE2_NAME, SnapshotType.FLUSH);
      }
    }

    // kick each off its own thread
    for (int i = 0; i < ssNum; i++) {
      new Thread(new SSRunnable(descs[i])).start();
    }

    // wait until all have been submitted
    toBeSubmitted.await();

    // loop until all are done.
    while (true) {
      int doneCount = 0;
      for (SnapshotDescription ss : descs) {
        try {
          if (admin.isSnapshotFinished(ss)) {
            doneCount++;
          }
        } catch (Exception e) {
          LOG.warn("Got an exception when checking for snapshot " + ss.getName(), e);
          doneCount++;
        }
      }
      if (doneCount == descs.length) {
        break;
      }
      Thread.sleep(100);
    }

    // dump for debugging
    UTIL.getHBaseCluster().getMaster().getMasterFileSystem().logFileSystemState(LOG);

    List<SnapshotDescription> taken = admin.listSnapshots();
    int takenSize = taken.size();
    LOG.info("Taken " + takenSize + " snapshots:  " + taken);
    assertTrue(
        "We expect at least 1 request to be rejected because of we concurrently"
            + " issued many requests",
        takenSize < ssNum && takenSize > 0);

    // Verify that there's at least one snapshot per table
    int t1SnapshotsCount = 0;
    int t2SnapshotsCount = 0;
    for (SnapshotDescription ss : taken) {
      if (ss.getTableName().equals(TABLE_NAME)) {
        t1SnapshotsCount++;
      } else if (ss.getTableName().equals(TABLE2_NAME)) {
        t2SnapshotsCount++;
      }
    }
    assertTrue("We expect at least 1 snapshot of table1 ", t1SnapshotsCount > 0);
    assertTrue("We expect at least 1 snapshot of table2 ", t2SnapshotsCount > 0);

    UTIL.deleteTable(TABLE2_NAME);
  }

  private void waitRegionsAfterMerge(final long numRegionsAfterMerge)
      throws IOException, InterruptedException {
    // Verify that there's one region less
    long startTime = System.currentTimeMillis();
    while (admin.getTableRegions(TABLE_NAME).size() != numRegionsAfterMerge) {
      // This may be flaky... if after 15sec the merge is not complete give up
      // it will fail in the assertEquals(numRegionsAfterMerge).
      if ((System.currentTimeMillis() - startTime) > 15000) break;
      Thread.sleep(100);
    }
    SnapshotTestingUtils.waitForTableToBeOnline(UTIL, TABLE_NAME);
  }

  protected void verifyRowCount(
      final HBaseTestingUtility util, final TableName tableName, long expectedRows)
      throws IOException {
    SnapshotTestingUtils.verifyRowCount(util, tableName, expectedRows);
  }

  protected int countRows(final Table table, final byte[]... families) throws IOException {
    return UTIL.countRows(table, families);
  }
}
/**
 * Test all of the data block encoding algorithms for correctness. Most of the class generate data
 * which will test different branches in code.
 */
@Category({IOTests.class, LargeTests.class})
@RunWith(Parameterized.class)
public class TestDataBlockEncoders {

  private static final Log LOG = LogFactory.getLog(TestDataBlockEncoders.class);

  @Rule
  public final TestRule timeout =
      CategoryBasedTimeout.builder()
          .withTimeout(this.getClass())
          .withLookingForStuckThread(true)
          .build();

  private static int NUMBER_OF_KV = 10000;
  private static int NUM_RANDOM_SEEKS = 1000;

  private static int ENCODED_DATA_OFFSET =
      HConstants.HFILEBLOCK_HEADER_SIZE + DataBlockEncoding.ID_SIZE;

  private RedundantKVGenerator generator = new RedundantKVGenerator();
  private Random randomizer = new Random(42l);

  private final boolean includesMemstoreTS;
  private final boolean includesTags;
  private final boolean useOffheapData;

  @Parameters
  public static Collection<Object[]> parameters() {
    return HBaseTestingUtility.memStoreTSTagsAndOffheapCombination();
  }

  public TestDataBlockEncoders(
      boolean includesMemstoreTS, boolean includesTag, boolean useOffheapData) {
    this.includesMemstoreTS = includesMemstoreTS;
    this.includesTags = includesTag;
    this.useOffheapData = useOffheapData;
  }

  private HFileBlockEncodingContext getEncodingContext(
      Compression.Algorithm algo, DataBlockEncoding encoding) {
    DataBlockEncoder encoder = encoding.getEncoder();
    HFileContext meta =
        new HFileContextBuilder()
            .withHBaseCheckSum(false)
            .withIncludesMvcc(includesMemstoreTS)
            .withIncludesTags(includesTags)
            .withCompression(algo)
            .build();
    if (encoder != null) {
      return encoder.newDataBlockEncodingContext(
          encoding, HConstants.HFILEBLOCK_DUMMY_HEADER, meta);
    } else {
      return new HFileBlockDefaultEncodingContext(
          encoding, HConstants.HFILEBLOCK_DUMMY_HEADER, meta);
    }
  }

  /**
   * Test data block encoding of empty KeyValue.
   *
   * @throws IOException On test failure.
   */
  @Test
  public void testEmptyKeyValues() throws IOException {
    List<KeyValue> kvList = new ArrayList<KeyValue>();
    byte[] row = new byte[0];
    byte[] family = new byte[0];
    byte[] qualifier = new byte[0];
    byte[] value = new byte[0];
    if (!includesTags) {
      kvList.add(new KeyValue(row, family, qualifier, 0l, value));
      kvList.add(new KeyValue(row, family, qualifier, 0l, value));
    } else {
      byte[] metaValue1 = Bytes.toBytes("metaValue1");
      byte[] metaValue2 = Bytes.toBytes("metaValue2");
      kvList.add(
          new KeyValue(
              row,
              family,
              qualifier,
              0l,
              value,
              new Tag[] {new ArrayBackedTag((byte) 1, metaValue1)}));
      kvList.add(
          new KeyValue(
              row,
              family,
              qualifier,
              0l,
              value,
              new Tag[] {new ArrayBackedTag((byte) 1, metaValue2)}));
    }
    testEncodersOnDataset(kvList, includesMemstoreTS, includesTags);
  }

  /**
   * Test KeyValues with negative timestamp.
   *
   * @throws IOException On test failure.
   */
  @Test
  public void testNegativeTimestamps() throws IOException {
    List<KeyValue> kvList = new ArrayList<KeyValue>();
    byte[] row = new byte[0];
    byte[] family = new byte[0];
    byte[] qualifier = new byte[0];
    byte[] value = new byte[0];
    if (includesTags) {
      byte[] metaValue1 = Bytes.toBytes("metaValue1");
      byte[] metaValue2 = Bytes.toBytes("metaValue2");
      kvList.add(
          new KeyValue(
              row,
              family,
              qualifier,
              0l,
              value,
              new Tag[] {new ArrayBackedTag((byte) 1, metaValue1)}));
      kvList.add(
          new KeyValue(
              row,
              family,
              qualifier,
              0l,
              value,
              new Tag[] {new ArrayBackedTag((byte) 1, metaValue2)}));
    } else {
      kvList.add(new KeyValue(row, family, qualifier, -1l, Type.Put, value));
      kvList.add(new KeyValue(row, family, qualifier, -2l, Type.Put, value));
    }
    testEncodersOnDataset(kvList, includesMemstoreTS, includesTags);
  }

  /**
   * Test whether compression -> decompression gives the consistent results on pseudorandom sample.
   *
   * @throws IOException On test failure.
   */
  @Test
  public void testExecutionOnSample() throws IOException {
    List<KeyValue> kvList = generator.generateTestKeyValues(NUMBER_OF_KV, includesTags);
    testEncodersOnDataset(kvList, includesMemstoreTS, includesTags);
  }

  /** Test seeking while file is encoded. */
  @Test
  public void testSeekingOnSample() throws IOException {
    List<KeyValue> sampleKv = generator.generateTestKeyValues(NUMBER_OF_KV, includesTags);

    // create all seekers
    List<DataBlockEncoder.EncodedSeeker> encodedSeekers =
        new ArrayList<DataBlockEncoder.EncodedSeeker>();
    for (DataBlockEncoding encoding : DataBlockEncoding.values()) {
      LOG.info("Encoding: " + encoding);
      // Off heap block data support not added for PREFIX_TREE DBE yet.
      // TODO remove this once support is added. HBASE-12298
      if (this.useOffheapData && encoding == DataBlockEncoding.PREFIX_TREE) continue;
      DataBlockEncoder encoder = encoding.getEncoder();
      if (encoder == null) {
        continue;
      }
      LOG.info("Encoder: " + encoder);
      ByteBuffer encodedBuffer =
          encodeKeyValues(
              encoding,
              sampleKv,
              getEncodingContext(Compression.Algorithm.NONE, encoding),
              this.useOffheapData);
      HFileContext meta =
          new HFileContextBuilder()
              .withHBaseCheckSum(false)
              .withIncludesMvcc(includesMemstoreTS)
              .withIncludesTags(includesTags)
              .withCompression(Compression.Algorithm.NONE)
              .build();
      DataBlockEncoder.EncodedSeeker seeker =
          encoder.createSeeker(
              CellComparator.COMPARATOR, encoder.newDataBlockDecodingContext(meta));
      seeker.setCurrentBuffer(new SingleByteBuff(encodedBuffer));
      encodedSeekers.add(seeker);
    }
    LOG.info("Testing it!");
    // test it!
    // try a few random seeks
    for (boolean seekBefore : new boolean[] {false, true}) {
      for (int i = 0; i < NUM_RANDOM_SEEKS; ++i) {
        int keyValueId;
        if (!seekBefore) {
          keyValueId = randomizer.nextInt(sampleKv.size());
        } else {
          keyValueId = randomizer.nextInt(sampleKv.size() - 1) + 1;
        }

        KeyValue keyValue = sampleKv.get(keyValueId);
        checkSeekingConsistency(encodedSeekers, seekBefore, keyValue);
      }
    }

    // check edge cases
    LOG.info("Checking edge cases");
    checkSeekingConsistency(encodedSeekers, false, sampleKv.get(0));
    for (boolean seekBefore : new boolean[] {false, true}) {
      checkSeekingConsistency(encodedSeekers, seekBefore, sampleKv.get(sampleKv.size() - 1));
      KeyValue midKv = sampleKv.get(sampleKv.size() / 2);
      Cell lastMidKv = CellUtil.createLastOnRowCol(midKv);
      checkSeekingConsistency(encodedSeekers, seekBefore, lastMidKv);
    }
    LOG.info("Done");
  }

  static ByteBuffer encodeKeyValues(
      DataBlockEncoding encoding,
      List<KeyValue> kvs,
      HFileBlockEncodingContext encodingContext,
      boolean useOffheapData)
      throws IOException {
    DataBlockEncoder encoder = encoding.getEncoder();
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    baos.write(HConstants.HFILEBLOCK_DUMMY_HEADER);
    DataOutputStream dos = new DataOutputStream(baos);
    encoder.startBlockEncoding(encodingContext, dos);
    for (KeyValue kv : kvs) {
      encoder.encode(kv, encodingContext, dos);
    }
    encoder.endBlockEncoding(encodingContext, dos, baos.getBuffer());
    byte[] encodedData = new byte[baos.size() - ENCODED_DATA_OFFSET];
    System.arraycopy(baos.toByteArray(), ENCODED_DATA_OFFSET, encodedData, 0, encodedData.length);
    if (useOffheapData) {
      ByteBuffer bb = ByteBuffer.allocateDirect(encodedData.length);
      bb.put(encodedData);
      bb.rewind();
      return bb;
    }
    return ByteBuffer.wrap(encodedData);
  }

  @Test
  public void testNextOnSample() throws IOException {
    List<KeyValue> sampleKv = generator.generateTestKeyValues(NUMBER_OF_KV, includesTags);

    for (DataBlockEncoding encoding : DataBlockEncoding.values()) {
      // Off heap block data support not added for PREFIX_TREE DBE yet.
      // TODO remove this once support is added. HBASE-12298
      if (this.useOffheapData && encoding == DataBlockEncoding.PREFIX_TREE) continue;
      if (encoding.getEncoder() == null) {
        continue;
      }
      DataBlockEncoder encoder = encoding.getEncoder();
      ByteBuffer encodedBuffer =
          encodeKeyValues(
              encoding,
              sampleKv,
              getEncodingContext(Compression.Algorithm.NONE, encoding),
              this.useOffheapData);
      HFileContext meta =
          new HFileContextBuilder()
              .withHBaseCheckSum(false)
              .withIncludesMvcc(includesMemstoreTS)
              .withIncludesTags(includesTags)
              .withCompression(Compression.Algorithm.NONE)
              .build();
      DataBlockEncoder.EncodedSeeker seeker =
          encoder.createSeeker(
              CellComparator.COMPARATOR, encoder.newDataBlockDecodingContext(meta));
      seeker.setCurrentBuffer(new SingleByteBuff(encodedBuffer));
      int i = 0;
      do {
        KeyValue expectedKeyValue = sampleKv.get(i);
        Cell cell = seeker.getCell();
        if (CellComparator.COMPARATOR.compareKeyIgnoresMvcc(expectedKeyValue, cell) != 0) {
          int commonPrefix =
              CellUtil.findCommonPrefixInFlatKey(expectedKeyValue, cell, false, true);
          fail(
              String.format(
                  "next() produces wrong results "
                      + "encoder: %s i: %d commonPrefix: %d"
                      + "\n expected %s\n actual      %s",
                  encoder.toString(),
                  i,
                  commonPrefix,
                  Bytes.toStringBinary(
                      expectedKeyValue.getBuffer(),
                      expectedKeyValue.getKeyOffset(),
                      expectedKeyValue.getKeyLength()),
                  CellUtil.toString(cell, false)));
        }
        i++;
      } while (seeker.next());
    }
  }

  /**
   * Test whether the decompression of first key is implemented correctly.
   *
   * @throws IOException
   */
  @Test
  public void testFirstKeyInBlockOnSample() throws IOException {
    List<KeyValue> sampleKv = generator.generateTestKeyValues(NUMBER_OF_KV, includesTags);

    for (DataBlockEncoding encoding : DataBlockEncoding.values()) {
      // Off heap block data support not added for PREFIX_TREE DBE yet.
      // TODO remove this once support is added. HBASE-12298
      if (this.useOffheapData && encoding == DataBlockEncoding.PREFIX_TREE) continue;
      if (encoding.getEncoder() == null) {
        continue;
      }
      DataBlockEncoder encoder = encoding.getEncoder();
      ByteBuffer encodedBuffer =
          encodeKeyValues(
              encoding,
              sampleKv,
              getEncodingContext(Compression.Algorithm.NONE, encoding),
              this.useOffheapData);
      Cell key = encoder.getFirstKeyCellInBlock(new SingleByteBuff(encodedBuffer));
      KeyValue firstKv = sampleKv.get(0);
      if (0 != CellComparator.COMPARATOR.compareKeyIgnoresMvcc(key, firstKv)) {
        int commonPrefix = CellUtil.findCommonPrefixInFlatKey(key, firstKv, false, true);
        fail(String.format("Bug in '%s' commonPrefix %d", encoder.toString(), commonPrefix));
      }
    }
  }

  private void checkSeekingConsistency(
      List<DataBlockEncoder.EncodedSeeker> encodedSeekers, boolean seekBefore, Cell keyValue) {
    Cell expectedKeyValue = null;
    ByteBuffer expectedKey = null;
    ByteBuffer expectedValue = null;
    for (DataBlockEncoder.EncodedSeeker seeker : encodedSeekers) {
      seeker.seekToKeyInBlock(keyValue, seekBefore);
      seeker.rewind();

      Cell actualKeyValue = seeker.getCell();
      ByteBuffer actualKey = null;
      if (seeker instanceof PrefixTreeSeeker) {
        byte[] serializedKey = CellUtil.getCellKeySerializedAsKeyValueKey(seeker.getKey());
        actualKey = ByteBuffer.wrap(KeyValueUtil.createKeyValueFromKey(serializedKey).getKey());
      } else {
        actualKey = ByteBuffer.wrap(((KeyValue) seeker.getKey()).getKey());
      }
      ByteBuffer actualValue = seeker.getValueShallowCopy();

      if (expectedKeyValue != null) {
        assertTrue(CellUtil.equals(expectedKeyValue, actualKeyValue));
      } else {
        expectedKeyValue = actualKeyValue;
      }

      if (expectedKey != null) {
        assertEquals(expectedKey, actualKey);
      } else {
        expectedKey = actualKey;
      }

      if (expectedValue != null) {
        assertEquals(expectedValue, actualValue);
      } else {
        expectedValue = actualValue;
      }
    }
  }

  private void testEncodersOnDataset(
      List<KeyValue> kvList, boolean includesMemstoreTS, boolean includesTags) throws IOException {
    ByteBuffer unencodedDataBuf =
        RedundantKVGenerator.convertKvToByteBuffer(kvList, includesMemstoreTS);
    HFileContext fileContext =
        new HFileContextBuilder()
            .withIncludesMvcc(includesMemstoreTS)
            .withIncludesTags(includesTags)
            .build();
    for (DataBlockEncoding encoding : DataBlockEncoding.values()) {
      DataBlockEncoder encoder = encoding.getEncoder();
      if (encoder == null) {
        continue;
      }
      HFileBlockEncodingContext encodingContext =
          new HFileBlockDefaultEncodingContext(
              encoding, HConstants.HFILEBLOCK_DUMMY_HEADER, fileContext);

      ByteArrayOutputStream baos = new ByteArrayOutputStream();
      baos.write(HConstants.HFILEBLOCK_DUMMY_HEADER);
      DataOutputStream dos = new DataOutputStream(baos);
      encoder.startBlockEncoding(encodingContext, dos);
      for (KeyValue kv : kvList) {
        encoder.encode(kv, encodingContext, dos);
      }
      encoder.endBlockEncoding(encodingContext, dos, baos.getBuffer());
      byte[] encodedData = baos.toByteArray();

      testAlgorithm(encodedData, unencodedDataBuf, encoder);
    }
  }

  @Test
  public void testZeroByte() throws IOException {
    List<KeyValue> kvList = new ArrayList<KeyValue>();
    byte[] row = Bytes.toBytes("abcd");
    byte[] family = new byte[] {'f'};
    byte[] qualifier0 = new byte[] {'b'};
    byte[] qualifier1 = new byte[] {'c'};
    byte[] value0 = new byte[] {'d'};
    byte[] value1 = new byte[] {0x00};
    if (includesTags) {
      kvList.add(
          new KeyValue(
              row,
              family,
              qualifier0,
              0,
              value0,
              new Tag[] {new ArrayBackedTag((byte) 1, "value1")}));
      kvList.add(
          new KeyValue(
              row,
              family,
              qualifier1,
              0,
              value1,
              new Tag[] {new ArrayBackedTag((byte) 1, "value1")}));
    } else {
      kvList.add(new KeyValue(row, family, qualifier0, 0, Type.Put, value0));
      kvList.add(new KeyValue(row, family, qualifier1, 0, Type.Put, value1));
    }
    testEncodersOnDataset(kvList, includesMemstoreTS, includesTags);
  }

  private void testAlgorithm(
      byte[] encodedData, ByteBuffer unencodedDataBuf, DataBlockEncoder encoder)
      throws IOException {
    // decode
    ByteArrayInputStream bais =
        new ByteArrayInputStream(
            encodedData, ENCODED_DATA_OFFSET, encodedData.length - ENCODED_DATA_OFFSET);
    DataInputStream dis = new DataInputStream(bais);
    ByteBuffer actualDataset;
    HFileContext meta =
        new HFileContextBuilder()
            .withHBaseCheckSum(false)
            .withIncludesMvcc(includesMemstoreTS)
            .withIncludesTags(includesTags)
            .withCompression(Compression.Algorithm.NONE)
            .build();
    actualDataset = encoder.decodeKeyValues(dis, encoder.newDataBlockDecodingContext(meta));
    actualDataset.rewind();

    // this is because in case of prefix tree the decoded stream will not have
    // the
    // mvcc in it.
    assertEquals(
        "Encoding -> decoding gives different results for " + encoder,
        Bytes.toStringBinary(unencodedDataBuf),
        Bytes.toStringBinary(actualDataset));
  }
}
Esempio n. 3
0
/**
 * Simple test for {@link CellSortReducer} and {@link HFileOutputFormat2}. Sets up and runs a
 * mapreduce job that writes hfile output. Creates a few inner classes to implement splits and an
 * inputformat that emits keys and values like those of {@link PerformanceEvaluation}.
 */
@Category({VerySlowMapReduceTests.class, LargeTests.class})
public class TestHFileOutputFormat2 {
  @Rule
  public final TestRule timeout =
      CategoryBasedTimeout.builder()
          .withTimeout(this.getClass())
          .withLookingForStuckThread(true)
          .build();

  private static final int ROWSPERSPLIT = 1024;

  private static final byte[][] FAMILIES = {
    Bytes.add(PerformanceEvaluation.FAMILY_NAME, Bytes.toBytes("-A")),
    Bytes.add(PerformanceEvaluation.FAMILY_NAME, Bytes.toBytes("-B"))
  };
  private static final TableName TABLE_NAME = TableName.valueOf("TestTable");

  private HBaseTestingUtility util = new HBaseTestingUtility();

  private static final Log LOG = LogFactory.getLog(TestHFileOutputFormat2.class);

  /** Simple mapper that makes KeyValue output. */
  static class RandomKVGeneratingMapper
      extends Mapper<NullWritable, NullWritable, ImmutableBytesWritable, Cell> {

    private int keyLength;
    private static final int KEYLEN_DEFAULT = 10;
    private static final String KEYLEN_CONF = "randomkv.key.length";

    private int valLength;
    private static final int VALLEN_DEFAULT = 10;
    private static final String VALLEN_CONF = "randomkv.val.length";
    private static final byte[] QUALIFIER = Bytes.toBytes("data");

    @Override
    protected void setup(Context context) throws IOException, InterruptedException {
      super.setup(context);

      Configuration conf = context.getConfiguration();
      keyLength = conf.getInt(KEYLEN_CONF, KEYLEN_DEFAULT);
      valLength = conf.getInt(VALLEN_CONF, VALLEN_DEFAULT);
    }

    @Override
    protected void map(
        NullWritable n1,
        NullWritable n2,
        Mapper<NullWritable, NullWritable, ImmutableBytesWritable, Cell>.Context context)
        throws java.io.IOException, InterruptedException {

      byte keyBytes[] = new byte[keyLength];
      byte valBytes[] = new byte[valLength];

      int taskId = context.getTaskAttemptID().getTaskID().getId();
      assert taskId < Byte.MAX_VALUE : "Unit tests dont support > 127 tasks!";

      Random random = new Random();
      for (int i = 0; i < ROWSPERSPLIT; i++) {

        random.nextBytes(keyBytes);
        // Ensure that unique tasks generate unique keys
        keyBytes[keyLength - 1] = (byte) (taskId & 0xFF);
        random.nextBytes(valBytes);
        ImmutableBytesWritable key = new ImmutableBytesWritable(keyBytes);

        for (byte[] family : TestHFileOutputFormat2.FAMILIES) {
          Cell kv = new KeyValue(keyBytes, family, QUALIFIER, valBytes);
          context.write(key, kv);
        }
      }
    }
  }

  private void setupRandomGeneratorMapper(Job job) {
    job.setInputFormatClass(NMapInputFormat.class);
    job.setMapperClass(RandomKVGeneratingMapper.class);
    job.setMapOutputKeyClass(ImmutableBytesWritable.class);
    job.setMapOutputValueClass(KeyValue.class);
  }

  /**
   * Test that {@link HFileOutputFormat2} RecordWriter amends timestamps if passed a keyvalue whose
   * timestamp is {@link HConstants#LATEST_TIMESTAMP}.
   *
   * @see <a href="https://issues.apache.org/jira/browse/HBASE-2615">HBASE-2615</a>
   */
  @Ignore("Goes zombie too frequently; needs work. See HBASE-14563")
  @Test
  public void test_LATEST_TIMESTAMP_isReplaced() throws Exception {
    Configuration conf = new Configuration(this.util.getConfiguration());
    RecordWriter<ImmutableBytesWritable, Cell> writer = null;
    TaskAttemptContext context = null;
    Path dir = util.getDataTestDir("test_LATEST_TIMESTAMP_isReplaced");
    try {
      Job job = new Job(conf);
      FileOutputFormat.setOutputPath(job, dir);
      context = createTestTaskAttemptContext(job);
      HFileOutputFormat2 hof = new HFileOutputFormat2();
      writer = hof.getRecordWriter(context);
      final byte[] b = Bytes.toBytes("b");

      // Test 1.  Pass a KV that has a ts of LATEST_TIMESTAMP.  It should be
      // changed by call to write.  Check all in kv is same but ts.
      KeyValue kv = new KeyValue(b, b, b);
      KeyValue original = kv.clone();
      writer.write(new ImmutableBytesWritable(), kv);
      assertFalse(original.equals(kv));
      assertTrue(Bytes.equals(CellUtil.cloneRow(original), CellUtil.cloneRow(kv)));
      assertTrue(Bytes.equals(CellUtil.cloneFamily(original), CellUtil.cloneFamily(kv)));
      assertTrue(Bytes.equals(CellUtil.cloneQualifier(original), CellUtil.cloneQualifier(kv)));
      assertNotSame(original.getTimestamp(), kv.getTimestamp());
      assertNotSame(HConstants.LATEST_TIMESTAMP, kv.getTimestamp());

      // Test 2. Now test passing a kv that has explicit ts.  It should not be
      // changed by call to record write.
      kv = new KeyValue(b, b, b, kv.getTimestamp() - 1, b);
      original = kv.clone();
      writer.write(new ImmutableBytesWritable(), kv);
      assertTrue(original.equals(kv));
    } finally {
      if (writer != null && context != null) writer.close(context);
      dir.getFileSystem(conf).delete(dir, true);
    }
  }

  private TaskAttemptContext createTestTaskAttemptContext(final Job job) throws Exception {
    HadoopShims hadoop = CompatibilitySingletonFactory.getInstance(HadoopShims.class);
    TaskAttemptContext context =
        hadoop.createTestTaskAttemptContext(job, "attempt_201402131733_0001_m_000000_0");
    return context;
  }

  /*
   * Test that {@link HFileOutputFormat2} creates an HFile with TIMERANGE
   * metadata used by time-restricted scans.
   */
  @Ignore("Goes zombie too frequently; needs work. See HBASE-14563")
  @Test
  public void test_TIMERANGE() throws Exception {
    Configuration conf = new Configuration(this.util.getConfiguration());
    RecordWriter<ImmutableBytesWritable, Cell> writer = null;
    TaskAttemptContext context = null;
    Path dir = util.getDataTestDir("test_TIMERANGE_present");
    LOG.info("Timerange dir writing to dir: " + dir);
    try {
      // build a record writer using HFileOutputFormat2
      Job job = new Job(conf);
      FileOutputFormat.setOutputPath(job, dir);
      context = createTestTaskAttemptContext(job);
      HFileOutputFormat2 hof = new HFileOutputFormat2();
      writer = hof.getRecordWriter(context);

      // Pass two key values with explicit times stamps
      final byte[] b = Bytes.toBytes("b");

      // value 1 with timestamp 2000
      KeyValue kv = new KeyValue(b, b, b, 2000, b);
      KeyValue original = kv.clone();
      writer.write(new ImmutableBytesWritable(), kv);
      assertEquals(original, kv);

      // value 2 with timestamp 1000
      kv = new KeyValue(b, b, b, 1000, b);
      original = kv.clone();
      writer.write(new ImmutableBytesWritable(), kv);
      assertEquals(original, kv);

      // verify that the file has the proper FileInfo.
      writer.close(context);

      // the generated file lives 1 directory down from the attempt directory
      // and is the only file, e.g.
      // _attempt__0000_r_000000_0/b/1979617994050536795
      FileSystem fs = FileSystem.get(conf);
      Path attemptDirectory = hof.getDefaultWorkFile(context, "").getParent();
      FileStatus[] sub1 = fs.listStatus(attemptDirectory);
      FileStatus[] file = fs.listStatus(sub1[0].getPath());

      // open as HFile Reader and pull out TIMERANGE FileInfo.
      HFile.Reader rd = HFile.createReader(fs, file[0].getPath(), new CacheConfig(conf), conf);
      Map<byte[], byte[]> finfo = rd.loadFileInfo();
      byte[] range = finfo.get("TIMERANGE".getBytes());
      assertNotNull(range);

      // unmarshall and check values.
      TimeRangeTracker timeRangeTracker = new TimeRangeTracker();
      Writables.copyWritable(range, timeRangeTracker);
      LOG.info(
          timeRangeTracker.getMinimumTimestamp() + "...." + timeRangeTracker.getMaximumTimestamp());
      assertEquals(1000, timeRangeTracker.getMinimumTimestamp());
      assertEquals(2000, timeRangeTracker.getMaximumTimestamp());
      rd.close();
    } finally {
      if (writer != null && context != null) writer.close(context);
      dir.getFileSystem(conf).delete(dir, true);
    }
  }

  /** Run small MR job. */
  @Ignore("Goes zombie too frequently; needs work. See HBASE-14563")
  @Test
  public void testWritingPEData() throws Exception {
    Configuration conf = util.getConfiguration();
    Path testDir = util.getDataTestDirOnTestFS("testWritingPEData");
    FileSystem fs = testDir.getFileSystem(conf);

    // Set down this value or we OOME in eclipse.
    conf.setInt("mapreduce.task.io.sort.mb", 20);
    // Write a few files.
    conf.setLong(HConstants.HREGION_MAX_FILESIZE, 64 * 1024);

    Job job = new Job(conf, "testWritingPEData");
    setupRandomGeneratorMapper(job);
    // This partitioner doesn't work well for number keys but using it anyways
    // just to demonstrate how to configure it.
    byte[] startKey = new byte[RandomKVGeneratingMapper.KEYLEN_DEFAULT];
    byte[] endKey = new byte[RandomKVGeneratingMapper.KEYLEN_DEFAULT];

    Arrays.fill(startKey, (byte) 0);
    Arrays.fill(endKey, (byte) 0xff);

    job.setPartitionerClass(SimpleTotalOrderPartitioner.class);
    // Set start and end rows for partitioner.
    SimpleTotalOrderPartitioner.setStartKey(job.getConfiguration(), startKey);
    SimpleTotalOrderPartitioner.setEndKey(job.getConfiguration(), endKey);
    job.setReducerClass(KeyValueSortReducer.class);
    job.setOutputFormatClass(HFileOutputFormat2.class);
    job.setNumReduceTasks(4);
    job.getConfiguration()
        .setStrings(
            "io.serializations",
            conf.get("io.serializations"),
            MutationSerialization.class.getName(),
            ResultSerialization.class.getName(),
            KeyValueSerialization.class.getName());

    FileOutputFormat.setOutputPath(job, testDir);
    assertTrue(job.waitForCompletion(false));
    FileStatus[] files = fs.listStatus(testDir);
    assertTrue(files.length > 0);
  }

  @Ignore("Goes zombie too frequently; needs work. See HBASE-14563")
  @Test
  public void testJobConfiguration() throws Exception {
    Configuration conf = new Configuration(this.util.getConfiguration());
    conf.set("hbase.fs.tmp.dir", util.getDataTestDir("testJobConfiguration").toString());
    Job job = new Job(conf);
    job.setWorkingDirectory(util.getDataTestDir("testJobConfiguration"));
    Table table = Mockito.mock(Table.class);
    RegionLocator regionLocator = Mockito.mock(RegionLocator.class);
    setupMockStartKeys(regionLocator);
    setupMockTableName(regionLocator);
    HFileOutputFormat2.configureIncrementalLoad(job, table.getTableDescriptor(), regionLocator);
    assertEquals(job.getNumReduceTasks(), 4);
  }

  private byte[][] generateRandomStartKeys(int numKeys) {
    Random random = new Random();
    byte[][] ret = new byte[numKeys][];
    // first region start key is always empty
    ret[0] = HConstants.EMPTY_BYTE_ARRAY;
    for (int i = 1; i < numKeys; i++) {
      ret[i] =
          PerformanceEvaluation.generateData(random, PerformanceEvaluation.DEFAULT_VALUE_LENGTH);
    }
    return ret;
  }

  private byte[][] generateRandomSplitKeys(int numKeys) {
    Random random = new Random();
    byte[][] ret = new byte[numKeys][];
    for (int i = 0; i < numKeys; i++) {
      ret[i] =
          PerformanceEvaluation.generateData(random, PerformanceEvaluation.DEFAULT_VALUE_LENGTH);
    }
    return ret;
  }

  @Ignore("Goes zombie too frequently; needs work. See HBASE-14563")
  @Test
  public void testMRIncrementalLoad() throws Exception {
    LOG.info("\nStarting test testMRIncrementalLoad\n");
    doIncrementalLoadTest(false, false);
  }

  @Ignore("Goes zombie too frequently; needs work. See HBASE-14563")
  @Test
  public void testMRIncrementalLoadWithSplit() throws Exception {
    LOG.info("\nStarting test testMRIncrementalLoadWithSplit\n");
    doIncrementalLoadTest(true, false);
  }

  /**
   * Test for HFileOutputFormat2.LOCALITY_SENSITIVE_CONF_KEY = true This test could only check the
   * correctness of original logic if LOCALITY_SENSITIVE_CONF_KEY is set to true. Because
   * MiniHBaseCluster always run with single hostname (and different ports), it's not possible to
   * check the region locality by comparing region locations and DN hostnames. When MiniHBaseCluster
   * supports explicit hostnames parameter (just like MiniDFSCluster does), we could test region
   * locality features more easily.
   */
  @Ignore("Goes zombie too frequently; needs work. See HBASE-14563")
  @Test
  public void testMRIncrementalLoadWithLocality() throws Exception {
    LOG.info("\nStarting test testMRIncrementalLoadWithLocality\n");
    doIncrementalLoadTest(false, true);
    doIncrementalLoadTest(true, true);
  }

  private void doIncrementalLoadTest(boolean shouldChangeRegions, boolean shouldKeepLocality)
      throws Exception {
    util = new HBaseTestingUtility();
    Configuration conf = util.getConfiguration();
    conf.setBoolean(HFileOutputFormat2.LOCALITY_SENSITIVE_CONF_KEY, shouldKeepLocality);
    int hostCount = 1;
    int regionNum = 5;
    if (shouldKeepLocality) {
      // We should change host count higher than hdfs replica count when MiniHBaseCluster supports
      // explicit hostnames parameter just like MiniDFSCluster does.
      hostCount = 3;
      regionNum = 20;
    }

    byte[][] splitKeys = generateRandomSplitKeys(regionNum - 1);
    String[] hostnames = new String[hostCount];
    for (int i = 0; i < hostCount; ++i) {
      hostnames[i] = "datanode_" + i;
    }
    util.startMiniCluster(1, hostCount, hostnames);

    Table table = util.createTable(TABLE_NAME, FAMILIES, splitKeys);
    Path testDir = util.getDataTestDirOnTestFS("testLocalMRIncrementalLoad");
    try (RegionLocator r = util.getConnection().getRegionLocator(TABLE_NAME);
        Admin admin = util.getConnection().getAdmin(); ) {
      assertEquals("Should start with empty table", 0, util.countRows(table));
      int numRegions = r.getStartKeys().length;
      assertEquals("Should make " + regionNum + " regions", numRegions, regionNum);

      // Generate the bulk load files
      runIncrementalPELoad(conf, table.getTableDescriptor(), r, testDir);
      // This doesn't write into the table, just makes files
      assertEquals("HFOF should not touch actual table", 0, util.countRows(table));

      // Make sure that a directory was created for every CF
      int dir = 0;
      for (FileStatus f : testDir.getFileSystem(conf).listStatus(testDir)) {
        for (byte[] family : FAMILIES) {
          if (Bytes.toString(family).equals(f.getPath().getName())) {
            ++dir;
          }
        }
      }
      assertEquals("Column family not found in FS.", FAMILIES.length, dir);

      // handle the split case
      if (shouldChangeRegions) {
        LOG.info("Changing regions in table");
        admin.disableTable(table.getName());
        while (util.getMiniHBaseCluster()
            .getMaster()
            .getAssignmentManager()
            .getRegionStates()
            .isRegionsInTransition()) {
          Threads.sleep(200);
          LOG.info("Waiting on table to finish disabling");
        }
        util.deleteTable(table.getName());
        byte[][] newSplitKeys = generateRandomSplitKeys(14);
        table = util.createTable(TABLE_NAME, FAMILIES, newSplitKeys);

        while (util.getConnection().getRegionLocator(TABLE_NAME).getAllRegionLocations().size()
                != 15
            || !admin.isTableAvailable(table.getName())) {
          Thread.sleep(200);
          LOG.info("Waiting for new region assignment to happen");
        }
      }

      // Perform the actual load
      new LoadIncrementalHFiles(conf).doBulkLoad(testDir, admin, table, r);

      // Ensure data shows up
      int expectedRows = NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT;
      assertEquals(
          "LoadIncrementalHFiles should put expected data in table",
          expectedRows,
          util.countRows(table));
      Scan scan = new Scan();
      ResultScanner results = table.getScanner(scan);
      for (Result res : results) {
        assertEquals(FAMILIES.length, res.rawCells().length);
        Cell first = res.rawCells()[0];
        for (Cell kv : res.rawCells()) {
          assertTrue(CellUtil.matchingRow(first, kv));
          assertTrue(Bytes.equals(CellUtil.cloneValue(first), CellUtil.cloneValue(kv)));
        }
      }
      results.close();
      String tableDigestBefore = util.checksumRows(table);

      // Check region locality
      HDFSBlocksDistribution hbd = new HDFSBlocksDistribution();
      for (HRegion region : util.getHBaseCluster().getRegions(TABLE_NAME)) {
        hbd.add(region.getHDFSBlocksDistribution());
      }
      for (String hostname : hostnames) {
        float locality = hbd.getBlockLocalityIndex(hostname);
        LOG.info("locality of [" + hostname + "]: " + locality);
        assertEquals(100, (int) (locality * 100));
      }

      // Cause regions to reopen
      admin.disableTable(TABLE_NAME);
      while (!admin.isTableDisabled(TABLE_NAME)) {
        Thread.sleep(200);
        LOG.info("Waiting for table to disable");
      }
      admin.enableTable(TABLE_NAME);
      util.waitTableAvailable(TABLE_NAME);
      assertEquals(
          "Data should remain after reopening of regions",
          tableDigestBefore,
          util.checksumRows(table));
    } finally {
      testDir.getFileSystem(conf).delete(testDir, true);
      util.deleteTable(TABLE_NAME);
      util.shutdownMiniCluster();
    }
  }

  private void runIncrementalPELoad(
      Configuration conf,
      HTableDescriptor tableDescriptor,
      RegionLocator regionLocator,
      Path outDir)
      throws IOException, UnsupportedEncodingException, InterruptedException,
          ClassNotFoundException {
    Job job = new Job(conf, "testLocalMRIncrementalLoad");
    job.setWorkingDirectory(util.getDataTestDirOnTestFS("runIncrementalPELoad"));
    job.getConfiguration()
        .setStrings(
            "io.serializations",
            conf.get("io.serializations"),
            MutationSerialization.class.getName(),
            ResultSerialization.class.getName(),
            KeyValueSerialization.class.getName());
    setupRandomGeneratorMapper(job);
    HFileOutputFormat2.configureIncrementalLoad(job, tableDescriptor, regionLocator);
    FileOutputFormat.setOutputPath(job, outDir);

    assertFalse(util.getTestFileSystem().exists(outDir));

    assertEquals(regionLocator.getAllRegionLocations().size(), job.getNumReduceTasks());

    assertTrue(job.waitForCompletion(true));
  }

  /**
   * Test for {@link HFileOutputFormat2#configureCompression(org.apache.hadoop.hbase.client.Table,
   * Configuration)} and {@link HFileOutputFormat2#createFamilyCompressionMap (Configuration)}.
   * Tests that the compression map is correctly serialized into and deserialized from configuration
   *
   * @throws IOException
   */
  @Ignore("Goes zombie too frequently; needs work. See HBASE-14563")
  @Test
  public void testSerializeDeserializeFamilyCompressionMap() throws IOException {
    for (int numCfs = 0; numCfs <= 3; numCfs++) {
      Configuration conf = new Configuration(this.util.getConfiguration());
      Map<String, Compression.Algorithm> familyToCompression =
          getMockColumnFamiliesForCompression(numCfs);
      Table table = Mockito.mock(HTable.class);
      setupMockColumnFamiliesForCompression(table, familyToCompression);
      HFileOutputFormat2.configureCompression(conf, table.getTableDescriptor());

      // read back family specific compression setting from the configuration
      Map<byte[], Algorithm> retrievedFamilyToCompressionMap =
          HFileOutputFormat2.createFamilyCompressionMap(conf);

      // test that we have a value for all column families that matches with the
      // used mock values
      for (Entry<String, Algorithm> entry : familyToCompression.entrySet()) {
        assertEquals(
            "Compression configuration incorrect for column family:" + entry.getKey(),
            entry.getValue(),
            retrievedFamilyToCompressionMap.get(entry.getKey().getBytes()));
      }
    }
  }

  private void setupMockColumnFamiliesForCompression(
      Table table, Map<String, Compression.Algorithm> familyToCompression) throws IOException {
    HTableDescriptor mockTableDescriptor = new HTableDescriptor(TABLE_NAME);
    for (Entry<String, Compression.Algorithm> entry : familyToCompression.entrySet()) {
      mockTableDescriptor.addFamily(
          new HColumnDescriptor(entry.getKey())
              .setMaxVersions(1)
              .setCompressionType(entry.getValue())
              .setBlockCacheEnabled(false)
              .setTimeToLive(0));
    }
    Mockito.doReturn(mockTableDescriptor).when(table).getTableDescriptor();
  }

  /**
   * @return a map from column family names to compression algorithms for testing column family
   *     compression. Column family names have special characters
   */
  private Map<String, Compression.Algorithm> getMockColumnFamiliesForCompression(int numCfs) {
    Map<String, Compression.Algorithm> familyToCompression =
        new HashMap<String, Compression.Algorithm>();
    // use column family names having special characters
    if (numCfs-- > 0) {
      familyToCompression.put("Family1!@#!@#&", Compression.Algorithm.LZO);
    }
    if (numCfs-- > 0) {
      familyToCompression.put("Family2=asdads&!AASD", Compression.Algorithm.SNAPPY);
    }
    if (numCfs-- > 0) {
      familyToCompression.put("Family2=asdads&!AASD", Compression.Algorithm.GZ);
    }
    if (numCfs-- > 0) {
      familyToCompression.put("Family3", Compression.Algorithm.NONE);
    }
    return familyToCompression;
  }

  /**
   * Test for {@link HFileOutputFormat2#configureBloomType(org.apache.hadoop.hbase.client.Table,
   * Configuration)} and {@link HFileOutputFormat2#createFamilyBloomTypeMap (Configuration)}. Tests
   * that the compression map is correctly serialized into and deserialized from configuration
   *
   * @throws IOException
   */
  @Ignore("Goes zombie too frequently; needs work. See HBASE-14563")
  @Test
  public void testSerializeDeserializeFamilyBloomTypeMap() throws IOException {
    for (int numCfs = 0; numCfs <= 2; numCfs++) {
      Configuration conf = new Configuration(this.util.getConfiguration());
      Map<String, BloomType> familyToBloomType = getMockColumnFamiliesForBloomType(numCfs);
      Table table = Mockito.mock(HTable.class);
      setupMockColumnFamiliesForBloomType(table, familyToBloomType);
      HFileOutputFormat2.configureBloomType(table.getTableDescriptor(), conf);

      // read back family specific data block encoding settings from the
      // configuration
      Map<byte[], BloomType> retrievedFamilyToBloomTypeMap =
          HFileOutputFormat2.createFamilyBloomTypeMap(conf);

      // test that we have a value for all column families that matches with the
      // used mock values
      for (Entry<String, BloomType> entry : familyToBloomType.entrySet()) {
        assertEquals(
            "BloomType configuration incorrect for column family:" + entry.getKey(),
            entry.getValue(),
            retrievedFamilyToBloomTypeMap.get(entry.getKey().getBytes()));
      }
    }
  }

  private void setupMockColumnFamiliesForBloomType(
      Table table, Map<String, BloomType> familyToDataBlockEncoding) throws IOException {
    HTableDescriptor mockTableDescriptor = new HTableDescriptor(TABLE_NAME);
    for (Entry<String, BloomType> entry : familyToDataBlockEncoding.entrySet()) {
      mockTableDescriptor.addFamily(
          new HColumnDescriptor(entry.getKey())
              .setMaxVersions(1)
              .setBloomFilterType(entry.getValue())
              .setBlockCacheEnabled(false)
              .setTimeToLive(0));
    }
    Mockito.doReturn(mockTableDescriptor).when(table).getTableDescriptor();
  }

  /**
   * @return a map from column family names to compression algorithms for testing column family
   *     compression. Column family names have special characters
   */
  private Map<String, BloomType> getMockColumnFamiliesForBloomType(int numCfs) {
    Map<String, BloomType> familyToBloomType = new HashMap<String, BloomType>();
    // use column family names having special characters
    if (numCfs-- > 0) {
      familyToBloomType.put("Family1!@#!@#&", BloomType.ROW);
    }
    if (numCfs-- > 0) {
      familyToBloomType.put("Family2=asdads&!AASD", BloomType.ROWCOL);
    }
    if (numCfs-- > 0) {
      familyToBloomType.put("Family3", BloomType.NONE);
    }
    return familyToBloomType;
  }

  /**
   * Test for {@link HFileOutputFormat2#configureBlockSize(org.apache.hadoop.hbase.client.Table,
   * Configuration)} and {@link HFileOutputFormat2#createFamilyBlockSizeMap (Configuration)}. Tests
   * that the compression map is correctly serialized into and deserialized from configuration
   *
   * @throws IOException
   */
  @Ignore("Goes zombie too frequently; needs work. See HBASE-14563")
  @Test
  public void testSerializeDeserializeFamilyBlockSizeMap() throws IOException {
    for (int numCfs = 0; numCfs <= 3; numCfs++) {
      Configuration conf = new Configuration(this.util.getConfiguration());
      Map<String, Integer> familyToBlockSize = getMockColumnFamiliesForBlockSize(numCfs);
      Table table = Mockito.mock(HTable.class);
      setupMockColumnFamiliesForBlockSize(table, familyToBlockSize);
      HFileOutputFormat2.configureBlockSize(table.getTableDescriptor(), conf);

      // read back family specific data block encoding settings from the
      // configuration
      Map<byte[], Integer> retrievedFamilyToBlockSizeMap =
          HFileOutputFormat2.createFamilyBlockSizeMap(conf);

      // test that we have a value for all column families that matches with the
      // used mock values
      for (Entry<String, Integer> entry : familyToBlockSize.entrySet()) {
        assertEquals(
            "BlockSize configuration incorrect for column family:" + entry.getKey(),
            entry.getValue(),
            retrievedFamilyToBlockSizeMap.get(entry.getKey().getBytes()));
      }
    }
  }

  private void setupMockColumnFamiliesForBlockSize(
      Table table, Map<String, Integer> familyToDataBlockEncoding) throws IOException {
    HTableDescriptor mockTableDescriptor = new HTableDescriptor(TABLE_NAME);
    for (Entry<String, Integer> entry : familyToDataBlockEncoding.entrySet()) {
      mockTableDescriptor.addFamily(
          new HColumnDescriptor(entry.getKey())
              .setMaxVersions(1)
              .setBlocksize(entry.getValue())
              .setBlockCacheEnabled(false)
              .setTimeToLive(0));
    }
    Mockito.doReturn(mockTableDescriptor).when(table).getTableDescriptor();
  }

  /**
   * @return a map from column family names to compression algorithms for testing column family
   *     compression. Column family names have special characters
   */
  private Map<String, Integer> getMockColumnFamiliesForBlockSize(int numCfs) {
    Map<String, Integer> familyToBlockSize = new HashMap<String, Integer>();
    // use column family names having special characters
    if (numCfs-- > 0) {
      familyToBlockSize.put("Family1!@#!@#&", 1234);
    }
    if (numCfs-- > 0) {
      familyToBlockSize.put("Family2=asdads&!AASD", Integer.MAX_VALUE);
    }
    if (numCfs-- > 0) {
      familyToBlockSize.put("Family2=asdads&!AASD", Integer.MAX_VALUE);
    }
    if (numCfs-- > 0) {
      familyToBlockSize.put("Family3", 0);
    }
    return familyToBlockSize;
  }

  /**
   * Test for {@link HFileOutputFormat2#configureDataBlockEncoding(HTableDescriptor, Configuration)}
   * and {@link HFileOutputFormat2#createFamilyDataBlockEncodingMap(Configuration)}. Tests that the
   * compression map is correctly serialized into and deserialized from configuration
   *
   * @throws IOException
   */
  @Ignore("Goes zombie too frequently; needs work. See HBASE-14563")
  @Test
  public void testSerializeDeserializeFamilyDataBlockEncodingMap() throws IOException {
    for (int numCfs = 0; numCfs <= 3; numCfs++) {
      Configuration conf = new Configuration(this.util.getConfiguration());
      Map<String, DataBlockEncoding> familyToDataBlockEncoding =
          getMockColumnFamiliesForDataBlockEncoding(numCfs);
      Table table = Mockito.mock(HTable.class);
      setupMockColumnFamiliesForDataBlockEncoding(table, familyToDataBlockEncoding);
      HTableDescriptor tableDescriptor = table.getTableDescriptor();
      HFileOutputFormat2.configureDataBlockEncoding(tableDescriptor, conf);

      // read back family specific data block encoding settings from the
      // configuration
      Map<byte[], DataBlockEncoding> retrievedFamilyToDataBlockEncodingMap =
          HFileOutputFormat2.createFamilyDataBlockEncodingMap(conf);

      // test that we have a value for all column families that matches with the
      // used mock values
      for (Entry<String, DataBlockEncoding> entry : familyToDataBlockEncoding.entrySet()) {
        assertEquals(
            "DataBlockEncoding configuration incorrect for column family:" + entry.getKey(),
            entry.getValue(),
            retrievedFamilyToDataBlockEncodingMap.get(entry.getKey().getBytes()));
      }
    }
  }

  private void setupMockColumnFamiliesForDataBlockEncoding(
      Table table, Map<String, DataBlockEncoding> familyToDataBlockEncoding) throws IOException {
    HTableDescriptor mockTableDescriptor = new HTableDescriptor(TABLE_NAME);
    for (Entry<String, DataBlockEncoding> entry : familyToDataBlockEncoding.entrySet()) {
      mockTableDescriptor.addFamily(
          new HColumnDescriptor(entry.getKey())
              .setMaxVersions(1)
              .setDataBlockEncoding(entry.getValue())
              .setBlockCacheEnabled(false)
              .setTimeToLive(0));
    }
    Mockito.doReturn(mockTableDescriptor).when(table).getTableDescriptor();
  }

  /**
   * @return a map from column family names to compression algorithms for testing column family
   *     compression. Column family names have special characters
   */
  private Map<String, DataBlockEncoding> getMockColumnFamiliesForDataBlockEncoding(int numCfs) {
    Map<String, DataBlockEncoding> familyToDataBlockEncoding =
        new HashMap<String, DataBlockEncoding>();
    // use column family names having special characters
    if (numCfs-- > 0) {
      familyToDataBlockEncoding.put("Family1!@#!@#&", DataBlockEncoding.DIFF);
    }
    if (numCfs-- > 0) {
      familyToDataBlockEncoding.put("Family2=asdads&!AASD", DataBlockEncoding.FAST_DIFF);
    }
    if (numCfs-- > 0) {
      familyToDataBlockEncoding.put("Family2=asdads&!AASD", DataBlockEncoding.PREFIX);
    }
    if (numCfs-- > 0) {
      familyToDataBlockEncoding.put("Family3", DataBlockEncoding.NONE);
    }
    return familyToDataBlockEncoding;
  }

  private void setupMockStartKeys(RegionLocator table) throws IOException {
    byte[][] mockKeys =
        new byte[][] {
          HConstants.EMPTY_BYTE_ARRAY,
          Bytes.toBytes("aaa"),
          Bytes.toBytes("ggg"),
          Bytes.toBytes("zzz")
        };
    Mockito.doReturn(mockKeys).when(table).getStartKeys();
  }

  private void setupMockTableName(RegionLocator table) throws IOException {
    TableName mockTableName = TableName.valueOf("mock_table");
    Mockito.doReturn(mockTableName).when(table).getName();
  }

  /**
   * Test that {@link HFileOutputFormat2} RecordWriter uses compression and bloom filter settings
   * from the column family descriptor
   */
  @Ignore("Goes zombie too frequently; needs work. See HBASE-14563")
  @Test
  public void testColumnFamilySettings() throws Exception {
    Configuration conf = new Configuration(this.util.getConfiguration());
    RecordWriter<ImmutableBytesWritable, Cell> writer = null;
    TaskAttemptContext context = null;
    Path dir = util.getDataTestDir("testColumnFamilySettings");

    // Setup table descriptor
    Table table = Mockito.mock(Table.class);
    RegionLocator regionLocator = Mockito.mock(RegionLocator.class);
    HTableDescriptor htd = new HTableDescriptor(TABLE_NAME);
    Mockito.doReturn(htd).when(table).getTableDescriptor();
    for (HColumnDescriptor hcd : HBaseTestingUtility.generateColumnDescriptors()) {
      htd.addFamily(hcd);
    }

    // set up the table to return some mock keys
    setupMockStartKeys(regionLocator);

    try {
      // partial map red setup to get an operational writer for testing
      // We turn off the sequence file compression, because DefaultCodec
      // pollutes the GZip codec pool with an incompatible compressor.
      conf.set("io.seqfile.compression.type", "NONE");
      conf.set("hbase.fs.tmp.dir", dir.toString());
      // turn locality off to eliminate getRegionLocation fail-and-retry time when writing kvs
      conf.setBoolean(HFileOutputFormat2.LOCALITY_SENSITIVE_CONF_KEY, false);

      Job job = new Job(conf, "testLocalMRIncrementalLoad");
      job.setWorkingDirectory(util.getDataTestDirOnTestFS("testColumnFamilySettings"));
      setupRandomGeneratorMapper(job);
      HFileOutputFormat2.configureIncrementalLoad(job, table.getTableDescriptor(), regionLocator);
      FileOutputFormat.setOutputPath(job, dir);
      context = createTestTaskAttemptContext(job);
      HFileOutputFormat2 hof = new HFileOutputFormat2();
      writer = hof.getRecordWriter(context);

      // write out random rows
      writeRandomKeyValues(writer, context, htd.getFamiliesKeys(), ROWSPERSPLIT);
      writer.close(context);

      // Make sure that a directory was created for every CF
      FileSystem fs = dir.getFileSystem(conf);

      // commit so that the filesystem has one directory per column family
      hof.getOutputCommitter(context).commitTask(context);
      hof.getOutputCommitter(context).commitJob(context);
      FileStatus[] families = FSUtils.listStatus(fs, dir, new FSUtils.FamilyDirFilter(fs));
      assertEquals(htd.getFamilies().size(), families.length);
      for (FileStatus f : families) {
        String familyStr = f.getPath().getName();
        HColumnDescriptor hcd = htd.getFamily(Bytes.toBytes(familyStr));
        // verify that the compression on this file matches the configured
        // compression
        Path dataFilePath = fs.listStatus(f.getPath())[0].getPath();
        Reader reader = HFile.createReader(fs, dataFilePath, new CacheConfig(conf), conf);
        Map<byte[], byte[]> fileInfo = reader.loadFileInfo();

        byte[] bloomFilter = fileInfo.get(StoreFile.BLOOM_FILTER_TYPE_KEY);
        if (bloomFilter == null) bloomFilter = Bytes.toBytes("NONE");
        assertEquals(
            "Incorrect bloom filter used for column family "
                + familyStr
                + "(reader: "
                + reader
                + ")",
            hcd.getBloomFilterType(),
            BloomType.valueOf(Bytes.toString(bloomFilter)));
        assertEquals(
            "Incorrect compression used for column family "
                + familyStr
                + "(reader: "
                + reader
                + ")",
            hcd.getCompressionType(),
            reader.getFileContext().getCompression());
      }
    } finally {
      dir.getFileSystem(conf).delete(dir, true);
    }
  }

  /**
   * Write random values to the writer assuming a table created using {@link #FAMILIES} as column
   * family descriptors
   */
  private void writeRandomKeyValues(
      RecordWriter<ImmutableBytesWritable, Cell> writer,
      TaskAttemptContext context,
      Set<byte[]> families,
      int numRows)
      throws IOException, InterruptedException {
    byte keyBytes[] = new byte[Bytes.SIZEOF_INT];
    int valLength = 10;
    byte valBytes[] = new byte[valLength];

    int taskId = context.getTaskAttemptID().getTaskID().getId();
    assert taskId < Byte.MAX_VALUE : "Unit tests dont support > 127 tasks!";
    final byte[] qualifier = Bytes.toBytes("data");
    Random random = new Random();
    for (int i = 0; i < numRows; i++) {

      Bytes.putInt(keyBytes, 0, i);
      random.nextBytes(valBytes);
      ImmutableBytesWritable key = new ImmutableBytesWritable(keyBytes);

      for (byte[] family : families) {
        Cell kv = new KeyValue(keyBytes, family, qualifier, valBytes);
        writer.write(key, kv);
      }
    }
  }

  /**
   * This test is to test the scenario happened in HBASE-6901. All files are bulk loaded and
   * excluded from minor compaction. Without the fix of HBASE-6901, an
   * ArrayIndexOutOfBoundsException will be thrown.
   */
  @Ignore("Flakey: See HBASE-9051")
  @Test
  public void testExcludeAllFromMinorCompaction() throws Exception {
    Configuration conf = util.getConfiguration();
    conf.setInt("hbase.hstore.compaction.min", 2);
    generateRandomStartKeys(5);

    util.startMiniCluster();
    try (Connection conn = ConnectionFactory.createConnection();
        Admin admin = conn.getAdmin();
        Table table = util.createTable(TABLE_NAME, FAMILIES);
        RegionLocator locator = conn.getRegionLocator(TABLE_NAME)) {
      final FileSystem fs = util.getDFSCluster().getFileSystem();
      assertEquals("Should start with empty table", 0, util.countRows(table));

      // deep inspection: get the StoreFile dir
      final Path storePath =
          new Path(
              FSUtils.getTableDir(FSUtils.getRootDir(conf), TABLE_NAME),
              new Path(
                  admin.getTableRegions(TABLE_NAME).get(0).getEncodedName(),
                  Bytes.toString(FAMILIES[0])));
      assertEquals(0, fs.listStatus(storePath).length);

      // Generate two bulk load files
      conf.setBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude", true);

      for (int i = 0; i < 2; i++) {
        Path testDir = util.getDataTestDirOnTestFS("testExcludeAllFromMinorCompaction_" + i);
        runIncrementalPELoad(
            conf, table.getTableDescriptor(), conn.getRegionLocator(TABLE_NAME), testDir);
        // Perform the actual load
        new LoadIncrementalHFiles(conf).doBulkLoad(testDir, admin, table, locator);
      }

      // Ensure data shows up
      int expectedRows = 2 * NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT;
      assertEquals(
          "LoadIncrementalHFiles should put expected data in table",
          expectedRows,
          util.countRows(table));

      // should have a second StoreFile now
      assertEquals(2, fs.listStatus(storePath).length);

      // minor compactions shouldn't get rid of the file
      admin.compact(TABLE_NAME);
      try {
        quickPoll(
            new Callable<Boolean>() {
              @Override
              public Boolean call() throws Exception {
                return fs.listStatus(storePath).length == 1;
              }
            },
            5000);
        throw new IOException("SF# = " + fs.listStatus(storePath).length);
      } catch (AssertionError ae) {
        // this is expected behavior
      }

      // a major compaction should work though
      admin.majorCompact(TABLE_NAME);
      quickPoll(
          new Callable<Boolean>() {
            @Override
            public Boolean call() throws Exception {
              return fs.listStatus(storePath).length == 1;
            }
          },
          5000);

    } finally {
      util.shutdownMiniCluster();
    }
  }

  @Ignore("Goes zombie too frequently; needs work. See HBASE-14563")
  @Test
  public void testExcludeMinorCompaction() throws Exception {
    Configuration conf = util.getConfiguration();
    conf.setInt("hbase.hstore.compaction.min", 2);
    generateRandomStartKeys(5);

    util.startMiniCluster();
    try (Connection conn = ConnectionFactory.createConnection(conf);
        Admin admin = conn.getAdmin()) {
      Path testDir = util.getDataTestDirOnTestFS("testExcludeMinorCompaction");
      final FileSystem fs = util.getDFSCluster().getFileSystem();
      Table table = util.createTable(TABLE_NAME, FAMILIES);
      assertEquals("Should start with empty table", 0, util.countRows(table));

      // deep inspection: get the StoreFile dir
      final Path storePath =
          new Path(
              FSUtils.getTableDir(FSUtils.getRootDir(conf), TABLE_NAME),
              new Path(
                  admin.getTableRegions(TABLE_NAME).get(0).getEncodedName(),
                  Bytes.toString(FAMILIES[0])));
      assertEquals(0, fs.listStatus(storePath).length);

      // put some data in it and flush to create a storefile
      Put p = new Put(Bytes.toBytes("test"));
      p.addColumn(FAMILIES[0], Bytes.toBytes("1"), Bytes.toBytes("1"));
      table.put(p);
      admin.flush(TABLE_NAME);
      assertEquals(1, util.countRows(table));
      quickPoll(
          new Callable<Boolean>() {
            @Override
            public Boolean call() throws Exception {
              return fs.listStatus(storePath).length == 1;
            }
          },
          5000);

      // Generate a bulk load file with more rows
      conf.setBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude", true);

      RegionLocator regionLocator = conn.getRegionLocator(TABLE_NAME);
      runIncrementalPELoad(conf, table.getTableDescriptor(), regionLocator, testDir);

      // Perform the actual load
      new LoadIncrementalHFiles(conf).doBulkLoad(testDir, admin, table, regionLocator);

      // Ensure data shows up
      int expectedRows = NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT;
      assertEquals(
          "LoadIncrementalHFiles should put expected data in table",
          expectedRows + 1,
          util.countRows(table));

      // should have a second StoreFile now
      assertEquals(2, fs.listStatus(storePath).length);

      // minor compactions shouldn't get rid of the file
      admin.compact(TABLE_NAME);
      try {
        quickPoll(
            new Callable<Boolean>() {
              @Override
              public Boolean call() throws Exception {
                return fs.listStatus(storePath).length == 1;
              }
            },
            5000);
        throw new IOException("SF# = " + fs.listStatus(storePath).length);
      } catch (AssertionError ae) {
        // this is expected behavior
      }

      // a major compaction should work though
      admin.majorCompact(TABLE_NAME);
      quickPoll(
          new Callable<Boolean>() {
            @Override
            public Boolean call() throws Exception {
              return fs.listStatus(storePath).length == 1;
            }
          },
          5000);

    } finally {
      util.shutdownMiniCluster();
    }
  }

  private void quickPoll(Callable<Boolean> c, int waitMs) throws Exception {
    int sleepMs = 10;
    int retries = (int) Math.ceil(((double) waitMs) / sleepMs);
    while (retries-- > 0) {
      if (c.call().booleanValue()) {
        return;
      }
      Thread.sleep(sleepMs);
    }
    fail();
  }

  public static void main(String args[]) throws Exception {
    new TestHFileOutputFormat2().manualTest(args);
  }

  public void manualTest(String args[]) throws Exception {
    Configuration conf = HBaseConfiguration.create();
    util = new HBaseTestingUtility(conf);
    if ("newtable".equals(args[0])) {
      TableName tname = TableName.valueOf(args[1]);
      byte[][] splitKeys = generateRandomSplitKeys(4);
      try (Table table = util.createTable(tname, FAMILIES, splitKeys)) {}
    } else if ("incremental".equals(args[0])) {
      TableName tname = TableName.valueOf(args[1]);
      try (Connection c = ConnectionFactory.createConnection(conf);
          Admin admin = c.getAdmin();
          RegionLocator regionLocator = c.getRegionLocator(tname)) {
        Path outDir = new Path("incremental-out");
        runIncrementalPELoad(conf, admin.getTableDescriptor(tname), regionLocator, outDir);
      }
    } else {
      throw new RuntimeException("usage: TestHFileOutputFormat2 newtable | incremental");
    }
  }
}