/*
 Map all unique pairs <family, name>  to index. Table name is part of TableRowkey, so we do
 not care about it
  */
 private void initColumnIndexes() throws SQLException {
   columnIndexes = new TreeMap(Bytes.BYTES_COMPARATOR);
   int columnIndex = 0;
   for (int index = 0; index < logicalNames.size(); index++) {
     PTable table = PhoenixRuntime.getTable(conn, logicalNames.get(index));
     List<PColumn> cls = table.getColumns();
     for (int i = 0; i < cls.size(); i++) {
       PColumn c = cls.get(i);
       byte[] family = new byte[0];
       if (c.getFamilyName() != null) // Skip PK column
       family = c.getFamilyName().getBytes();
       byte[] name = c.getName().getBytes();
       byte[] cfn = Bytes.add(family, QueryConstants.NAMESPACE_SEPARATOR_BYTES, name);
       if (!columnIndexes.containsKey(cfn)) {
         columnIndexes.put(cfn, new Integer(columnIndex));
         columnIndex++;
       }
     }
     byte[] emptyColumnFamily = SchemaUtil.getEmptyColumnFamily(table);
     byte[] cfn =
         Bytes.add(
             emptyColumnFamily,
             QueryConstants.NAMESPACE_SEPARATOR_BYTES,
             QueryConstants.EMPTY_COLUMN_BYTES);
     columnIndexes.put(cfn, new Integer(columnIndex));
     columnIndex++;
   }
 }
Beispiel #2
0
  private byte[] createRowKey(RowLogMessage message, String subscription, boolean problematic) {
    byte[] rowKey = new byte[0];
    if (problematic) {
      rowKey = PROBLEMATIC_MARKER;
    }
    rowKey = Bytes.add(rowKey, Bytes.toBytes(subscription));

    rowKey = Bytes.add(rowKey, Bytes.toBytes(message.getTimestamp()));
    rowKey = Bytes.add(rowKey, Bytes.toBytes(message.getSeqNr()));
    rowKey = Bytes.add(rowKey, message.getRowKey());

    return rowKey;
  }
Beispiel #3
0
  public List<RowLogMessage> next(String subscription, Long minimalTimestamp, boolean problematic)
      throws RowLogException {
    byte[] rowPrefix;
    byte[] subscriptionBytes = Bytes.toBytes(subscription);
    if (problematic) {
      rowPrefix = PROBLEMATIC_MARKER;
      rowPrefix = Bytes.add(rowPrefix, subscriptionBytes);
    } else {
      rowPrefix = subscriptionBytes;
    }
    byte[] startRow = rowPrefix;
    if (minimalTimestamp != null) startRow = Bytes.add(startRow, Bytes.toBytes(minimalTimestamp));
    try {
      List<RowLogMessage> rowLogMessages = new ArrayList<RowLogMessage>();
      Scan scan = new Scan(startRow);
      if (minimalTimestamp != null) scan.setTimeRange(minimalTimestamp, Long.MAX_VALUE);
      scan.addColumn(MESSAGES_CF, MESSAGE_COLUMN);
      ResultScanner scanner = table.getScanner(scan);
      boolean keepScanning = problematic;
      do {
        Result[] results = scanner.next(batchSize);
        if (results.length == 0) {
          keepScanning = false;
        }
        for (Result next : results) {
          byte[] rowKey = next.getRow();
          if (!Bytes.startsWith(rowKey, rowPrefix)) {
            keepScanning = false;
            break; // There were no messages for this subscription
          }
          if (problematic) {
            rowKey = Bytes.tail(rowKey, rowKey.length - PROBLEMATIC_MARKER.length);
          }
          byte[] value = next.getValue(MESSAGES_CF, MESSAGE_COLUMN);
          byte[] messageId = Bytes.tail(rowKey, rowKey.length - subscriptionBytes.length);
          rowLogMessages.add(decodeMessage(messageId, value));
        }
      } while (keepScanning);

      // The scanner is not closed in a finally block, since when we get an IOException from
      // HBase, it is likely that closing the scanner will give problems too. Not closing
      // the scanner is not fatal since HBase will expire it after a while.
      Closer.close(scanner);

      return rowLogMessages;
    } catch (IOException e) {
      throw new RowLogException("Failed to fetch next message from RowLogShard", e);
    }
  }
 /**
  * @param scan the scan specification
  * @throws Exception
  */
 public static ScannerModel fromScan(Scan scan) throws Exception {
   ScannerModel model = new ScannerModel();
   model.setStartRow(scan.getStartRow());
   model.setEndRow(scan.getStopRow());
   Map<byte[], NavigableSet<byte[]>> families = scan.getFamilyMap();
   if (families != null) {
     for (Map.Entry<byte[], NavigableSet<byte[]>> entry : families.entrySet()) {
       if (entry.getValue() != null) {
         for (byte[] qualifier : entry.getValue()) {
           model.addColumn(Bytes.add(entry.getKey(), COLUMN_DIVIDER, qualifier));
         }
       } else {
         model.addColumn(entry.getKey());
       }
     }
   }
   model.setStartTime(scan.getTimeRange().getMin());
   model.setEndTime(scan.getTimeRange().getMax());
   int caching = scan.getCaching();
   if (caching > 0) {
     model.setBatch(caching);
   }
   int maxVersions = scan.getMaxVersions();
   if (maxVersions > 0) {
     model.setMaxVersions(maxVersions);
   }
   Filter filter = scan.getFilter();
   if (filter != null) {
     model.setFilter(stringifyFilter(filter));
   }
   return model;
 }
 private static byte[][] makeN(byte[] base, int n) {
   byte[][] ret = new byte[n][];
   for (int i = 0; i < n; i++) {
     ret[i] = Bytes.add(base, Bytes.toBytes(String.format("%04d", i)));
   }
   return ret;
 }
  protected static byte[] constructKey(int token_i, String udid_s) {
    byte[] udid = udid_s.getBytes();
    int salt = ((int) udid[0]) << 24 | (token_i);

    byte[] key = Bytes.add(Bytes.toBytes(salt), udid);
    return key;
  }
 @Override
 public OutputStream getOutputStream(Blob blob) throws BlobException {
   UUID uuid = UUID.randomUUID();
   byte[] blobKey = Bytes.toBytes(uuid.getMostSignificantBits());
   blobKey = Bytes.add(blobKey, Bytes.toBytes(uuid.getLeastSignificantBits()));
   return new HBaseBlobOutputStream(table, blobKey, blob);
 }
  /**
   * A set up method to start the test cluster. AggregateProtocolImpl is registered and will be
   * loaded during region startup.
   *
   * @throws Exception
   */
  @BeforeClass
  public static void setupBeforeClass() throws Exception {

    conf.set(
        CoprocessorHost.REGION_COPROCESSOR_CONF_KEY,
        "org.apache.hadoop.hbase.coprocessor.AggregateImplementation");

    util.startMiniCluster(2);
    HTable table = util.createTable(TEST_TABLE, TEST_FAMILY);
    util.createMultiRegions(
        util.getConfiguration(),
        table,
        TEST_FAMILY,
        new byte[][] {HConstants.EMPTY_BYTE_ARRAY, ROWS[rowSeperator1], ROWS[rowSeperator2]});
    /**
     * The testtable has one CQ which is always populated and one variable CQ for each row rowkey1:
     * CF:CQ CF:CQ1 rowKey2: CF:CQ CF:CQ2
     */
    for (int i = 0; i < ROWSIZE; i++) {
      Put put = new Put(ROWS[i]);
      put.setWriteToWAL(false);
      Long l = new Long(i);
      put.add(TEST_FAMILY, TEST_QUALIFIER, Bytes.toBytes(l));
      table.put(put);
      Put p2 = new Put(ROWS[i]);
      put.setWriteToWAL(false);
      p2.add(TEST_FAMILY, Bytes.add(TEST_MULTI_CQ, Bytes.toBytes(l)), Bytes.toBytes(l * 10));
      table.put(p2);
    }
    table.close();
  }
Beispiel #9
0
 public boolean addDeleted(KeyValue kv) {
   if (kv.getValue().length == 0) {
     deletedColumns.put(
         new ByteArray(Bytes.add(kv.getFamily(), kv.getQualifier())), kv.getTimestamp());
     return true;
   }
   return false;
 }
Beispiel #10
0
 /**
  * This utility method creates a new Thrift ColumnDescriptor "struct" based on an Hbase
  * HColumnDescriptor object.
  *
  * @param in Hbase HColumnDescriptor object
  * @return Thrift ColumnDescriptor
  */
 public static ColumnDescriptor colDescFromHbase(HColumnDescriptor in) {
   ColumnDescriptor col = new ColumnDescriptor();
   col.name = ByteBuffer.wrap(Bytes.add(in.getName(), KeyValue.COLUMN_FAMILY_DELIM_ARRAY));
   col.maxVersions = in.getMaxVersions();
   col.compression = in.getCompression().toString();
   col.inMemory = in.isInMemory();
   col.blockCacheEnabled = in.isBlockCacheEnabled();
   col.bloomFilterType = in.getBloomFilterType().toString();
   return col;
 }
 protected AbstractQueueConsumer(ConsumerConfig consumerConfig, QueueName queueName) {
   this.consumerConfig = consumerConfig;
   this.queueName = queueName;
   this.entryCache = Maps.newTreeMap(Bytes.BYTES_COMPARATOR);
   this.consumingEntries = Maps.newTreeMap(Bytes.BYTES_COMPARATOR);
   this.queueRowPrefix = QueueEntryRow.getQueueRowPrefix(queueName);
   this.startRow = getRowKey(0L, 0);
   this.stateColumnName =
       Bytes.add(QueueEntryRow.STATE_COLUMN_PREFIX, Bytes.toBytes(consumerConfig.getGroupId()));
 }
    public int hashCode() {
      if (this.ID == null) this.ID = Bytes.add(row, indexSpec.getIndexId());

      if (hash == null) {
        System.out.println("null hash");
      } else if (this.ID == null) {
        System.out.println("null.eslse");
      }
      return hash.hash(this.ID);
    }
Beispiel #13
0
 public boolean isDeleted(KeyValue kv) {
   Long timestamp;
   timestamp = deletedRows.get(new ByteArray(kv.getRow()));
   if (timestamp != null && kv.getTimestamp() < timestamp) return true;
   timestamp = deletedFamilies.get(new ByteArray(kv.getFamily()));
   if (timestamp != null && kv.getTimestamp() < timestamp) return true;
   timestamp = deletedColumns.get(new ByteArray(Bytes.add(kv.getFamily(), kv.getQualifier())));
   if (timestamp != null && kv.getTimestamp() < timestamp) return true;
   return false;
 }
 /**
  * Find the column index which will replace the column name in the aggregated array and will be
  * restored in Reducer
  *
  * @param cell KeyValue for the column
  * @return column index for the specified cell or -1 if was not found
  */
 private int findIndex(Cell cell) throws IOException {
   byte[] familyName =
       Bytes.copy(cell.getFamilyArray(), cell.getFamilyOffset(), cell.getFamilyLength());
   byte[] name =
       Bytes.copy(cell.getQualifierArray(), cell.getQualifierOffset(), cell.getQualifierLength());
   byte[] cfn = Bytes.add(familyName, QueryConstants.NAMESPACE_SEPARATOR_BYTES, name);
   if (columnIndexes.containsKey(cfn)) {
     return columnIndexes.get(cfn);
   }
   return -1;
 }
 public void prepareTestData() throws Exception {
   try {
     util.getHBaseAdmin().disableTable(TABLE);
     util.getHBaseAdmin().deleteTable(TABLE);
   } catch (Exception e) {
     // ignore table not found
   }
   table = util.createTable(TABLE, FAM);
   {
     Put put = new Put(ROW);
     put.addColumn(FAM, A, Bytes.add(B, C)); // B, C are friends of A
     put.addColumn(FAM, B, Bytes.add(D, E, F)); // D, E, F are friends of B
     put.addColumn(FAM, C, G); // G is a friend of C
     table.put(put);
     rowSize = put.size();
   }
   Put put = new Put(ROW2);
   put.addColumn(FAM, D, E);
   put.addColumn(FAM, F, G);
   table.put(put);
   row2Size = put.size();
 }
 public WorkSet(
     IndexedRegion r,
     IndexSpecification indexSpec,
     byte[] row,
     SortedMap<byte[], byte[]> columnValues,
     Integer lockId,
     Result d,
     int seq) {
   this.r = r;
   this.indexSpec = indexSpec;
   this.row = row;
   this.columnValues = columnValues;
   this.lockId = lockId;
   this.d = d;
   this.sequence = seq;
   this.ID = Bytes.add(row, indexSpec.getCCITName());
 }
  @Override
  public int run(String[] strings) throws Exception {
    Configuration conf = new Configuration();
    // String inputFileName = "/cluster/gmm.seq";
    String outputFileName = "/cluster/matrix_intermediate_" + level + ".seq";

    int result;
    System.out.println("level:" + level);
    conf.set("level", level + "");
    String table = "ClusterDatabase";
    // String seqFileName = "/cluster/gmm.seq";

    Scan scan = new Scan();
    scan.setStartRow((level + "|").getBytes());
    scan.setStopRow(
        Bytes.add((level + "|").getBytes(), Bytes.toBytes("ffffffffffffffffffffffffffffffff")));
    scan.addColumn("Cluster".getBytes(), "GMM".getBytes());

    // try (FileSystem fileSystem = FileSystem.get(conf)) {
    FileSystem fileSystem = FileSystem.get(conf);
    Path outputpath = new Path(outputFileName);
    if (fileSystem.exists(outputpath)) {
      fileSystem.delete(outputpath, true);
    }

    Job job = new Job(conf, "Matrix Creation I From HBase");
    job.setJarByClass(MatrixCreationI.class);
    TableMapReduceUtil.initTableMapperJob(
        table, scan, MatrixMapper.class, IntWritable.class, Text.class, job);
    job.setReducerClass(MatrixReducer.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    //            job.setInputFormatClass(TableInputFormat.class);
    // job.setOutputFormatClass(TextOutputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setNumReduceTasks(1);
    // FileInputFormat.addInputPath(job, new Path(inputFileName + "/part*"));
    FileOutputFormat.setOutputPath(job, outputpath);
    result = job.waitForCompletion(true) ? 0 : 1;
    // }
    return result;
  }
  /**
   * Set the start row for the replica callable based on the state of the last result received.
   *
   * @param callable The callable to set the start row on
   */
  private void setStartRowForReplicaCallable(ScannerCallable callable) {
    if (this.lastResult == null || callable == null) return;

    if (this.lastResult.isPartial()) {
      // The last result was a partial result which means we have not received all of the cells
      // for this row. Thus, use the last result's row as the start row. If a replica switch
      // occurs, the scanner will ensure that any accumulated partial results are cleared,
      // and the scan can resume from this row.
      callable.getScan().setStartRow(this.lastResult.getRow());
    } else {
      // The last result was not a partial result which means it contained all of the cells for
      // that row (we no longer need any information from it). Set the start row to the next
      // closest row that could be seen.
      if (callable.getScan().isReversed()) {
        callable.getScan().setStartRow(createClosestRowBefore(this.lastResult.getRow()));
      } else {
        callable.getScan().setStartRow(Bytes.add(this.lastResult.getRow(), new byte[1]));
      }
    }
  }
Beispiel #19
0
  @Test
  public void testCreateTableWithRegions() throws IOException, InterruptedException {

    byte[] tableName = Bytes.toBytes("testCreateTableWithRegions");

    byte[][] splitKeys = {
      new byte[] {1, 1, 1},
      new byte[] {2, 2, 2},
      new byte[] {3, 3, 3},
      new byte[] {4, 4, 4},
      new byte[] {5, 5, 5},
      new byte[] {6, 6, 6},
      new byte[] {7, 7, 7},
      new byte[] {8, 8, 8},
      new byte[] {9, 9, 9},
    };
    int expectedRegions = splitKeys.length + 1;

    HTableDescriptor desc = new HTableDescriptor(tableName);
    desc.addFamily(new HColumnDescriptor(HConstants.CATALOG_FAMILY));
    admin.createTable(desc, splitKeys);

    HTable ht = new HTable(TEST_UTIL.getConfiguration(), tableName);
    Map<HRegionInfo, HServerAddress> regions = ht.getRegionsInfo();
    assertEquals(
        "Tried to create " + expectedRegions + " regions " + "but only found " + regions.size(),
        expectedRegions,
        regions.size());
    System.err.println("Found " + regions.size() + " regions");

    Iterator<HRegionInfo> hris = regions.keySet().iterator();
    HRegionInfo hri = hris.next();
    assertTrue(hri.getStartKey() == null || hri.getStartKey().length == 0);
    assertTrue(Bytes.equals(hri.getEndKey(), splitKeys[0]));
    hri = hris.next();
    assertTrue(Bytes.equals(hri.getStartKey(), splitKeys[0]));
    assertTrue(Bytes.equals(hri.getEndKey(), splitKeys[1]));
    hri = hris.next();
    assertTrue(Bytes.equals(hri.getStartKey(), splitKeys[1]));
    assertTrue(Bytes.equals(hri.getEndKey(), splitKeys[2]));
    hri = hris.next();
    assertTrue(Bytes.equals(hri.getStartKey(), splitKeys[2]));
    assertTrue(Bytes.equals(hri.getEndKey(), splitKeys[3]));
    hri = hris.next();
    assertTrue(Bytes.equals(hri.getStartKey(), splitKeys[3]));
    assertTrue(Bytes.equals(hri.getEndKey(), splitKeys[4]));
    hri = hris.next();
    assertTrue(Bytes.equals(hri.getStartKey(), splitKeys[4]));
    assertTrue(Bytes.equals(hri.getEndKey(), splitKeys[5]));
    hri = hris.next();
    assertTrue(Bytes.equals(hri.getStartKey(), splitKeys[5]));
    assertTrue(Bytes.equals(hri.getEndKey(), splitKeys[6]));
    hri = hris.next();
    assertTrue(Bytes.equals(hri.getStartKey(), splitKeys[6]));
    assertTrue(Bytes.equals(hri.getEndKey(), splitKeys[7]));
    hri = hris.next();
    assertTrue(Bytes.equals(hri.getStartKey(), splitKeys[7]));
    assertTrue(Bytes.equals(hri.getEndKey(), splitKeys[8]));
    hri = hris.next();
    assertTrue(Bytes.equals(hri.getStartKey(), splitKeys[8]));
    assertTrue(hri.getEndKey() == null || hri.getEndKey().length == 0);

    verifyRoundRobinDistribution(ht, expectedRegions);

    // Now test using start/end with a number of regions

    // Use 80 bit numbers to make sure we aren't limited
    byte[] startKey = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
    byte[] endKey = {9, 9, 9, 9, 9, 9, 9, 9, 9, 9};

    // Splitting into 10 regions, we expect (null,1) ... (9, null)
    // with (1,2) (2,3) (3,4) (4,5) (5,6) (6,7) (7,8) (8,9) in the middle

    expectedRegions = 10;

    byte[] TABLE_2 = Bytes.add(tableName, Bytes.toBytes("_2"));

    desc = new HTableDescriptor(TABLE_2);
    desc.addFamily(new HColumnDescriptor(HConstants.CATALOG_FAMILY));
    admin = new HBaseAdmin(TEST_UTIL.getConfiguration());
    admin.createTable(desc, startKey, endKey, expectedRegions);

    ht = new HTable(TEST_UTIL.getConfiguration(), TABLE_2);
    regions = ht.getRegionsInfo();
    assertEquals(
        "Tried to create " + expectedRegions + " regions " + "but only found " + regions.size(),
        expectedRegions,
        regions.size());
    System.err.println("Found " + regions.size() + " regions");

    hris = regions.keySet().iterator();
    hri = hris.next();
    assertTrue(hri.getStartKey() == null || hri.getStartKey().length == 0);
    assertTrue(Bytes.equals(hri.getEndKey(), new byte[] {1, 1, 1, 1, 1, 1, 1, 1, 1, 1}));
    hri = hris.next();
    assertTrue(Bytes.equals(hri.getStartKey(), new byte[] {1, 1, 1, 1, 1, 1, 1, 1, 1, 1}));
    assertTrue(Bytes.equals(hri.getEndKey(), new byte[] {2, 2, 2, 2, 2, 2, 2, 2, 2, 2}));
    hri = hris.next();
    assertTrue(Bytes.equals(hri.getStartKey(), new byte[] {2, 2, 2, 2, 2, 2, 2, 2, 2, 2}));
    assertTrue(Bytes.equals(hri.getEndKey(), new byte[] {3, 3, 3, 3, 3, 3, 3, 3, 3, 3}));
    hri = hris.next();
    assertTrue(Bytes.equals(hri.getStartKey(), new byte[] {3, 3, 3, 3, 3, 3, 3, 3, 3, 3}));
    assertTrue(Bytes.equals(hri.getEndKey(), new byte[] {4, 4, 4, 4, 4, 4, 4, 4, 4, 4}));
    hri = hris.next();
    assertTrue(Bytes.equals(hri.getStartKey(), new byte[] {4, 4, 4, 4, 4, 4, 4, 4, 4, 4}));
    assertTrue(Bytes.equals(hri.getEndKey(), new byte[] {5, 5, 5, 5, 5, 5, 5, 5, 5, 5}));
    hri = hris.next();
    assertTrue(Bytes.equals(hri.getStartKey(), new byte[] {5, 5, 5, 5, 5, 5, 5, 5, 5, 5}));
    assertTrue(Bytes.equals(hri.getEndKey(), new byte[] {6, 6, 6, 6, 6, 6, 6, 6, 6, 6}));
    hri = hris.next();
    assertTrue(Bytes.equals(hri.getStartKey(), new byte[] {6, 6, 6, 6, 6, 6, 6, 6, 6, 6}));
    assertTrue(Bytes.equals(hri.getEndKey(), new byte[] {7, 7, 7, 7, 7, 7, 7, 7, 7, 7}));
    hri = hris.next();
    assertTrue(Bytes.equals(hri.getStartKey(), new byte[] {7, 7, 7, 7, 7, 7, 7, 7, 7, 7}));
    assertTrue(Bytes.equals(hri.getEndKey(), new byte[] {8, 8, 8, 8, 8, 8, 8, 8, 8, 8}));
    hri = hris.next();
    assertTrue(Bytes.equals(hri.getStartKey(), new byte[] {8, 8, 8, 8, 8, 8, 8, 8, 8, 8}));
    assertTrue(Bytes.equals(hri.getEndKey(), new byte[] {9, 9, 9, 9, 9, 9, 9, 9, 9, 9}));
    hri = hris.next();
    assertTrue(Bytes.equals(hri.getStartKey(), new byte[] {9, 9, 9, 9, 9, 9, 9, 9, 9, 9}));
    assertTrue(hri.getEndKey() == null || hri.getEndKey().length == 0);

    verifyRoundRobinDistribution(ht, expectedRegions);

    // Try once more with something that divides into something infinite

    startKey = new byte[] {0, 0, 0, 0, 0, 0};
    endKey = new byte[] {1, 0, 0, 0, 0, 0};

    expectedRegions = 5;

    byte[] TABLE_3 = Bytes.add(tableName, Bytes.toBytes("_3"));

    desc = new HTableDescriptor(TABLE_3);
    desc.addFamily(new HColumnDescriptor(HConstants.CATALOG_FAMILY));
    admin = new HBaseAdmin(TEST_UTIL.getConfiguration());
    admin.createTable(desc, startKey, endKey, expectedRegions);

    ht = new HTable(TEST_UTIL.getConfiguration(), TABLE_3);
    regions = ht.getRegionsInfo();
    assertEquals(
        "Tried to create " + expectedRegions + " regions " + "but only found " + regions.size(),
        expectedRegions,
        regions.size());
    System.err.println("Found " + regions.size() + " regions");

    verifyRoundRobinDistribution(ht, expectedRegions);

    // Try an invalid case where there are duplicate split keys
    splitKeys =
        new byte[][] {
          new byte[] {1, 1, 1},
          new byte[] {2, 2, 2},
          new byte[] {3, 3, 3},
          new byte[] {2, 2, 2}
        };

    byte[] TABLE_4 = Bytes.add(tableName, Bytes.toBytes("_4"));
    desc = new HTableDescriptor(TABLE_4);
    desc.addFamily(new HColumnDescriptor(HConstants.CATALOG_FAMILY));
    admin = new HBaseAdmin(TEST_UTIL.getConfiguration());
    try {
      admin.createTable(desc, splitKeys);
      assertTrue(
          "Should not be able to create this table because of " + "duplicate split keys", false);
    } catch (IllegalArgumentException iae) {
      // Expected
    }
  }
 /**
  * Load data to a table, flush it to disk, trigger compaction, confirm the compaction state is
  * right and wait till it is done.
  *
  * @param tableName
  * @param flushes
  * @param expectedState
  * @param singleFamily otherwise, run compaction on all cfs
  * @throws IOException
  * @throws InterruptedException
  */
 private void compaction(
     final String tableName,
     final int flushes,
     final CompactionState expectedState,
     boolean singleFamily)
     throws IOException, InterruptedException {
   // Create a table with regions
   TableName table = TableName.valueOf(tableName);
   byte[] family = Bytes.toBytes("family");
   byte[][] families = {
     family, Bytes.add(family, Bytes.toBytes("2")), Bytes.add(family, Bytes.toBytes("3"))
   };
   Table ht = null;
   try {
     ht = TEST_UTIL.createTable(table, families);
     loadData(ht, families, 3000, flushes);
     HRegionServer rs = TEST_UTIL.getMiniHBaseCluster().getRegionServer(0);
     List<HRegion> regions = rs.getOnlineRegions(table);
     int countBefore = countStoreFilesInFamilies(regions, families);
     int countBeforeSingleFamily = countStoreFilesInFamily(regions, family);
     assertTrue(countBefore > 0); // there should be some data files
     HBaseAdmin admin = new HBaseAdmin(TEST_UTIL.getConfiguration());
     if (expectedState == CompactionState.MINOR) {
       if (singleFamily) {
         admin.compact(table.getName(), family);
       } else {
         admin.compact(table.getName());
       }
     } else {
       if (singleFamily) {
         admin.majorCompact(table.getName(), family);
       } else {
         admin.majorCompact(table.getName());
       }
     }
     long curt = System.currentTimeMillis();
     long waitTime = 5000;
     long endt = curt + waitTime;
     CompactionState state = admin.getCompactionState(table.getName());
     while (state == CompactionState.NONE && curt < endt) {
       Thread.sleep(10);
       state = admin.getCompactionState(table.getName());
       curt = System.currentTimeMillis();
     }
     // Now, should have the right compaction state,
     // otherwise, the compaction should have already been done
     if (expectedState != state) {
       for (HRegion region : regions) {
         state = region.getCompactionState();
         assertEquals(CompactionState.NONE, state);
       }
     } else {
       // Wait until the compaction is done
       state = admin.getCompactionState(table.getName());
       while (state != CompactionState.NONE && curt < endt) {
         Thread.sleep(10);
         state = admin.getCompactionState(table.getName());
       }
       // Now, compaction should be done.
       assertEquals(CompactionState.NONE, state);
     }
     int countAfter = countStoreFilesInFamilies(regions, families);
     int countAfterSingleFamily = countStoreFilesInFamily(regions, family);
     assertTrue(countAfter < countBefore);
     if (!singleFamily) {
       if (expectedState == CompactionState.MAJOR) assertTrue(families.length == countAfter);
       else assertTrue(families.length < countAfter);
     } else {
       int singleFamDiff = countBeforeSingleFamily - countAfterSingleFamily;
       // assert only change was to single column family
       assertTrue(singleFamDiff == (countBefore - countAfter));
       if (expectedState == CompactionState.MAJOR) {
         assertTrue(1 == countAfterSingleFamily);
       } else {
         assertTrue(1 < countAfterSingleFamily);
       }
     }
   } finally {
     if (ht != null) {
       TEST_UTIL.deleteTable(table);
     }
   }
 }
Beispiel #21
0
 @Override
 public byte[] toBytes(JobId jobId) {
   return Bytes.add(Bytes.toBytes(jobId.getJobEpoch()), Bytes.toBytes(jobId.getJobSequence()));
 }
  /**
   * Index updates can potentially be queued up if there aren't enough writer threads. If a running
   * index write fails, then we should early exit the pending indexupdate, when it comes up (if the
   * pool isn't already shutdown).
   *
   * <p>This test is a little bit racey - we could actually have the failure of the first task
   * before the third task is even submitted. However, we should never see the third task attempt to
   * make the batch write, so we should never see a failure here.
   *
   * @throws Exception on failure
   */
  @SuppressWarnings("unchecked")
  @Test
  public void testFailureOnRunningUpdateAbortsPending() throws Exception {
    Abortable abort = new StubAbortable();
    Stoppable stop = Mockito.mock(Stoppable.class);
    // single thread factory so the older request gets queued
    ExecutorService exec = Executors.newFixedThreadPool(1);
    Map<ImmutableBytesPtr, HTableInterface> tables =
        new HashMap<ImmutableBytesPtr, HTableInterface>();
    FakeTableFactory factory = new FakeTableFactory(tables);

    // updates to two different tables
    byte[] tableName = Bytes.add(this.testName.getTableName(), new byte[] {1, 2, 3, 4});
    Put m = new Put(row);
    m.add(Bytes.toBytes("family"), Bytes.toBytes("qual"), null);
    byte[] tableName2 = this.testName.getTableName(); // this will sort after the first tablename
    List<Pair<Mutation, byte[]>> indexUpdates = new ArrayList<Pair<Mutation, byte[]>>();
    indexUpdates.add(new Pair<Mutation, byte[]>(m, tableName));
    indexUpdates.add(new Pair<Mutation, byte[]>(m, tableName2));
    indexUpdates.add(new Pair<Mutation, byte[]>(m, tableName2));

    // first table will fail
    HTableInterface table = Mockito.mock(HTableInterface.class);
    Mockito.when(table.batch(Mockito.anyList()))
        .thenThrow(new IOException("Intentional IOException for failed first write."));
    Mockito.when(table.getTableName()).thenReturn(tableName);

    // second table just blocks to make sure that the abort propagates to the third task
    final CountDownLatch waitOnAbortedLatch = new CountDownLatch(1);
    final boolean[] failed = new boolean[] {false};
    HTableInterface table2 = Mockito.mock(HTableInterface.class);
    Mockito.when(table2.getTableName()).thenReturn(tableName2);
    Mockito.when(table2.batch(Mockito.anyList()))
        .thenAnswer(
            new Answer<Void>() {
              @Override
              public Void answer(InvocationOnMock invocation) throws Throwable {
                waitOnAbortedLatch.await();
                return null;
              }
            })
        .thenAnswer(
            new Answer<Void>() {
              @Override
              public Void answer(InvocationOnMock invocation) throws Throwable {
                failed[0] = true;
                throw new RuntimeException(
                    "Unexpected exception - second index table shouldn't have been written to");
              }
            });

    // add the tables to the set of tables, so its returned to the writer
    tables.put(new ImmutableBytesPtr(tableName), table);
    tables.put(new ImmutableBytesPtr(tableName2), table2);

    ParallelWriterIndexCommitter committer =
        new ParallelWriterIndexCommitter(VersionInfo.getVersion());
    committer.setup(factory, exec, abort, stop, 2);
    KillServerOnFailurePolicy policy = new KillServerOnFailurePolicy();
    policy.setup(stop, abort);
    IndexWriter writer = new IndexWriter(committer, policy);
    try {
      writer.write(indexUpdates);
      fail("Should not have successfully completed all index writes");
    } catch (SingleIndexWriteFailureException e) {
      LOG.info("Correctly got a failure to reach the index", e);
      // should have correctly gotten the correct abort, so let the next task execute
      waitOnAbortedLatch.countDown();
    }
    assertFalse(
        "Third set of index writes never have been attempted - should have seen the abort before done!",
        failed[0]);
    writer.stop(this.testName.getTableNameString() + " finished");
    assertTrue("Factory didn't get shutdown after writer#stop!", factory.shutdown);
    assertTrue("ExectorService isn't terminated after writer#stop!", exec.isShutdown());
  }
/**
 * Simple test for {@link CellSortReducer} and {@link HFileOutputFormat2}. Sets up and runs a
 * mapreduce job that writes hfile output. Creates a few inner classes to implement splits and an
 * inputformat that emits keys and values like those of {@link PerformanceEvaluation}.
 */
@Category({VerySlowMapReduceTests.class, LargeTests.class})
public class TestHFileOutputFormat2 {
  @Rule
  public final TestRule timeout =
      CategoryBasedTimeout.builder()
          .withTimeout(this.getClass())
          .withLookingForStuckThread(true)
          .build();

  private static final int ROWSPERSPLIT = 1024;

  private static final byte[][] FAMILIES = {
    Bytes.add(PerformanceEvaluation.FAMILY_NAME, Bytes.toBytes("-A")),
    Bytes.add(PerformanceEvaluation.FAMILY_NAME, Bytes.toBytes("-B"))
  };
  private static final TableName TABLE_NAME = TableName.valueOf("TestTable");

  private HBaseTestingUtility util = new HBaseTestingUtility();

  private static final Log LOG = LogFactory.getLog(TestHFileOutputFormat2.class);

  /** Simple mapper that makes KeyValue output. */
  static class RandomKVGeneratingMapper
      extends Mapper<NullWritable, NullWritable, ImmutableBytesWritable, Cell> {

    private int keyLength;
    private static final int KEYLEN_DEFAULT = 10;
    private static final String KEYLEN_CONF = "randomkv.key.length";

    private int valLength;
    private static final int VALLEN_DEFAULT = 10;
    private static final String VALLEN_CONF = "randomkv.val.length";
    private static final byte[] QUALIFIER = Bytes.toBytes("data");

    @Override
    protected void setup(Context context) throws IOException, InterruptedException {
      super.setup(context);

      Configuration conf = context.getConfiguration();
      keyLength = conf.getInt(KEYLEN_CONF, KEYLEN_DEFAULT);
      valLength = conf.getInt(VALLEN_CONF, VALLEN_DEFAULT);
    }

    @Override
    protected void map(
        NullWritable n1,
        NullWritable n2,
        Mapper<NullWritable, NullWritable, ImmutableBytesWritable, Cell>.Context context)
        throws java.io.IOException, InterruptedException {

      byte keyBytes[] = new byte[keyLength];
      byte valBytes[] = new byte[valLength];

      int taskId = context.getTaskAttemptID().getTaskID().getId();
      assert taskId < Byte.MAX_VALUE : "Unit tests dont support > 127 tasks!";

      Random random = new Random();
      for (int i = 0; i < ROWSPERSPLIT; i++) {

        random.nextBytes(keyBytes);
        // Ensure that unique tasks generate unique keys
        keyBytes[keyLength - 1] = (byte) (taskId & 0xFF);
        random.nextBytes(valBytes);
        ImmutableBytesWritable key = new ImmutableBytesWritable(keyBytes);

        for (byte[] family : TestHFileOutputFormat2.FAMILIES) {
          Cell kv = new KeyValue(keyBytes, family, QUALIFIER, valBytes);
          context.write(key, kv);
        }
      }
    }
  }

  private void setupRandomGeneratorMapper(Job job) {
    job.setInputFormatClass(NMapInputFormat.class);
    job.setMapperClass(RandomKVGeneratingMapper.class);
    job.setMapOutputKeyClass(ImmutableBytesWritable.class);
    job.setMapOutputValueClass(KeyValue.class);
  }

  /**
   * Test that {@link HFileOutputFormat2} RecordWriter amends timestamps if passed a keyvalue whose
   * timestamp is {@link HConstants#LATEST_TIMESTAMP}.
   *
   * @see <a href="https://issues.apache.org/jira/browse/HBASE-2615">HBASE-2615</a>
   */
  @Ignore("Goes zombie too frequently; needs work. See HBASE-14563")
  @Test
  public void test_LATEST_TIMESTAMP_isReplaced() throws Exception {
    Configuration conf = new Configuration(this.util.getConfiguration());
    RecordWriter<ImmutableBytesWritable, Cell> writer = null;
    TaskAttemptContext context = null;
    Path dir = util.getDataTestDir("test_LATEST_TIMESTAMP_isReplaced");
    try {
      Job job = new Job(conf);
      FileOutputFormat.setOutputPath(job, dir);
      context = createTestTaskAttemptContext(job);
      HFileOutputFormat2 hof = new HFileOutputFormat2();
      writer = hof.getRecordWriter(context);
      final byte[] b = Bytes.toBytes("b");

      // Test 1.  Pass a KV that has a ts of LATEST_TIMESTAMP.  It should be
      // changed by call to write.  Check all in kv is same but ts.
      KeyValue kv = new KeyValue(b, b, b);
      KeyValue original = kv.clone();
      writer.write(new ImmutableBytesWritable(), kv);
      assertFalse(original.equals(kv));
      assertTrue(Bytes.equals(CellUtil.cloneRow(original), CellUtil.cloneRow(kv)));
      assertTrue(Bytes.equals(CellUtil.cloneFamily(original), CellUtil.cloneFamily(kv)));
      assertTrue(Bytes.equals(CellUtil.cloneQualifier(original), CellUtil.cloneQualifier(kv)));
      assertNotSame(original.getTimestamp(), kv.getTimestamp());
      assertNotSame(HConstants.LATEST_TIMESTAMP, kv.getTimestamp());

      // Test 2. Now test passing a kv that has explicit ts.  It should not be
      // changed by call to record write.
      kv = new KeyValue(b, b, b, kv.getTimestamp() - 1, b);
      original = kv.clone();
      writer.write(new ImmutableBytesWritable(), kv);
      assertTrue(original.equals(kv));
    } finally {
      if (writer != null && context != null) writer.close(context);
      dir.getFileSystem(conf).delete(dir, true);
    }
  }

  private TaskAttemptContext createTestTaskAttemptContext(final Job job) throws Exception {
    HadoopShims hadoop = CompatibilitySingletonFactory.getInstance(HadoopShims.class);
    TaskAttemptContext context =
        hadoop.createTestTaskAttemptContext(job, "attempt_201402131733_0001_m_000000_0");
    return context;
  }

  /*
   * Test that {@link HFileOutputFormat2} creates an HFile with TIMERANGE
   * metadata used by time-restricted scans.
   */
  @Ignore("Goes zombie too frequently; needs work. See HBASE-14563")
  @Test
  public void test_TIMERANGE() throws Exception {
    Configuration conf = new Configuration(this.util.getConfiguration());
    RecordWriter<ImmutableBytesWritable, Cell> writer = null;
    TaskAttemptContext context = null;
    Path dir = util.getDataTestDir("test_TIMERANGE_present");
    LOG.info("Timerange dir writing to dir: " + dir);
    try {
      // build a record writer using HFileOutputFormat2
      Job job = new Job(conf);
      FileOutputFormat.setOutputPath(job, dir);
      context = createTestTaskAttemptContext(job);
      HFileOutputFormat2 hof = new HFileOutputFormat2();
      writer = hof.getRecordWriter(context);

      // Pass two key values with explicit times stamps
      final byte[] b = Bytes.toBytes("b");

      // value 1 with timestamp 2000
      KeyValue kv = new KeyValue(b, b, b, 2000, b);
      KeyValue original = kv.clone();
      writer.write(new ImmutableBytesWritable(), kv);
      assertEquals(original, kv);

      // value 2 with timestamp 1000
      kv = new KeyValue(b, b, b, 1000, b);
      original = kv.clone();
      writer.write(new ImmutableBytesWritable(), kv);
      assertEquals(original, kv);

      // verify that the file has the proper FileInfo.
      writer.close(context);

      // the generated file lives 1 directory down from the attempt directory
      // and is the only file, e.g.
      // _attempt__0000_r_000000_0/b/1979617994050536795
      FileSystem fs = FileSystem.get(conf);
      Path attemptDirectory = hof.getDefaultWorkFile(context, "").getParent();
      FileStatus[] sub1 = fs.listStatus(attemptDirectory);
      FileStatus[] file = fs.listStatus(sub1[0].getPath());

      // open as HFile Reader and pull out TIMERANGE FileInfo.
      HFile.Reader rd = HFile.createReader(fs, file[0].getPath(), new CacheConfig(conf), conf);
      Map<byte[], byte[]> finfo = rd.loadFileInfo();
      byte[] range = finfo.get("TIMERANGE".getBytes());
      assertNotNull(range);

      // unmarshall and check values.
      TimeRangeTracker timeRangeTracker = new TimeRangeTracker();
      Writables.copyWritable(range, timeRangeTracker);
      LOG.info(
          timeRangeTracker.getMinimumTimestamp() + "...." + timeRangeTracker.getMaximumTimestamp());
      assertEquals(1000, timeRangeTracker.getMinimumTimestamp());
      assertEquals(2000, timeRangeTracker.getMaximumTimestamp());
      rd.close();
    } finally {
      if (writer != null && context != null) writer.close(context);
      dir.getFileSystem(conf).delete(dir, true);
    }
  }

  /** Run small MR job. */
  @Ignore("Goes zombie too frequently; needs work. See HBASE-14563")
  @Test
  public void testWritingPEData() throws Exception {
    Configuration conf = util.getConfiguration();
    Path testDir = util.getDataTestDirOnTestFS("testWritingPEData");
    FileSystem fs = testDir.getFileSystem(conf);

    // Set down this value or we OOME in eclipse.
    conf.setInt("mapreduce.task.io.sort.mb", 20);
    // Write a few files.
    conf.setLong(HConstants.HREGION_MAX_FILESIZE, 64 * 1024);

    Job job = new Job(conf, "testWritingPEData");
    setupRandomGeneratorMapper(job);
    // This partitioner doesn't work well for number keys but using it anyways
    // just to demonstrate how to configure it.
    byte[] startKey = new byte[RandomKVGeneratingMapper.KEYLEN_DEFAULT];
    byte[] endKey = new byte[RandomKVGeneratingMapper.KEYLEN_DEFAULT];

    Arrays.fill(startKey, (byte) 0);
    Arrays.fill(endKey, (byte) 0xff);

    job.setPartitionerClass(SimpleTotalOrderPartitioner.class);
    // Set start and end rows for partitioner.
    SimpleTotalOrderPartitioner.setStartKey(job.getConfiguration(), startKey);
    SimpleTotalOrderPartitioner.setEndKey(job.getConfiguration(), endKey);
    job.setReducerClass(KeyValueSortReducer.class);
    job.setOutputFormatClass(HFileOutputFormat2.class);
    job.setNumReduceTasks(4);
    job.getConfiguration()
        .setStrings(
            "io.serializations",
            conf.get("io.serializations"),
            MutationSerialization.class.getName(),
            ResultSerialization.class.getName(),
            KeyValueSerialization.class.getName());

    FileOutputFormat.setOutputPath(job, testDir);
    assertTrue(job.waitForCompletion(false));
    FileStatus[] files = fs.listStatus(testDir);
    assertTrue(files.length > 0);
  }

  @Ignore("Goes zombie too frequently; needs work. See HBASE-14563")
  @Test
  public void testJobConfiguration() throws Exception {
    Configuration conf = new Configuration(this.util.getConfiguration());
    conf.set("hbase.fs.tmp.dir", util.getDataTestDir("testJobConfiguration").toString());
    Job job = new Job(conf);
    job.setWorkingDirectory(util.getDataTestDir("testJobConfiguration"));
    Table table = Mockito.mock(Table.class);
    RegionLocator regionLocator = Mockito.mock(RegionLocator.class);
    setupMockStartKeys(regionLocator);
    setupMockTableName(regionLocator);
    HFileOutputFormat2.configureIncrementalLoad(job, table.getTableDescriptor(), regionLocator);
    assertEquals(job.getNumReduceTasks(), 4);
  }

  private byte[][] generateRandomStartKeys(int numKeys) {
    Random random = new Random();
    byte[][] ret = new byte[numKeys][];
    // first region start key is always empty
    ret[0] = HConstants.EMPTY_BYTE_ARRAY;
    for (int i = 1; i < numKeys; i++) {
      ret[i] =
          PerformanceEvaluation.generateData(random, PerformanceEvaluation.DEFAULT_VALUE_LENGTH);
    }
    return ret;
  }

  private byte[][] generateRandomSplitKeys(int numKeys) {
    Random random = new Random();
    byte[][] ret = new byte[numKeys][];
    for (int i = 0; i < numKeys; i++) {
      ret[i] =
          PerformanceEvaluation.generateData(random, PerformanceEvaluation.DEFAULT_VALUE_LENGTH);
    }
    return ret;
  }

  @Ignore("Goes zombie too frequently; needs work. See HBASE-14563")
  @Test
  public void testMRIncrementalLoad() throws Exception {
    LOG.info("\nStarting test testMRIncrementalLoad\n");
    doIncrementalLoadTest(false, false);
  }

  @Ignore("Goes zombie too frequently; needs work. See HBASE-14563")
  @Test
  public void testMRIncrementalLoadWithSplit() throws Exception {
    LOG.info("\nStarting test testMRIncrementalLoadWithSplit\n");
    doIncrementalLoadTest(true, false);
  }

  /**
   * Test for HFileOutputFormat2.LOCALITY_SENSITIVE_CONF_KEY = true This test could only check the
   * correctness of original logic if LOCALITY_SENSITIVE_CONF_KEY is set to true. Because
   * MiniHBaseCluster always run with single hostname (and different ports), it's not possible to
   * check the region locality by comparing region locations and DN hostnames. When MiniHBaseCluster
   * supports explicit hostnames parameter (just like MiniDFSCluster does), we could test region
   * locality features more easily.
   */
  @Ignore("Goes zombie too frequently; needs work. See HBASE-14563")
  @Test
  public void testMRIncrementalLoadWithLocality() throws Exception {
    LOG.info("\nStarting test testMRIncrementalLoadWithLocality\n");
    doIncrementalLoadTest(false, true);
    doIncrementalLoadTest(true, true);
  }

  private void doIncrementalLoadTest(boolean shouldChangeRegions, boolean shouldKeepLocality)
      throws Exception {
    util = new HBaseTestingUtility();
    Configuration conf = util.getConfiguration();
    conf.setBoolean(HFileOutputFormat2.LOCALITY_SENSITIVE_CONF_KEY, shouldKeepLocality);
    int hostCount = 1;
    int regionNum = 5;
    if (shouldKeepLocality) {
      // We should change host count higher than hdfs replica count when MiniHBaseCluster supports
      // explicit hostnames parameter just like MiniDFSCluster does.
      hostCount = 3;
      regionNum = 20;
    }

    byte[][] splitKeys = generateRandomSplitKeys(regionNum - 1);
    String[] hostnames = new String[hostCount];
    for (int i = 0; i < hostCount; ++i) {
      hostnames[i] = "datanode_" + i;
    }
    util.startMiniCluster(1, hostCount, hostnames);

    Table table = util.createTable(TABLE_NAME, FAMILIES, splitKeys);
    Path testDir = util.getDataTestDirOnTestFS("testLocalMRIncrementalLoad");
    try (RegionLocator r = util.getConnection().getRegionLocator(TABLE_NAME);
        Admin admin = util.getConnection().getAdmin(); ) {
      assertEquals("Should start with empty table", 0, util.countRows(table));
      int numRegions = r.getStartKeys().length;
      assertEquals("Should make " + regionNum + " regions", numRegions, regionNum);

      // Generate the bulk load files
      runIncrementalPELoad(conf, table.getTableDescriptor(), r, testDir);
      // This doesn't write into the table, just makes files
      assertEquals("HFOF should not touch actual table", 0, util.countRows(table));

      // Make sure that a directory was created for every CF
      int dir = 0;
      for (FileStatus f : testDir.getFileSystem(conf).listStatus(testDir)) {
        for (byte[] family : FAMILIES) {
          if (Bytes.toString(family).equals(f.getPath().getName())) {
            ++dir;
          }
        }
      }
      assertEquals("Column family not found in FS.", FAMILIES.length, dir);

      // handle the split case
      if (shouldChangeRegions) {
        LOG.info("Changing regions in table");
        admin.disableTable(table.getName());
        while (util.getMiniHBaseCluster()
            .getMaster()
            .getAssignmentManager()
            .getRegionStates()
            .isRegionsInTransition()) {
          Threads.sleep(200);
          LOG.info("Waiting on table to finish disabling");
        }
        util.deleteTable(table.getName());
        byte[][] newSplitKeys = generateRandomSplitKeys(14);
        table = util.createTable(TABLE_NAME, FAMILIES, newSplitKeys);

        while (util.getConnection().getRegionLocator(TABLE_NAME).getAllRegionLocations().size()
                != 15
            || !admin.isTableAvailable(table.getName())) {
          Thread.sleep(200);
          LOG.info("Waiting for new region assignment to happen");
        }
      }

      // Perform the actual load
      new LoadIncrementalHFiles(conf).doBulkLoad(testDir, admin, table, r);

      // Ensure data shows up
      int expectedRows = NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT;
      assertEquals(
          "LoadIncrementalHFiles should put expected data in table",
          expectedRows,
          util.countRows(table));
      Scan scan = new Scan();
      ResultScanner results = table.getScanner(scan);
      for (Result res : results) {
        assertEquals(FAMILIES.length, res.rawCells().length);
        Cell first = res.rawCells()[0];
        for (Cell kv : res.rawCells()) {
          assertTrue(CellUtil.matchingRow(first, kv));
          assertTrue(Bytes.equals(CellUtil.cloneValue(first), CellUtil.cloneValue(kv)));
        }
      }
      results.close();
      String tableDigestBefore = util.checksumRows(table);

      // Check region locality
      HDFSBlocksDistribution hbd = new HDFSBlocksDistribution();
      for (HRegion region : util.getHBaseCluster().getRegions(TABLE_NAME)) {
        hbd.add(region.getHDFSBlocksDistribution());
      }
      for (String hostname : hostnames) {
        float locality = hbd.getBlockLocalityIndex(hostname);
        LOG.info("locality of [" + hostname + "]: " + locality);
        assertEquals(100, (int) (locality * 100));
      }

      // Cause regions to reopen
      admin.disableTable(TABLE_NAME);
      while (!admin.isTableDisabled(TABLE_NAME)) {
        Thread.sleep(200);
        LOG.info("Waiting for table to disable");
      }
      admin.enableTable(TABLE_NAME);
      util.waitTableAvailable(TABLE_NAME);
      assertEquals(
          "Data should remain after reopening of regions",
          tableDigestBefore,
          util.checksumRows(table));
    } finally {
      testDir.getFileSystem(conf).delete(testDir, true);
      util.deleteTable(TABLE_NAME);
      util.shutdownMiniCluster();
    }
  }

  private void runIncrementalPELoad(
      Configuration conf,
      HTableDescriptor tableDescriptor,
      RegionLocator regionLocator,
      Path outDir)
      throws IOException, UnsupportedEncodingException, InterruptedException,
          ClassNotFoundException {
    Job job = new Job(conf, "testLocalMRIncrementalLoad");
    job.setWorkingDirectory(util.getDataTestDirOnTestFS("runIncrementalPELoad"));
    job.getConfiguration()
        .setStrings(
            "io.serializations",
            conf.get("io.serializations"),
            MutationSerialization.class.getName(),
            ResultSerialization.class.getName(),
            KeyValueSerialization.class.getName());
    setupRandomGeneratorMapper(job);
    HFileOutputFormat2.configureIncrementalLoad(job, tableDescriptor, regionLocator);
    FileOutputFormat.setOutputPath(job, outDir);

    assertFalse(util.getTestFileSystem().exists(outDir));

    assertEquals(regionLocator.getAllRegionLocations().size(), job.getNumReduceTasks());

    assertTrue(job.waitForCompletion(true));
  }

  /**
   * Test for {@link HFileOutputFormat2#configureCompression(org.apache.hadoop.hbase.client.Table,
   * Configuration)} and {@link HFileOutputFormat2#createFamilyCompressionMap (Configuration)}.
   * Tests that the compression map is correctly serialized into and deserialized from configuration
   *
   * @throws IOException
   */
  @Ignore("Goes zombie too frequently; needs work. See HBASE-14563")
  @Test
  public void testSerializeDeserializeFamilyCompressionMap() throws IOException {
    for (int numCfs = 0; numCfs <= 3; numCfs++) {
      Configuration conf = new Configuration(this.util.getConfiguration());
      Map<String, Compression.Algorithm> familyToCompression =
          getMockColumnFamiliesForCompression(numCfs);
      Table table = Mockito.mock(HTable.class);
      setupMockColumnFamiliesForCompression(table, familyToCompression);
      HFileOutputFormat2.configureCompression(conf, table.getTableDescriptor());

      // read back family specific compression setting from the configuration
      Map<byte[], Algorithm> retrievedFamilyToCompressionMap =
          HFileOutputFormat2.createFamilyCompressionMap(conf);

      // test that we have a value for all column families that matches with the
      // used mock values
      for (Entry<String, Algorithm> entry : familyToCompression.entrySet()) {
        assertEquals(
            "Compression configuration incorrect for column family:" + entry.getKey(),
            entry.getValue(),
            retrievedFamilyToCompressionMap.get(entry.getKey().getBytes()));
      }
    }
  }

  private void setupMockColumnFamiliesForCompression(
      Table table, Map<String, Compression.Algorithm> familyToCompression) throws IOException {
    HTableDescriptor mockTableDescriptor = new HTableDescriptor(TABLE_NAME);
    for (Entry<String, Compression.Algorithm> entry : familyToCompression.entrySet()) {
      mockTableDescriptor.addFamily(
          new HColumnDescriptor(entry.getKey())
              .setMaxVersions(1)
              .setCompressionType(entry.getValue())
              .setBlockCacheEnabled(false)
              .setTimeToLive(0));
    }
    Mockito.doReturn(mockTableDescriptor).when(table).getTableDescriptor();
  }

  /**
   * @return a map from column family names to compression algorithms for testing column family
   *     compression. Column family names have special characters
   */
  private Map<String, Compression.Algorithm> getMockColumnFamiliesForCompression(int numCfs) {
    Map<String, Compression.Algorithm> familyToCompression =
        new HashMap<String, Compression.Algorithm>();
    // use column family names having special characters
    if (numCfs-- > 0) {
      familyToCompression.put("Family1!@#!@#&", Compression.Algorithm.LZO);
    }
    if (numCfs-- > 0) {
      familyToCompression.put("Family2=asdads&!AASD", Compression.Algorithm.SNAPPY);
    }
    if (numCfs-- > 0) {
      familyToCompression.put("Family2=asdads&!AASD", Compression.Algorithm.GZ);
    }
    if (numCfs-- > 0) {
      familyToCompression.put("Family3", Compression.Algorithm.NONE);
    }
    return familyToCompression;
  }

  /**
   * Test for {@link HFileOutputFormat2#configureBloomType(org.apache.hadoop.hbase.client.Table,
   * Configuration)} and {@link HFileOutputFormat2#createFamilyBloomTypeMap (Configuration)}. Tests
   * that the compression map is correctly serialized into and deserialized from configuration
   *
   * @throws IOException
   */
  @Ignore("Goes zombie too frequently; needs work. See HBASE-14563")
  @Test
  public void testSerializeDeserializeFamilyBloomTypeMap() throws IOException {
    for (int numCfs = 0; numCfs <= 2; numCfs++) {
      Configuration conf = new Configuration(this.util.getConfiguration());
      Map<String, BloomType> familyToBloomType = getMockColumnFamiliesForBloomType(numCfs);
      Table table = Mockito.mock(HTable.class);
      setupMockColumnFamiliesForBloomType(table, familyToBloomType);
      HFileOutputFormat2.configureBloomType(table.getTableDescriptor(), conf);

      // read back family specific data block encoding settings from the
      // configuration
      Map<byte[], BloomType> retrievedFamilyToBloomTypeMap =
          HFileOutputFormat2.createFamilyBloomTypeMap(conf);

      // test that we have a value for all column families that matches with the
      // used mock values
      for (Entry<String, BloomType> entry : familyToBloomType.entrySet()) {
        assertEquals(
            "BloomType configuration incorrect for column family:" + entry.getKey(),
            entry.getValue(),
            retrievedFamilyToBloomTypeMap.get(entry.getKey().getBytes()));
      }
    }
  }

  private void setupMockColumnFamiliesForBloomType(
      Table table, Map<String, BloomType> familyToDataBlockEncoding) throws IOException {
    HTableDescriptor mockTableDescriptor = new HTableDescriptor(TABLE_NAME);
    for (Entry<String, BloomType> entry : familyToDataBlockEncoding.entrySet()) {
      mockTableDescriptor.addFamily(
          new HColumnDescriptor(entry.getKey())
              .setMaxVersions(1)
              .setBloomFilterType(entry.getValue())
              .setBlockCacheEnabled(false)
              .setTimeToLive(0));
    }
    Mockito.doReturn(mockTableDescriptor).when(table).getTableDescriptor();
  }

  /**
   * @return a map from column family names to compression algorithms for testing column family
   *     compression. Column family names have special characters
   */
  private Map<String, BloomType> getMockColumnFamiliesForBloomType(int numCfs) {
    Map<String, BloomType> familyToBloomType = new HashMap<String, BloomType>();
    // use column family names having special characters
    if (numCfs-- > 0) {
      familyToBloomType.put("Family1!@#!@#&", BloomType.ROW);
    }
    if (numCfs-- > 0) {
      familyToBloomType.put("Family2=asdads&!AASD", BloomType.ROWCOL);
    }
    if (numCfs-- > 0) {
      familyToBloomType.put("Family3", BloomType.NONE);
    }
    return familyToBloomType;
  }

  /**
   * Test for {@link HFileOutputFormat2#configureBlockSize(org.apache.hadoop.hbase.client.Table,
   * Configuration)} and {@link HFileOutputFormat2#createFamilyBlockSizeMap (Configuration)}. Tests
   * that the compression map is correctly serialized into and deserialized from configuration
   *
   * @throws IOException
   */
  @Ignore("Goes zombie too frequently; needs work. See HBASE-14563")
  @Test
  public void testSerializeDeserializeFamilyBlockSizeMap() throws IOException {
    for (int numCfs = 0; numCfs <= 3; numCfs++) {
      Configuration conf = new Configuration(this.util.getConfiguration());
      Map<String, Integer> familyToBlockSize = getMockColumnFamiliesForBlockSize(numCfs);
      Table table = Mockito.mock(HTable.class);
      setupMockColumnFamiliesForBlockSize(table, familyToBlockSize);
      HFileOutputFormat2.configureBlockSize(table.getTableDescriptor(), conf);

      // read back family specific data block encoding settings from the
      // configuration
      Map<byte[], Integer> retrievedFamilyToBlockSizeMap =
          HFileOutputFormat2.createFamilyBlockSizeMap(conf);

      // test that we have a value for all column families that matches with the
      // used mock values
      for (Entry<String, Integer> entry : familyToBlockSize.entrySet()) {
        assertEquals(
            "BlockSize configuration incorrect for column family:" + entry.getKey(),
            entry.getValue(),
            retrievedFamilyToBlockSizeMap.get(entry.getKey().getBytes()));
      }
    }
  }

  private void setupMockColumnFamiliesForBlockSize(
      Table table, Map<String, Integer> familyToDataBlockEncoding) throws IOException {
    HTableDescriptor mockTableDescriptor = new HTableDescriptor(TABLE_NAME);
    for (Entry<String, Integer> entry : familyToDataBlockEncoding.entrySet()) {
      mockTableDescriptor.addFamily(
          new HColumnDescriptor(entry.getKey())
              .setMaxVersions(1)
              .setBlocksize(entry.getValue())
              .setBlockCacheEnabled(false)
              .setTimeToLive(0));
    }
    Mockito.doReturn(mockTableDescriptor).when(table).getTableDescriptor();
  }

  /**
   * @return a map from column family names to compression algorithms for testing column family
   *     compression. Column family names have special characters
   */
  private Map<String, Integer> getMockColumnFamiliesForBlockSize(int numCfs) {
    Map<String, Integer> familyToBlockSize = new HashMap<String, Integer>();
    // use column family names having special characters
    if (numCfs-- > 0) {
      familyToBlockSize.put("Family1!@#!@#&", 1234);
    }
    if (numCfs-- > 0) {
      familyToBlockSize.put("Family2=asdads&!AASD", Integer.MAX_VALUE);
    }
    if (numCfs-- > 0) {
      familyToBlockSize.put("Family2=asdads&!AASD", Integer.MAX_VALUE);
    }
    if (numCfs-- > 0) {
      familyToBlockSize.put("Family3", 0);
    }
    return familyToBlockSize;
  }

  /**
   * Test for {@link HFileOutputFormat2#configureDataBlockEncoding(HTableDescriptor, Configuration)}
   * and {@link HFileOutputFormat2#createFamilyDataBlockEncodingMap(Configuration)}. Tests that the
   * compression map is correctly serialized into and deserialized from configuration
   *
   * @throws IOException
   */
  @Ignore("Goes zombie too frequently; needs work. See HBASE-14563")
  @Test
  public void testSerializeDeserializeFamilyDataBlockEncodingMap() throws IOException {
    for (int numCfs = 0; numCfs <= 3; numCfs++) {
      Configuration conf = new Configuration(this.util.getConfiguration());
      Map<String, DataBlockEncoding> familyToDataBlockEncoding =
          getMockColumnFamiliesForDataBlockEncoding(numCfs);
      Table table = Mockito.mock(HTable.class);
      setupMockColumnFamiliesForDataBlockEncoding(table, familyToDataBlockEncoding);
      HTableDescriptor tableDescriptor = table.getTableDescriptor();
      HFileOutputFormat2.configureDataBlockEncoding(tableDescriptor, conf);

      // read back family specific data block encoding settings from the
      // configuration
      Map<byte[], DataBlockEncoding> retrievedFamilyToDataBlockEncodingMap =
          HFileOutputFormat2.createFamilyDataBlockEncodingMap(conf);

      // test that we have a value for all column families that matches with the
      // used mock values
      for (Entry<String, DataBlockEncoding> entry : familyToDataBlockEncoding.entrySet()) {
        assertEquals(
            "DataBlockEncoding configuration incorrect for column family:" + entry.getKey(),
            entry.getValue(),
            retrievedFamilyToDataBlockEncodingMap.get(entry.getKey().getBytes()));
      }
    }
  }

  private void setupMockColumnFamiliesForDataBlockEncoding(
      Table table, Map<String, DataBlockEncoding> familyToDataBlockEncoding) throws IOException {
    HTableDescriptor mockTableDescriptor = new HTableDescriptor(TABLE_NAME);
    for (Entry<String, DataBlockEncoding> entry : familyToDataBlockEncoding.entrySet()) {
      mockTableDescriptor.addFamily(
          new HColumnDescriptor(entry.getKey())
              .setMaxVersions(1)
              .setDataBlockEncoding(entry.getValue())
              .setBlockCacheEnabled(false)
              .setTimeToLive(0));
    }
    Mockito.doReturn(mockTableDescriptor).when(table).getTableDescriptor();
  }

  /**
   * @return a map from column family names to compression algorithms for testing column family
   *     compression. Column family names have special characters
   */
  private Map<String, DataBlockEncoding> getMockColumnFamiliesForDataBlockEncoding(int numCfs) {
    Map<String, DataBlockEncoding> familyToDataBlockEncoding =
        new HashMap<String, DataBlockEncoding>();
    // use column family names having special characters
    if (numCfs-- > 0) {
      familyToDataBlockEncoding.put("Family1!@#!@#&", DataBlockEncoding.DIFF);
    }
    if (numCfs-- > 0) {
      familyToDataBlockEncoding.put("Family2=asdads&!AASD", DataBlockEncoding.FAST_DIFF);
    }
    if (numCfs-- > 0) {
      familyToDataBlockEncoding.put("Family2=asdads&!AASD", DataBlockEncoding.PREFIX);
    }
    if (numCfs-- > 0) {
      familyToDataBlockEncoding.put("Family3", DataBlockEncoding.NONE);
    }
    return familyToDataBlockEncoding;
  }

  private void setupMockStartKeys(RegionLocator table) throws IOException {
    byte[][] mockKeys =
        new byte[][] {
          HConstants.EMPTY_BYTE_ARRAY,
          Bytes.toBytes("aaa"),
          Bytes.toBytes("ggg"),
          Bytes.toBytes("zzz")
        };
    Mockito.doReturn(mockKeys).when(table).getStartKeys();
  }

  private void setupMockTableName(RegionLocator table) throws IOException {
    TableName mockTableName = TableName.valueOf("mock_table");
    Mockito.doReturn(mockTableName).when(table).getName();
  }

  /**
   * Test that {@link HFileOutputFormat2} RecordWriter uses compression and bloom filter settings
   * from the column family descriptor
   */
  @Ignore("Goes zombie too frequently; needs work. See HBASE-14563")
  @Test
  public void testColumnFamilySettings() throws Exception {
    Configuration conf = new Configuration(this.util.getConfiguration());
    RecordWriter<ImmutableBytesWritable, Cell> writer = null;
    TaskAttemptContext context = null;
    Path dir = util.getDataTestDir("testColumnFamilySettings");

    // Setup table descriptor
    Table table = Mockito.mock(Table.class);
    RegionLocator regionLocator = Mockito.mock(RegionLocator.class);
    HTableDescriptor htd = new HTableDescriptor(TABLE_NAME);
    Mockito.doReturn(htd).when(table).getTableDescriptor();
    for (HColumnDescriptor hcd : HBaseTestingUtility.generateColumnDescriptors()) {
      htd.addFamily(hcd);
    }

    // set up the table to return some mock keys
    setupMockStartKeys(regionLocator);

    try {
      // partial map red setup to get an operational writer for testing
      // We turn off the sequence file compression, because DefaultCodec
      // pollutes the GZip codec pool with an incompatible compressor.
      conf.set("io.seqfile.compression.type", "NONE");
      conf.set("hbase.fs.tmp.dir", dir.toString());
      // turn locality off to eliminate getRegionLocation fail-and-retry time when writing kvs
      conf.setBoolean(HFileOutputFormat2.LOCALITY_SENSITIVE_CONF_KEY, false);

      Job job = new Job(conf, "testLocalMRIncrementalLoad");
      job.setWorkingDirectory(util.getDataTestDirOnTestFS("testColumnFamilySettings"));
      setupRandomGeneratorMapper(job);
      HFileOutputFormat2.configureIncrementalLoad(job, table.getTableDescriptor(), regionLocator);
      FileOutputFormat.setOutputPath(job, dir);
      context = createTestTaskAttemptContext(job);
      HFileOutputFormat2 hof = new HFileOutputFormat2();
      writer = hof.getRecordWriter(context);

      // write out random rows
      writeRandomKeyValues(writer, context, htd.getFamiliesKeys(), ROWSPERSPLIT);
      writer.close(context);

      // Make sure that a directory was created for every CF
      FileSystem fs = dir.getFileSystem(conf);

      // commit so that the filesystem has one directory per column family
      hof.getOutputCommitter(context).commitTask(context);
      hof.getOutputCommitter(context).commitJob(context);
      FileStatus[] families = FSUtils.listStatus(fs, dir, new FSUtils.FamilyDirFilter(fs));
      assertEquals(htd.getFamilies().size(), families.length);
      for (FileStatus f : families) {
        String familyStr = f.getPath().getName();
        HColumnDescriptor hcd = htd.getFamily(Bytes.toBytes(familyStr));
        // verify that the compression on this file matches the configured
        // compression
        Path dataFilePath = fs.listStatus(f.getPath())[0].getPath();
        Reader reader = HFile.createReader(fs, dataFilePath, new CacheConfig(conf), conf);
        Map<byte[], byte[]> fileInfo = reader.loadFileInfo();

        byte[] bloomFilter = fileInfo.get(StoreFile.BLOOM_FILTER_TYPE_KEY);
        if (bloomFilter == null) bloomFilter = Bytes.toBytes("NONE");
        assertEquals(
            "Incorrect bloom filter used for column family "
                + familyStr
                + "(reader: "
                + reader
                + ")",
            hcd.getBloomFilterType(),
            BloomType.valueOf(Bytes.toString(bloomFilter)));
        assertEquals(
            "Incorrect compression used for column family "
                + familyStr
                + "(reader: "
                + reader
                + ")",
            hcd.getCompressionType(),
            reader.getFileContext().getCompression());
      }
    } finally {
      dir.getFileSystem(conf).delete(dir, true);
    }
  }

  /**
   * Write random values to the writer assuming a table created using {@link #FAMILIES} as column
   * family descriptors
   */
  private void writeRandomKeyValues(
      RecordWriter<ImmutableBytesWritable, Cell> writer,
      TaskAttemptContext context,
      Set<byte[]> families,
      int numRows)
      throws IOException, InterruptedException {
    byte keyBytes[] = new byte[Bytes.SIZEOF_INT];
    int valLength = 10;
    byte valBytes[] = new byte[valLength];

    int taskId = context.getTaskAttemptID().getTaskID().getId();
    assert taskId < Byte.MAX_VALUE : "Unit tests dont support > 127 tasks!";
    final byte[] qualifier = Bytes.toBytes("data");
    Random random = new Random();
    for (int i = 0; i < numRows; i++) {

      Bytes.putInt(keyBytes, 0, i);
      random.nextBytes(valBytes);
      ImmutableBytesWritable key = new ImmutableBytesWritable(keyBytes);

      for (byte[] family : families) {
        Cell kv = new KeyValue(keyBytes, family, qualifier, valBytes);
        writer.write(key, kv);
      }
    }
  }

  /**
   * This test is to test the scenario happened in HBASE-6901. All files are bulk loaded and
   * excluded from minor compaction. Without the fix of HBASE-6901, an
   * ArrayIndexOutOfBoundsException will be thrown.
   */
  @Ignore("Flakey: See HBASE-9051")
  @Test
  public void testExcludeAllFromMinorCompaction() throws Exception {
    Configuration conf = util.getConfiguration();
    conf.setInt("hbase.hstore.compaction.min", 2);
    generateRandomStartKeys(5);

    util.startMiniCluster();
    try (Connection conn = ConnectionFactory.createConnection();
        Admin admin = conn.getAdmin();
        Table table = util.createTable(TABLE_NAME, FAMILIES);
        RegionLocator locator = conn.getRegionLocator(TABLE_NAME)) {
      final FileSystem fs = util.getDFSCluster().getFileSystem();
      assertEquals("Should start with empty table", 0, util.countRows(table));

      // deep inspection: get the StoreFile dir
      final Path storePath =
          new Path(
              FSUtils.getTableDir(FSUtils.getRootDir(conf), TABLE_NAME),
              new Path(
                  admin.getTableRegions(TABLE_NAME).get(0).getEncodedName(),
                  Bytes.toString(FAMILIES[0])));
      assertEquals(0, fs.listStatus(storePath).length);

      // Generate two bulk load files
      conf.setBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude", true);

      for (int i = 0; i < 2; i++) {
        Path testDir = util.getDataTestDirOnTestFS("testExcludeAllFromMinorCompaction_" + i);
        runIncrementalPELoad(
            conf, table.getTableDescriptor(), conn.getRegionLocator(TABLE_NAME), testDir);
        // Perform the actual load
        new LoadIncrementalHFiles(conf).doBulkLoad(testDir, admin, table, locator);
      }

      // Ensure data shows up
      int expectedRows = 2 * NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT;
      assertEquals(
          "LoadIncrementalHFiles should put expected data in table",
          expectedRows,
          util.countRows(table));

      // should have a second StoreFile now
      assertEquals(2, fs.listStatus(storePath).length);

      // minor compactions shouldn't get rid of the file
      admin.compact(TABLE_NAME);
      try {
        quickPoll(
            new Callable<Boolean>() {
              @Override
              public Boolean call() throws Exception {
                return fs.listStatus(storePath).length == 1;
              }
            },
            5000);
        throw new IOException("SF# = " + fs.listStatus(storePath).length);
      } catch (AssertionError ae) {
        // this is expected behavior
      }

      // a major compaction should work though
      admin.majorCompact(TABLE_NAME);
      quickPoll(
          new Callable<Boolean>() {
            @Override
            public Boolean call() throws Exception {
              return fs.listStatus(storePath).length == 1;
            }
          },
          5000);

    } finally {
      util.shutdownMiniCluster();
    }
  }

  @Ignore("Goes zombie too frequently; needs work. See HBASE-14563")
  @Test
  public void testExcludeMinorCompaction() throws Exception {
    Configuration conf = util.getConfiguration();
    conf.setInt("hbase.hstore.compaction.min", 2);
    generateRandomStartKeys(5);

    util.startMiniCluster();
    try (Connection conn = ConnectionFactory.createConnection(conf);
        Admin admin = conn.getAdmin()) {
      Path testDir = util.getDataTestDirOnTestFS("testExcludeMinorCompaction");
      final FileSystem fs = util.getDFSCluster().getFileSystem();
      Table table = util.createTable(TABLE_NAME, FAMILIES);
      assertEquals("Should start with empty table", 0, util.countRows(table));

      // deep inspection: get the StoreFile dir
      final Path storePath =
          new Path(
              FSUtils.getTableDir(FSUtils.getRootDir(conf), TABLE_NAME),
              new Path(
                  admin.getTableRegions(TABLE_NAME).get(0).getEncodedName(),
                  Bytes.toString(FAMILIES[0])));
      assertEquals(0, fs.listStatus(storePath).length);

      // put some data in it and flush to create a storefile
      Put p = new Put(Bytes.toBytes("test"));
      p.addColumn(FAMILIES[0], Bytes.toBytes("1"), Bytes.toBytes("1"));
      table.put(p);
      admin.flush(TABLE_NAME);
      assertEquals(1, util.countRows(table));
      quickPoll(
          new Callable<Boolean>() {
            @Override
            public Boolean call() throws Exception {
              return fs.listStatus(storePath).length == 1;
            }
          },
          5000);

      // Generate a bulk load file with more rows
      conf.setBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude", true);

      RegionLocator regionLocator = conn.getRegionLocator(TABLE_NAME);
      runIncrementalPELoad(conf, table.getTableDescriptor(), regionLocator, testDir);

      // Perform the actual load
      new LoadIncrementalHFiles(conf).doBulkLoad(testDir, admin, table, regionLocator);

      // Ensure data shows up
      int expectedRows = NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT;
      assertEquals(
          "LoadIncrementalHFiles should put expected data in table",
          expectedRows + 1,
          util.countRows(table));

      // should have a second StoreFile now
      assertEquals(2, fs.listStatus(storePath).length);

      // minor compactions shouldn't get rid of the file
      admin.compact(TABLE_NAME);
      try {
        quickPoll(
            new Callable<Boolean>() {
              @Override
              public Boolean call() throws Exception {
                return fs.listStatus(storePath).length == 1;
              }
            },
            5000);
        throw new IOException("SF# = " + fs.listStatus(storePath).length);
      } catch (AssertionError ae) {
        // this is expected behavior
      }

      // a major compaction should work though
      admin.majorCompact(TABLE_NAME);
      quickPoll(
          new Callable<Boolean>() {
            @Override
            public Boolean call() throws Exception {
              return fs.listStatus(storePath).length == 1;
            }
          },
          5000);

    } finally {
      util.shutdownMiniCluster();
    }
  }

  private void quickPoll(Callable<Boolean> c, int waitMs) throws Exception {
    int sleepMs = 10;
    int retries = (int) Math.ceil(((double) waitMs) / sleepMs);
    while (retries-- > 0) {
      if (c.call().booleanValue()) {
        return;
      }
      Thread.sleep(sleepMs);
    }
    fail();
  }

  public static void main(String args[]) throws Exception {
    new TestHFileOutputFormat2().manualTest(args);
  }

  public void manualTest(String args[]) throws Exception {
    Configuration conf = HBaseConfiguration.create();
    util = new HBaseTestingUtility(conf);
    if ("newtable".equals(args[0])) {
      TableName tname = TableName.valueOf(args[1]);
      byte[][] splitKeys = generateRandomSplitKeys(4);
      try (Table table = util.createTable(tname, FAMILIES, splitKeys)) {}
    } else if ("incremental".equals(args[0])) {
      TableName tname = TableName.valueOf(args[1]);
      try (Connection c = ConnectionFactory.createConnection(conf);
          Admin admin = c.getAdmin();
          RegionLocator regionLocator = c.getRegionLocator(tname)) {
        Path outDir = new Path("incremental-out");
        runIncrementalPELoad(conf, admin.getTableDescriptor(tname), regionLocator, outDir);
      }
    } else {
      throw new RuntimeException("usage: TestHFileOutputFormat2 newtable | incremental");
    }
  }
}
  @Test
  public void testRun() throws Exception {

    TableName tn = TableName.valueOf(tableName);
    byte[] mobValueBytes = new byte[100];

    // get the path where mob files lie in
    Path mobFamilyPath = MobUtils.getMobFamilyPath(TEST_UTIL.getConfiguration(), tn, family);

    Put put = new Put(Bytes.toBytes(row));
    put.addColumn(Bytes.toBytes(family), Bytes.toBytes(qf), 1, mobValueBytes);
    Put put2 = new Put(Bytes.toBytes(row + "ignore"));
    put2.addColumn(Bytes.toBytes(family), Bytes.toBytes(qf), 1, mobValueBytes);
    table.mutate(put);
    table.mutate(put2);
    table.flush();
    admin.flush(tn);

    FileStatus[] fileStatuses = TEST_UTIL.getTestFileSystem().listStatus(mobFamilyPath);
    // check the generation of a mob file
    assertEquals(1, fileStatuses.length);

    String mobFile1 = fileStatuses[0].getPath().getName();

    Configuration configuration = new Configuration(TEST_UTIL.getConfiguration());
    configuration.setFloat(MobConstants.MOB_SWEEP_TOOL_COMPACTION_RATIO, 0.6f);
    configuration.setStrings(TableInputFormat.INPUT_TABLE, tableName);
    configuration.setStrings(TableInputFormat.SCAN_COLUMN_FAMILY, family);
    configuration.setStrings(SweepJob.WORKING_VISITED_DIR_KEY, "jobWorkingNamesDir");
    configuration.setStrings(SweepJob.WORKING_FILES_DIR_KEY, "compactionFileDir");
    configuration.setStrings(
        CommonConfigurationKeys.IO_SERIALIZATIONS_KEY, JavaSerialization.class.getName());
    configuration.set(SweepJob.WORKING_VISITED_DIR_KEY, "compactionVisitedDir");
    configuration.setLong(
        MobConstants.MOB_SWEEP_TOOL_COMPACTION_START_DATE,
        System.currentTimeMillis() + 24 * 3600 * 1000);

    ZooKeeperWatcher zkw = new ZooKeeperWatcher(configuration, "1", new DummyMobAbortable());
    TableName lockName = MobUtils.getTableLockName(tn);
    String znode = ZKUtil.joinZNode(zkw.tableLockZNode, lockName.getNameAsString());
    configuration.set(SweepJob.SWEEP_JOB_ID, "1");
    configuration.set(SweepJob.SWEEP_JOB_TABLE_NODE, znode);
    ServerName serverName = SweepJob.getCurrentServerName(configuration);
    configuration.set(SweepJob.SWEEP_JOB_SERVERNAME, serverName.toString());

    TableLockManager tableLockManager =
        TableLockManager.createTableLockManager(configuration, zkw, serverName);
    TableLock lock = tableLockManager.writeLock(lockName, "Run sweep tool");
    lock.acquire();
    try {
      // use the same counter when mocking
      Counter counter = new GenericCounter();
      Reducer<Text, KeyValue, Writable, Writable>.Context ctx = mock(Reducer.Context.class);
      when(ctx.getConfiguration()).thenReturn(configuration);
      when(ctx.getCounter(Matchers.any(SweepCounter.class))).thenReturn(counter);
      when(ctx.nextKey()).thenReturn(true).thenReturn(false);
      when(ctx.getCurrentKey()).thenReturn(new Text(mobFile1));

      byte[] refBytes = Bytes.toBytes(mobFile1);
      long valueLength = refBytes.length;
      byte[] newValue = Bytes.add(Bytes.toBytes(valueLength), refBytes);
      KeyValue kv2 =
          new KeyValue(
              Bytes.toBytes(row),
              Bytes.toBytes(family),
              Bytes.toBytes(qf),
              1,
              KeyValue.Type.Put,
              newValue);
      List<KeyValue> list = new ArrayList<KeyValue>();
      list.add(kv2);

      when(ctx.getValues()).thenReturn(list);

      SweepReducer reducer = new SweepReducer();
      reducer.run(ctx);
    } finally {
      lock.release();
    }
    FileStatus[] filsStatuses2 = TEST_UTIL.getTestFileSystem().listStatus(mobFamilyPath);
    String mobFile2 = filsStatuses2[0].getPath().getName();
    // new mob file is generated, old one has been archived
    assertEquals(1, filsStatuses2.length);
    assertEquals(false, mobFile2.equalsIgnoreCase(mobFile1));

    // test sequence file
    String workingPath = configuration.get(SweepJob.WORKING_VISITED_DIR_KEY);
    FileStatus[] statuses = TEST_UTIL.getTestFileSystem().listStatus(new Path(workingPath));
    Set<String> files = new TreeSet<String>();
    for (FileStatus st : statuses) {
      files.addAll(
          getKeyFromSequenceFile(TEST_UTIL.getTestFileSystem(), st.getPath(), configuration));
    }
    assertEquals(1, files.size());
    assertEquals(true, files.contains(mobFile1));
  }