Beispiel #1
0
 public Map<String, Long> getRegionSizes(String tableName) {
   Map<String, Long> regions = new HashMap<>();
   try {
     final Table table = connection.getTable(TableName.valueOf(tableName));
     RegionLocator regionLocator = connection.getRegionLocator(table.getName());
     List<HRegionLocation> tableRegionInfos = regionLocator.getAllRegionLocations();
     Set<byte[]> tableRegions = new TreeSet<byte[]>(Bytes.BYTES_COMPARATOR);
     for (HRegionLocation regionInfo : tableRegionInfos) {
       tableRegions.add(regionInfo.getRegionInfo().getRegionName());
     }
     ClusterStatus clusterStatus = connection.getAdmin().getClusterStatus();
     Collection<ServerName> servers = clusterStatus.getServers();
     final long megaByte = 1024L * 1024L;
     for (ServerName serverName : servers) {
       ServerLoad serverLoad = clusterStatus.getLoad(serverName);
       for (RegionLoad regionLoad : serverLoad.getRegionsLoad().values()) {
         byte[] regionId = regionLoad.getName();
         if (tableRegions.contains(regionId)) {
           long regionSizeBytes = regionLoad.getStorefileSizeMB() * megaByte;
           regions.put(regionLoad.getNameAsString(), regionSizeBytes);
         }
       }
     }
   } catch (IOException e) {
     e.printStackTrace();
   }
   return regions;
 }
Beispiel #2
0
  static void configureIncrementalLoad(
      Job job,
      HTableDescriptor tableDescriptor,
      RegionLocator regionLocator,
      Class<? extends OutputFormat<?, ?>> cls)
      throws IOException, UnsupportedEncodingException {
    Configuration conf = job.getConfiguration();
    job.setOutputKeyClass(ImmutableBytesWritable.class);
    job.setOutputValueClass(KeyValue.class);
    job.setOutputFormatClass(cls);

    // Based on the configured map output class, set the correct reducer to properly
    // sort the incoming values.
    // TODO it would be nice to pick one or the other of these formats.
    if (KeyValue.class.equals(job.getMapOutputValueClass())) {
      job.setReducerClass(KeyValueSortReducer.class);
    } else if (Put.class.equals(job.getMapOutputValueClass())) {
      job.setReducerClass(PutSortReducer.class);
    } else if (Text.class.equals(job.getMapOutputValueClass())) {
      job.setReducerClass(TextSortReducer.class);
    } else {
      LOG.warn("Unknown map output value type:" + job.getMapOutputValueClass());
    }

    conf.setStrings(
        "io.serializations",
        conf.get("io.serializations"),
        MutationSerialization.class.getName(),
        ResultSerialization.class.getName(),
        KeyValueSerialization.class.getName());

    if (conf.getBoolean(LOCALITY_SENSITIVE_CONF_KEY, DEFAULT_LOCALITY_SENSITIVE)) {
      // record this table name for creating writer by favored nodes
      LOG.info("bulkload locality sensitive enabled");
      conf.set(OUTPUT_TABLE_NAME_CONF_KEY, regionLocator.getName().getNameAsString());
    }

    // Use table's region boundaries for TOP split points.
    LOG.info("Looking up current regions for table " + regionLocator.getName());
    List<ImmutableBytesWritable> startKeys = getRegionStartKeys(regionLocator);
    LOG.info(
        "Configuring "
            + startKeys.size()
            + " reduce partitions "
            + "to match current region count");
    job.setNumReduceTasks(startKeys.size());

    configurePartitioner(job, startKeys);
    // Set compression algorithms based on column families
    configureCompression(conf, tableDescriptor);
    configureBloomType(tableDescriptor, conf);
    configureBlockSize(tableDescriptor, conf);
    configureDataBlockEncoding(tableDescriptor, conf);

    TableMapReduceUtil.addDependencyJars(job);
    TableMapReduceUtil.initCredentials(job);
    LOG.info("Incremental table " + regionLocator.getName() + " output configured.");
  }
Beispiel #3
0
 /**
  * Get the split row keys of table
  *
  * @param tableName table name
  * @return array of split row keys
  * @throws IOException
  */
 private byte[][] getTableSplitRowKeys(TableName tableName) throws IOException {
   try (RegionLocator locator = connection.getRegionLocator(tableName); ) {
     byte[][] startKeys = locator.getStartKeys();
     if (startKeys.length == 1) {
       return null;
     }
     byte[][] splits = new byte[startKeys.length - 1][];
     for (int i = 1; i < startKeys.length; i++) {
       splits[i - 1] = startKeys[i];
     }
     return splits;
   }
 }
  private void runIncrementalPELoad(
      Configuration conf,
      HTableDescriptor tableDescriptor,
      RegionLocator regionLocator,
      Path outDir)
      throws IOException, UnsupportedEncodingException, InterruptedException,
          ClassNotFoundException {
    Job job = new Job(conf, "testLocalMRIncrementalLoad");
    job.setWorkingDirectory(util.getDataTestDirOnTestFS("runIncrementalPELoad"));
    job.getConfiguration()
        .setStrings(
            "io.serializations",
            conf.get("io.serializations"),
            MutationSerialization.class.getName(),
            ResultSerialization.class.getName(),
            KeyValueSerialization.class.getName());
    setupRandomGeneratorMapper(job);
    HFileOutputFormat2.configureIncrementalLoad(job, tableDescriptor, regionLocator);
    FileOutputFormat.setOutputPath(job, outDir);

    assertFalse(util.getTestFileSystem().exists(outDir));

    assertEquals(regionLocator.getAllRegionLocations().size(), job.getNumReduceTasks());

    assertTrue(job.waitForCompletion(true));
  }
 public int getPartition(ImmutableBytesWritable key, V2 value, int numPartitions) {
   byte[] region = null;
   // Only one region return 0
   if (this.startKeys.length == 1) {
     return 0;
   }
   try {
     // Not sure if this is cached after a split so we could have problems
     // here if a region splits while mapping
     region = table.getRegionLocation(key.get()).getRegionInfo().getStartKey();
   } catch (IOException e) {
     LOG.error(e);
   }
   for (int i = 0; i < this.startKeys.length; i++) {
     if (Bytes.compareTo(region, this.startKeys[i]) == 0) {
       if (i >= numPartitions - 1) {
         // cover if we have less reduces then regions.
         return (Integer.toString(i).hashCode() & Integer.MAX_VALUE) % numPartitions;
       }
       return i;
     }
   }
   // if above fails to find start key that match we need to return something
   return 0;
 }
Beispiel #6
0
 /**
  * Return the start keys of all of the regions in this table, as a list of ImmutableBytesWritable.
  */
 private static List<ImmutableBytesWritable> getRegionStartKeys(RegionLocator table)
     throws IOException {
   byte[][] byteKeys = table.getStartKeys();
   ArrayList<ImmutableBytesWritable> ret = new ArrayList<ImmutableBytesWritable>(byteKeys.length);
   for (byte[] byteKey : byteKeys) {
     ret.add(new ImmutableBytesWritable(byteKey));
   }
   return ret;
 }
  protected void deleteRegion(Configuration conf, final Table tbl, byte[] startKey, byte[] endKey)
      throws IOException {

    LOG.info("Before delete:");
    HTableDescriptor htd = tbl.getTableDescriptor();
    dumpMeta(htd);

    List<HRegionLocation> regions;
    try (RegionLocator rl = connection.getRegionLocator(tbl.getName())) {
      regions = rl.getAllRegionLocations();
    }

    for (HRegionLocation e : regions) {
      HRegionInfo hri = e.getRegionInfo();
      ServerName hsa = e.getServerName();
      if (Bytes.compareTo(hri.getStartKey(), startKey) == 0
          && Bytes.compareTo(hri.getEndKey(), endKey) == 0) {

        LOG.info("RegionName: " + hri.getRegionNameAsString());
        byte[] deleteRow = hri.getRegionName();
        TEST_UTIL.getHBaseAdmin().unassign(deleteRow, true);

        LOG.info("deleting hdfs data: " + hri.toString() + hsa.toString());
        Path rootDir = FSUtils.getRootDir(conf);
        FileSystem fs = rootDir.getFileSystem(conf);
        Path p = new Path(FSUtils.getTableDir(rootDir, htd.getTableName()), hri.getEncodedName());
        fs.delete(p, true);

        try (Table meta = this.connection.getTable(TableName.META_TABLE_NAME)) {
          Delete delete = new Delete(deleteRow);
          meta.delete(delete);
        }
      }
      LOG.info(hri.toString() + hsa.toString());
    }

    TEST_UTIL.getMetaTableRows(htd.getTableName());
    LOG.info("After delete:");
    dumpMeta(htd);
  }
  @Test(timeout = 180000)
  public void testCloneSnapshot() throws Exception {
    String nsp = prefix + "_testCloneSnapshot";
    NamespaceDescriptor nspDesc =
        NamespaceDescriptor.create(nsp)
            .addConfiguration(TableNamespaceManager.KEY_MAX_TABLES, "2")
            .addConfiguration(TableNamespaceManager.KEY_MAX_REGIONS, "20")
            .build();
    ADMIN.createNamespace(nspDesc);
    assertNotNull("Namespace descriptor found null.", ADMIN.getNamespaceDescriptor(nsp));
    TableName tableName = TableName.valueOf(nsp + TableName.NAMESPACE_DELIM + "table1");
    TableName cloneTableName = TableName.valueOf(nsp + TableName.NAMESPACE_DELIM + "table2");
    HTableDescriptor tableDescOne = new HTableDescriptor(tableName);

    ADMIN.createTable(tableDescOne, Bytes.toBytes("AAA"), Bytes.toBytes("ZZZ"), 4);
    String snapshot = "snapshot_testCloneSnapshot";
    ADMIN.snapshot(snapshot, tableName);
    ADMIN.cloneSnapshot(snapshot, cloneTableName);

    int tableLength;
    try (RegionLocator locator = ADMIN.getConnection().getRegionLocator(tableName)) {
      tableLength = locator.getStartKeys().length;
    }
    assertEquals(tableName.getNameAsString() + " should have four regions.", 4, tableLength);

    try (RegionLocator locator = ADMIN.getConnection().getRegionLocator(cloneTableName)) {
      tableLength = locator.getStartKeys().length;
    }
    assertEquals(cloneTableName.getNameAsString() + " should have four regions.", 4, tableLength);

    NamespaceTableAndRegionInfo nstate = getNamespaceState(nsp);
    assertEquals("Total tables count should be 2.", 2, nstate.getTables().size());
    assertEquals("Total regions count should be.", 8, nstate.getRegionCount());

    ADMIN.deleteSnapshot(snapshot);
  }
Beispiel #9
0
  private static List<HBaseMRRowRange> binRanges(
      final List<HBaseMRRowRange> inputRanges,
      final Map<HRegionLocation, Map<HRegionInfo, List<HBaseMRRowRange>>> binnedRanges,
      final RegionLocator regionLocator)
      throws IOException {

    // Loop through ranges, getting RegionLocation and RegionInfo for
    // startKey, clipping range by that regionInfo's extent, and leaving
    // remainder in the List to be region'd
    final ListIterator<HBaseMRRowRange> i = inputRanges.listIterator();
    while (i.hasNext()) {
      final HBaseMRRowRange range = i.next();
      final HRegionLocation location = regionLocator.getRegionLocation(range.getStart().getBytes());

      Map<HRegionInfo, List<HBaseMRRowRange>> regionInfoMap = binnedRanges.get(location);
      if (regionInfoMap == null) {
        regionInfoMap = new HashMap<HRegionInfo, List<HBaseMRRowRange>>();
        binnedRanges.put(location, regionInfoMap);
      }

      final HRegionInfo regionInfo = location.getRegionInfo();
      List<HBaseMRRowRange> rangeList = regionInfoMap.get(regionInfo);
      if (rangeList == null) {
        rangeList = new ArrayList<HBaseMRRowRange>();
        regionInfoMap.put(regionInfo, rangeList);
      }

      if (regionInfo.containsRange(range.getStart().getBytes(), range.getEnd().getBytes())) {
        rangeList.add(range);
        i.remove();
      } else {
        final ByteArrayRange overlappingRange =
            range.intersection(
                new ByteArrayRange(
                    new ByteArrayId(regionInfo.getStartKey()),
                    new ByteArrayId(regionInfo.getEndKey())));
        rangeList.add(new HBaseMRRowRange(overlappingRange));

        final HBaseMRRowRange uncoveredRange =
            new HBaseMRRowRange(
                new ByteArrayId(HBaseUtils.getNextPrefix(regionInfo.getEndKey())), range.getEnd());
        i.add(uncoveredRange);
      }
    }

    return inputRanges;
  }
  private void doIncrementalLoadTest(boolean shouldChangeRegions, boolean shouldKeepLocality)
      throws Exception {
    util = new HBaseTestingUtility();
    Configuration conf = util.getConfiguration();
    conf.setBoolean(HFileOutputFormat2.LOCALITY_SENSITIVE_CONF_KEY, shouldKeepLocality);
    int hostCount = 1;
    int regionNum = 5;
    if (shouldKeepLocality) {
      // We should change host count higher than hdfs replica count when MiniHBaseCluster supports
      // explicit hostnames parameter just like MiniDFSCluster does.
      hostCount = 3;
      regionNum = 20;
    }

    byte[][] splitKeys = generateRandomSplitKeys(regionNum - 1);
    String[] hostnames = new String[hostCount];
    for (int i = 0; i < hostCount; ++i) {
      hostnames[i] = "datanode_" + i;
    }
    util.startMiniCluster(1, hostCount, hostnames);

    Table table = util.createTable(TABLE_NAME, FAMILIES, splitKeys);
    Path testDir = util.getDataTestDirOnTestFS("testLocalMRIncrementalLoad");
    try (RegionLocator r = util.getConnection().getRegionLocator(TABLE_NAME);
        Admin admin = util.getConnection().getAdmin(); ) {
      assertEquals("Should start with empty table", 0, util.countRows(table));
      int numRegions = r.getStartKeys().length;
      assertEquals("Should make " + regionNum + " regions", numRegions, regionNum);

      // Generate the bulk load files
      runIncrementalPELoad(conf, table.getTableDescriptor(), r, testDir);
      // This doesn't write into the table, just makes files
      assertEquals("HFOF should not touch actual table", 0, util.countRows(table));

      // Make sure that a directory was created for every CF
      int dir = 0;
      for (FileStatus f : testDir.getFileSystem(conf).listStatus(testDir)) {
        for (byte[] family : FAMILIES) {
          if (Bytes.toString(family).equals(f.getPath().getName())) {
            ++dir;
          }
        }
      }
      assertEquals("Column family not found in FS.", FAMILIES.length, dir);

      // handle the split case
      if (shouldChangeRegions) {
        LOG.info("Changing regions in table");
        admin.disableTable(table.getName());
        while (util.getMiniHBaseCluster()
            .getMaster()
            .getAssignmentManager()
            .getRegionStates()
            .isRegionsInTransition()) {
          Threads.sleep(200);
          LOG.info("Waiting on table to finish disabling");
        }
        util.deleteTable(table.getName());
        byte[][] newSplitKeys = generateRandomSplitKeys(14);
        table = util.createTable(TABLE_NAME, FAMILIES, newSplitKeys);

        while (util.getConnection().getRegionLocator(TABLE_NAME).getAllRegionLocations().size()
                != 15
            || !admin.isTableAvailable(table.getName())) {
          Thread.sleep(200);
          LOG.info("Waiting for new region assignment to happen");
        }
      }

      // Perform the actual load
      new LoadIncrementalHFiles(conf).doBulkLoad(testDir, admin, table, r);

      // Ensure data shows up
      int expectedRows = NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT;
      assertEquals(
          "LoadIncrementalHFiles should put expected data in table",
          expectedRows,
          util.countRows(table));
      Scan scan = new Scan();
      ResultScanner results = table.getScanner(scan);
      for (Result res : results) {
        assertEquals(FAMILIES.length, res.rawCells().length);
        Cell first = res.rawCells()[0];
        for (Cell kv : res.rawCells()) {
          assertTrue(CellUtil.matchingRow(first, kv));
          assertTrue(Bytes.equals(CellUtil.cloneValue(first), CellUtil.cloneValue(kv)));
        }
      }
      results.close();
      String tableDigestBefore = util.checksumRows(table);

      // Check region locality
      HDFSBlocksDistribution hbd = new HDFSBlocksDistribution();
      for (HRegion region : util.getHBaseCluster().getRegions(TABLE_NAME)) {
        hbd.add(region.getHDFSBlocksDistribution());
      }
      for (String hostname : hostnames) {
        float locality = hbd.getBlockLocalityIndex(hostname);
        LOG.info("locality of [" + hostname + "]: " + locality);
        assertEquals(100, (int) (locality * 100));
      }

      // Cause regions to reopen
      admin.disableTable(TABLE_NAME);
      while (!admin.isTableDisabled(TABLE_NAME)) {
        Thread.sleep(200);
        LOG.info("Waiting for table to disable");
      }
      admin.enableTable(TABLE_NAME);
      util.waitTableAvailable(TABLE_NAME);
      assertEquals(
          "Data should remain after reopening of regions",
          tableDigestBefore,
          util.checksumRows(table));
    } finally {
      testDir.getFileSystem(conf).delete(testDir, true);
      util.deleteTable(TABLE_NAME);
      util.shutdownMiniCluster();
    }
  }
  private void doIncrementalLoadTest(boolean shouldChangeRegions) throws Exception {
    util = new HBaseTestingUtility();
    Configuration conf = util.getConfiguration();
    byte[][] splitKeys = generateRandomSplitKeys(4);
    util.startMiniCluster();
    try {
      HTable table = util.createTable(TABLE_NAME, FAMILIES, splitKeys);
      Admin admin = table.getConnection().getAdmin();
      Path testDir = util.getDataTestDirOnTestFS("testLocalMRIncrementalLoad");
      assertEquals("Should start with empty table", 0, util.countRows(table));
      int numRegions = -1;
      try (RegionLocator r = table.getRegionLocator()) {
        numRegions = r.getStartKeys().length;
      }
      assertEquals("Should make 5 regions", numRegions, 5);

      // Generate the bulk load files
      util.startMiniMapReduceCluster();
      runIncrementalPELoad(conf, table.getTableDescriptor(), table.getRegionLocator(), testDir);
      // This doesn't write into the table, just makes files
      assertEquals("HFOF should not touch actual table", 0, util.countRows(table));

      // Make sure that a directory was created for every CF
      int dir = 0;
      for (FileStatus f : testDir.getFileSystem(conf).listStatus(testDir)) {
        for (byte[] family : FAMILIES) {
          if (Bytes.toString(family).equals(f.getPath().getName())) {
            ++dir;
          }
        }
      }
      assertEquals("Column family not found in FS.", FAMILIES.length, dir);

      // handle the split case
      if (shouldChangeRegions) {
        LOG.info("Changing regions in table");
        admin.disableTable(table.getName());
        while (util.getMiniHBaseCluster()
            .getMaster()
            .getAssignmentManager()
            .getRegionStates()
            .isRegionsInTransition()) {
          Threads.sleep(200);
          LOG.info("Waiting on table to finish disabling");
        }
        util.deleteTable(table.getName());
        byte[][] newSplitKeys = generateRandomSplitKeys(14);
        table = util.createTable(TABLE_NAME, FAMILIES, newSplitKeys);

        while (table.getRegionLocator().getAllRegionLocations().size() != 15
            || !admin.isTableAvailable(table.getName())) {
          Thread.sleep(200);
          LOG.info("Waiting for new region assignment to happen");
        }
      }

      // Perform the actual load
      new LoadIncrementalHFiles(conf).doBulkLoad(testDir, table);

      // Ensure data shows up
      int expectedRows = NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT;
      assertEquals(
          "LoadIncrementalHFiles should put expected data in table",
          expectedRows,
          util.countRows(table));
      Scan scan = new Scan();
      ResultScanner results = table.getScanner(scan);
      for (Result res : results) {
        assertEquals(FAMILIES.length, res.rawCells().length);
        Cell first = res.rawCells()[0];
        for (Cell kv : res.rawCells()) {
          assertTrue(CellUtil.matchingRow(first, kv));
          assertTrue(Bytes.equals(CellUtil.cloneValue(first), CellUtil.cloneValue(kv)));
        }
      }
      results.close();
      String tableDigestBefore = util.checksumRows(table);

      // Cause regions to reopen
      admin.disableTable(TABLE_NAME);
      while (!admin.isTableDisabled(TABLE_NAME)) {
        Thread.sleep(200);
        LOG.info("Waiting for table to disable");
      }
      admin.enableTable(TABLE_NAME);
      util.waitTableAvailable(TABLE_NAME);
      assertEquals(
          "Data should remain after reopening of regions",
          tableDigestBefore,
          util.checksumRows(table));
    } finally {
      util.shutdownMiniMapReduceCluster();
      util.shutdownMiniCluster();
    }
  }
  /**
   * Test the global mem store size in the region server is equal to sum of each region's mem store
   * size
   *
   * @throws Exception
   */
  @Test
  public void testGlobalMemStore() throws Exception {
    // Start the cluster
    LOG.info("Starting cluster");
    Configuration conf = HBaseConfiguration.create();
    TEST_UTIL = new HBaseTestingUtility(conf);
    TEST_UTIL.startMiniCluster(1, regionServerNum);
    cluster = TEST_UTIL.getHBaseCluster();
    LOG.info("Waiting for active/ready master");
    cluster.waitForActiveAndReadyMaster();

    // Create a table with regions
    TableName table = TableName.valueOf("TestGlobalMemStoreSize");
    byte[] family = Bytes.toBytes("family");
    LOG.info("Creating table with " + regionNum + " regions");
    Table ht = TEST_UTIL.createMultiRegionTable(table, family, regionNum);
    int numRegions = -1;
    try (RegionLocator r = TEST_UTIL.getConnection().getRegionLocator(table)) {
      numRegions = r.getStartKeys().length;
    }
    assertEquals(regionNum, numRegions);
    waitForAllRegionsAssigned();

    for (HRegionServer server : getOnlineRegionServers()) {
      long globalMemStoreSize = 0;
      for (HRegionInfo regionInfo : ProtobufUtil.getOnlineRegions(server.getRSRpcServices())) {
        globalMemStoreSize +=
            server.getFromOnlineRegions(regionInfo.getEncodedName()).getMemstoreSize();
      }
      assertEquals(server.getRegionServerAccounting().getGlobalMemstoreSize(), globalMemStoreSize);
    }

    // check the global memstore size after flush
    int i = 0;
    for (HRegionServer server : getOnlineRegionServers()) {
      LOG.info(
          "Starting flushes on "
              + server.getServerName()
              + ", size="
              + server.getRegionServerAccounting().getGlobalMemstoreSize());

      for (HRegionInfo regionInfo : ProtobufUtil.getOnlineRegions(server.getRSRpcServices())) {
        Region r = server.getFromOnlineRegions(regionInfo.getEncodedName());
        flush(r, server);
      }
      LOG.info("Post flush on " + server.getServerName());
      long now = System.currentTimeMillis();
      long timeout = now + 1000;
      while (server.getRegionServerAccounting().getGlobalMemstoreSize() != 0
          && timeout < System.currentTimeMillis()) {
        Threads.sleep(10);
      }
      long size = server.getRegionServerAccounting().getGlobalMemstoreSize();
      if (size > 0) {
        // If size > 0, see if its because the meta region got edits while
        // our test was running....
        for (HRegionInfo regionInfo : ProtobufUtil.getOnlineRegions(server.getRSRpcServices())) {
          Region r = server.getFromOnlineRegions(regionInfo.getEncodedName());
          long l = r.getMemstoreSize();
          if (l > 0) {
            // Only meta could have edits at this stage.  Give it another flush
            // clear them.
            assertTrue(regionInfo.isMetaRegion());
            LOG.info(r.toString() + " " + l + ", reflushing");
            r.flush(true);
          }
        }
      }
      size = server.getRegionServerAccounting().getGlobalMemstoreSize();
      assertEquals("Server=" + server.getServerName() + ", i=" + i++, 0, size);
    }

    ht.close();
    TEST_UTIL.shutdownMiniCluster();
  }