/* * 为表添加数据(适合知道有多少列族的固定表) * * @rowKey rowKey * * @tableName 表名 * * @column1 第一个列族列表 * * @value1 第一个列的值的列表 * * @column2 第二个列族列表 * * @value2 第二个列的值的列表 */ public static void addData( String rowKey, String tableName, String[] column1, String[] value1, String[] column2, String[] value2) throws IOException { Put put = new Put(Bytes.toBytes(rowKey)); // 设置rowkey HTable table = new HTable(conf, Bytes.toBytes(tableName)); // HTabel负责跟记录相关的操作如增删改查等// // 获取表 HColumnDescriptor[] columnFamilies = table .getTableDescriptor() // 获取所有的列族 .getColumnFamilies(); for (int i = 0; i < columnFamilies.length; i++) { String familyName = columnFamilies[i].getNameAsString(); // 获取列族名 if (familyName.equals("article")) { // article列族put数据 for (int j = 0; j < column1.length; j++) { put.add(Bytes.toBytes(familyName), Bytes.toBytes(column1[j]), Bytes.toBytes(value1[j])); } } if (familyName.equals("author")) { // author列族put数据 for (int j = 0; j < column2.length; j++) { put.add(Bytes.toBytes(familyName), Bytes.toBytes(column2[j]), Bytes.toBytes(value2[j])); } } } table.put(put); System.out.println("add data Success!"); }
/** * 新增单列 * * @param tableName 表名 * @param rowkey 行键名 * @param col 列 */ public void addColumn(String tableName, String rowkey, HbaseColumn col) { try { HTable table = new HTable(conf, Bytes.toBytes(tableName)); // HTabel负责跟记录相关的操作如增删改查等// HColumnDescriptor[] cfstmp = table.getTableDescriptor().getColumnFamilies(); // 获取所有的列族 ArrayList<String> cfs = new ArrayList<>(); for (HColumnDescriptor cf : cfstmp) { cfs.add(cf.getNameAsString()); // 获取列族名 } Put put = new Put(Bytes.toBytes(rowkey)); // 设置rowkey if (cfs.contains(col.cf)) { put.add(Bytes.toBytes(col.cf), Bytes.toBytes(col.col), Bytes.toBytes(col.value)); } table.put(put); } catch (Exception e) { logger.error("addColumn failed", e); } }
/** * 新增数据 * * @param tableName 表名 * @param rowKey 行键名 * @param columnFamily 列族名 * @param column 列名 * @param value 列值 */ public void addData( String tableName, String rowKey, String columnFamily, String column, String value) { try { // 获取表 HTable table = new HTable(conf, Bytes.toBytes(tableName)); // HTabel负责跟记录相关的操作如增删改查等// HColumnDescriptor[] cfs = table.getTableDescriptor().getColumnFamilies(); // 获取所有的列族 Put put = new Put(Bytes.toBytes(rowKey)); // 设置rowkey for (HColumnDescriptor cf : cfs) { String familyName = cf.getNameAsString(); // 获取列族名 if (!familyName.equals(columnFamily)) { continue; } // 列族相同put数据 put.add(Bytes.toBytes(familyName), Bytes.toBytes(column), Bytes.toBytes(value)); } table.put(put); } catch (Exception e) { logger.error("addData failed", e); } }
private void runIncrementalPELoad(Configuration conf, HTable table, Path outDir) throws Exception { Job job = new Job(conf, "testLocalMRIncrementalLoad"); job.setWorkingDirectory(util.getDataTestDirOnTestFS("runIncrementalPELoad")); job.getConfiguration() .setStrings( "io.serializations", conf.get("io.serializations"), MutationSerialization.class.getName(), ResultSerialization.class.getName(), KeyValueSerialization.class.getName()); setupRandomGeneratorMapper(job); HFileOutputFormat2.configureIncrementalLoad( job, table.getTableDescriptor(), table.getRegionLocator()); FileOutputFormat.setOutputPath(job, outDir); Assert.assertFalse(util.getTestFileSystem().exists(outDir)); assertEquals(table.getRegionLocator().getAllRegionLocations().size(), job.getNumReduceTasks()); assertTrue(job.waitForCompletion(true)); }
private void init() { logger.debug("Getting region locations"); try { HTable table = new HTable(storagePluginConfig.getHBaseConf(), hbaseScanSpec.getTableName()); this.hTableDesc = table.getTableDescriptor(); NavigableMap<HRegionInfo, ServerName> regionsMap = table.getRegionLocations(); statsCalculator = new TableStatsCalculator(table, hbaseScanSpec, storagePlugin.getContext().getConfig()); boolean foundStartRegion = false; regionsToScan = new TreeMap<HRegionInfo, ServerName>(); for (Entry<HRegionInfo, ServerName> mapEntry : regionsMap.entrySet()) { HRegionInfo regionInfo = mapEntry.getKey(); if (!foundStartRegion && hbaseScanSpec.getStartRow() != null && hbaseScanSpec.getStartRow().length != 0 && !regionInfo.containsRow(hbaseScanSpec.getStartRow())) { continue; } foundStartRegion = true; regionsToScan.put(regionInfo, mapEntry.getValue()); scanSizeInBytes += statsCalculator.getRegionSizeInBytes(regionInfo.getRegionName()); if (hbaseScanSpec.getStopRow() != null && hbaseScanSpec.getStopRow().length != 0 && regionInfo.containsRow(hbaseScanSpec.getStopRow())) { break; } } table.close(); } catch (IOException e) { throw new DrillRuntimeException( "Error getting region info for table: " + hbaseScanSpec.getTableName(), e); } verifyColumns(); }
/** * This test is to test the scenario happened in HBASE-6901. All files are bulk loaded and * excluded from minor compaction. Without the fix of HBASE-6901, an * ArrayIndexOutOfBoundsException will be thrown. */ @Ignore("Flakey: See HBASE-9051") @Test public void testExcludeAllFromMinorCompaction() throws Exception { Configuration conf = util.getConfiguration(); conf.setInt("hbase.hstore.compaction.min", 2); generateRandomStartKeys(5); util.startMiniCluster(); try (Connection conn = ConnectionFactory.createConnection(); Admin admin = conn.getAdmin()) { final FileSystem fs = util.getDFSCluster().getFileSystem(); HTable table = util.createTable(TABLE_NAME, FAMILIES); assertEquals("Should start with empty table", 0, util.countRows(table)); // deep inspection: get the StoreFile dir final Path storePath = new Path( FSUtils.getTableDir(FSUtils.getRootDir(conf), TABLE_NAME), new Path( admin.getTableRegions(TABLE_NAME).get(0).getEncodedName(), Bytes.toString(FAMILIES[0]))); assertEquals(0, fs.listStatus(storePath).length); // Generate two bulk load files conf.setBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude", true); util.startMiniMapReduceCluster(); for (int i = 0; i < 2; i++) { Path testDir = util.getDataTestDirOnTestFS("testExcludeAllFromMinorCompaction_" + i); runIncrementalPELoad( conf, table.getTableDescriptor(), conn.getRegionLocator(TABLE_NAME), testDir); // Perform the actual load new LoadIncrementalHFiles(conf).doBulkLoad(testDir, table); } // Ensure data shows up int expectedRows = 2 * NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT; assertEquals( "LoadIncrementalHFiles should put expected data in table", expectedRows, util.countRows(table)); // should have a second StoreFile now assertEquals(2, fs.listStatus(storePath).length); // minor compactions shouldn't get rid of the file admin.compact(TABLE_NAME); try { quickPoll( new Callable<Boolean>() { @Override public Boolean call() throws Exception { return fs.listStatus(storePath).length == 1; } }, 5000); throw new IOException("SF# = " + fs.listStatus(storePath).length); } catch (AssertionError ae) { // this is expected behavior } // a major compaction should work though admin.majorCompact(TABLE_NAME); quickPoll( new Callable<Boolean>() { @Override public Boolean call() throws Exception { return fs.listStatus(storePath).length == 1; } }, 5000); } finally { util.shutdownMiniMapReduceCluster(); util.shutdownMiniCluster(); } }
private void doIncrementalLoadTest(boolean shouldChangeRegions) throws Exception { util = new HBaseTestingUtility(); Configuration conf = util.getConfiguration(); byte[][] splitKeys = generateRandomSplitKeys(4); util.startMiniCluster(); try { HTable table = util.createTable(TABLE_NAME, FAMILIES, splitKeys); Admin admin = table.getConnection().getAdmin(); Path testDir = util.getDataTestDirOnTestFS("testLocalMRIncrementalLoad"); assertEquals("Should start with empty table", 0, util.countRows(table)); int numRegions = -1; try (RegionLocator r = table.getRegionLocator()) { numRegions = r.getStartKeys().length; } assertEquals("Should make 5 regions", numRegions, 5); // Generate the bulk load files util.startMiniMapReduceCluster(); runIncrementalPELoad(conf, table.getTableDescriptor(), table.getRegionLocator(), testDir); // This doesn't write into the table, just makes files assertEquals("HFOF should not touch actual table", 0, util.countRows(table)); // Make sure that a directory was created for every CF int dir = 0; for (FileStatus f : testDir.getFileSystem(conf).listStatus(testDir)) { for (byte[] family : FAMILIES) { if (Bytes.toString(family).equals(f.getPath().getName())) { ++dir; } } } assertEquals("Column family not found in FS.", FAMILIES.length, dir); // handle the split case if (shouldChangeRegions) { LOG.info("Changing regions in table"); admin.disableTable(table.getName()); while (util.getMiniHBaseCluster() .getMaster() .getAssignmentManager() .getRegionStates() .isRegionsInTransition()) { Threads.sleep(200); LOG.info("Waiting on table to finish disabling"); } util.deleteTable(table.getName()); byte[][] newSplitKeys = generateRandomSplitKeys(14); table = util.createTable(TABLE_NAME, FAMILIES, newSplitKeys); while (table.getRegionLocator().getAllRegionLocations().size() != 15 || !admin.isTableAvailable(table.getName())) { Thread.sleep(200); LOG.info("Waiting for new region assignment to happen"); } } // Perform the actual load new LoadIncrementalHFiles(conf).doBulkLoad(testDir, table); // Ensure data shows up int expectedRows = NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT; assertEquals( "LoadIncrementalHFiles should put expected data in table", expectedRows, util.countRows(table)); Scan scan = new Scan(); ResultScanner results = table.getScanner(scan); for (Result res : results) { assertEquals(FAMILIES.length, res.rawCells().length); Cell first = res.rawCells()[0]; for (Cell kv : res.rawCells()) { assertTrue(CellUtil.matchingRow(first, kv)); assertTrue(Bytes.equals(CellUtil.cloneValue(first), CellUtil.cloneValue(kv))); } } results.close(); String tableDigestBefore = util.checksumRows(table); // Cause regions to reopen admin.disableTable(TABLE_NAME); while (!admin.isTableDisabled(TABLE_NAME)) { Thread.sleep(200); LOG.info("Waiting for table to disable"); } admin.enableTable(TABLE_NAME); util.waitTableAvailable(TABLE_NAME); assertEquals( "Data should remain after reopening of regions", tableDigestBefore, util.checksumRows(table)); } finally { util.shutdownMiniMapReduceCluster(); util.shutdownMiniCluster(); } }
/** * Get an estimate of the number of rows and bytes per row in regions between startRowKey and * endRowKey. The more store files there are the more this will be off. Also, this does not take * into account any rows that are in the memstore. * * <p>The values computed here should be cached so that in high qps workloads the nn is not * overwhelmed. Could be done in load(); Synchronized to make sure that only one thread at a time * is using the htable. * * @param startRowKey First row key in the range * @param endRowKey Last row key in the range * @return The estimated number of rows in the regions between the row keys (first) and the * estimated row size in bytes (second). */ public synchronized Pair<Long, Long> getEstimatedRowStats(byte[] startRowKey, byte[] endRowKey) { Preconditions.checkNotNull(startRowKey); Preconditions.checkNotNull(endRowKey); long rowSize = 0; long rowCount = 0; long hdfsSize = 0; boolean isCompressed = false; try { // Check to see if things are compressed. // If they are we'll estimate a compression factor. if (columnFamilies_ == null) { columnFamilies_ = hTable_.getTableDescriptor().getColumnFamilies(); } Preconditions.checkNotNull(columnFamilies_); for (HColumnDescriptor desc : columnFamilies_) { isCompressed |= desc.getCompression() != Compression.Algorithm.NONE; } // For every region in the range. List<HRegionLocation> locations = getRegionsInRange(hTable_, startRowKey, endRowKey); for (HRegionLocation location : locations) { long currentHdfsSize = 0; long currentRowSize = 0; long currentRowCount = 0; HRegionInfo info = location.getRegionInfo(); // Get the size on hdfs currentHdfsSize += getHdfsSize(info); Scan s = new Scan(info.getStartKey()); // Get a small sample of rows s.setBatch(ROW_COUNT_ESTIMATE_BATCH_SIZE); // Try and get every version so the row's size can be used to estimate. s.setMaxVersions(Short.MAX_VALUE); // Don't cache the blocks as we don't think these are // necessarily important blocks. s.setCacheBlocks(false); // Try and get deletes too so their size can be counted. s.setRaw(true); ResultScanner rs = hTable_.getScanner(s); try { // And get the the ROW_COUNT_ESTIMATE_BATCH_SIZE fetched rows // for a representative sample for (int i = 0; i < ROW_COUNT_ESTIMATE_BATCH_SIZE; i++) { Result r = rs.next(); if (r == null) break; currentRowCount += 1; for (KeyValue kv : r.list()) { // some extra row size added to make up for shared overhead currentRowSize += kv.getRowLength() // row key + 4 // row key length field + kv.getFamilyLength() // Column family bytes + 4 // family length field + kv.getQualifierLength() // qualifier bytes + 4 // qualifier length field + kv.getValueLength() // length of the value + 4 // value length field + 10; // extra overhead for hfile index, checksums, metadata, etc } } // add these values to the cumulative totals in one shot just // in case there was an error in between getting the hdfs // size and the row/column sizes. hdfsSize += currentHdfsSize; rowCount += currentRowCount; rowSize += currentRowSize; } finally { rs.close(); } } } catch (IOException ioe) { // Print the stack trace, but we'll ignore it // as this is just an estimate. // TODO: Put this into the per query log. LOG.error("Error computing HBase row count estimate", ioe); } // If there are no rows then no need to estimate. if (rowCount == 0) return new Pair<Long, Long>(0L, 0L); // if something went wrong then set a signal value. if (rowSize <= 0 || hdfsSize <= 0) return new Pair<Long, Long>(-1L, -1L); // estimate the number of rows. double bytesPerRow = rowSize / (double) rowCount; long estimatedRowCount = (long) ((isCompressed ? 2 : 1) * (hdfsSize / bytesPerRow)); return new Pair<Long, Long>(estimatedRowCount, (long) bytesPerRow); }