Java HColumnDescriptor.getCompression Examples

Programming Language: Java

Namespace/Package Name: org.apache.hadoop.hbase

Method/Function: getCompression

Examples at hotexamples.com: 3

Java HColumnDescriptor.getCompression - 3 examples found. These are the top rated real world Java examples of org.apache.hadoop.hbase.HColumnDescriptor.getCompression extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

setMaxVersions(18)

getName(16)

getNameAsString(15)

setScope(7)

getMaxVersions(7)

setCompressionType(7)

setBloomFilterType(5)

toString(4)

setMobThreshold(4)

getValue(4)

setMobEnabled(4)

setInMemory(4)

getBloomFilterType(3)

getMinVersions(3)

getCompression(3)

setValue(3)

isMobEnabled(3)

setBlockCacheEnabled(3)

setBlocksize(3)

getKeepDeletedCells(3)

getCompressionType(3)

setTimeToLive(2)

setKeepDeletedCells(2)

convert(2)

getScope(2)

setDataBlockEncoding(2)

setEncodeOnDisk(1)

equals(1)

isBlockCacheEnabled(1)

setMinVersions(1)

getTimeToLive(1)

getDataBlockEncoding(1)

getCompactionCompression(1)

getBlocksize(1)

isInMemory(1)

Example #1

Show file

File: ThriftUtilities.java Project: Guavus/hbase

 /**
  * This utility method creates a new Thrift ColumnDescriptor "struct" based on an Hbase
  * HColumnDescriptor object.
  *
  * @param in Hbase HColumnDescriptor object
  * @return Thrift ColumnDescriptor
  */
 public static ColumnDescriptor colDescFromHbase(HColumnDescriptor in) {
   ColumnDescriptor col = new ColumnDescriptor();
   col.name = ByteBuffer.wrap(Bytes.add(in.getName(), KeyValue.COLUMN_FAMILY_DELIM_ARRAY));
   col.maxVersions = in.getMaxVersions();
   col.compression = in.getCompression().toString();
   col.inMemory = in.isInMemory();
   col.blockCacheEnabled = in.isBlockCacheEnabled();
   col.bloomFilterType = in.getBloomFilterType().toString();
   return col;
 }

Example #2

Show file

File: TestHFileOutputFormat2.java Project: Guavus/hbase

  /**
   * Test that {@link HFileOutputFormat2} RecordWriter uses compression and bloom filter settings
   * from the column family descriptor
   */
  @Test
  public void testColumnFamilySettings() throws Exception {
    Configuration conf = new Configuration(this.util.getConfiguration());
    RecordWriter<ImmutableBytesWritable, Cell> writer = null;
    TaskAttemptContext context = null;
    Path dir = util.getDataTestDir("testColumnFamilySettings");

    // Setup table descriptor
    Table table = Mockito.mock(Table.class);
    RegionLocator regionLocator = Mockito.mock(RegionLocator.class);
    HTableDescriptor htd = new HTableDescriptor(TABLE_NAME);
    Mockito.doReturn(htd).when(table).getTableDescriptor();
    for (HColumnDescriptor hcd : HBaseTestingUtility.generateColumnDescriptors()) {
      htd.addFamily(hcd);
    }

    // set up the table to return some mock keys
    setupMockStartKeys(regionLocator);

    try {
      // partial map red setup to get an operational writer for testing
      // We turn off the sequence file compression, because DefaultCodec
      // pollutes the GZip codec pool with an incompatible compressor.
      conf.set("io.seqfile.compression.type", "NONE");
      Job job = new Job(conf, "testLocalMRIncrementalLoad");
      job.setWorkingDirectory(util.getDataTestDirOnTestFS("testColumnFamilySettings"));
      setupRandomGeneratorMapper(job);
      HFileOutputFormat2.configureIncrementalLoad(job, table.getTableDescriptor(), regionLocator);
      FileOutputFormat.setOutputPath(job, dir);
      context = createTestTaskAttemptContext(job);
      HFileOutputFormat2 hof = new HFileOutputFormat2();
      writer = hof.getRecordWriter(context);

      // write out random rows
      writeRandomKeyValues(writer, context, htd.getFamiliesKeys(), ROWSPERSPLIT);
      writer.close(context);

      // Make sure that a directory was created for every CF
      FileSystem fs = dir.getFileSystem(conf);

      // commit so that the filesystem has one directory per column family
      hof.getOutputCommitter(context).commitTask(context);
      hof.getOutputCommitter(context).commitJob(context);
      FileStatus[] families = FSUtils.listStatus(fs, dir, new FSUtils.FamilyDirFilter(fs));
      assertEquals(htd.getFamilies().size(), families.length);
      for (FileStatus f : families) {
        String familyStr = f.getPath().getName();
        HColumnDescriptor hcd = htd.getFamily(Bytes.toBytes(familyStr));
        // verify that the compression on this file matches the configured
        // compression
        Path dataFilePath = fs.listStatus(f.getPath())[0].getPath();
        Reader reader = HFile.createReader(fs, dataFilePath, new CacheConfig(conf), conf);
        Map<byte[], byte[]> fileInfo = reader.loadFileInfo();

        byte[] bloomFilter = fileInfo.get(StoreFile.BLOOM_FILTER_TYPE_KEY);
        if (bloomFilter == null) bloomFilter = Bytes.toBytes("NONE");
        assertEquals(
            "Incorrect bloom filter used for column family "
                + familyStr
                + "(reader: "
                + reader
                + ")",
            hcd.getBloomFilterType(),
            BloomType.valueOf(Bytes.toString(bloomFilter)));
        assertEquals(
            "Incorrect compression used for column family "
                + familyStr
                + "(reader: "
                + reader
                + ")",
            hcd.getCompression(),
            reader.getFileContext().getCompression());
      }
    } finally {
      dir.getFileSystem(conf).delete(dir, true);
    }
  }

Example #3

Show file

File: HBaseTable.java Project: ZhuChaoyu/Impala

  /**
   * Get an estimate of the number of rows and bytes per row in regions between startRowKey and
   * endRowKey. The more store files there are the more this will be off. Also, this does not take
   * into account any rows that are in the memstore.
   *
   * <p>The values computed here should be cached so that in high qps workloads the nn is not
   * overwhelmed. Could be done in load(); Synchronized to make sure that only one thread at a time
   * is using the htable.
   *
   * @param startRowKey First row key in the range
   * @param endRowKey Last row key in the range
   * @return The estimated number of rows in the regions between the row keys (first) and the
   *     estimated row size in bytes (second).
   */
  public synchronized Pair<Long, Long> getEstimatedRowStats(byte[] startRowKey, byte[] endRowKey) {
    Preconditions.checkNotNull(startRowKey);
    Preconditions.checkNotNull(endRowKey);

    long rowSize = 0;
    long rowCount = 0;
    long hdfsSize = 0;
    boolean isCompressed = false;

    try {
      // Check to see if things are compressed.
      // If they are we'll estimate a compression factor.
      if (columnFamilies_ == null) {
        columnFamilies_ = hTable_.getTableDescriptor().getColumnFamilies();
      }
      Preconditions.checkNotNull(columnFamilies_);
      for (HColumnDescriptor desc : columnFamilies_) {
        isCompressed |= desc.getCompression() != Compression.Algorithm.NONE;
      }

      // For every region in the range.
      List<HRegionLocation> locations = getRegionsInRange(hTable_, startRowKey, endRowKey);
      for (HRegionLocation location : locations) {
        long currentHdfsSize = 0;
        long currentRowSize = 0;
        long currentRowCount = 0;

        HRegionInfo info = location.getRegionInfo();
        // Get the size on hdfs
        currentHdfsSize += getHdfsSize(info);

        Scan s = new Scan(info.getStartKey());
        // Get a small sample of rows
        s.setBatch(ROW_COUNT_ESTIMATE_BATCH_SIZE);
        // Try and get every version so the row's size can be used to estimate.
        s.setMaxVersions(Short.MAX_VALUE);
        // Don't cache the blocks as we don't think these are
        // necessarily important blocks.
        s.setCacheBlocks(false);
        // Try and get deletes too so their size can be counted.
        s.setRaw(true);
        ResultScanner rs = hTable_.getScanner(s);
        try {
          // And get the the ROW_COUNT_ESTIMATE_BATCH_SIZE fetched rows
          // for a representative sample
          for (int i = 0; i < ROW_COUNT_ESTIMATE_BATCH_SIZE; i++) {
            Result r = rs.next();
            if (r == null) break;
            currentRowCount += 1;
            for (KeyValue kv : r.list()) {
              // some extra row size added to make up for shared overhead
              currentRowSize +=
                  kv.getRowLength() // row key
                      + 4 // row key length field
                      + kv.getFamilyLength() // Column family bytes
                      + 4 // family length field
                      + kv.getQualifierLength() // qualifier bytes
                      + 4 // qualifier length field
                      + kv.getValueLength() // length of the value
                      + 4 // value length field
                      + 10; // extra overhead for hfile index, checksums, metadata, etc
            }
          }
          // add these values to the cumulative totals in one shot just
          // in case there was an error in between getting the hdfs
          // size and the row/column sizes.
          hdfsSize += currentHdfsSize;
          rowCount += currentRowCount;
          rowSize += currentRowSize;
        } finally {
          rs.close();
        }
      }
    } catch (IOException ioe) {
      // Print the stack trace, but we'll ignore it
      // as this is just an estimate.
      // TODO: Put this into the per query log.
      LOG.error("Error computing HBase row count estimate", ioe);
    }

    // If there are no rows then no need to estimate.
    if (rowCount == 0) return new Pair<Long, Long>(0L, 0L);

    // if something went wrong then set a signal value.
    if (rowSize <= 0 || hdfsSize <= 0) return new Pair<Long, Long>(-1L, -1L);

    // estimate the number of rows.
    double bytesPerRow = rowSize / (double) rowCount;
    long estimatedRowCount = (long) ((isCompressed ? 2 : 1) * (hdfsSize / bytesPerRow));

    return new Pair<Long, Long>(estimatedRowCount, (long) bytesPerRow);
  }