Beispiel #1
0
 /**
  * This utility method creates a new Thrift ColumnDescriptor "struct" based on an Hbase
  * HColumnDescriptor object.
  *
  * @param in Hbase HColumnDescriptor object
  * @return Thrift ColumnDescriptor
  */
 public static ColumnDescriptor colDescFromHbase(HColumnDescriptor in) {
   ColumnDescriptor col = new ColumnDescriptor();
   col.name = ByteBuffer.wrap(Bytes.add(in.getName(), KeyValue.COLUMN_FAMILY_DELIM_ARRAY));
   col.maxVersions = in.getMaxVersions();
   col.compression = in.getCompression().toString();
   col.inMemory = in.isInMemory();
   col.blockCacheEnabled = in.isBlockCacheEnabled();
   col.bloomFilterType = in.getBloomFilterType().toString();
   return col;
 }
Beispiel #2
0
 /**
  * Serialize column family to bloom type map to configuration. Invoked while configuring the MR
  * job for incremental load.
  *
  * @param tableDescriptor to read the properties from
  * @param conf to persist serialized values into
  * @throws IOException on failure to read column family descriptors
  */
 @VisibleForTesting
 static void configureBloomType(HTableDescriptor tableDescriptor, Configuration conf)
     throws UnsupportedEncodingException {
   if (tableDescriptor == null) {
     // could happen with mock table instance
     return;
   }
   StringBuilder bloomTypeConfigValue = new StringBuilder();
   Collection<HColumnDescriptor> families = tableDescriptor.getFamilies();
   int i = 0;
   for (HColumnDescriptor familyDescriptor : families) {
     if (i++ > 0) {
       bloomTypeConfigValue.append('&');
     }
     bloomTypeConfigValue.append(URLEncoder.encode(familyDescriptor.getNameAsString(), "UTF-8"));
     bloomTypeConfigValue.append('=');
     String bloomType = familyDescriptor.getBloomFilterType().toString();
     if (bloomType == null) {
       bloomType = HColumnDescriptor.DEFAULT_BLOOMFILTER;
     }
     bloomTypeConfigValue.append(URLEncoder.encode(bloomType, "UTF-8"));
   }
   conf.set(BLOOM_TYPE_FAMILIES_CONF_KEY, bloomTypeConfigValue.toString());
 }
  /**
   * Test that {@link HFileOutputFormat2} RecordWriter uses compression and bloom filter settings
   * from the column family descriptor
   */
  @Ignore("Goes zombie too frequently; needs work. See HBASE-14563")
  @Test
  public void testColumnFamilySettings() throws Exception {
    Configuration conf = new Configuration(this.util.getConfiguration());
    RecordWriter<ImmutableBytesWritable, Cell> writer = null;
    TaskAttemptContext context = null;
    Path dir = util.getDataTestDir("testColumnFamilySettings");

    // Setup table descriptor
    Table table = Mockito.mock(Table.class);
    RegionLocator regionLocator = Mockito.mock(RegionLocator.class);
    HTableDescriptor htd = new HTableDescriptor(TABLE_NAME);
    Mockito.doReturn(htd).when(table).getTableDescriptor();
    for (HColumnDescriptor hcd : HBaseTestingUtility.generateColumnDescriptors()) {
      htd.addFamily(hcd);
    }

    // set up the table to return some mock keys
    setupMockStartKeys(regionLocator);

    try {
      // partial map red setup to get an operational writer for testing
      // We turn off the sequence file compression, because DefaultCodec
      // pollutes the GZip codec pool with an incompatible compressor.
      conf.set("io.seqfile.compression.type", "NONE");
      conf.set("hbase.fs.tmp.dir", dir.toString());
      // turn locality off to eliminate getRegionLocation fail-and-retry time when writing kvs
      conf.setBoolean(HFileOutputFormat2.LOCALITY_SENSITIVE_CONF_KEY, false);

      Job job = new Job(conf, "testLocalMRIncrementalLoad");
      job.setWorkingDirectory(util.getDataTestDirOnTestFS("testColumnFamilySettings"));
      setupRandomGeneratorMapper(job);
      HFileOutputFormat2.configureIncrementalLoad(job, table.getTableDescriptor(), regionLocator);
      FileOutputFormat.setOutputPath(job, dir);
      context = createTestTaskAttemptContext(job);
      HFileOutputFormat2 hof = new HFileOutputFormat2();
      writer = hof.getRecordWriter(context);

      // write out random rows
      writeRandomKeyValues(writer, context, htd.getFamiliesKeys(), ROWSPERSPLIT);
      writer.close(context);

      // Make sure that a directory was created for every CF
      FileSystem fs = dir.getFileSystem(conf);

      // commit so that the filesystem has one directory per column family
      hof.getOutputCommitter(context).commitTask(context);
      hof.getOutputCommitter(context).commitJob(context);
      FileStatus[] families = FSUtils.listStatus(fs, dir, new FSUtils.FamilyDirFilter(fs));
      assertEquals(htd.getFamilies().size(), families.length);
      for (FileStatus f : families) {
        String familyStr = f.getPath().getName();
        HColumnDescriptor hcd = htd.getFamily(Bytes.toBytes(familyStr));
        // verify that the compression on this file matches the configured
        // compression
        Path dataFilePath = fs.listStatus(f.getPath())[0].getPath();
        Reader reader = HFile.createReader(fs, dataFilePath, new CacheConfig(conf), conf);
        Map<byte[], byte[]> fileInfo = reader.loadFileInfo();

        byte[] bloomFilter = fileInfo.get(StoreFile.BLOOM_FILTER_TYPE_KEY);
        if (bloomFilter == null) bloomFilter = Bytes.toBytes("NONE");
        assertEquals(
            "Incorrect bloom filter used for column family "
                + familyStr
                + "(reader: "
                + reader
                + ")",
            hcd.getBloomFilterType(),
            BloomType.valueOf(Bytes.toString(bloomFilter)));
        assertEquals(
            "Incorrect compression used for column family "
                + familyStr
                + "(reader: "
                + reader
                + ")",
            hcd.getCompressionType(),
            reader.getFileContext().getCompression());
      }
    } finally {
      dir.getFileSystem(conf).delete(dir, true);
    }
  }