Exemple #1
0
 /**
  * Runs inside the task to deserialize column family to bloom filter type map from the
  * configuration.
  *
  * @param conf to read the serialized values from
  * @return a map from column family to the the configured bloom filter type
  */
 @VisibleForTesting
 static Map<byte[], BloomType> createFamilyBloomTypeMap(Configuration conf) {
   Map<byte[], String> stringMap = createFamilyConfValueMap(conf, BLOOM_TYPE_FAMILIES_CONF_KEY);
   Map<byte[], BloomType> bloomTypeMap = new TreeMap<byte[], BloomType>(Bytes.BYTES_COMPARATOR);
   for (Map.Entry<byte[], String> e : stringMap.entrySet()) {
     BloomType bloomType = BloomType.valueOf(e.getValue());
     bloomTypeMap.put(e.getKey(), bloomType);
   }
   return bloomTypeMap;
 }
Exemple #2
0
  /**
   * This utility method creates a new Hbase HColumnDescriptor object based on a Thrift
   * ColumnDescriptor "struct".
   *
   * @param in Thrift ColumnDescriptor object
   * @return HColumnDescriptor
   * @throws IllegalArgument
   */
  public static HColumnDescriptor colDescFromThrift(ColumnDescriptor in) throws IllegalArgument {
    Compression.Algorithm comp =
        Compression.getCompressionAlgorithmByName(in.compression.toLowerCase());
    BloomType bt = BloomType.valueOf(in.bloomFilterType);

    if (in.name == null || !in.name.hasRemaining()) {
      throw new IllegalArgument("column name is empty");
    }
    byte[] parsedName = KeyValue.parseColumn(Bytes.getBytes(in.name))[0];
    HColumnDescriptor col =
        new HColumnDescriptor(parsedName)
            .setMaxVersions(in.maxVersions)
            .setCompressionType(comp)
            .setInMemory(in.inMemory)
            .setBlockCacheEnabled(in.blockCacheEnabled)
            .setTimeToLive(in.timeToLive > 0 ? in.timeToLive : Integer.MAX_VALUE)
            .setBloomFilterType(bt);
    return col;
  }
  /**
   * Test that {@link HFileOutputFormat2} RecordWriter uses compression and bloom filter settings
   * from the column family descriptor
   */
  @Ignore("Goes zombie too frequently; needs work. See HBASE-14563")
  @Test
  public void testColumnFamilySettings() throws Exception {
    Configuration conf = new Configuration(this.util.getConfiguration());
    RecordWriter<ImmutableBytesWritable, Cell> writer = null;
    TaskAttemptContext context = null;
    Path dir = util.getDataTestDir("testColumnFamilySettings");

    // Setup table descriptor
    Table table = Mockito.mock(Table.class);
    RegionLocator regionLocator = Mockito.mock(RegionLocator.class);
    HTableDescriptor htd = new HTableDescriptor(TABLE_NAME);
    Mockito.doReturn(htd).when(table).getTableDescriptor();
    for (HColumnDescriptor hcd : HBaseTestingUtility.generateColumnDescriptors()) {
      htd.addFamily(hcd);
    }

    // set up the table to return some mock keys
    setupMockStartKeys(regionLocator);

    try {
      // partial map red setup to get an operational writer for testing
      // We turn off the sequence file compression, because DefaultCodec
      // pollutes the GZip codec pool with an incompatible compressor.
      conf.set("io.seqfile.compression.type", "NONE");
      conf.set("hbase.fs.tmp.dir", dir.toString());
      // turn locality off to eliminate getRegionLocation fail-and-retry time when writing kvs
      conf.setBoolean(HFileOutputFormat2.LOCALITY_SENSITIVE_CONF_KEY, false);

      Job job = new Job(conf, "testLocalMRIncrementalLoad");
      job.setWorkingDirectory(util.getDataTestDirOnTestFS("testColumnFamilySettings"));
      setupRandomGeneratorMapper(job);
      HFileOutputFormat2.configureIncrementalLoad(job, table.getTableDescriptor(), regionLocator);
      FileOutputFormat.setOutputPath(job, dir);
      context = createTestTaskAttemptContext(job);
      HFileOutputFormat2 hof = new HFileOutputFormat2();
      writer = hof.getRecordWriter(context);

      // write out random rows
      writeRandomKeyValues(writer, context, htd.getFamiliesKeys(), ROWSPERSPLIT);
      writer.close(context);

      // Make sure that a directory was created for every CF
      FileSystem fs = dir.getFileSystem(conf);

      // commit so that the filesystem has one directory per column family
      hof.getOutputCommitter(context).commitTask(context);
      hof.getOutputCommitter(context).commitJob(context);
      FileStatus[] families = FSUtils.listStatus(fs, dir, new FSUtils.FamilyDirFilter(fs));
      assertEquals(htd.getFamilies().size(), families.length);
      for (FileStatus f : families) {
        String familyStr = f.getPath().getName();
        HColumnDescriptor hcd = htd.getFamily(Bytes.toBytes(familyStr));
        // verify that the compression on this file matches the configured
        // compression
        Path dataFilePath = fs.listStatus(f.getPath())[0].getPath();
        Reader reader = HFile.createReader(fs, dataFilePath, new CacheConfig(conf), conf);
        Map<byte[], byte[]> fileInfo = reader.loadFileInfo();

        byte[] bloomFilter = fileInfo.get(StoreFile.BLOOM_FILTER_TYPE_KEY);
        if (bloomFilter == null) bloomFilter = Bytes.toBytes("NONE");
        assertEquals(
            "Incorrect bloom filter used for column family "
                + familyStr
                + "(reader: "
                + reader
                + ")",
            hcd.getBloomFilterType(),
            BloomType.valueOf(Bytes.toString(bloomFilter)));
        assertEquals(
            "Incorrect compression used for column family "
                + familyStr
                + "(reader: "
                + reader
                + ")",
            hcd.getCompressionType(),
            reader.getFileContext().getCompression());
      }
    } finally {
      dir.getFileSystem(conf).delete(dir, true);
    }
  }