/** * Runs inside the task to deserialize column family to bloom filter type map from the * configuration. * * @param conf to read the serialized values from * @return a map from column family to the the configured bloom filter type */ @VisibleForTesting static Map<byte[], BloomType> createFamilyBloomTypeMap(Configuration conf) { Map<byte[], String> stringMap = createFamilyConfValueMap(conf, BLOOM_TYPE_FAMILIES_CONF_KEY); Map<byte[], BloomType> bloomTypeMap = new TreeMap<byte[], BloomType>(Bytes.BYTES_COMPARATOR); for (Map.Entry<byte[], String> e : stringMap.entrySet()) { BloomType bloomType = BloomType.valueOf(e.getValue()); bloomTypeMap.put(e.getKey(), bloomType); } return bloomTypeMap; }
/** * This utility method creates a new Hbase HColumnDescriptor object based on a Thrift * ColumnDescriptor "struct". * * @param in Thrift ColumnDescriptor object * @return HColumnDescriptor * @throws IllegalArgument */ public static HColumnDescriptor colDescFromThrift(ColumnDescriptor in) throws IllegalArgument { Compression.Algorithm comp = Compression.getCompressionAlgorithmByName(in.compression.toLowerCase()); BloomType bt = BloomType.valueOf(in.bloomFilterType); if (in.name == null || !in.name.hasRemaining()) { throw new IllegalArgument("column name is empty"); } byte[] parsedName = KeyValue.parseColumn(Bytes.getBytes(in.name))[0]; HColumnDescriptor col = new HColumnDescriptor(parsedName) .setMaxVersions(in.maxVersions) .setCompressionType(comp) .setInMemory(in.inMemory) .setBlockCacheEnabled(in.blockCacheEnabled) .setTimeToLive(in.timeToLive > 0 ? in.timeToLive : Integer.MAX_VALUE) .setBloomFilterType(bt); return col; }
/** * Test that {@link HFileOutputFormat2} RecordWriter uses compression and bloom filter settings * from the column family descriptor */ @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test public void testColumnFamilySettings() throws Exception { Configuration conf = new Configuration(this.util.getConfiguration()); RecordWriter<ImmutableBytesWritable, Cell> writer = null; TaskAttemptContext context = null; Path dir = util.getDataTestDir("testColumnFamilySettings"); // Setup table descriptor Table table = Mockito.mock(Table.class); RegionLocator regionLocator = Mockito.mock(RegionLocator.class); HTableDescriptor htd = new HTableDescriptor(TABLE_NAME); Mockito.doReturn(htd).when(table).getTableDescriptor(); for (HColumnDescriptor hcd : HBaseTestingUtility.generateColumnDescriptors()) { htd.addFamily(hcd); } // set up the table to return some mock keys setupMockStartKeys(regionLocator); try { // partial map red setup to get an operational writer for testing // We turn off the sequence file compression, because DefaultCodec // pollutes the GZip codec pool with an incompatible compressor. conf.set("io.seqfile.compression.type", "NONE"); conf.set("hbase.fs.tmp.dir", dir.toString()); // turn locality off to eliminate getRegionLocation fail-and-retry time when writing kvs conf.setBoolean(HFileOutputFormat2.LOCALITY_SENSITIVE_CONF_KEY, false); Job job = new Job(conf, "testLocalMRIncrementalLoad"); job.setWorkingDirectory(util.getDataTestDirOnTestFS("testColumnFamilySettings")); setupRandomGeneratorMapper(job); HFileOutputFormat2.configureIncrementalLoad(job, table.getTableDescriptor(), regionLocator); FileOutputFormat.setOutputPath(job, dir); context = createTestTaskAttemptContext(job); HFileOutputFormat2 hof = new HFileOutputFormat2(); writer = hof.getRecordWriter(context); // write out random rows writeRandomKeyValues(writer, context, htd.getFamiliesKeys(), ROWSPERSPLIT); writer.close(context); // Make sure that a directory was created for every CF FileSystem fs = dir.getFileSystem(conf); // commit so that the filesystem has one directory per column family hof.getOutputCommitter(context).commitTask(context); hof.getOutputCommitter(context).commitJob(context); FileStatus[] families = FSUtils.listStatus(fs, dir, new FSUtils.FamilyDirFilter(fs)); assertEquals(htd.getFamilies().size(), families.length); for (FileStatus f : families) { String familyStr = f.getPath().getName(); HColumnDescriptor hcd = htd.getFamily(Bytes.toBytes(familyStr)); // verify that the compression on this file matches the configured // compression Path dataFilePath = fs.listStatus(f.getPath())[0].getPath(); Reader reader = HFile.createReader(fs, dataFilePath, new CacheConfig(conf), conf); Map<byte[], byte[]> fileInfo = reader.loadFileInfo(); byte[] bloomFilter = fileInfo.get(StoreFile.BLOOM_FILTER_TYPE_KEY); if (bloomFilter == null) bloomFilter = Bytes.toBytes("NONE"); assertEquals( "Incorrect bloom filter used for column family " + familyStr + "(reader: " + reader + ")", hcd.getBloomFilterType(), BloomType.valueOf(Bytes.toString(bloomFilter))); assertEquals( "Incorrect compression used for column family " + familyStr + "(reader: " + reader + ")", hcd.getCompressionType(), reader.getFileContext().getCompression()); } } finally { dir.getFileSystem(conf).delete(dir, true); } }