/** * Test for {@link HFileOutputFormat2#configureDataBlockEncoding(HTableDescriptor, Configuration)} * and {@link HFileOutputFormat2#createFamilyDataBlockEncodingMap(Configuration)}. Tests that the * compression map is correctly serialized into and deserialized from configuration * * @throws IOException */ @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test public void testSerializeDeserializeFamilyDataBlockEncodingMap() throws IOException { for (int numCfs = 0; numCfs <= 3; numCfs++) { Configuration conf = new Configuration(this.util.getConfiguration()); Map<String, DataBlockEncoding> familyToDataBlockEncoding = getMockColumnFamiliesForDataBlockEncoding(numCfs); Table table = Mockito.mock(HTable.class); setupMockColumnFamiliesForDataBlockEncoding(table, familyToDataBlockEncoding); HTableDescriptor tableDescriptor = table.getTableDescriptor(); HFileOutputFormat2.configureDataBlockEncoding(tableDescriptor, conf); // read back family specific data block encoding settings from the // configuration Map<byte[], DataBlockEncoding> retrievedFamilyToDataBlockEncodingMap = HFileOutputFormat2.createFamilyDataBlockEncodingMap(conf); // test that we have a value for all column families that matches with the // used mock values for (Entry<String, DataBlockEncoding> entry : familyToDataBlockEncoding.entrySet()) { assertEquals( "DataBlockEncoding configuration incorrect for column family:" + entry.getKey(), entry.getValue(), retrievedFamilyToDataBlockEncodingMap.get(entry.getKey().getBytes())); } } }
/* * Test that {@link HFileOutputFormat2} creates an HFile with TIMERANGE * metadata used by time-restricted scans. */ @Test public void test_TIMERANGE() throws Exception { Configuration conf = new Configuration(this.util.getConfiguration()); RecordWriter<ImmutableBytesWritable, Cell> writer = null; TaskAttemptContext context = null; Path dir = util.getDataTestDir("test_TIMERANGE_present"); LOG.info("Timerange dir writing to dir: " + dir); try { // build a record writer using HFileOutputFormat2 Job job = new Job(conf); FileOutputFormat.setOutputPath(job, dir); context = createTestTaskAttemptContext(job); HFileOutputFormat2 hof = new HFileOutputFormat2(); writer = hof.getRecordWriter(context); // Pass two key values with explicit times stamps final byte[] b = Bytes.toBytes("b"); // value 1 with timestamp 2000 KeyValue kv = new KeyValue(b, b, b, 2000, b); KeyValue original = kv.clone(); writer.write(new ImmutableBytesWritable(), kv); assertEquals(original, kv); // value 2 with timestamp 1000 kv = new KeyValue(b, b, b, 1000, b); original = kv.clone(); writer.write(new ImmutableBytesWritable(), kv); assertEquals(original, kv); // verify that the file has the proper FileInfo. writer.close(context); // the generated file lives 1 directory down from the attempt directory // and is the only file, e.g. // _attempt__0000_r_000000_0/b/1979617994050536795 FileSystem fs = FileSystem.get(conf); Path attemptDirectory = hof.getDefaultWorkFile(context, "").getParent(); FileStatus[] sub1 = fs.listStatus(attemptDirectory); FileStatus[] file = fs.listStatus(sub1[0].getPath()); // open as HFile Reader and pull out TIMERANGE FileInfo. HFile.Reader rd = HFile.createReader(fs, file[0].getPath(), new CacheConfig(conf), conf); Map<byte[], byte[]> finfo = rd.loadFileInfo(); byte[] range = finfo.get("TIMERANGE".getBytes()); assertNotNull(range); // unmarshall and check values. TimeRangeTracker timeRangeTracker = new TimeRangeTracker(); Writables.copyWritable(range, timeRangeTracker); LOG.info( timeRangeTracker.getMinimumTimestamp() + "...." + timeRangeTracker.getMaximumTimestamp()); assertEquals(1000, timeRangeTracker.getMinimumTimestamp()); assertEquals(2000, timeRangeTracker.getMaximumTimestamp()); rd.close(); } finally { if (writer != null && context != null) writer.close(context); dir.getFileSystem(conf).delete(dir, true); } }
private void runIncrementalPELoad( Configuration conf, HTableDescriptor tableDescriptor, RegionLocator regionLocator, Path outDir) throws IOException, UnsupportedEncodingException, InterruptedException, ClassNotFoundException { Job job = new Job(conf, "testLocalMRIncrementalLoad"); job.setWorkingDirectory(util.getDataTestDirOnTestFS("runIncrementalPELoad")); job.getConfiguration() .setStrings( "io.serializations", conf.get("io.serializations"), MutationSerialization.class.getName(), ResultSerialization.class.getName(), KeyValueSerialization.class.getName()); setupRandomGeneratorMapper(job); HFileOutputFormat2.configureIncrementalLoad(job, tableDescriptor, regionLocator); FileOutputFormat.setOutputPath(job, outDir); assertFalse(util.getTestFileSystem().exists(outDir)); assertEquals(regionLocator.getAllRegionLocations().size(), job.getNumReduceTasks()); assertTrue(job.waitForCompletion(true)); }
@Test public void testJobConfiguration() throws Exception { Job job = new Job(util.getConfiguration()); job.setWorkingDirectory(util.getDataTestDir("testJobConfiguration")); Table table = Mockito.mock(Table.class); RegionLocator regionLocator = Mockito.mock(RegionLocator.class); setupMockStartKeys(regionLocator); HFileOutputFormat2.configureIncrementalLoad(job, table.getTableDescriptor(), regionLocator); assertEquals(job.getNumReduceTasks(), 4); }
/** * Test that {@link HFileOutputFormat2} RecordWriter amends timestamps if passed a keyvalue whose * timestamp is {@link HConstants#LATEST_TIMESTAMP}. * * @see <a href="https://issues.apache.org/jira/browse/HBASE-2615">HBASE-2615</a> */ @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test public void test_LATEST_TIMESTAMP_isReplaced() throws Exception { Configuration conf = new Configuration(this.util.getConfiguration()); RecordWriter<ImmutableBytesWritable, Cell> writer = null; TaskAttemptContext context = null; Path dir = util.getDataTestDir("test_LATEST_TIMESTAMP_isReplaced"); try { Job job = new Job(conf); FileOutputFormat.setOutputPath(job, dir); context = createTestTaskAttemptContext(job); HFileOutputFormat2 hof = new HFileOutputFormat2(); writer = hof.getRecordWriter(context); final byte[] b = Bytes.toBytes("b"); // Test 1. Pass a KV that has a ts of LATEST_TIMESTAMP. It should be // changed by call to write. Check all in kv is same but ts. KeyValue kv = new KeyValue(b, b, b); KeyValue original = kv.clone(); writer.write(new ImmutableBytesWritable(), kv); assertFalse(original.equals(kv)); assertTrue(Bytes.equals(CellUtil.cloneRow(original), CellUtil.cloneRow(kv))); assertTrue(Bytes.equals(CellUtil.cloneFamily(original), CellUtil.cloneFamily(kv))); assertTrue(Bytes.equals(CellUtil.cloneQualifier(original), CellUtil.cloneQualifier(kv))); assertNotSame(original.getTimestamp(), kv.getTimestamp()); assertNotSame(HConstants.LATEST_TIMESTAMP, kv.getTimestamp()); // Test 2. Now test passing a kv that has explicit ts. It should not be // changed by call to record write. kv = new KeyValue(b, b, b, kv.getTimestamp() - 1, b); original = kv.clone(); writer.write(new ImmutableBytesWritable(), kv); assertTrue(original.equals(kv)); } finally { if (writer != null && context != null) writer.close(context); dir.getFileSystem(conf).delete(dir, true); } }
@Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test public void testJobConfiguration() throws Exception { Configuration conf = new Configuration(this.util.getConfiguration()); conf.set("hbase.fs.tmp.dir", util.getDataTestDir("testJobConfiguration").toString()); Job job = new Job(conf); job.setWorkingDirectory(util.getDataTestDir("testJobConfiguration")); Table table = Mockito.mock(Table.class); RegionLocator regionLocator = Mockito.mock(RegionLocator.class); setupMockStartKeys(regionLocator); setupMockTableName(regionLocator); HFileOutputFormat2.configureIncrementalLoad(job, table.getTableDescriptor(), regionLocator); assertEquals(job.getNumReduceTasks(), 4); }
/** * Test for {@link HFileOutputFormat2#configureBlockSize(org.apache.hadoop.hbase.client.Table, * Configuration)} and {@link HFileOutputFormat2#createFamilyBlockSizeMap (Configuration)}. Tests * that the compression map is correctly serialized into and deserialized from configuration * * @throws IOException */ @Test public void testSerializeDeserializeFamilyBlockSizeMap() throws IOException { for (int numCfs = 0; numCfs <= 3; numCfs++) { Configuration conf = new Configuration(this.util.getConfiguration()); Map<String, Integer> familyToBlockSize = getMockColumnFamiliesForBlockSize(numCfs); Table table = Mockito.mock(HTable.class); setupMockColumnFamiliesForBlockSize(table, familyToBlockSize); HFileOutputFormat2.configureBlockSize(table.getTableDescriptor(), conf); // read back family specific data block encoding settings from the // configuration Map<byte[], Integer> retrievedFamilyToBlockSizeMap = HFileOutputFormat2.createFamilyBlockSizeMap(conf); // test that we have a value for all column families that matches with the // used mock values for (Entry<String, Integer> entry : familyToBlockSize.entrySet()) { assertEquals( "BlockSize configuration incorrect for column family:" + entry.getKey(), entry.getValue(), retrievedFamilyToBlockSizeMap.get(entry.getKey().getBytes())); } } }
/** * Test for {@link HFileOutputFormat2#configureCompression(org.apache.hadoop.hbase.client.Table, * Configuration)} and {@link HFileOutputFormat2#createFamilyCompressionMap (Configuration)}. * Tests that the compression map is correctly serialized into and deserialized from configuration * * @throws IOException */ @Test public void testSerializeDeserializeFamilyCompressionMap() throws IOException { for (int numCfs = 0; numCfs <= 3; numCfs++) { Configuration conf = new Configuration(this.util.getConfiguration()); Map<String, Compression.Algorithm> familyToCompression = getMockColumnFamiliesForCompression(numCfs); Table table = Mockito.mock(HTable.class); setupMockColumnFamiliesForCompression(table, familyToCompression); HFileOutputFormat2.configureCompression(conf, table.getTableDescriptor()); // read back family specific compression setting from the configuration Map<byte[], Algorithm> retrievedFamilyToCompressionMap = HFileOutputFormat2.createFamilyCompressionMap(conf); // test that we have a value for all column families that matches with the // used mock values for (Entry<String, Algorithm> entry : familyToCompression.entrySet()) { assertEquals( "Compression configuration incorrect for column family:" + entry.getKey(), entry.getValue(), retrievedFamilyToCompressionMap.get(entry.getKey().getBytes())); } } }
/** * Test that {@link HFileOutputFormat2} RecordWriter uses compression and bloom filter settings * from the column family descriptor */ @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test public void testColumnFamilySettings() throws Exception { Configuration conf = new Configuration(this.util.getConfiguration()); RecordWriter<ImmutableBytesWritable, Cell> writer = null; TaskAttemptContext context = null; Path dir = util.getDataTestDir("testColumnFamilySettings"); // Setup table descriptor Table table = Mockito.mock(Table.class); RegionLocator regionLocator = Mockito.mock(RegionLocator.class); HTableDescriptor htd = new HTableDescriptor(TABLE_NAME); Mockito.doReturn(htd).when(table).getTableDescriptor(); for (HColumnDescriptor hcd : HBaseTestingUtility.generateColumnDescriptors()) { htd.addFamily(hcd); } // set up the table to return some mock keys setupMockStartKeys(regionLocator); try { // partial map red setup to get an operational writer for testing // We turn off the sequence file compression, because DefaultCodec // pollutes the GZip codec pool with an incompatible compressor. conf.set("io.seqfile.compression.type", "NONE"); conf.set("hbase.fs.tmp.dir", dir.toString()); // turn locality off to eliminate getRegionLocation fail-and-retry time when writing kvs conf.setBoolean(HFileOutputFormat2.LOCALITY_SENSITIVE_CONF_KEY, false); Job job = new Job(conf, "testLocalMRIncrementalLoad"); job.setWorkingDirectory(util.getDataTestDirOnTestFS("testColumnFamilySettings")); setupRandomGeneratorMapper(job); HFileOutputFormat2.configureIncrementalLoad(job, table.getTableDescriptor(), regionLocator); FileOutputFormat.setOutputPath(job, dir); context = createTestTaskAttemptContext(job); HFileOutputFormat2 hof = new HFileOutputFormat2(); writer = hof.getRecordWriter(context); // write out random rows writeRandomKeyValues(writer, context, htd.getFamiliesKeys(), ROWSPERSPLIT); writer.close(context); // Make sure that a directory was created for every CF FileSystem fs = dir.getFileSystem(conf); // commit so that the filesystem has one directory per column family hof.getOutputCommitter(context).commitTask(context); hof.getOutputCommitter(context).commitJob(context); FileStatus[] families = FSUtils.listStatus(fs, dir, new FSUtils.FamilyDirFilter(fs)); assertEquals(htd.getFamilies().size(), families.length); for (FileStatus f : families) { String familyStr = f.getPath().getName(); HColumnDescriptor hcd = htd.getFamily(Bytes.toBytes(familyStr)); // verify that the compression on this file matches the configured // compression Path dataFilePath = fs.listStatus(f.getPath())[0].getPath(); Reader reader = HFile.createReader(fs, dataFilePath, new CacheConfig(conf), conf); Map<byte[], byte[]> fileInfo = reader.loadFileInfo(); byte[] bloomFilter = fileInfo.get(StoreFile.BLOOM_FILTER_TYPE_KEY); if (bloomFilter == null) bloomFilter = Bytes.toBytes("NONE"); assertEquals( "Incorrect bloom filter used for column family " + familyStr + "(reader: " + reader + ")", hcd.getBloomFilterType(), BloomType.valueOf(Bytes.toString(bloomFilter))); assertEquals( "Incorrect compression used for column family " + familyStr + "(reader: " + reader + ")", hcd.getCompressionType(), reader.getFileContext().getCompression()); } } finally { dir.getFileSystem(conf).delete(dir, true); } }