/* * Test that {@link HFileOutputFormat2} creates an HFile with TIMERANGE * metadata used by time-restricted scans. */ @Test public void test_TIMERANGE() throws Exception { Configuration conf = new Configuration(this.util.getConfiguration()); RecordWriter<ImmutableBytesWritable, Cell> writer = null; TaskAttemptContext context = null; Path dir = util.getDataTestDir("test_TIMERANGE_present"); LOG.info("Timerange dir writing to dir: " + dir); try { // build a record writer using HFileOutputFormat2 Job job = new Job(conf); FileOutputFormat.setOutputPath(job, dir); context = createTestTaskAttemptContext(job); HFileOutputFormat2 hof = new HFileOutputFormat2(); writer = hof.getRecordWriter(context); // Pass two key values with explicit times stamps final byte[] b = Bytes.toBytes("b"); // value 1 with timestamp 2000 KeyValue kv = new KeyValue(b, b, b, 2000, b); KeyValue original = kv.clone(); writer.write(new ImmutableBytesWritable(), kv); assertEquals(original, kv); // value 2 with timestamp 1000 kv = new KeyValue(b, b, b, 1000, b); original = kv.clone(); writer.write(new ImmutableBytesWritable(), kv); assertEquals(original, kv); // verify that the file has the proper FileInfo. writer.close(context); // the generated file lives 1 directory down from the attempt directory // and is the only file, e.g. // _attempt__0000_r_000000_0/b/1979617994050536795 FileSystem fs = FileSystem.get(conf); Path attemptDirectory = hof.getDefaultWorkFile(context, "").getParent(); FileStatus[] sub1 = fs.listStatus(attemptDirectory); FileStatus[] file = fs.listStatus(sub1[0].getPath()); // open as HFile Reader and pull out TIMERANGE FileInfo. HFile.Reader rd = HFile.createReader(fs, file[0].getPath(), new CacheConfig(conf), conf); Map<byte[], byte[]> finfo = rd.loadFileInfo(); byte[] range = finfo.get("TIMERANGE".getBytes()); assertNotNull(range); // unmarshall and check values. TimeRangeTracker timeRangeTracker = new TimeRangeTracker(); Writables.copyWritable(range, timeRangeTracker); LOG.info( timeRangeTracker.getMinimumTimestamp() + "...." + timeRangeTracker.getMaximumTimestamp()); assertEquals(1000, timeRangeTracker.getMinimumTimestamp()); assertEquals(2000, timeRangeTracker.getMaximumTimestamp()); rd.close(); } finally { if (writer != null && context != null) writer.close(context); dir.getFileSystem(conf).delete(dir, true); } }
@Test public void test() throws Exception { createTable(TABLE_NAME, getBasicSchema(), getBasicCreateTableOptions()); KuduTableOutputFormat output = new KuduTableOutputFormat(); Configuration conf = new Configuration(); conf.set(KuduTableOutputFormat.MASTER_ADDRESSES_KEY, getMasterAddresses()); conf.set(KuduTableOutputFormat.OUTPUT_TABLE_KEY, TABLE_NAME); output.setConf(conf); String multitonKey = conf.get(KuduTableOutputFormat.MULTITON_KEY); KuduTable table = KuduTableOutputFormat.getKuduTable(multitonKey); assertNotNull(table); Insert insert = table.newInsert(); PartialRow row = insert.getRow(); row.addInt(0, 1); row.addInt(1, 2); row.addInt(2, 3); row.addString(3, "a string"); row.addBoolean(4, true); RecordWriter<NullWritable, Operation> rw = output.getRecordWriter(null); rw.write(NullWritable.get(), insert); rw.close(null); AsyncKuduScanner.AsyncKuduScannerBuilder builder = client.newScannerBuilder(table); assertEquals(1, countRowsInScan(builder.build())); }
@Override public void cleanUp() throws IOException { if (writer != null) { try { writer.close(context); } catch (InterruptedException e) { throw new IOException(e); } writer = null; } HadoopShims.commitOrCleanup(outputCommitter, context); }
@Override public void tearDown() throws IOException { if (writer != null) { try { writer.close(context); } catch (InterruptedException e) { throw new IOException(e); } writer = null; } if (outputCommitter.needsTaskCommit(context)) outputCommitter.commitTask(context); HadoopShims.commitOrCleanup(outputCommitter, context); }
private void runNewMapper( final JobConf job, MRTaskReporter reporter, final MRInputLegacy in, KeyValueWriter out) throws IOException, InterruptedException { // Initialize input in-line since it sets parameters which may be used by the processor. // Done only for MRInput. // TODO use new method in MRInput to get required info // in.initialize(job, master); // make a task context so we can get the classes org.apache.hadoop.mapreduce.TaskAttemptContext taskContext = getTaskAttemptContext(); // make a mapper org.apache.hadoop.mapreduce.Mapper mapper; try { mapper = (org.apache.hadoop.mapreduce.Mapper) ReflectionUtils.newInstance(taskContext.getMapperClass(), job); } catch (ClassNotFoundException cnfe) { throw new IOException(cnfe); } org.apache.hadoop.mapreduce.RecordReader input = new NewRecordReader(in); org.apache.hadoop.mapreduce.RecordWriter output = new NewOutputCollector(out); org.apache.hadoop.mapreduce.InputSplit split = in.getNewInputSplit(); org.apache.hadoop.mapreduce.MapContext mapContext = new MapContextImpl( job, taskAttemptId, input, output, getCommitter(), processorContext, split); org.apache.hadoop.mapreduce.Mapper.Context mapperContext = new WrappedMapper().getMapContext(mapContext); input.initialize(split, mapperContext); mapper.run(mapperContext); this.statusUpdate(); input.close(); output.close(mapperContext); }
/** * Creates an lzo file with random data. * * @param outputDir Output directory. * @param fs File system we're using. * @param attemptContext Task attempt context, contains task id etc. * @throws IOException * @throws InterruptedException */ private byte[] createTestInput( Path outputDir, FileSystem fs, TaskAttemptContext attemptContext, int charsToOutput) throws IOException, InterruptedException { TextOutputFormat<Text, Text> output = new TextOutputFormat<Text, Text>(); RecordWriter<Text, Text> rw = null; md5_.reset(); try { rw = output.getRecordWriter(attemptContext); char[] chars = "abcdefghijklmnopqrstuvwxyz\u00E5\u00E4\u00F6".toCharArray(); Random r = new Random(System.currentTimeMillis()); Text key = new Text(); Text value = new Text(); int charsMax = chars.length - 1; for (int i = 0; i < charsToOutput; ) { i += fillText(chars, r, charsMax, key); i += fillText(chars, r, charsMax, value); rw.write(key, value); md5_.update(key.getBytes(), 0, key.getLength()); // text output format writes tab between the key and value md5_.update("\t".getBytes("UTF-8")); md5_.update(value.getBytes(), 0, value.getLength()); } } finally { if (rw != null) { rw.close(attemptContext); OutputCommitter committer = output.getOutputCommitter(attemptContext); committer.commitTask(attemptContext); committer.cleanupJob(attemptContext); } } byte[] result = md5_.digest(); md5_.reset(); return result; }
// this is a tool because when you run a mapreduce, you will need to use the // ToolRunner // if you want libjars to be passed properly to the map and reduce tasks // even though this class isn't a mapreduce @Override public int run(String[] args) throws Exception { if (args.length != 5) { System.out.println( "Usage: bin/tool.sh " + this.getClass().getName() + " <instance name> <zoo keepers> <username> <password> <tablename>"); return 1; } Text tableName = new Text(args[4]); Job job = new Job(getConf()); Configuration conf = job.getConfiguration(); AccumuloOutputFormat.setZooKeeperInstance(conf, args[0], args[1]); AccumuloOutputFormat.setOutputInfo(conf, args[2], args[3].getBytes(), true, null); job.setOutputFormatClass(AccumuloOutputFormat.class); // when running a mapreduce, you won't need to instantiate the output // format and record writer // mapreduce will do that for you, and you will just use // output.collect(tableName, mutation) TaskAttemptContext context = new TaskAttemptContext(conf, new TaskAttemptID()); RecordWriter<Text, Mutation> rw = new AccumuloOutputFormat().getRecordWriter(context); Text colf = new Text("colfam"); System.out.println("writing ..."); for (int i = 0; i < 10000; i++) { Mutation m = new Mutation(new Text(String.format("row_%d", i))); for (int j = 0; j < 5; j++) { m.put( colf, new Text(String.format("colqual_%d", j)), new Value((String.format("value_%d_%d", i, j)).getBytes())); } rw.write(tableName, m); // repeat until done if (i % 100 == 0) System.out.println(i); } rw.close(context); // close when done return 0; }
/** * Test that {@link HFileOutputFormat2} RecordWriter amends timestamps if passed a keyvalue whose * timestamp is {@link HConstants#LATEST_TIMESTAMP}. * * @see <a href="https://issues.apache.org/jira/browse/HBASE-2615">HBASE-2615</a> */ @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test public void test_LATEST_TIMESTAMP_isReplaced() throws Exception { Configuration conf = new Configuration(this.util.getConfiguration()); RecordWriter<ImmutableBytesWritable, Cell> writer = null; TaskAttemptContext context = null; Path dir = util.getDataTestDir("test_LATEST_TIMESTAMP_isReplaced"); try { Job job = new Job(conf); FileOutputFormat.setOutputPath(job, dir); context = createTestTaskAttemptContext(job); HFileOutputFormat2 hof = new HFileOutputFormat2(); writer = hof.getRecordWriter(context); final byte[] b = Bytes.toBytes("b"); // Test 1. Pass a KV that has a ts of LATEST_TIMESTAMP. It should be // changed by call to write. Check all in kv is same but ts. KeyValue kv = new KeyValue(b, b, b); KeyValue original = kv.clone(); writer.write(new ImmutableBytesWritable(), kv); assertFalse(original.equals(kv)); assertTrue(Bytes.equals(CellUtil.cloneRow(original), CellUtil.cloneRow(kv))); assertTrue(Bytes.equals(CellUtil.cloneFamily(original), CellUtil.cloneFamily(kv))); assertTrue(Bytes.equals(CellUtil.cloneQualifier(original), CellUtil.cloneQualifier(kv))); assertNotSame(original.getTimestamp(), kv.getTimestamp()); assertNotSame(HConstants.LATEST_TIMESTAMP, kv.getTimestamp()); // Test 2. Now test passing a kv that has explicit ts. It should not be // changed by call to record write. kv = new KeyValue(b, b, b, kv.getTimestamp() - 1, b); original = kv.clone(); writer.write(new ImmutableBytesWritable(), kv); assertTrue(original.equals(kv)); } finally { if (writer != null && context != null) writer.close(context); dir.getFileSystem(conf).delete(dir, true); } }
/** This is needed to close out the internal RecordWriter */ @Override public void close(TaskAttemptContext arg0) throws IOException, InterruptedException { // TODO Auto-generated method stub _inRecordWriter.close(arg0); } // end close
public void testBinary() throws IOException, InterruptedException { Configuration conf = new Configuration(); Job job = new Job(conf); Path outdir = new Path(System.getProperty("test.build.data", "/tmp"), "outseq"); Random r = new Random(); long seed = r.nextLong(); r.setSeed(seed); FileOutputFormat.setOutputPath(job, outdir); SequenceFileAsBinaryOutputFormat.setSequenceFileOutputKeyClass(job, IntWritable.class); SequenceFileAsBinaryOutputFormat.setSequenceFileOutputValueClass(job, DoubleWritable.class); SequenceFileAsBinaryOutputFormat.setCompressOutput(job, true); SequenceFileAsBinaryOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK); BytesWritable bkey = new BytesWritable(); BytesWritable bval = new BytesWritable(); TaskAttemptContext context = MapReduceTestUtil.createDummyMapTaskAttemptContext(job.getConfiguration()); OutputFormat<BytesWritable, BytesWritable> outputFormat = new SequenceFileAsBinaryOutputFormat(); OutputCommitter committer = outputFormat.getOutputCommitter(context); committer.setupJob(job); RecordWriter<BytesWritable, BytesWritable> writer = outputFormat.getRecordWriter(context); IntWritable iwritable = new IntWritable(); DoubleWritable dwritable = new DoubleWritable(); DataOutputBuffer outbuf = new DataOutputBuffer(); LOG.info("Creating data by SequenceFileAsBinaryOutputFormat"); try { for (int i = 0; i < RECORDS; ++i) { iwritable = new IntWritable(r.nextInt()); iwritable.write(outbuf); bkey.set(outbuf.getData(), 0, outbuf.getLength()); outbuf.reset(); dwritable = new DoubleWritable(r.nextDouble()); dwritable.write(outbuf); bval.set(outbuf.getData(), 0, outbuf.getLength()); outbuf.reset(); writer.write(bkey, bval); } } finally { writer.close(context); } committer.commitTask(context); committer.commitJob(job); InputFormat<IntWritable, DoubleWritable> iformat = new SequenceFileInputFormat<IntWritable, DoubleWritable>(); int count = 0; r.setSeed(seed); SequenceFileInputFormat.setInputPaths(job, outdir); LOG.info("Reading data by SequenceFileInputFormat"); for (InputSplit split : iformat.getSplits(job)) { RecordReader<IntWritable, DoubleWritable> reader = iformat.createRecordReader(split, context); MapContext<IntWritable, DoubleWritable, BytesWritable, BytesWritable> mcontext = new MapContextImpl<IntWritable, DoubleWritable, BytesWritable, BytesWritable>( job.getConfiguration(), context.getTaskAttemptID(), reader, null, null, MapReduceTestUtil.createDummyReporter(), split); reader.initialize(split, mcontext); try { int sourceInt; double sourceDouble; while (reader.nextKeyValue()) { sourceInt = r.nextInt(); sourceDouble = r.nextDouble(); iwritable = reader.getCurrentKey(); dwritable = reader.getCurrentValue(); assertEquals( "Keys don't match: " + "*" + iwritable.get() + ":" + sourceInt + "*", sourceInt, iwritable.get()); assertTrue( "Vals don't match: " + "*" + dwritable.get() + ":" + sourceDouble + "*", Double.compare(dwritable.get(), sourceDouble) == 0); ++count; } } finally { reader.close(); } } assertEquals("Some records not found", RECORDS, count); }
/** * Test that {@link HFileOutputFormat2} RecordWriter uses compression and bloom filter settings * from the column family descriptor */ @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test public void testColumnFamilySettings() throws Exception { Configuration conf = new Configuration(this.util.getConfiguration()); RecordWriter<ImmutableBytesWritable, Cell> writer = null; TaskAttemptContext context = null; Path dir = util.getDataTestDir("testColumnFamilySettings"); // Setup table descriptor Table table = Mockito.mock(Table.class); RegionLocator regionLocator = Mockito.mock(RegionLocator.class); HTableDescriptor htd = new HTableDescriptor(TABLE_NAME); Mockito.doReturn(htd).when(table).getTableDescriptor(); for (HColumnDescriptor hcd : HBaseTestingUtility.generateColumnDescriptors()) { htd.addFamily(hcd); } // set up the table to return some mock keys setupMockStartKeys(regionLocator); try { // partial map red setup to get an operational writer for testing // We turn off the sequence file compression, because DefaultCodec // pollutes the GZip codec pool with an incompatible compressor. conf.set("io.seqfile.compression.type", "NONE"); conf.set("hbase.fs.tmp.dir", dir.toString()); // turn locality off to eliminate getRegionLocation fail-and-retry time when writing kvs conf.setBoolean(HFileOutputFormat2.LOCALITY_SENSITIVE_CONF_KEY, false); Job job = new Job(conf, "testLocalMRIncrementalLoad"); job.setWorkingDirectory(util.getDataTestDirOnTestFS("testColumnFamilySettings")); setupRandomGeneratorMapper(job); HFileOutputFormat2.configureIncrementalLoad(job, table.getTableDescriptor(), regionLocator); FileOutputFormat.setOutputPath(job, dir); context = createTestTaskAttemptContext(job); HFileOutputFormat2 hof = new HFileOutputFormat2(); writer = hof.getRecordWriter(context); // write out random rows writeRandomKeyValues(writer, context, htd.getFamiliesKeys(), ROWSPERSPLIT); writer.close(context); // Make sure that a directory was created for every CF FileSystem fs = dir.getFileSystem(conf); // commit so that the filesystem has one directory per column family hof.getOutputCommitter(context).commitTask(context); hof.getOutputCommitter(context).commitJob(context); FileStatus[] families = FSUtils.listStatus(fs, dir, new FSUtils.FamilyDirFilter(fs)); assertEquals(htd.getFamilies().size(), families.length); for (FileStatus f : families) { String familyStr = f.getPath().getName(); HColumnDescriptor hcd = htd.getFamily(Bytes.toBytes(familyStr)); // verify that the compression on this file matches the configured // compression Path dataFilePath = fs.listStatus(f.getPath())[0].getPath(); Reader reader = HFile.createReader(fs, dataFilePath, new CacheConfig(conf), conf); Map<byte[], byte[]> fileInfo = reader.loadFileInfo(); byte[] bloomFilter = fileInfo.get(StoreFile.BLOOM_FILTER_TYPE_KEY); if (bloomFilter == null) bloomFilter = Bytes.toBytes("NONE"); assertEquals( "Incorrect bloom filter used for column family " + familyStr + "(reader: " + reader + ")", hcd.getBloomFilterType(), BloomType.valueOf(Bytes.toString(bloomFilter))); assertEquals( "Incorrect compression used for column family " + familyStr + "(reader: " + reader + ")", hcd.getCompressionType(), reader.getFileContext().getCompression()); } } finally { dir.getFileSystem(conf).delete(dir, true); } }
/** * close * * @param context the context of the task * @throws IOException * @throws InterruptedException */ public void close(TaskAttemptContext context) throws IOException, InterruptedException { recordWriter.close(context); }
@Override public void close(TaskAttemptContext context) throws IOException, InterruptedException { writer.write(NullWritable.get(), classifier); writer.close(context); }
@Test public void test() throws Exception { context.checking( new Expectations() { { allowing(taskContext).getConfiguration(); will(returnValue(conf)); allowing(taskContext).getTaskAttemptID(); will(returnValue(taskAttemptID)); } }); OutputFormat outputFormat = new IndexRecordWriter.OutputFormat(); conf.setStrings("RdfFieldNames", "index0", "index1"); conf.setEnum("IndexType", RDFDocumentFactory.IndexType.VERTICAL); RecordWriter<IntWritable, IndexRecordWriterValue> recordWriter = outputFormat.getRecordWriter(taskContext); IntWritable key = new IntWritable(); IndexRecordWriterTermValue termValue = new IndexRecordWriterTermValue(); IndexRecordWriterDocValue docValue = new IndexRecordWriterDocValue(); IndexRecordWriterSizeValue sizeValue = new IndexRecordWriterSizeValue(); // ALIGNEMENT_INDEX key.set(DocumentMapper.ALIGNMENT_INDEX); termValue.setTerm("term1"); termValue.setTermFrequency(1); // The alignment index doesn't have positions/counts. termValue.setOccurrenceCount(0); termValue.setSumOfMaxTermPositions(0); recordWriter.write(key, termValue); docValue.setDocument(0); // term1 occurs in index 0 recordWriter.write(key, docValue); // Index 0 key.set(0); termValue.setTermFrequency(3); termValue.setOccurrenceCount(6); termValue.setSumOfMaxTermPositions(15 + 12 + 18); recordWriter.write(key, termValue); docValue.setDocument(3); docValue.clearOccerrences(); docValue.addOccurrence(11); docValue.addOccurrence(15); recordWriter.write(key, docValue); docValue.setDocument(4); docValue.clearOccerrences(); docValue.addOccurrence(12); recordWriter.write(key, docValue); docValue.setDocument(7); docValue.clearOccerrences(); docValue.addOccurrence(14); docValue.addOccurrence(17); docValue.addOccurrence(18); recordWriter.write(key, docValue); // ALIGNEMENT_INDEX key.set(DocumentMapper.ALIGNMENT_INDEX); termValue.setTerm("term2"); termValue.setTermFrequency(2); // The alignment index doesn't have positions/counts. termValue.setOccurrenceCount(0); termValue.setSumOfMaxTermPositions(0); recordWriter.write(key, termValue); docValue.clearOccerrences(); docValue.setDocument(0); // term2 occurs in index 0 & 1 recordWriter.write(key, docValue); docValue.setDocument(1); // term2 occurs in index 0 & 1 recordWriter.write(key, docValue); // Index 0 key.set(0); termValue.setTermFrequency(2); termValue.setOccurrenceCount(4); termValue.setSumOfMaxTermPositions(19 + 16); recordWriter.write(key, termValue); docValue.setDocument(1); docValue.clearOccerrences(); docValue.addOccurrence(10); docValue.addOccurrence(19); recordWriter.write(key, docValue); docValue.setDocument(7); docValue.clearOccerrences(); docValue.addOccurrence(13); docValue.addOccurrence(16); recordWriter.write(key, docValue); // Index 1 key.set(1); termValue.setTermFrequency(1); termValue.setOccurrenceCount(1); termValue.setSumOfMaxTermPositions(14); recordWriter.write(key, termValue); docValue.setDocument(1); docValue.clearOccerrences(); docValue.addOccurrence(14); recordWriter.write(key, docValue); // ALIGNMENT_INDEX key.set(DocumentMapper.ALIGNMENT_INDEX); termValue.setTerm("term3"); termValue.setTermFrequency(1); // The alignment index doesn't have positions/counts. termValue.setOccurrenceCount(0); termValue.setSumOfMaxTermPositions(0); recordWriter.write(key, termValue); docValue.setDocument(1); // term3 occurs in index 1 recordWriter.write(key, docValue); docValue.clearOccerrences(); // Index 1 key.set(1); termValue.setTermFrequency(1); termValue.setOccurrenceCount(2); termValue.setSumOfMaxTermPositions(11); recordWriter.write(key, termValue); docValue.setDocument(3); docValue.clearOccerrences(); docValue.addOccurrence(10); docValue.addOccurrence(11); recordWriter.write(key, docValue); // Doc Sizes. key.set(0); sizeValue.setDocument(0); sizeValue.setSize(3); recordWriter.write(key, sizeValue); sizeValue.setDocument(3); sizeValue.setSize(1); recordWriter.write(key, sizeValue); sizeValue.setDocument(4); sizeValue.setSize(10); recordWriter.write(key, sizeValue); sizeValue.setDocument(6); sizeValue.setSize(2); recordWriter.write(key, sizeValue); key.set(1); sizeValue.setDocument(3); sizeValue.setSize(3); recordWriter.write(key, sizeValue); sizeValue.setDocument(6); sizeValue.setSize(5); recordWriter.write(key, sizeValue); recordWriter.close(taskContext); // Check the written indexes.. Path workPath = outputFormat.getDefaultWorkFile(taskContext, ""); System.out.println("Default work file is " + workPath.toString()); String dir = workPath.toUri().getPath(); BitStreamIndex index0 = (BitStreamIndex) DiskBasedIndex.getInstance(dir + "/index0", true, true); assertEquals(8, index0.numberOfDocuments); assertEquals(2, index0.numberOfTerms); assertTrue(index0.hasPositions); // term1 checkOccurrences(index0.documents(0), 3, "(3:11,15) (4:12) (7:14,17,18)"); // term2 checkOccurrences(index0.documents(1), 2, "(1:10,19) (7:13,16)"); assertEquals("[3, 0, 0, 1, 10, 0, 2, 0]", index0.sizes.toString()); BitStreamIndex index1 = (BitStreamIndex) DiskBasedIndex.getInstance(dir + "/index1", true, true); assertEquals(8, index1.numberOfDocuments); assertEquals(2, index1.numberOfTerms); assertTrue(index0.hasPositions); checkOccurrences(index1.documents(0), 1, "(1:14)"); // term3 checkOccurrences(index1.documents(1), 1, "(3:10,11)"); BitStreamIndex indexAlignment = (BitStreamIndex) DiskBasedIndex.getInstance(dir + "/alignment", true); assertEquals(8, indexAlignment.numberOfDocuments); assertEquals(3, indexAlignment.numberOfTerms); assertFalse(indexAlignment.hasPositions); // term1 assertEquals(1, indexAlignment.documents(0).frequency()); // term2 assertEquals(2, indexAlignment.documents(1).frequency()); // term3 assertEquals(1, indexAlignment.documents(2).frequency()); assertEquals("[0, 0, 0, 3, 0, 0, 5, 0]", index1.sizes.toString()); }
public void close() throws IOException, InterruptedException { recordWriter.close(context); }