@Test public void test() throws Exception { context.checking( new Expectations() { { allowing(taskContext).getConfiguration(); will(returnValue(conf)); allowing(taskContext).getTaskAttemptID(); will(returnValue(taskAttemptID)); } }); OutputFormat outputFormat = new IndexRecordWriter.OutputFormat(); conf.setStrings("RdfFieldNames", "index0", "index1"); conf.setEnum("IndexType", RDFDocumentFactory.IndexType.VERTICAL); RecordWriter<IntWritable, IndexRecordWriterValue> recordWriter = outputFormat.getRecordWriter(taskContext); IntWritable key = new IntWritable(); IndexRecordWriterTermValue termValue = new IndexRecordWriterTermValue(); IndexRecordWriterDocValue docValue = new IndexRecordWriterDocValue(); IndexRecordWriterSizeValue sizeValue = new IndexRecordWriterSizeValue(); // ALIGNEMENT_INDEX key.set(DocumentMapper.ALIGNMENT_INDEX); termValue.setTerm("term1"); termValue.setTermFrequency(1); // The alignment index doesn't have positions/counts. termValue.setOccurrenceCount(0); termValue.setSumOfMaxTermPositions(0); recordWriter.write(key, termValue); docValue.setDocument(0); // term1 occurs in index 0 recordWriter.write(key, docValue); // Index 0 key.set(0); termValue.setTermFrequency(3); termValue.setOccurrenceCount(6); termValue.setSumOfMaxTermPositions(15 + 12 + 18); recordWriter.write(key, termValue); docValue.setDocument(3); docValue.clearOccerrences(); docValue.addOccurrence(11); docValue.addOccurrence(15); recordWriter.write(key, docValue); docValue.setDocument(4); docValue.clearOccerrences(); docValue.addOccurrence(12); recordWriter.write(key, docValue); docValue.setDocument(7); docValue.clearOccerrences(); docValue.addOccurrence(14); docValue.addOccurrence(17); docValue.addOccurrence(18); recordWriter.write(key, docValue); // ALIGNEMENT_INDEX key.set(DocumentMapper.ALIGNMENT_INDEX); termValue.setTerm("term2"); termValue.setTermFrequency(2); // The alignment index doesn't have positions/counts. termValue.setOccurrenceCount(0); termValue.setSumOfMaxTermPositions(0); recordWriter.write(key, termValue); docValue.clearOccerrences(); docValue.setDocument(0); // term2 occurs in index 0 & 1 recordWriter.write(key, docValue); docValue.setDocument(1); // term2 occurs in index 0 & 1 recordWriter.write(key, docValue); // Index 0 key.set(0); termValue.setTermFrequency(2); termValue.setOccurrenceCount(4); termValue.setSumOfMaxTermPositions(19 + 16); recordWriter.write(key, termValue); docValue.setDocument(1); docValue.clearOccerrences(); docValue.addOccurrence(10); docValue.addOccurrence(19); recordWriter.write(key, docValue); docValue.setDocument(7); docValue.clearOccerrences(); docValue.addOccurrence(13); docValue.addOccurrence(16); recordWriter.write(key, docValue); // Index 1 key.set(1); termValue.setTermFrequency(1); termValue.setOccurrenceCount(1); termValue.setSumOfMaxTermPositions(14); recordWriter.write(key, termValue); docValue.setDocument(1); docValue.clearOccerrences(); docValue.addOccurrence(14); recordWriter.write(key, docValue); // ALIGNMENT_INDEX key.set(DocumentMapper.ALIGNMENT_INDEX); termValue.setTerm("term3"); termValue.setTermFrequency(1); // The alignment index doesn't have positions/counts. termValue.setOccurrenceCount(0); termValue.setSumOfMaxTermPositions(0); recordWriter.write(key, termValue); docValue.setDocument(1); // term3 occurs in index 1 recordWriter.write(key, docValue); docValue.clearOccerrences(); // Index 1 key.set(1); termValue.setTermFrequency(1); termValue.setOccurrenceCount(2); termValue.setSumOfMaxTermPositions(11); recordWriter.write(key, termValue); docValue.setDocument(3); docValue.clearOccerrences(); docValue.addOccurrence(10); docValue.addOccurrence(11); recordWriter.write(key, docValue); // Doc Sizes. key.set(0); sizeValue.setDocument(0); sizeValue.setSize(3); recordWriter.write(key, sizeValue); sizeValue.setDocument(3); sizeValue.setSize(1); recordWriter.write(key, sizeValue); sizeValue.setDocument(4); sizeValue.setSize(10); recordWriter.write(key, sizeValue); sizeValue.setDocument(6); sizeValue.setSize(2); recordWriter.write(key, sizeValue); key.set(1); sizeValue.setDocument(3); sizeValue.setSize(3); recordWriter.write(key, sizeValue); sizeValue.setDocument(6); sizeValue.setSize(5); recordWriter.write(key, sizeValue); recordWriter.close(taskContext); // Check the written indexes.. Path workPath = outputFormat.getDefaultWorkFile(taskContext, ""); System.out.println("Default work file is " + workPath.toString()); String dir = workPath.toUri().getPath(); BitStreamIndex index0 = (BitStreamIndex) DiskBasedIndex.getInstance(dir + "/index0", true, true); assertEquals(8, index0.numberOfDocuments); assertEquals(2, index0.numberOfTerms); assertTrue(index0.hasPositions); // term1 checkOccurrences(index0.documents(0), 3, "(3:11,15) (4:12) (7:14,17,18)"); // term2 checkOccurrences(index0.documents(1), 2, "(1:10,19) (7:13,16)"); assertEquals("[3, 0, 0, 1, 10, 0, 2, 0]", index0.sizes.toString()); BitStreamIndex index1 = (BitStreamIndex) DiskBasedIndex.getInstance(dir + "/index1", true, true); assertEquals(8, index1.numberOfDocuments); assertEquals(2, index1.numberOfTerms); assertTrue(index0.hasPositions); checkOccurrences(index1.documents(0), 1, "(1:14)"); // term3 checkOccurrences(index1.documents(1), 1, "(3:10,11)"); BitStreamIndex indexAlignment = (BitStreamIndex) DiskBasedIndex.getInstance(dir + "/alignment", true); assertEquals(8, indexAlignment.numberOfDocuments); assertEquals(3, indexAlignment.numberOfTerms); assertFalse(indexAlignment.hasPositions); // term1 assertEquals(1, indexAlignment.documents(0).frequency()); // term2 assertEquals(2, indexAlignment.documents(1).frequency()); // term3 assertEquals(1, indexAlignment.documents(2).frequency()); assertEquals("[0, 0, 0, 3, 0, 0, 5, 0]", index1.sizes.toString()); }