예제 #1
0
  /*
   * Test that {@link HFileOutputFormat2} creates an HFile with TIMERANGE
   * metadata used by time-restricted scans.
   */
  @Test
  public void test_TIMERANGE() throws Exception {
    Configuration conf = new Configuration(this.util.getConfiguration());
    RecordWriter<ImmutableBytesWritable, Cell> writer = null;
    TaskAttemptContext context = null;
    Path dir = util.getDataTestDir("test_TIMERANGE_present");
    LOG.info("Timerange dir writing to dir: " + dir);
    try {
      // build a record writer using HFileOutputFormat2
      Job job = new Job(conf);
      FileOutputFormat.setOutputPath(job, dir);
      context = createTestTaskAttemptContext(job);
      HFileOutputFormat2 hof = new HFileOutputFormat2();
      writer = hof.getRecordWriter(context);

      // Pass two key values with explicit times stamps
      final byte[] b = Bytes.toBytes("b");

      // value 1 with timestamp 2000
      KeyValue kv = new KeyValue(b, b, b, 2000, b);
      KeyValue original = kv.clone();
      writer.write(new ImmutableBytesWritable(), kv);
      assertEquals(original, kv);

      // value 2 with timestamp 1000
      kv = new KeyValue(b, b, b, 1000, b);
      original = kv.clone();
      writer.write(new ImmutableBytesWritable(), kv);
      assertEquals(original, kv);

      // verify that the file has the proper FileInfo.
      writer.close(context);

      // the generated file lives 1 directory down from the attempt directory
      // and is the only file, e.g.
      // _attempt__0000_r_000000_0/b/1979617994050536795
      FileSystem fs = FileSystem.get(conf);
      Path attemptDirectory = hof.getDefaultWorkFile(context, "").getParent();
      FileStatus[] sub1 = fs.listStatus(attemptDirectory);
      FileStatus[] file = fs.listStatus(sub1[0].getPath());

      // open as HFile Reader and pull out TIMERANGE FileInfo.
      HFile.Reader rd = HFile.createReader(fs, file[0].getPath(), new CacheConfig(conf), conf);
      Map<byte[], byte[]> finfo = rd.loadFileInfo();
      byte[] range = finfo.get("TIMERANGE".getBytes());
      assertNotNull(range);

      // unmarshall and check values.
      TimeRangeTracker timeRangeTracker = new TimeRangeTracker();
      Writables.copyWritable(range, timeRangeTracker);
      LOG.info(
          timeRangeTracker.getMinimumTimestamp() + "...." + timeRangeTracker.getMaximumTimestamp());
      assertEquals(1000, timeRangeTracker.getMinimumTimestamp());
      assertEquals(2000, timeRangeTracker.getMaximumTimestamp());
      rd.close();
    } finally {
      if (writer != null && context != null) writer.close(context);
      dir.getFileSystem(conf).delete(dir, true);
    }
  }
예제 #2
0
  @Test
  public void test() throws Exception {
    createTable(TABLE_NAME, getBasicSchema(), getBasicCreateTableOptions());

    KuduTableOutputFormat output = new KuduTableOutputFormat();
    Configuration conf = new Configuration();
    conf.set(KuduTableOutputFormat.MASTER_ADDRESSES_KEY, getMasterAddresses());
    conf.set(KuduTableOutputFormat.OUTPUT_TABLE_KEY, TABLE_NAME);
    output.setConf(conf);

    String multitonKey = conf.get(KuduTableOutputFormat.MULTITON_KEY);
    KuduTable table = KuduTableOutputFormat.getKuduTable(multitonKey);
    assertNotNull(table);

    Insert insert = table.newInsert();
    PartialRow row = insert.getRow();
    row.addInt(0, 1);
    row.addInt(1, 2);
    row.addInt(2, 3);
    row.addString(3, "a string");
    row.addBoolean(4, true);

    RecordWriter<NullWritable, Operation> rw = output.getRecordWriter(null);
    rw.write(NullWritable.get(), insert);
    rw.close(null);
    AsyncKuduScanner.AsyncKuduScannerBuilder builder = client.newScannerBuilder(table);
    assertEquals(1, countRowsInScan(builder.build()));
  }
 @Override
 public void cleanUp() throws IOException {
   if (writer != null) {
     try {
       writer.close(context);
     } catch (InterruptedException e) {
       throw new IOException(e);
     }
     writer = null;
   }
   HadoopShims.commitOrCleanup(outputCommitter, context);
 }
 @Override
 public void tearDown() throws IOException {
   if (writer != null) {
     try {
       writer.close(context);
     } catch (InterruptedException e) {
       throw new IOException(e);
     }
     writer = null;
   }
   if (outputCommitter.needsTaskCommit(context)) outputCommitter.commitTask(context);
   HadoopShims.commitOrCleanup(outputCommitter, context);
 }
  private void runNewMapper(
      final JobConf job, MRTaskReporter reporter, final MRInputLegacy in, KeyValueWriter out)
      throws IOException, InterruptedException {

    // Initialize input in-line since it sets parameters which may be used by the processor.
    // Done only for MRInput.
    // TODO use new method in MRInput to get required info
    // in.initialize(job, master);

    // make a task context so we can get the classes
    org.apache.hadoop.mapreduce.TaskAttemptContext taskContext = getTaskAttemptContext();

    // make a mapper
    org.apache.hadoop.mapreduce.Mapper mapper;
    try {
      mapper =
          (org.apache.hadoop.mapreduce.Mapper)
              ReflectionUtils.newInstance(taskContext.getMapperClass(), job);
    } catch (ClassNotFoundException cnfe) {
      throw new IOException(cnfe);
    }

    org.apache.hadoop.mapreduce.RecordReader input = new NewRecordReader(in);

    org.apache.hadoop.mapreduce.RecordWriter output = new NewOutputCollector(out);

    org.apache.hadoop.mapreduce.InputSplit split = in.getNewInputSplit();

    org.apache.hadoop.mapreduce.MapContext mapContext =
        new MapContextImpl(
            job, taskAttemptId, input, output, getCommitter(), processorContext, split);

    org.apache.hadoop.mapreduce.Mapper.Context mapperContext =
        new WrappedMapper().getMapContext(mapContext);

    input.initialize(split, mapperContext);
    mapper.run(mapperContext);
    this.statusUpdate();
    input.close();
    output.close(mapperContext);
  }
  /**
   * Creates an lzo file with random data.
   *
   * @param outputDir Output directory.
   * @param fs File system we're using.
   * @param attemptContext Task attempt context, contains task id etc.
   * @throws IOException
   * @throws InterruptedException
   */
  private byte[] createTestInput(
      Path outputDir, FileSystem fs, TaskAttemptContext attemptContext, int charsToOutput)
      throws IOException, InterruptedException {

    TextOutputFormat<Text, Text> output = new TextOutputFormat<Text, Text>();
    RecordWriter<Text, Text> rw = null;

    md5_.reset();

    try {
      rw = output.getRecordWriter(attemptContext);

      char[] chars = "abcdefghijklmnopqrstuvwxyz\u00E5\u00E4\u00F6".toCharArray();

      Random r = new Random(System.currentTimeMillis());
      Text key = new Text();
      Text value = new Text();
      int charsMax = chars.length - 1;
      for (int i = 0; i < charsToOutput; ) {
        i += fillText(chars, r, charsMax, key);
        i += fillText(chars, r, charsMax, value);
        rw.write(key, value);
        md5_.update(key.getBytes(), 0, key.getLength());
        // text output format writes tab between the key and value
        md5_.update("\t".getBytes("UTF-8"));
        md5_.update(value.getBytes(), 0, value.getLength());
      }
    } finally {
      if (rw != null) {
        rw.close(attemptContext);
        OutputCommitter committer = output.getOutputCommitter(attemptContext);
        committer.commitTask(attemptContext);
        committer.cleanupJob(attemptContext);
      }
    }

    byte[] result = md5_.digest();
    md5_.reset();
    return result;
  }
예제 #7
0
  // this is a tool because when you run a mapreduce, you will need to use the
  // ToolRunner
  // if you want libjars to be passed properly to the map and reduce tasks
  // even though this class isn't a mapreduce
  @Override
  public int run(String[] args) throws Exception {
    if (args.length != 5) {
      System.out.println(
          "Usage: bin/tool.sh "
              + this.getClass().getName()
              + " <instance name> <zoo keepers> <username> <password> <tablename>");
      return 1;
    }
    Text tableName = new Text(args[4]);
    Job job = new Job(getConf());
    Configuration conf = job.getConfiguration();
    AccumuloOutputFormat.setZooKeeperInstance(conf, args[0], args[1]);
    AccumuloOutputFormat.setOutputInfo(conf, args[2], args[3].getBytes(), true, null);
    job.setOutputFormatClass(AccumuloOutputFormat.class);

    // when running a mapreduce, you won't need to instantiate the output
    // format and record writer
    // mapreduce will do that for you, and you will just use
    // output.collect(tableName, mutation)
    TaskAttemptContext context = new TaskAttemptContext(conf, new TaskAttemptID());
    RecordWriter<Text, Mutation> rw = new AccumuloOutputFormat().getRecordWriter(context);

    Text colf = new Text("colfam");
    System.out.println("writing ...");
    for (int i = 0; i < 10000; i++) {
      Mutation m = new Mutation(new Text(String.format("row_%d", i)));
      for (int j = 0; j < 5; j++) {
        m.put(
            colf,
            new Text(String.format("colqual_%d", j)),
            new Value((String.format("value_%d_%d", i, j)).getBytes()));
      }
      rw.write(tableName, m); // repeat until done
      if (i % 100 == 0) System.out.println(i);
    }

    rw.close(context); // close when done
    return 0;
  }
예제 #8
0
  /**
   * Test that {@link HFileOutputFormat2} RecordWriter amends timestamps if passed a keyvalue whose
   * timestamp is {@link HConstants#LATEST_TIMESTAMP}.
   *
   * @see <a href="https://issues.apache.org/jira/browse/HBASE-2615">HBASE-2615</a>
   */
  @Ignore("Goes zombie too frequently; needs work. See HBASE-14563")
  @Test
  public void test_LATEST_TIMESTAMP_isReplaced() throws Exception {
    Configuration conf = new Configuration(this.util.getConfiguration());
    RecordWriter<ImmutableBytesWritable, Cell> writer = null;
    TaskAttemptContext context = null;
    Path dir = util.getDataTestDir("test_LATEST_TIMESTAMP_isReplaced");
    try {
      Job job = new Job(conf);
      FileOutputFormat.setOutputPath(job, dir);
      context = createTestTaskAttemptContext(job);
      HFileOutputFormat2 hof = new HFileOutputFormat2();
      writer = hof.getRecordWriter(context);
      final byte[] b = Bytes.toBytes("b");

      // Test 1.  Pass a KV that has a ts of LATEST_TIMESTAMP.  It should be
      // changed by call to write.  Check all in kv is same but ts.
      KeyValue kv = new KeyValue(b, b, b);
      KeyValue original = kv.clone();
      writer.write(new ImmutableBytesWritable(), kv);
      assertFalse(original.equals(kv));
      assertTrue(Bytes.equals(CellUtil.cloneRow(original), CellUtil.cloneRow(kv)));
      assertTrue(Bytes.equals(CellUtil.cloneFamily(original), CellUtil.cloneFamily(kv)));
      assertTrue(Bytes.equals(CellUtil.cloneQualifier(original), CellUtil.cloneQualifier(kv)));
      assertNotSame(original.getTimestamp(), kv.getTimestamp());
      assertNotSame(HConstants.LATEST_TIMESTAMP, kv.getTimestamp());

      // Test 2. Now test passing a kv that has explicit ts.  It should not be
      // changed by call to record write.
      kv = new KeyValue(b, b, b, kv.getTimestamp() - 1, b);
      original = kv.clone();
      writer.write(new ImmutableBytesWritable(), kv);
      assertTrue(original.equals(kv));
    } finally {
      if (writer != null && context != null) writer.close(context);
      dir.getFileSystem(conf).delete(dir, true);
    }
  }
 /** This is needed to close out the internal RecordWriter */
 @Override
 public void close(TaskAttemptContext arg0) throws IOException, InterruptedException {
   // TODO Auto-generated method stub
   _inRecordWriter.close(arg0);
 } // end close
  public void testBinary() throws IOException, InterruptedException {
    Configuration conf = new Configuration();
    Job job = new Job(conf);

    Path outdir = new Path(System.getProperty("test.build.data", "/tmp"), "outseq");
    Random r = new Random();
    long seed = r.nextLong();
    r.setSeed(seed);

    FileOutputFormat.setOutputPath(job, outdir);

    SequenceFileAsBinaryOutputFormat.setSequenceFileOutputKeyClass(job, IntWritable.class);
    SequenceFileAsBinaryOutputFormat.setSequenceFileOutputValueClass(job, DoubleWritable.class);

    SequenceFileAsBinaryOutputFormat.setCompressOutput(job, true);
    SequenceFileAsBinaryOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);

    BytesWritable bkey = new BytesWritable();
    BytesWritable bval = new BytesWritable();

    TaskAttemptContext context =
        MapReduceTestUtil.createDummyMapTaskAttemptContext(job.getConfiguration());
    OutputFormat<BytesWritable, BytesWritable> outputFormat =
        new SequenceFileAsBinaryOutputFormat();
    OutputCommitter committer = outputFormat.getOutputCommitter(context);
    committer.setupJob(job);
    RecordWriter<BytesWritable, BytesWritable> writer = outputFormat.getRecordWriter(context);

    IntWritable iwritable = new IntWritable();
    DoubleWritable dwritable = new DoubleWritable();
    DataOutputBuffer outbuf = new DataOutputBuffer();
    LOG.info("Creating data by SequenceFileAsBinaryOutputFormat");
    try {
      for (int i = 0; i < RECORDS; ++i) {
        iwritable = new IntWritable(r.nextInt());
        iwritable.write(outbuf);
        bkey.set(outbuf.getData(), 0, outbuf.getLength());
        outbuf.reset();
        dwritable = new DoubleWritable(r.nextDouble());
        dwritable.write(outbuf);
        bval.set(outbuf.getData(), 0, outbuf.getLength());
        outbuf.reset();
        writer.write(bkey, bval);
      }
    } finally {
      writer.close(context);
    }
    committer.commitTask(context);
    committer.commitJob(job);

    InputFormat<IntWritable, DoubleWritable> iformat =
        new SequenceFileInputFormat<IntWritable, DoubleWritable>();
    int count = 0;
    r.setSeed(seed);
    SequenceFileInputFormat.setInputPaths(job, outdir);
    LOG.info("Reading data by SequenceFileInputFormat");
    for (InputSplit split : iformat.getSplits(job)) {
      RecordReader<IntWritable, DoubleWritable> reader = iformat.createRecordReader(split, context);
      MapContext<IntWritable, DoubleWritable, BytesWritable, BytesWritable> mcontext =
          new MapContextImpl<IntWritable, DoubleWritable, BytesWritable, BytesWritable>(
              job.getConfiguration(),
              context.getTaskAttemptID(),
              reader,
              null,
              null,
              MapReduceTestUtil.createDummyReporter(),
              split);
      reader.initialize(split, mcontext);
      try {
        int sourceInt;
        double sourceDouble;
        while (reader.nextKeyValue()) {
          sourceInt = r.nextInt();
          sourceDouble = r.nextDouble();
          iwritable = reader.getCurrentKey();
          dwritable = reader.getCurrentValue();
          assertEquals(
              "Keys don't match: " + "*" + iwritable.get() + ":" + sourceInt + "*",
              sourceInt,
              iwritable.get());
          assertTrue(
              "Vals don't match: " + "*" + dwritable.get() + ":" + sourceDouble + "*",
              Double.compare(dwritable.get(), sourceDouble) == 0);
          ++count;
        }
      } finally {
        reader.close();
      }
    }
    assertEquals("Some records not found", RECORDS, count);
  }
예제 #11
0
  /**
   * Test that {@link HFileOutputFormat2} RecordWriter uses compression and bloom filter settings
   * from the column family descriptor
   */
  @Ignore("Goes zombie too frequently; needs work. See HBASE-14563")
  @Test
  public void testColumnFamilySettings() throws Exception {
    Configuration conf = new Configuration(this.util.getConfiguration());
    RecordWriter<ImmutableBytesWritable, Cell> writer = null;
    TaskAttemptContext context = null;
    Path dir = util.getDataTestDir("testColumnFamilySettings");

    // Setup table descriptor
    Table table = Mockito.mock(Table.class);
    RegionLocator regionLocator = Mockito.mock(RegionLocator.class);
    HTableDescriptor htd = new HTableDescriptor(TABLE_NAME);
    Mockito.doReturn(htd).when(table).getTableDescriptor();
    for (HColumnDescriptor hcd : HBaseTestingUtility.generateColumnDescriptors()) {
      htd.addFamily(hcd);
    }

    // set up the table to return some mock keys
    setupMockStartKeys(regionLocator);

    try {
      // partial map red setup to get an operational writer for testing
      // We turn off the sequence file compression, because DefaultCodec
      // pollutes the GZip codec pool with an incompatible compressor.
      conf.set("io.seqfile.compression.type", "NONE");
      conf.set("hbase.fs.tmp.dir", dir.toString());
      // turn locality off to eliminate getRegionLocation fail-and-retry time when writing kvs
      conf.setBoolean(HFileOutputFormat2.LOCALITY_SENSITIVE_CONF_KEY, false);

      Job job = new Job(conf, "testLocalMRIncrementalLoad");
      job.setWorkingDirectory(util.getDataTestDirOnTestFS("testColumnFamilySettings"));
      setupRandomGeneratorMapper(job);
      HFileOutputFormat2.configureIncrementalLoad(job, table.getTableDescriptor(), regionLocator);
      FileOutputFormat.setOutputPath(job, dir);
      context = createTestTaskAttemptContext(job);
      HFileOutputFormat2 hof = new HFileOutputFormat2();
      writer = hof.getRecordWriter(context);

      // write out random rows
      writeRandomKeyValues(writer, context, htd.getFamiliesKeys(), ROWSPERSPLIT);
      writer.close(context);

      // Make sure that a directory was created for every CF
      FileSystem fs = dir.getFileSystem(conf);

      // commit so that the filesystem has one directory per column family
      hof.getOutputCommitter(context).commitTask(context);
      hof.getOutputCommitter(context).commitJob(context);
      FileStatus[] families = FSUtils.listStatus(fs, dir, new FSUtils.FamilyDirFilter(fs));
      assertEquals(htd.getFamilies().size(), families.length);
      for (FileStatus f : families) {
        String familyStr = f.getPath().getName();
        HColumnDescriptor hcd = htd.getFamily(Bytes.toBytes(familyStr));
        // verify that the compression on this file matches the configured
        // compression
        Path dataFilePath = fs.listStatus(f.getPath())[0].getPath();
        Reader reader = HFile.createReader(fs, dataFilePath, new CacheConfig(conf), conf);
        Map<byte[], byte[]> fileInfo = reader.loadFileInfo();

        byte[] bloomFilter = fileInfo.get(StoreFile.BLOOM_FILTER_TYPE_KEY);
        if (bloomFilter == null) bloomFilter = Bytes.toBytes("NONE");
        assertEquals(
            "Incorrect bloom filter used for column family "
                + familyStr
                + "(reader: "
                + reader
                + ")",
            hcd.getBloomFilterType(),
            BloomType.valueOf(Bytes.toString(bloomFilter)));
        assertEquals(
            "Incorrect compression used for column family "
                + familyStr
                + "(reader: "
                + reader
                + ")",
            hcd.getCompressionType(),
            reader.getFileContext().getCompression());
      }
    } finally {
      dir.getFileSystem(conf).delete(dir, true);
    }
  }
 /**
  * close
  *
  * @param context the context of the task
  * @throws IOException
  * @throws InterruptedException
  */
 public void close(TaskAttemptContext context) throws IOException, InterruptedException {
   recordWriter.close(context);
 }
 @Override
 public void close(TaskAttemptContext context) throws IOException, InterruptedException {
   writer.write(NullWritable.get(), classifier);
   writer.close(context);
 }
예제 #14
0
  @Test
  public void test() throws Exception {
    context.checking(
        new Expectations() {
          {
            allowing(taskContext).getConfiguration();
            will(returnValue(conf));
            allowing(taskContext).getTaskAttemptID();
            will(returnValue(taskAttemptID));
          }
        });
    OutputFormat outputFormat = new IndexRecordWriter.OutputFormat();

    conf.setStrings("RdfFieldNames", "index0", "index1");
    conf.setEnum("IndexType", RDFDocumentFactory.IndexType.VERTICAL);

    RecordWriter<IntWritable, IndexRecordWriterValue> recordWriter =
        outputFormat.getRecordWriter(taskContext);

    IntWritable key = new IntWritable();
    IndexRecordWriterTermValue termValue = new IndexRecordWriterTermValue();
    IndexRecordWriterDocValue docValue = new IndexRecordWriterDocValue();
    IndexRecordWriterSizeValue sizeValue = new IndexRecordWriterSizeValue();

    // ALIGNEMENT_INDEX
    key.set(DocumentMapper.ALIGNMENT_INDEX);
    termValue.setTerm("term1");
    termValue.setTermFrequency(1);
    // The alignment index doesn't have positions/counts.
    termValue.setOccurrenceCount(0);
    termValue.setSumOfMaxTermPositions(0);
    recordWriter.write(key, termValue);
    docValue.setDocument(0); // term1 occurs in index 0
    recordWriter.write(key, docValue);

    // Index 0
    key.set(0);
    termValue.setTermFrequency(3);
    termValue.setOccurrenceCount(6);
    termValue.setSumOfMaxTermPositions(15 + 12 + 18);
    recordWriter.write(key, termValue);
    docValue.setDocument(3);
    docValue.clearOccerrences();
    docValue.addOccurrence(11);
    docValue.addOccurrence(15);
    recordWriter.write(key, docValue);
    docValue.setDocument(4);
    docValue.clearOccerrences();
    docValue.addOccurrence(12);
    recordWriter.write(key, docValue);
    docValue.setDocument(7);
    docValue.clearOccerrences();
    docValue.addOccurrence(14);
    docValue.addOccurrence(17);
    docValue.addOccurrence(18);
    recordWriter.write(key, docValue);

    // ALIGNEMENT_INDEX
    key.set(DocumentMapper.ALIGNMENT_INDEX);
    termValue.setTerm("term2");
    termValue.setTermFrequency(2);
    // The alignment index doesn't have positions/counts.
    termValue.setOccurrenceCount(0);
    termValue.setSumOfMaxTermPositions(0);
    recordWriter.write(key, termValue);
    docValue.clearOccerrences();
    docValue.setDocument(0); // term2 occurs in index 0 & 1
    recordWriter.write(key, docValue);
    docValue.setDocument(1); // term2 occurs in index 0 & 1
    recordWriter.write(key, docValue);

    // Index 0
    key.set(0);
    termValue.setTermFrequency(2);
    termValue.setOccurrenceCount(4);
    termValue.setSumOfMaxTermPositions(19 + 16);
    recordWriter.write(key, termValue);

    docValue.setDocument(1);
    docValue.clearOccerrences();
    docValue.addOccurrence(10);
    docValue.addOccurrence(19);
    recordWriter.write(key, docValue);
    docValue.setDocument(7);
    docValue.clearOccerrences();
    docValue.addOccurrence(13);
    docValue.addOccurrence(16);
    recordWriter.write(key, docValue);

    // Index 1
    key.set(1);
    termValue.setTermFrequency(1);
    termValue.setOccurrenceCount(1);
    termValue.setSumOfMaxTermPositions(14);
    recordWriter.write(key, termValue);
    docValue.setDocument(1);
    docValue.clearOccerrences();
    docValue.addOccurrence(14);
    recordWriter.write(key, docValue);

    // ALIGNMENT_INDEX
    key.set(DocumentMapper.ALIGNMENT_INDEX);
    termValue.setTerm("term3");
    termValue.setTermFrequency(1);
    // The alignment index doesn't have positions/counts.
    termValue.setOccurrenceCount(0);
    termValue.setSumOfMaxTermPositions(0);
    recordWriter.write(key, termValue);
    docValue.setDocument(1); // term3 occurs in index 1
    recordWriter.write(key, docValue);
    docValue.clearOccerrences();

    // Index 1
    key.set(1);
    termValue.setTermFrequency(1);
    termValue.setOccurrenceCount(2);
    termValue.setSumOfMaxTermPositions(11);
    recordWriter.write(key, termValue);
    docValue.setDocument(3);
    docValue.clearOccerrences();
    docValue.addOccurrence(10);
    docValue.addOccurrence(11);
    recordWriter.write(key, docValue);

    // Doc Sizes.
    key.set(0);
    sizeValue.setDocument(0);
    sizeValue.setSize(3);
    recordWriter.write(key, sizeValue);
    sizeValue.setDocument(3);
    sizeValue.setSize(1);
    recordWriter.write(key, sizeValue);
    sizeValue.setDocument(4);
    sizeValue.setSize(10);
    recordWriter.write(key, sizeValue);
    sizeValue.setDocument(6);
    sizeValue.setSize(2);
    recordWriter.write(key, sizeValue);

    key.set(1);
    sizeValue.setDocument(3);
    sizeValue.setSize(3);
    recordWriter.write(key, sizeValue);
    sizeValue.setDocument(6);
    sizeValue.setSize(5);
    recordWriter.write(key, sizeValue);

    recordWriter.close(taskContext);

    // Check the written indexes..

    Path workPath = outputFormat.getDefaultWorkFile(taskContext, "");
    System.out.println("Default work file is " + workPath.toString());
    String dir = workPath.toUri().getPath();
    BitStreamIndex index0 =
        (BitStreamIndex) DiskBasedIndex.getInstance(dir + "/index0", true, true);
    assertEquals(8, index0.numberOfDocuments);
    assertEquals(2, index0.numberOfTerms);
    assertTrue(index0.hasPositions);
    // term1
    checkOccurrences(index0.documents(0), 3, "(3:11,15) (4:12) (7:14,17,18)");
    // term2
    checkOccurrences(index0.documents(1), 2, "(1:10,19) (7:13,16)");
    assertEquals("[3, 0, 0, 1, 10, 0, 2, 0]", index0.sizes.toString());

    BitStreamIndex index1 =
        (BitStreamIndex) DiskBasedIndex.getInstance(dir + "/index1", true, true);
    assertEquals(8, index1.numberOfDocuments);
    assertEquals(2, index1.numberOfTerms);
    assertTrue(index0.hasPositions);
    checkOccurrences(index1.documents(0), 1, "(1:14)");
    // term3
    checkOccurrences(index1.documents(1), 1, "(3:10,11)");

    BitStreamIndex indexAlignment =
        (BitStreamIndex) DiskBasedIndex.getInstance(dir + "/alignment", true);
    assertEquals(8, indexAlignment.numberOfDocuments);
    assertEquals(3, indexAlignment.numberOfTerms);
    assertFalse(indexAlignment.hasPositions);
    // term1
    assertEquals(1, indexAlignment.documents(0).frequency());
    // term2
    assertEquals(2, indexAlignment.documents(1).frequency());
    // term3
    assertEquals(1, indexAlignment.documents(2).frequency());
    assertEquals("[0, 0, 0, 3, 0, 0, 5, 0]", index1.sizes.toString());
  }
예제 #15
0
 public void close() throws IOException, InterruptedException {
   recordWriter.close(context);
 }