Пример #1
0
  /*
   * Test that {@link HFileOutputFormat2} creates an HFile with TIMERANGE
   * metadata used by time-restricted scans.
   */
  @Test
  public void test_TIMERANGE() throws Exception {
    Configuration conf = new Configuration(this.util.getConfiguration());
    RecordWriter<ImmutableBytesWritable, Cell> writer = null;
    TaskAttemptContext context = null;
    Path dir = util.getDataTestDir("test_TIMERANGE_present");
    LOG.info("Timerange dir writing to dir: " + dir);
    try {
      // build a record writer using HFileOutputFormat2
      Job job = new Job(conf);
      FileOutputFormat.setOutputPath(job, dir);
      context = createTestTaskAttemptContext(job);
      HFileOutputFormat2 hof = new HFileOutputFormat2();
      writer = hof.getRecordWriter(context);

      // Pass two key values with explicit times stamps
      final byte[] b = Bytes.toBytes("b");

      // value 1 with timestamp 2000
      KeyValue kv = new KeyValue(b, b, b, 2000, b);
      KeyValue original = kv.clone();
      writer.write(new ImmutableBytesWritable(), kv);
      assertEquals(original, kv);

      // value 2 with timestamp 1000
      kv = new KeyValue(b, b, b, 1000, b);
      original = kv.clone();
      writer.write(new ImmutableBytesWritable(), kv);
      assertEquals(original, kv);

      // verify that the file has the proper FileInfo.
      writer.close(context);

      // the generated file lives 1 directory down from the attempt directory
      // and is the only file, e.g.
      // _attempt__0000_r_000000_0/b/1979617994050536795
      FileSystem fs = FileSystem.get(conf);
      Path attemptDirectory = hof.getDefaultWorkFile(context, "").getParent();
      FileStatus[] sub1 = fs.listStatus(attemptDirectory);
      FileStatus[] file = fs.listStatus(sub1[0].getPath());

      // open as HFile Reader and pull out TIMERANGE FileInfo.
      HFile.Reader rd = HFile.createReader(fs, file[0].getPath(), new CacheConfig(conf), conf);
      Map<byte[], byte[]> finfo = rd.loadFileInfo();
      byte[] range = finfo.get("TIMERANGE".getBytes());
      assertNotNull(range);

      // unmarshall and check values.
      TimeRangeTracker timeRangeTracker = new TimeRangeTracker();
      Writables.copyWritable(range, timeRangeTracker);
      LOG.info(
          timeRangeTracker.getMinimumTimestamp() + "...." + timeRangeTracker.getMaximumTimestamp());
      assertEquals(1000, timeRangeTracker.getMinimumTimestamp());
      assertEquals(2000, timeRangeTracker.getMaximumTimestamp());
      rd.close();
    } finally {
      if (writer != null && context != null) writer.close(context);
      dir.getFileSystem(conf).delete(dir, true);
    }
  }
Пример #2
0
  /**
   * Write random values to the writer assuming a table created using {@link #FAMILIES} as column
   * family descriptors
   */
  private void writeRandomKeyValues(
      RecordWriter<ImmutableBytesWritable, Cell> writer,
      TaskAttemptContext context,
      Set<byte[]> families,
      int numRows)
      throws IOException, InterruptedException {
    byte keyBytes[] = new byte[Bytes.SIZEOF_INT];
    int valLength = 10;
    byte valBytes[] = new byte[valLength];

    int taskId = context.getTaskAttemptID().getTaskID().getId();
    assert taskId < Byte.MAX_VALUE : "Unit tests dont support > 127 tasks!";
    final byte[] qualifier = Bytes.toBytes("data");
    Random random = new Random();
    for (int i = 0; i < numRows; i++) {

      Bytes.putInt(keyBytes, 0, i);
      random.nextBytes(valBytes);
      ImmutableBytesWritable key = new ImmutableBytesWritable(keyBytes);

      for (byte[] family : families) {
        Cell kv = new KeyValue(keyBytes, family, qualifier, valBytes);
        writer.write(key, kv);
      }
    }
  }
Пример #3
0
  @Test
  public void test() throws Exception {
    createTable(TABLE_NAME, getBasicSchema(), getBasicCreateTableOptions());

    KuduTableOutputFormat output = new KuduTableOutputFormat();
    Configuration conf = new Configuration();
    conf.set(KuduTableOutputFormat.MASTER_ADDRESSES_KEY, getMasterAddresses());
    conf.set(KuduTableOutputFormat.OUTPUT_TABLE_KEY, TABLE_NAME);
    output.setConf(conf);

    String multitonKey = conf.get(KuduTableOutputFormat.MULTITON_KEY);
    KuduTable table = KuduTableOutputFormat.getKuduTable(multitonKey);
    assertNotNull(table);

    Insert insert = table.newInsert();
    PartialRow row = insert.getRow();
    row.addInt(0, 1);
    row.addInt(1, 2);
    row.addInt(2, 3);
    row.addString(3, "a string");
    row.addBoolean(4, true);

    RecordWriter<NullWritable, Operation> rw = output.getRecordWriter(null);
    rw.write(NullWritable.get(), insert);
    rw.close(null);
    AsyncKuduScanner.AsyncKuduScannerBuilder builder = client.newScannerBuilder(table);
    assertEquals(1, countRowsInScan(builder.build()));
  }
 @Override
 public void write(K key, V value) throws IOException, InterruptedException {
   // 得到输出文件名
   String baseName = generateFileNameForKeyValue(key, value, job.getConfiguration()); // 生成输出文件名
   RecordWriter<K, V> rw = this.recordWriters.get(baseName); // ??
   if (rw == null) {
     rw = getBaseRecordWriter(job, baseName); //
     this.recordWriters.put(baseName, rw);
   }
   rw.write(key, value);
 }
Пример #5
0
  /**
   * Test that {@link HFileOutputFormat2} RecordWriter amends timestamps if passed a keyvalue whose
   * timestamp is {@link HConstants#LATEST_TIMESTAMP}.
   *
   * @see <a href="https://issues.apache.org/jira/browse/HBASE-2615">HBASE-2615</a>
   */
  @Ignore("Goes zombie too frequently; needs work. See HBASE-14563")
  @Test
  public void test_LATEST_TIMESTAMP_isReplaced() throws Exception {
    Configuration conf = new Configuration(this.util.getConfiguration());
    RecordWriter<ImmutableBytesWritable, Cell> writer = null;
    TaskAttemptContext context = null;
    Path dir = util.getDataTestDir("test_LATEST_TIMESTAMP_isReplaced");
    try {
      Job job = new Job(conf);
      FileOutputFormat.setOutputPath(job, dir);
      context = createTestTaskAttemptContext(job);
      HFileOutputFormat2 hof = new HFileOutputFormat2();
      writer = hof.getRecordWriter(context);
      final byte[] b = Bytes.toBytes("b");

      // Test 1.  Pass a KV that has a ts of LATEST_TIMESTAMP.  It should be
      // changed by call to write.  Check all in kv is same but ts.
      KeyValue kv = new KeyValue(b, b, b);
      KeyValue original = kv.clone();
      writer.write(new ImmutableBytesWritable(), kv);
      assertFalse(original.equals(kv));
      assertTrue(Bytes.equals(CellUtil.cloneRow(original), CellUtil.cloneRow(kv)));
      assertTrue(Bytes.equals(CellUtil.cloneFamily(original), CellUtil.cloneFamily(kv)));
      assertTrue(Bytes.equals(CellUtil.cloneQualifier(original), CellUtil.cloneQualifier(kv)));
      assertNotSame(original.getTimestamp(), kv.getTimestamp());
      assertNotSame(HConstants.LATEST_TIMESTAMP, kv.getTimestamp());

      // Test 2. Now test passing a kv that has explicit ts.  It should not be
      // changed by call to record write.
      kv = new KeyValue(b, b, b, kv.getTimestamp() - 1, b);
      original = kv.clone();
      writer.write(new ImmutableBytesWritable(), kv);
      assertTrue(original.equals(kv));
    } finally {
      if (writer != null && context != null) writer.close(context);
      dir.getFileSystem(conf).delete(dir, true);
    }
  }
Пример #6
0
 @Override
 public void putNext(Tuple tuple) throws IOException {
   log.debug("putNext({})", tuple);
   try {
     Node g = NodeEncoder.asNode((String) tuple.get(0));
     Node s = NodeEncoder.asNode((String) tuple.get(1));
     Node p = NodeEncoder.asNode((String) tuple.get(2));
     Node o = NodeEncoder.asNode((String) tuple.get(3));
     Quad quad = new Quad(g, s, p, o);
     QuadWritable quadWritable = new QuadWritable(quad);
     writer.write(NullWritable.get(), quadWritable);
   } catch (InterruptedException e) {
     throw new IOException(e);
   }
 }
    /**
     * The work is done here for preparing the output Mutation. The TileIdWritable and
     * RasterWritable are transformed here.
     */
    @Override
    public void write(TileIdWritable key, RasterWritable value)
        throws IOException, InterruptedException {
      int zoom = zoomLevel;
      if (key instanceof TileIdZoomWritable) {
        zoom = ((TileIdZoomWritable) key).getZoom();
      }

      // ColumnVisibility cv = new ColumnVisibility();
      // transform the keys
      ByteBuffer buf = ByteBuffer.allocate(8);
      buf.putLong(key.get());
      Mutation m = new Mutation(new Text(buf.array()));
      // We only want the actual bytes for the value, not the full array of bytes
      Value v = new Value(value.copyBytes());
      m.put(Integer.toString(zoom), Long.toString(key.get()), cv, v);
      _inRecordWriter.write(outTable, m);
    } // end write
  /**
   * Creates an lzo file with random data.
   *
   * @param outputDir Output directory.
   * @param fs File system we're using.
   * @param attemptContext Task attempt context, contains task id etc.
   * @throws IOException
   * @throws InterruptedException
   */
  private byte[] createTestInput(
      Path outputDir, FileSystem fs, TaskAttemptContext attemptContext, int charsToOutput)
      throws IOException, InterruptedException {

    TextOutputFormat<Text, Text> output = new TextOutputFormat<Text, Text>();
    RecordWriter<Text, Text> rw = null;

    md5_.reset();

    try {
      rw = output.getRecordWriter(attemptContext);

      char[] chars = "abcdefghijklmnopqrstuvwxyz\u00E5\u00E4\u00F6".toCharArray();

      Random r = new Random(System.currentTimeMillis());
      Text key = new Text();
      Text value = new Text();
      int charsMax = chars.length - 1;
      for (int i = 0; i < charsToOutput; ) {
        i += fillText(chars, r, charsMax, key);
        i += fillText(chars, r, charsMax, value);
        rw.write(key, value);
        md5_.update(key.getBytes(), 0, key.getLength());
        // text output format writes tab between the key and value
        md5_.update("\t".getBytes("UTF-8"));
        md5_.update(value.getBytes(), 0, value.getLength());
      }
    } finally {
      if (rw != null) {
        rw.close(attemptContext);
        OutputCommitter committer = output.getOutputCommitter(attemptContext);
        committer.commitTask(attemptContext);
        committer.cleanupJob(attemptContext);
      }
    }

    byte[] result = md5_.digest();
    md5_.reset();
    return result;
  }
Пример #9
0
  // this is a tool because when you run a mapreduce, you will need to use the
  // ToolRunner
  // if you want libjars to be passed properly to the map and reduce tasks
  // even though this class isn't a mapreduce
  @Override
  public int run(String[] args) throws Exception {
    if (args.length != 5) {
      System.out.println(
          "Usage: bin/tool.sh "
              + this.getClass().getName()
              + " <instance name> <zoo keepers> <username> <password> <tablename>");
      return 1;
    }
    Text tableName = new Text(args[4]);
    Job job = new Job(getConf());
    Configuration conf = job.getConfiguration();
    AccumuloOutputFormat.setZooKeeperInstance(conf, args[0], args[1]);
    AccumuloOutputFormat.setOutputInfo(conf, args[2], args[3].getBytes(), true, null);
    job.setOutputFormatClass(AccumuloOutputFormat.class);

    // when running a mapreduce, you won't need to instantiate the output
    // format and record writer
    // mapreduce will do that for you, and you will just use
    // output.collect(tableName, mutation)
    TaskAttemptContext context = new TaskAttemptContext(conf, new TaskAttemptID());
    RecordWriter<Text, Mutation> rw = new AccumuloOutputFormat().getRecordWriter(context);

    Text colf = new Text("colfam");
    System.out.println("writing ...");
    for (int i = 0; i < 10000; i++) {
      Mutation m = new Mutation(new Text(String.format("row_%d", i)));
      for (int j = 0; j < 5; j++) {
        m.put(
            colf,
            new Text(String.format("colqual_%d", j)),
            new Value((String.format("value_%d_%d", i, j)).getBytes()));
      }
      rw.write(tableName, m); // repeat until done
      if (i % 100 == 0) System.out.println(i);
    }

    rw.close(context); // close when done
    return 0;
  }
  @Override
  public void putNext(Tuple f) throws IOException {

    Object field;
    try {
      field = f.get(0);
    } catch (ExecException ee) {
      throw ee;
    }
    String key = (String) field;

    try {
      field = f.get(1);
    } catch (ExecException ee) {
      throw ee;
    }
    Map<String, Float> val = (Map<String, Float>) field;

    try {
      writer.write(key, val);
    } catch (InterruptedException e) {
      throw new IOException(e);
    }
  }
  public void testBinary() throws IOException, InterruptedException {
    Configuration conf = new Configuration();
    Job job = new Job(conf);

    Path outdir = new Path(System.getProperty("test.build.data", "/tmp"), "outseq");
    Random r = new Random();
    long seed = r.nextLong();
    r.setSeed(seed);

    FileOutputFormat.setOutputPath(job, outdir);

    SequenceFileAsBinaryOutputFormat.setSequenceFileOutputKeyClass(job, IntWritable.class);
    SequenceFileAsBinaryOutputFormat.setSequenceFileOutputValueClass(job, DoubleWritable.class);

    SequenceFileAsBinaryOutputFormat.setCompressOutput(job, true);
    SequenceFileAsBinaryOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);

    BytesWritable bkey = new BytesWritable();
    BytesWritable bval = new BytesWritable();

    TaskAttemptContext context =
        MapReduceTestUtil.createDummyMapTaskAttemptContext(job.getConfiguration());
    OutputFormat<BytesWritable, BytesWritable> outputFormat =
        new SequenceFileAsBinaryOutputFormat();
    OutputCommitter committer = outputFormat.getOutputCommitter(context);
    committer.setupJob(job);
    RecordWriter<BytesWritable, BytesWritable> writer = outputFormat.getRecordWriter(context);

    IntWritable iwritable = new IntWritable();
    DoubleWritable dwritable = new DoubleWritable();
    DataOutputBuffer outbuf = new DataOutputBuffer();
    LOG.info("Creating data by SequenceFileAsBinaryOutputFormat");
    try {
      for (int i = 0; i < RECORDS; ++i) {
        iwritable = new IntWritable(r.nextInt());
        iwritable.write(outbuf);
        bkey.set(outbuf.getData(), 0, outbuf.getLength());
        outbuf.reset();
        dwritable = new DoubleWritable(r.nextDouble());
        dwritable.write(outbuf);
        bval.set(outbuf.getData(), 0, outbuf.getLength());
        outbuf.reset();
        writer.write(bkey, bval);
      }
    } finally {
      writer.close(context);
    }
    committer.commitTask(context);
    committer.commitJob(job);

    InputFormat<IntWritable, DoubleWritable> iformat =
        new SequenceFileInputFormat<IntWritable, DoubleWritable>();
    int count = 0;
    r.setSeed(seed);
    SequenceFileInputFormat.setInputPaths(job, outdir);
    LOG.info("Reading data by SequenceFileInputFormat");
    for (InputSplit split : iformat.getSplits(job)) {
      RecordReader<IntWritable, DoubleWritable> reader = iformat.createRecordReader(split, context);
      MapContext<IntWritable, DoubleWritable, BytesWritable, BytesWritable> mcontext =
          new MapContextImpl<IntWritable, DoubleWritable, BytesWritable, BytesWritable>(
              job.getConfiguration(),
              context.getTaskAttemptID(),
              reader,
              null,
              null,
              MapReduceTestUtil.createDummyReporter(),
              split);
      reader.initialize(split, mcontext);
      try {
        int sourceInt;
        double sourceDouble;
        while (reader.nextKeyValue()) {
          sourceInt = r.nextInt();
          sourceDouble = r.nextDouble();
          iwritable = reader.getCurrentKey();
          dwritable = reader.getCurrentValue();
          assertEquals(
              "Keys don't match: " + "*" + iwritable.get() + ":" + sourceInt + "*",
              sourceInt,
              iwritable.get());
          assertTrue(
              "Vals don't match: " + "*" + dwritable.get() + ":" + sourceDouble + "*",
              Double.compare(dwritable.get(), sourceDouble) == 0);
          ++count;
        }
      } finally {
        reader.close();
      }
    }
    assertEquals("Some records not found", RECORDS, count);
  }
 @Override
 public void close(TaskAttemptContext context) throws IOException, InterruptedException {
   writer.write(NullWritable.get(), classifier);
   writer.close(context);
 }
Пример #13
0
  @Test
  public void test() throws Exception {
    context.checking(
        new Expectations() {
          {
            allowing(taskContext).getConfiguration();
            will(returnValue(conf));
            allowing(taskContext).getTaskAttemptID();
            will(returnValue(taskAttemptID));
          }
        });
    OutputFormat outputFormat = new IndexRecordWriter.OutputFormat();

    conf.setStrings("RdfFieldNames", "index0", "index1");
    conf.setEnum("IndexType", RDFDocumentFactory.IndexType.VERTICAL);

    RecordWriter<IntWritable, IndexRecordWriterValue> recordWriter =
        outputFormat.getRecordWriter(taskContext);

    IntWritable key = new IntWritable();
    IndexRecordWriterTermValue termValue = new IndexRecordWriterTermValue();
    IndexRecordWriterDocValue docValue = new IndexRecordWriterDocValue();
    IndexRecordWriterSizeValue sizeValue = new IndexRecordWriterSizeValue();

    // ALIGNEMENT_INDEX
    key.set(DocumentMapper.ALIGNMENT_INDEX);
    termValue.setTerm("term1");
    termValue.setTermFrequency(1);
    // The alignment index doesn't have positions/counts.
    termValue.setOccurrenceCount(0);
    termValue.setSumOfMaxTermPositions(0);
    recordWriter.write(key, termValue);
    docValue.setDocument(0); // term1 occurs in index 0
    recordWriter.write(key, docValue);

    // Index 0
    key.set(0);
    termValue.setTermFrequency(3);
    termValue.setOccurrenceCount(6);
    termValue.setSumOfMaxTermPositions(15 + 12 + 18);
    recordWriter.write(key, termValue);
    docValue.setDocument(3);
    docValue.clearOccerrences();
    docValue.addOccurrence(11);
    docValue.addOccurrence(15);
    recordWriter.write(key, docValue);
    docValue.setDocument(4);
    docValue.clearOccerrences();
    docValue.addOccurrence(12);
    recordWriter.write(key, docValue);
    docValue.setDocument(7);
    docValue.clearOccerrences();
    docValue.addOccurrence(14);
    docValue.addOccurrence(17);
    docValue.addOccurrence(18);
    recordWriter.write(key, docValue);

    // ALIGNEMENT_INDEX
    key.set(DocumentMapper.ALIGNMENT_INDEX);
    termValue.setTerm("term2");
    termValue.setTermFrequency(2);
    // The alignment index doesn't have positions/counts.
    termValue.setOccurrenceCount(0);
    termValue.setSumOfMaxTermPositions(0);
    recordWriter.write(key, termValue);
    docValue.clearOccerrences();
    docValue.setDocument(0); // term2 occurs in index 0 & 1
    recordWriter.write(key, docValue);
    docValue.setDocument(1); // term2 occurs in index 0 & 1
    recordWriter.write(key, docValue);

    // Index 0
    key.set(0);
    termValue.setTermFrequency(2);
    termValue.setOccurrenceCount(4);
    termValue.setSumOfMaxTermPositions(19 + 16);
    recordWriter.write(key, termValue);

    docValue.setDocument(1);
    docValue.clearOccerrences();
    docValue.addOccurrence(10);
    docValue.addOccurrence(19);
    recordWriter.write(key, docValue);
    docValue.setDocument(7);
    docValue.clearOccerrences();
    docValue.addOccurrence(13);
    docValue.addOccurrence(16);
    recordWriter.write(key, docValue);

    // Index 1
    key.set(1);
    termValue.setTermFrequency(1);
    termValue.setOccurrenceCount(1);
    termValue.setSumOfMaxTermPositions(14);
    recordWriter.write(key, termValue);
    docValue.setDocument(1);
    docValue.clearOccerrences();
    docValue.addOccurrence(14);
    recordWriter.write(key, docValue);

    // ALIGNMENT_INDEX
    key.set(DocumentMapper.ALIGNMENT_INDEX);
    termValue.setTerm("term3");
    termValue.setTermFrequency(1);
    // The alignment index doesn't have positions/counts.
    termValue.setOccurrenceCount(0);
    termValue.setSumOfMaxTermPositions(0);
    recordWriter.write(key, termValue);
    docValue.setDocument(1); // term3 occurs in index 1
    recordWriter.write(key, docValue);
    docValue.clearOccerrences();

    // Index 1
    key.set(1);
    termValue.setTermFrequency(1);
    termValue.setOccurrenceCount(2);
    termValue.setSumOfMaxTermPositions(11);
    recordWriter.write(key, termValue);
    docValue.setDocument(3);
    docValue.clearOccerrences();
    docValue.addOccurrence(10);
    docValue.addOccurrence(11);
    recordWriter.write(key, docValue);

    // Doc Sizes.
    key.set(0);
    sizeValue.setDocument(0);
    sizeValue.setSize(3);
    recordWriter.write(key, sizeValue);
    sizeValue.setDocument(3);
    sizeValue.setSize(1);
    recordWriter.write(key, sizeValue);
    sizeValue.setDocument(4);
    sizeValue.setSize(10);
    recordWriter.write(key, sizeValue);
    sizeValue.setDocument(6);
    sizeValue.setSize(2);
    recordWriter.write(key, sizeValue);

    key.set(1);
    sizeValue.setDocument(3);
    sizeValue.setSize(3);
    recordWriter.write(key, sizeValue);
    sizeValue.setDocument(6);
    sizeValue.setSize(5);
    recordWriter.write(key, sizeValue);

    recordWriter.close(taskContext);

    // Check the written indexes..

    Path workPath = outputFormat.getDefaultWorkFile(taskContext, "");
    System.out.println("Default work file is " + workPath.toString());
    String dir = workPath.toUri().getPath();
    BitStreamIndex index0 =
        (BitStreamIndex) DiskBasedIndex.getInstance(dir + "/index0", true, true);
    assertEquals(8, index0.numberOfDocuments);
    assertEquals(2, index0.numberOfTerms);
    assertTrue(index0.hasPositions);
    // term1
    checkOccurrences(index0.documents(0), 3, "(3:11,15) (4:12) (7:14,17,18)");
    // term2
    checkOccurrences(index0.documents(1), 2, "(1:10,19) (7:13,16)");
    assertEquals("[3, 0, 0, 1, 10, 0, 2, 0]", index0.sizes.toString());

    BitStreamIndex index1 =
        (BitStreamIndex) DiskBasedIndex.getInstance(dir + "/index1", true, true);
    assertEquals(8, index1.numberOfDocuments);
    assertEquals(2, index1.numberOfTerms);
    assertTrue(index0.hasPositions);
    checkOccurrences(index1.documents(0), 1, "(1:14)");
    // term3
    checkOccurrences(index1.documents(1), 1, "(3:10,11)");

    BitStreamIndex indexAlignment =
        (BitStreamIndex) DiskBasedIndex.getInstance(dir + "/alignment", true);
    assertEquals(8, indexAlignment.numberOfDocuments);
    assertEquals(3, indexAlignment.numberOfTerms);
    assertFalse(indexAlignment.hasPositions);
    // term1
    assertEquals(1, indexAlignment.documents(0).frequency());
    // term2
    assertEquals(2, indexAlignment.documents(1).frequency());
    // term3
    assertEquals(1, indexAlignment.documents(2).frequency());
    assertEquals("[0, 0, 0, 3, 0, 0, 5, 0]", index1.sizes.toString());
  }
 /** Generate an output key/value pair. */
 public void write(KEYOUT key, VALUEOUT value) throws IOException, InterruptedException {
   output.write(key, value);
 }
Пример #15
0
 public void write(K key, V value) throws IOException, InterruptedException {
   recordWriter.write(key, value);
 }