コード例 #1
0
  public void testInputFormat() {

    try {
      JobConf conf = new JobConf();
      String TMP_DIR = System.getProperty("test.build.data", "/tmp");
      Path filename = new Path("file:///" + TMP_DIR + "/tmpSeqFile");
      SequenceFile.Writer sfw =
          SequenceFile.createWriter(
              FileSystem.getLocal(conf),
              conf,
              filename,
              ChukwaArchiveKey.class,
              ChunkImpl.class,
              SequenceFile.CompressionType.NONE,
              Reporter.NULL);

      StringBuilder buf = new StringBuilder();
      int offsets[] = new int[lines.length];
      for (int i = 0; i < lines.length; ++i) {
        buf.append(lines[i]);
        buf.append("\n");
        offsets[i] = buf.length() - 1;
      }
      ChukwaArchiveKey key = new ChukwaArchiveKey(0, "datatype", "sname", 0);
      ChunkImpl val = new ChunkImpl("datatype", "sname", 0, buf.toString().getBytes(), null);
      val.setRecordOffsets(offsets);
      sfw.append(key, val);
      sfw.append(key, val); // write it twice
      sfw.close();

      long len = FileSystem.getLocal(conf).getFileStatus(filename).getLen();
      InputSplit split = new FileSplit(filename, 0, len, (String[]) null);
      ChukwaInputFormat in = new ChukwaInputFormat();
      RecordReader<LongWritable, Text> r = in.getRecordReader(split, conf, Reporter.NULL);

      LongWritable l = r.createKey();
      Text line = r.createValue();
      for (int i = 0; i < lines.length * 2; ++i) {
        boolean succeeded = r.next(l, line);
        assertTrue(succeeded);
        assertEquals(i, l.get());
        assertEquals(lines[i % lines.length], line.toString());
        System.out.println("read line: " + l.get() + " " + line);
      }
      boolean succeeded = r.next(l, line);
      assertFalse(succeeded);

    } catch (IOException e) {
      e.printStackTrace();
      fail("IO exception " + e);
    }
  }
コード例 #2
0
    public void setup(Context context) {

      try {
        // System.setProperty("java.library.path", "/home/gathors/proj/libs");
        // System.loadLibrary(Core.NATIVE_xxx);
        // System.loacLibrary("/home/gathors/proj/libs/opencv-300.jar");
      } catch (UnsatisfiedLinkError e) {
        System.err.println("\nNATIVE LIBRARY failed to load...");
        System.err.println("ERROR:" + e);
        System.err.println("NATIVE_LIBRARY_NAME:" + Core.NATIVE_LIBRARY_NAME);
        System.err.println("#" + System.getProperty("java.library.path"));
        System.exit(1);
      }
    }
コード例 #3
0
  public void testcheckOutputSpecsForbidRecordCompression() throws IOException {
    Job job = Job.getInstance(new Configuration(), "testcheckOutputSpecsForbidRecordCompression");
    FileSystem fs = FileSystem.getLocal(job.getConfiguration());
    Path outputdir = new Path(System.getProperty("test.build.data", "/tmp") + "/output");
    fs.delete(outputdir, true);

    // Without outputpath, FileOutputFormat.checkoutputspecs will throw
    // InvalidJobConfException
    FileOutputFormat.setOutputPath(job, outputdir);

    // SequenceFileAsBinaryOutputFormat doesn't support record compression
    // It should throw an exception when checked by checkOutputSpecs
    SequenceFileAsBinaryOutputFormat.setCompressOutput(job, true);

    SequenceFileAsBinaryOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);
    try {
      new SequenceFileAsBinaryOutputFormat().checkOutputSpecs(job);
    } catch (Exception e) {
      fail(
          "Block compression should be allowed for "
              + "SequenceFileAsBinaryOutputFormat:Caught "
              + e.getClass().getName());
    }

    SequenceFileAsBinaryOutputFormat.setOutputCompressionType(job, CompressionType.RECORD);
    try {
      new SequenceFileAsBinaryOutputFormat().checkOutputSpecs(job);
      fail("Record compression should not be allowed for " + "SequenceFileAsBinaryOutputFormat");
    } catch (InvalidJobConfException ie) {
      // expected
    } catch (Exception e) {
      fail(
          "Expected "
              + InvalidJobConfException.class.getName()
              + "but caught "
              + e.getClass().getName());
    }
  }
コード例 #4
0
  public void testFormat() throws Exception {
    JobConf job = new JobConf(conf);
    FileSystem fs = FileSystem.getLocal(conf);
    Path dir = new Path(System.getProperty("test.build.data", ".") + "/mapred");
    Path file = new Path(dir, "test.seq");

    Reporter reporter = Reporter.NULL;

    int seed = new Random().nextInt();
    // LOG.info("seed = "+seed);
    Random random = new Random(seed);

    fs.delete(dir, true);

    FileInputFormat.setInputPaths(job, dir);

    // for a variety of lengths
    for (int length = 0; length < MAX_LENGTH; length += random.nextInt(MAX_LENGTH / 10) + 1) {

      // LOG.info("creating; entries = " + length);

      // create a file with length entries
      SequenceFile.Writer writer =
          SequenceFile.createWriter(fs, conf, file, IntWritable.class, BytesWritable.class);
      try {
        for (int i = 0; i < length; i++) {
          IntWritable key = new IntWritable(i);
          byte[] data = new byte[random.nextInt(10)];
          random.nextBytes(data);
          BytesWritable value = new BytesWritable(data);
          writer.append(key, value);
        }
      } finally {
        writer.close();
      }

      // try splitting the file in a variety of sizes
      InputFormat<IntWritable, BytesWritable> format =
          new SequenceFileInputFormat<IntWritable, BytesWritable>();
      IntWritable key = new IntWritable();
      BytesWritable value = new BytesWritable();
      for (int i = 0; i < 3; i++) {
        int numSplits = random.nextInt(MAX_LENGTH / (SequenceFile.SYNC_INTERVAL / 20)) + 1;
        // LOG.info("splitting: requesting = " + numSplits);
        InputSplit[] splits = format.getSplits(job, numSplits);
        // LOG.info("splitting: got =        " + splits.length);

        // check each split
        BitSet bits = new BitSet(length);
        for (int j = 0; j < splits.length; j++) {
          RecordReader<IntWritable, BytesWritable> reader =
              format.getRecordReader(splits[j], job, reporter);
          try {
            int count = 0;
            while (reader.next(key, value)) {
              // if (bits.get(key.get())) {
              // LOG.info("splits["+j+"]="+splits[j]+" : " +
              // key.get());
              // LOG.info("@"+reader.getPos());
              // }
              assertFalse("Key in multiple partitions.", bits.get(key.get()));
              bits.set(key.get());
              count++;
            }
            // LOG.info("splits["+j+"]="+splits[j]+" count=" +
            // count);
          } finally {
            reader.close();
          }
        }
        assertEquals("Some keys in no partition.", length, bits.cardinality());
      }
    }
  }
コード例 #5
0
public class TestFileOutputCommitter extends TestCase {
  private static Path outDir = new Path(System.getProperty("test.build.data", "/tmp"), "output");

  // A random task attempt id for testing.
  private static String attempt = "attempt_200707121733_0001_m_000000_0";
  private static TaskAttemptID taskID = TaskAttemptID.forName(attempt);
  private Text key1 = new Text("key1");
  private Text key2 = new Text("key2");
  private Text val1 = new Text("val1");
  private Text val2 = new Text("val2");

  @SuppressWarnings("unchecked")
  private void writeOutput(RecordWriter theRecordWriter, Reporter reporter) throws IOException {
    NullWritable nullWritable = NullWritable.get();

    try {
      theRecordWriter.write(key1, val1);
      theRecordWriter.write(null, nullWritable);
      theRecordWriter.write(null, val1);
      theRecordWriter.write(nullWritable, val2);
      theRecordWriter.write(key2, nullWritable);
      theRecordWriter.write(key1, null);
      theRecordWriter.write(null, null);
      theRecordWriter.write(key2, val2);
    } finally {
      theRecordWriter.close(reporter);
    }
  }

  private void setConfForFileOutputCommitter(JobConf job) {
    job.set(JobContext.TASK_ATTEMPT_ID, attempt);
    job.setOutputCommitter(FileOutputCommitter.class);
    FileOutputFormat.setOutputPath(job, outDir);
  }

  @SuppressWarnings("unchecked")
  public void testCommitter() throws Exception {
    JobConf job = new JobConf();
    setConfForFileOutputCommitter(job);
    JobContext jContext = new JobContextImpl(job, taskID.getJobID());
    TaskAttemptContext tContext = new TaskAttemptContextImpl(job, taskID);
    FileOutputCommitter committer = new FileOutputCommitter();
    FileOutputFormat.setWorkOutputPath(job, committer.getTempTaskOutputPath(tContext));

    committer.setupJob(jContext);
    committer.setupTask(tContext);
    String file = "test.txt";

    // A reporter that does nothing
    Reporter reporter = Reporter.NULL;
    // write output
    FileSystem localFs = FileSystem.getLocal(job);
    TextOutputFormat theOutputFormat = new TextOutputFormat();
    RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(localFs, job, file, reporter);
    writeOutput(theRecordWriter, reporter);

    // do commit
    committer.commitTask(tContext);
    committer.commitJob(jContext);

    // validate output
    File expectedFile = new File(new Path(outDir, file).toString());
    StringBuffer expectedOutput = new StringBuffer();
    expectedOutput.append(key1).append('\t').append(val1).append("\n");
    expectedOutput.append(val1).append("\n");
    expectedOutput.append(val2).append("\n");
    expectedOutput.append(key2).append("\n");
    expectedOutput.append(key1).append("\n");
    expectedOutput.append(key2).append('\t').append(val2).append("\n");
    String output = UtilsForTests.slurp(expectedFile);
    assertEquals(output, expectedOutput.toString());

    FileUtil.fullyDelete(new File(outDir.toString()));
  }

  public void testAbort() throws IOException {
    JobConf job = new JobConf();
    setConfForFileOutputCommitter(job);
    JobContext jContext = new JobContextImpl(job, taskID.getJobID());
    TaskAttemptContext tContext = new TaskAttemptContextImpl(job, taskID);
    FileOutputCommitter committer = new FileOutputCommitter();
    FileOutputFormat.setWorkOutputPath(job, committer.getTempTaskOutputPath(tContext));

    // do setup
    committer.setupJob(jContext);
    committer.setupTask(tContext);
    String file = "test.txt";

    // A reporter that does nothing
    Reporter reporter = Reporter.NULL;
    // write output
    FileSystem localFs = FileSystem.getLocal(job);
    TextOutputFormat theOutputFormat = new TextOutputFormat();
    RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(localFs, job, file, reporter);
    writeOutput(theRecordWriter, reporter);

    // do abort
    committer.abortTask(tContext);
    File expectedFile =
        new File(new Path(committer.getTempTaskOutputPath(tContext), file).toString());
    assertFalse("task temp dir still exists", expectedFile.exists());

    committer.abortJob(jContext, JobStatus.State.FAILED);
    expectedFile = new File(new Path(outDir, FileOutputCommitter.TEMP_DIR_NAME).toString());
    assertFalse("job temp dir still exists", expectedFile.exists());
    assertEquals("Output directory not empty", 0, new File(outDir.toString()).listFiles().length);
    FileUtil.fullyDelete(new File(outDir.toString()));
  }

  public static class FakeFileSystem extends RawLocalFileSystem {
    public FakeFileSystem() {
      super();
    }

    public URI getUri() {
      return URI.create("faildel:///");
    }

    @Override
    public boolean delete(Path p, boolean recursive) throws IOException {
      throw new IOException("fake delete failed");
    }
  }

  public void testFailAbort() throws IOException {
    JobConf job = new JobConf();
    job.set(FileSystem.FS_DEFAULT_NAME_KEY, "faildel:///");
    job.setClass("fs.faildel.impl", FakeFileSystem.class, FileSystem.class);
    setConfForFileOutputCommitter(job);
    JobContext jContext = new JobContextImpl(job, taskID.getJobID());
    TaskAttemptContext tContext = new TaskAttemptContextImpl(job, taskID);
    FileOutputCommitter committer = new FileOutputCommitter();
    FileOutputFormat.setWorkOutputPath(job, committer.getTempTaskOutputPath(tContext));

    // do setup
    committer.setupJob(jContext);
    committer.setupTask(tContext);
    String file = "test.txt";

    // A reporter that does nothing
    Reporter reporter = Reporter.NULL;
    // write output
    FileSystem localFs = new FakeFileSystem();
    TextOutputFormat theOutputFormat = new TextOutputFormat();
    RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(localFs, job, file, reporter);
    writeOutput(theRecordWriter, reporter);

    // do abort
    Throwable th = null;
    try {
      committer.abortTask(tContext);
    } catch (IOException ie) {
      th = ie;
    }
    assertNotNull(th);
    assertTrue(th instanceof IOException);
    assertTrue(th.getMessage().contains("fake delete failed"));
    File jobTmpDir = new File(new Path(outDir, FileOutputCommitter.TEMP_DIR_NAME).toString());
    File taskTmpDir = new File(jobTmpDir, "_" + taskID);
    File expectedFile = new File(taskTmpDir, file);
    assertTrue(expectedFile + " does not exists", expectedFile.exists());

    th = null;
    try {
      committer.abortJob(jContext, JobStatus.State.FAILED);
    } catch (IOException ie) {
      th = ie;
    }
    assertNotNull(th);
    assertTrue(th instanceof IOException);
    assertTrue(th.getMessage().contains("fake delete failed"));
    assertTrue("job temp dir does not exists", jobTmpDir.exists());
  }
}
コード例 #6
0
  public void testBinary() throws IOException, InterruptedException {
    Configuration conf = new Configuration();
    Job job = new Job(conf);

    Path outdir = new Path(System.getProperty("test.build.data", "/tmp"), "outseq");
    Random r = new Random();
    long seed = r.nextLong();
    r.setSeed(seed);

    FileOutputFormat.setOutputPath(job, outdir);

    SequenceFileAsBinaryOutputFormat.setSequenceFileOutputKeyClass(job, IntWritable.class);
    SequenceFileAsBinaryOutputFormat.setSequenceFileOutputValueClass(job, DoubleWritable.class);

    SequenceFileAsBinaryOutputFormat.setCompressOutput(job, true);
    SequenceFileAsBinaryOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);

    BytesWritable bkey = new BytesWritable();
    BytesWritable bval = new BytesWritable();

    TaskAttemptContext context =
        MapReduceTestUtil.createDummyMapTaskAttemptContext(job.getConfiguration());
    OutputFormat<BytesWritable, BytesWritable> outputFormat =
        new SequenceFileAsBinaryOutputFormat();
    OutputCommitter committer = outputFormat.getOutputCommitter(context);
    committer.setupJob(job);
    RecordWriter<BytesWritable, BytesWritable> writer = outputFormat.getRecordWriter(context);

    IntWritable iwritable = new IntWritable();
    DoubleWritable dwritable = new DoubleWritable();
    DataOutputBuffer outbuf = new DataOutputBuffer();
    LOG.info("Creating data by SequenceFileAsBinaryOutputFormat");
    try {
      for (int i = 0; i < RECORDS; ++i) {
        iwritable = new IntWritable(r.nextInt());
        iwritable.write(outbuf);
        bkey.set(outbuf.getData(), 0, outbuf.getLength());
        outbuf.reset();
        dwritable = new DoubleWritable(r.nextDouble());
        dwritable.write(outbuf);
        bval.set(outbuf.getData(), 0, outbuf.getLength());
        outbuf.reset();
        writer.write(bkey, bval);
      }
    } finally {
      writer.close(context);
    }
    committer.commitTask(context);
    committer.commitJob(job);

    InputFormat<IntWritable, DoubleWritable> iformat =
        new SequenceFileInputFormat<IntWritable, DoubleWritable>();
    int count = 0;
    r.setSeed(seed);
    SequenceFileInputFormat.setInputPaths(job, outdir);
    LOG.info("Reading data by SequenceFileInputFormat");
    for (InputSplit split : iformat.getSplits(job)) {
      RecordReader<IntWritable, DoubleWritable> reader = iformat.createRecordReader(split, context);
      MapContext<IntWritable, DoubleWritable, BytesWritable, BytesWritable> mcontext =
          new MapContextImpl<IntWritable, DoubleWritable, BytesWritable, BytesWritable>(
              job.getConfiguration(),
              context.getTaskAttemptID(),
              reader,
              null,
              null,
              MapReduceTestUtil.createDummyReporter(),
              split);
      reader.initialize(split, mcontext);
      try {
        int sourceInt;
        double sourceDouble;
        while (reader.nextKeyValue()) {
          sourceInt = r.nextInt();
          sourceDouble = r.nextDouble();
          iwritable = reader.getCurrentKey();
          dwritable = reader.getCurrentValue();
          assertEquals(
              "Keys don't match: " + "*" + iwritable.get() + ":" + sourceInt + "*",
              sourceInt,
              iwritable.get());
          assertTrue(
              "Vals don't match: " + "*" + dwritable.get() + ":" + sourceDouble + "*",
              Double.compare(dwritable.get(), sourceDouble) == 0);
          ++count;
        }
      } finally {
        reader.close();
      }
    }
    assertEquals("Some records not found", RECORDS, count);
  }
コード例 #7
0
/**
 * Test cases from MessagePackBase64LineInputFormat.
 *
 * @see TestLineInputFormat.java
 */
public class TestMessagePackBase64LineInputFormat extends TestCase {
  public static class MyClass {
    public String s;
    public int v;
  }

  public static class MyClassWritable extends MessagePackWritable<MyClass> {
    protected MyClass getObjectInstance() {
      return new MyClass();
    }
  }

  public static class MyClassMessagePackBase64LineInputFormat
      extends MessagePackBase64LineInputFormat<MyClass, MyClassWritable> {
    protected MyClassWritable getWritableInstance() {
      return new MyClassWritable();
    }
  }

  // --------------

  private static int MAX_LENGTH = 200;
  private final Base64 base64_ = new Base64();
  private static Configuration defaultConf = new Configuration();
  private static FileSystem localFs = null;

  private static Path workDir =
      new Path(new Path(System.getProperty("test.build.data", "."), "data"), "TestLineInputFormat");

  public void testFormat() throws Exception {
    localFs = FileSystem.getLocal(defaultConf);
    localFs.delete(workDir, true);

    Job job = new Job(new Configuration(defaultConf));
    Path file = new Path(workDir, "test.txt");

    int seed = new Random().nextInt();
    Random random = new Random(seed);

    // for a variety of lengths
    for (int length = 0; length < MAX_LENGTH; length += random.nextInt(MAX_LENGTH / 10) + 1) {
      // create a file with length entries
      Writer writer = new OutputStreamWriter(localFs.create(file));
      try {
        MyClass mc = new MyClass();
        for (int i = 0; i < length; i++) {
          mc.s = Integer.toString(i);
          mc.v = i;
          byte[] raw = MessagePack.pack(mc);
          byte[] b64e = base64_.encodeBase64(raw);
          byte[] b64d = base64_.decode(b64e);
          MyClass mc2 = MessagePack.unpack(b64d, mc.getClass());
          assertEquals(mc.s, mc2.s);
          assertEquals(mc.v, mc2.v);

          writer.write(base64_.encodeToString(raw));
        }
      } finally {
        writer.close();
      }
      checkFormat(job);
    }
  }

  void checkFormat(Job job) throws Exception {
    TaskAttemptContext attemptContext =
        new TaskAttemptContext(job.getConfiguration(), new TaskAttemptID("123", 0, false, 1, 2));

    MyClassMessagePackBase64LineInputFormat format = new MyClassMessagePackBase64LineInputFormat();
    FileInputFormat.setInputPaths(job, workDir);

    List<InputSplit> splits = format.getSplits(job);
    for (int j = 0; j < splits.size(); j++) {
      RecordReader<LongWritable, MyClassWritable> reader =
          format.createRecordReader(splits.get(j), attemptContext);
      reader.initialize(splits.get(j), attemptContext);

      int count = 0;
      try {
        while (reader.nextKeyValue()) {
          LongWritable key = reader.getCurrentKey();
          MyClassWritable val = reader.getCurrentValue();
          MyClass mc = val.get();
          assertEquals(mc.v, count);
          assertEquals(mc.s, Integer.toString(count));
          count++;
        }
      } finally {
        reader.close();
      }
    }
  }
}