Example #1
0
  public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
    Configuration conf = ContextUtil.getConfiguration(context);
    this.maxLineLength = conf.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE);

    FileSplit split = (FileSplit) genericSplit;
    start = (split.getStart()) << 16;
    end = (start + split.getLength()) << 16;

    final Path file = split.getPath();
    FileSystem fs = file.getFileSystem(conf);

    bin =
        new BlockCompressedInputStream(
            new WrapSeekable<FSDataInputStream>(
                fs.open(file), fs.getFileStatus(file).getLen(), file));

    in = new LineReader(bin, conf);

    if (start != 0) {
      bin.seek(start);

      // Skip first line
      in.readLine(new Text());
      start = bin.getFilePointer();
    }
    this.pos = start;
  }
 public RecordReader<Text, SequencedFragment> createRecordReader(
     InputSplit genericSplit, TaskAttemptContext context)
     throws IOException, InterruptedException {
   context.setStatus(genericSplit.toString());
   return new QseqRecordReader(
       ContextUtil.getConfiguration(context),
       (FileSplit) genericSplit); // cast as per example in TextInputFormat
 }
Example #3
0
 @Override
 protected void map(
     LongWritable ignored,
     SAMRecordWritable wrec,
     Mapper<LongWritable, SAMRecordWritable, Text, SAMRecordWritable>.Context ctx)
     throws InterruptedException, IOException {
   Utils.correctSAMRecordForMerging(wrec.get(), ContextUtil.getConfiguration(ctx));
   ctx.write(new Text(wrec.get().getReadName()), wrec);
 }
Example #4
0
  @Override
  protected void reduce(
      Text key,
      Iterable<SAMRecordWritable> records,
      Reducer<Text, SAMRecordWritable, Text, SAMRecordWritable>.Context ctx)
      throws IOException, InterruptedException {
    // Non-primary records are simply written out, but as long as we can find
    // two primaries, pair them up.

    final SAMFileHeader header =
        Utils.getSAMHeaderMerger(ContextUtil.getConfiguration(ctx)).getMergedHeader();

    final Iterator<SAMRecordWritable> it = records.iterator();

    while (it.hasNext()) {
      SAMRecordWritable a = it.next();

      if (a.get().getNotPrimaryAlignmentFlag()) {
        ctx.write(key, a);
        continue;
      }

      // Cache the record since the iterator does its own caching, meaning
      // that after another it.next() we would have a == b.
      wrec.set(a.get());
      a = wrec;

      SAMRecordWritable b = null;
      while (it.hasNext()) {
        b = it.next();
        if (!b.get().getNotPrimaryAlignmentFlag()) break;
        ctx.write(key, b);
      }

      if (b == null) {
        // No more primaries, so just write the unpaired one as-is.
        ctx.write(key, a);
        break;
      }

      a.get().setHeader(header);
      b.get().setHeader(header);
      SamPairUtil.setMateInfo(a.get(), b.get(), header);

      ctx.write(key, a);
      ctx.write(key, b);
    }
  }
Example #5
0
  @Override
  public RecordWriter<NullWritable, Text> getRecordWriter(TaskAttemptContext ctx)
      throws IOException {
    Path path = getDefaultWorkFile(ctx, "");
    FileSystem fs = path.getFileSystem(ContextUtil.getConfiguration(ctx));

    final OutputStream file = fs.create(path);

    return new TextOutputFormat.LineRecordWriter<NullWritable, Text>(
        new DataOutputStream(
            new FilterOutputStream(new BlockCompressedOutputStream(file, null)) {
              @Override
              public void close() throws IOException {
                // Don't close the BlockCompressedOutputStream, so we don't
                // get an end-of-file sentinel.
                this.out.flush();

                // Instead, close the file stream directly.
                file.close();
              }
            }));
  }
 @Override
 public boolean isSplitable(JobContext context, Path path) {
   CompressionCodec codec =
       new CompressionCodecFactory(ContextUtil.getConfiguration(context)).getCodec(path);
   return codec == null;
 }