예제 #1
0
  @Test(dataProvider = "variedAccumulationLevels")
  public void multilevelCollectorTest(final Set<MetricAccumulationLevel> accumulationLevels) {
    final SAMFileReader in = new SAMFileReader(TESTFILE);
    final RecordCountMultiLevelCollector collector =
        new RecordCountMultiLevelCollector(accumulationLevels, in.getFileHeader().getReadGroups());

    for (final SAMRecord rec : in) {
      collector.acceptRecord(rec, null);
    }

    collector.finish();

    int totalProcessed = 0;
    int totalMetrics = 0;
    for (final MetricAccumulationLevel level : accumulationLevels) {
      final Map<String, Integer> keyToMetrics = accumulationLevelToPerUnitReads.get(level);
      for (final Map.Entry<String, Integer> entry : keyToMetrics.entrySet()) {
        final TotalNumberMetric metric = collector.getUnitsToMetrics().get(entry.getKey());
        Assert.assertEquals(entry.getValue(), metric.TALLY);
        Assert.assertTrue(metric.FINISHED);
        totalProcessed += metric.TALLY;
        totalMetrics += 1;
      }
    }

    Assert.assertEquals(collector.getUnitsToMetrics().size(), totalMetrics);
    Assert.assertEquals(totalProcessed, collector.getNumProcessed());
  }
  @Test
  public void test2() throws IOException {
    SAMFileReader r =
        new SAMFileReader(
            new File("c:/temp/HG00096.mapped.illumina.mosaik.GBR.exome.20110411.chr20.bam"));
    SAMRecordIterator iterator = r.iterator();

    CompressionHeaderFactory.HuffmanParamsCalculator c = new HuffmanParamsCalculator();

    String[] names = new String[100000];
    for (int i = 0; i < names.length && iterator.hasNext(); i++) {
      names[i] = iterator.next().getReadName();
      c.add(names[i].length());
    }
    iterator.close();
    r.close();
    c.calculate();

    int[] values = c.values();
    int[] lens = c.bitLens();
    System.out.println(Arrays.toString(values));
    System.out.println(Arrays.toString(lens));

    EncodingParams params = HuffmanIntegerEncoding.toParam(values, lens);
    HuffmanIntegerEncoding e = new HuffmanIntegerEncoding();
    e.fromByteArray(params.params);

    BitCodec<Integer> codec = e.buildCodec(null, null);

    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    DefaultBitOutputStream bos = new DefaultBitOutputStream(baos);
    for (int i = 0; i < names.length; i++) {
      codec.write(bos, names[i].length());
    }

    bos.close();

    codec = e.buildCodec(null, null);
    ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
    DefaultBitInputStream bis = new DefaultBitInputStream(bais);

    for (int i = 0; i < names.length; i++) {
      int v = codec.read(bis);
      if (v != names[i].length()) fail("Mismatch: " + v + " vs " + names[i].length());
    }
  }
예제 #3
0
  @Override
  protected int doWork() {
    Map<Integer, net.sf.picard.util.IntervalTree<Boolean>> bed = null;
    Histogram<Category> hist = new Histogram<BamStats01.Category>();
    SAMFileReader samFileReader = null;

    BamStats01Report report = new BamStats01Report(samFileReader.getFileHeader());
    try {
      SAMRecordIterator iter = samFileReader.iterator();
      report.addAlignment(iter.next());
    } catch (Exception err) {
      return -1;
    } finally {

    }

    return 0;
  }
예제 #4
0
  protected int doWork() {
    IoUtil.assertFileIsReadable(INPUT);
    IoUtil.assertFileIsWritable(OUTPUT);

    final SAMFileReader in = new SAMFileReader(INPUT);

    // create the read group we'll be using
    final SAMReadGroupRecord rg = new SAMReadGroupRecord(RGID);
    rg.setLibrary(RGLB);
    rg.setPlatform(RGPL);
    rg.setSample(RGSM);
    rg.setPlatformUnit(RGPU);
    if (RGCN != null) rg.setSequencingCenter(RGCN);
    if (RGDS != null) rg.setDescription(RGDS);
    if (RGDT != null) rg.setRunDate(RGDT);

    log.info(
        String.format(
            "Created read group ID=%s PL=%s LB=%s SM=%s%n",
            rg.getId(), rg.getPlatform(), rg.getLibrary(), rg.getSample()));

    // create the new header and output file
    final SAMFileHeader inHeader = in.getFileHeader();
    final SAMFileHeader outHeader = inHeader.clone();
    outHeader.setReadGroups(Arrays.asList(rg));
    if (SORT_ORDER != null) outHeader.setSortOrder(SORT_ORDER);

    final SAMFileWriter outWriter =
        new SAMFileWriterFactory()
            .makeSAMOrBAMWriter(
                outHeader, outHeader.getSortOrder() == inHeader.getSortOrder(), OUTPUT);

    final ProgressLogger progress = new ProgressLogger(log);
    for (final SAMRecord read : in) {
      read.setAttribute(SAMTag.RG.name(), RGID);
      outWriter.addAlignment(read);
      progress.record(read);
    }

    // cleanup
    in.close();
    outWriter.close();
    return 0;
  }
  public void timeDownsampling(int reps) {
    for (int i = 0; i < reps; i++) {
      SAMFileReader reader = new SAMFileReader(inputFile);
      ReadProperties readProperties =
          new ReadProperties(
              Collections.<SAMReaderID>singletonList(new SAMReaderID(inputFile, new Tags())),
              reader.getFileHeader(),
              false,
              SAMFileReader.ValidationStringency.SILENT,
              downsampling.create(),
              new ValidationExclusion(Collections.singletonList(ValidationExclusion.TYPE.ALL)),
              Collections.<ReadFilter>emptyList(),
              false,
              false,
              BAQ.CalculationMode.OFF,
              BAQ.QualityMode.DONT_MODIFY,
              null, // no BAQ
              null, // no BQSR
              (byte) 0);

      GenomeLocParser genomeLocParser =
          new GenomeLocParser(reader.getFileHeader().getSequenceDictionary());
      // Filter unmapped reads.  TODO: is this always strictly necessary?  Who in the GATK normally
      // filters these out?
      Iterator<SAMRecord> readIterator =
          new FilteringIterator(reader.iterator(), new UnmappedReadFilter());
      LocusIteratorByState locusIteratorByState =
          new LocusIteratorByState(
              readIterator,
              readProperties,
              genomeLocParser,
              LocusIteratorByState.sampleListForSAMWithoutReadGroups());
      while (locusIteratorByState.hasNext()) {
        locusIteratorByState.next().getLocation();
      }
      reader.close();
    }
  }
예제 #6
0
  /**
   * Asserts that files are readable and writable and then fires off an HsMetricsCalculator instance
   * to do the real work.
   */
  protected int doWork() {
    IoUtil.assertFileIsReadable(getProbeIntervals());
    IoUtil.assertFileIsReadable(TARGET_INTERVALS);
    IoUtil.assertFileIsReadable(INPUT);
    IoUtil.assertFileIsWritable(OUTPUT);
    if (PER_TARGET_COVERAGE != null) IoUtil.assertFileIsWritable(PER_TARGET_COVERAGE);

    final SAMFileReader samReader = new SAMFileReader(INPUT);

    final File probeIntervals = getProbeIntervals();

    // Validate that the targets and baits have the same references as the reads file
    SequenceUtil.assertSequenceDictionariesEqual(
        samReader.getFileHeader().getSequenceDictionary(),
        IntervalList.fromFile(TARGET_INTERVALS).getHeader().getSequenceDictionary(),
        INPUT,
        TARGET_INTERVALS);
    SequenceUtil.assertSequenceDictionariesEqual(
        samReader.getFileHeader().getSequenceDictionary(),
        IntervalList.fromFile(probeIntervals).getHeader().getSequenceDictionary(),
        INPUT,
        probeIntervals);

    ReferenceSequenceFile ref = null;
    if (REFERENCE_SEQUENCE != null) {
      IoUtil.assertFileIsReadable(REFERENCE_SEQUENCE);
      ref = ReferenceSequenceFileFactory.getReferenceSequenceFile(REFERENCE_SEQUENCE);
      SequenceUtil.assertSequenceDictionariesEqual(
          samReader.getFileHeader().getSequenceDictionary(),
          ref.getSequenceDictionary(),
          INPUT,
          REFERENCE_SEQUENCE);
    }

    final TargetMetricsCollector collector =
        makeCollector(
            METRIC_ACCUMULATION_LEVEL,
            samReader.getFileHeader().getReadGroups(),
            ref,
            PER_TARGET_COVERAGE,
            TARGET_INTERVALS,
            probeIntervals,
            getProbeSetName());

    // Add each record to the requested collectors
    final Iterator<SAMRecord> records = samReader.iterator();
    final ProgressLogger progress = new ProgressLogger(log);

    while (records.hasNext()) {
      final SAMRecord sam = records.next();
      collector.acceptRecord(sam, null);
      progress.record(sam);
    }

    // Write the output file
    final MetricsFile<HsMetrics, Integer> metrics = getMetricsFile();
    collector.finish();

    collector.addAllLevelsToFile(metrics);

    metrics.write(OUTPUT);

    return 0;
  }
예제 #7
0
  @Override
  public int doWork(String[] args) {
    File refFile = null;
    com.github.lindenb.jvarkit.util.cli.GetOpt getopt =
        new com.github.lindenb.jvarkit.util.cli.GetOpt();
    int c;
    while ((c = getopt.getopt(args, "hvL:r:")) != -1) {
      switch (c) {
        case 'h':
          printUsage();
          return 0;
        case 'v':
          System.out.println(getVersion());
          return 0;
        case 'L':
          getLogger().setLevel(java.util.logging.Level.parse(getopt.getOptArg()));
          break;
        case 'r':
          refFile = new File(getopt.getOptArg());
          break;
        case ':':
          System.err.println("Missing argument for option -" + getopt.getOptOpt());
          return -1;
        default:
          System.err.println("Unknown option -" + getopt.getOptOpt());
          return -1;
      }
    }

    if (refFile == null) {
      error("Undefined REF file");
      return -1;
    }
    File bamFile = null;
    if (getopt.getOptInd() + 1 != args.length) {
      info("reading from stdin.");
    } else {
      bamFile = new File(args[getopt.getOptInd()]);
    }

    IndexedFastaSequenceFile indexedFastaSequenceFile = null;
    SAMFileReader samFileReader = null;

    try {
      GenomicSequence genomicSequence = null;
      indexedFastaSequenceFile = new IndexedFastaSequenceFile(refFile);
      SAMFileReader.setDefaultValidationStringency(ValidationStringency.SILENT);
      samFileReader = null;
      if (bamFile == null) {
        samFileReader = new SAMFileReader(System.in);
      } else {
        samFileReader = new SAMFileReader(bamFile);
      }
      XMLOutputFactory xmlfactory = XMLOutputFactory.newInstance();
      XMLStreamWriter w = xmlfactory.createXMLStreamWriter(System.out, "UTF-8");
      w.writeStartDocument("UTF-8", "1.0");
      w.writeStartElement("sam");
      w.writeComment(getProgramCommandLine());
      w.writeAttribute("ref", (bamFile == null ? "stdin" : bamFile.getPath()));
      w.writeAttribute("bam", args[1]);

      SAMRecordIterator iter = samFileReader.iterator();
      while (iter.hasNext()) {
        SAMRecord rec = iter.next();

        final byte readbases[] = rec.getReadBases();
        w.writeStartElement("read");

        w.writeStartElement("name");
        w.writeCharacters(rec.getReadName());
        w.writeEndElement();
        w.writeStartElement("sequence");
        w.writeCharacters(new String(readbases));
        w.writeEndElement();
        w.writeStartElement("flags");
        w.writeAttribute("paired", String.valueOf(rec.getReadPairedFlag()));
        w.writeAttribute(
            "failsVendorQual", String.valueOf(rec.getReadFailsVendorQualityCheckFlag()));
        w.writeAttribute("mapped", String.valueOf(!rec.getReadUnmappedFlag()));
        w.writeAttribute("strand", (rec.getReadNegativeStrandFlag() ? "-" : "+"));

        if (rec.getReadPairedFlag()) {
          w.writeAttribute("mate-mapped", String.valueOf(!rec.getMateUnmappedFlag()));
          w.writeAttribute("mate-strand", (rec.getMateNegativeStrandFlag() ? "-" : "+"));
          w.writeAttribute("proper-pair", String.valueOf(rec.getProperPairFlag()));
        }

        w.writeCharacters(String.valueOf(rec.getFlags()));
        w.writeEndElement();
        if (!rec.getReadUnmappedFlag()) {
          w.writeStartElement("qual");
          w.writeCharacters(String.valueOf(rec.getMappingQuality()));
          w.writeEndElement();

          w.writeStartElement("chrom");
          w.writeAttribute("index", String.valueOf(rec.getReferenceIndex()));
          w.writeCharacters(rec.getReferenceName());
          w.writeEndElement();
          w.writeStartElement("pos");
          w.writeCharacters(String.valueOf(rec.getAlignmentStart()));
          w.writeEndElement();
          w.writeStartElement("cigar");
          w.writeCharacters(rec.getCigarString());
          w.writeEndElement();
        }

        if (!rec.getMateUnmappedFlag()) {
          w.writeStartElement("mate-chrom");
          w.writeAttribute("index", String.valueOf(rec.getMateReferenceIndex()));
          w.writeCharacters(rec.getMateReferenceName());
          w.writeEndElement();
          w.writeStartElement("mate-pos");
          w.writeCharacters(String.valueOf(rec.getMateAlignmentStart()));
          w.writeEndElement();
        }

        if (!rec.getReadUnmappedFlag()) {
          if (genomicSequence == null
              || genomicSequence.getChrom().equals(rec.getReferenceName())) {
            genomicSequence = new GenomicSequence(indexedFastaSequenceFile, rec.getReferenceName());
          }

          w.writeStartElement("align");

          int readIndex = 0;
          int refIndex = rec.getAlignmentStart();

          for (final CigarElement e : rec.getCigar().getCigarElements()) {
            switch (e.getOperator()) {
              case H:
                break; // ignore hard clips
              case P:
                break; // ignore pads
              case I: // cont.
              case S:
                {
                  final int length = e.getLength();
                  for (int i = 0; i < length; ++i) {
                    w.writeEmptyElement(e.getOperator().name());
                    w.writeAttribute("read-index", String.valueOf(readIndex + 1));
                    if (readIndex >= 0 && readIndex < readbases.length) {
                      w.writeAttribute("read-base", String.valueOf((char) (readbases[readIndex])));
                    }
                    readIndex++;
                  }
                  break;
                }
              case N: // cont. -- reference skip
              case D:
                {
                  final int length = e.getLength();
                  for (int i = 0; i < length; ++i) {
                    w.writeEmptyElement(e.getOperator().name());
                    w.writeAttribute("ref-index", String.valueOf(refIndex));
                    if (refIndex >= 1 && refIndex <= genomicSequence.length()) {
                      w.writeAttribute(
                          "ref-base", String.valueOf(genomicSequence.charAt(refIndex - 1)));
                    }
                    refIndex++;
                  }
                  break;
                }
              case M:
              case EQ:
              case X:
                {
                  final int length = e.getLength();
                  for (int i = 0; i < length; ++i) {
                    w.writeEmptyElement(e.getOperator().name());
                    char baseRead = '\0';
                    if (readIndex >= 0 && readIndex < readbases.length) {
                      baseRead = (char) (rec.getReadBases()[readIndex]);
                      w.writeAttribute("read-index", String.valueOf(readIndex + 1));
                      w.writeAttribute("read-base", String.valueOf(baseRead));
                    }
                    w.writeAttribute("ref-index", String.valueOf(refIndex));
                    if (refIndex >= 1 && refIndex <= genomicSequence.length()) {
                      char baseRef = genomicSequence.charAt(refIndex - 1);
                      w.writeAttribute("ref-base", String.valueOf(baseRef));
                      if (Character.toUpperCase(baseRef) != Character.toUpperCase(baseRead)) {
                        w.writeAttribute("mismatch", "true");
                      }
                    }

                    refIndex++;
                    readIndex++;
                  }
                  break;
                }

              default:
                throw new IllegalStateException(
                    "Case statement didn't deal with cigar op: " + e.getOperator());
            }
          }
        }

        w.writeEndElement();

        w.writeEndElement();

        iter.close();
        w.writeEndElement();
      }
      w.writeEndElement();
      w.writeEndDocument();
      w.flush();
      w.close();
    } catch (Exception err) {
      error(err);
      return -1;
    } finally {
      CloserUtil.close(samFileReader);
      CloserUtil.close(indexedFastaSequenceFile);
    }
    return 0;
  }
예제 #8
0
  public static void main(String[] args) {
    args =
        new String[] {
          "/commun/data/users/cfaucheron/aln_20120329/S0529/data_S0529/S0529_sort.nodup.bam"
        };
    ReferenceSequenceFile rsf =
        ReferenceSequenceFileFactory.getReferenceSequenceFile(
            new File("/commun/data/pubdb/ucsc/hg19/chromosomes/hg19.fa"));
    int count = 0;
    for (String filename : args) {
      File file = new File(filename);
      SAMFileReader samIn = new SAMFileReader(file);
      SAMRecordIterator r = samIn.iterator();
      while (r.hasNext()) {

        SAMRecord rec = r.next();
        if (rec.getReadUnmappedFlag()) continue;

        if (++count > 10000) break;

        if (rec.getAlignmentStart() > rec.getAlignmentEnd()) throw new IllegalStateException();
        byte bases[] =
            rsf.getSubsequenceAt(
                    rec.getReferenceName(),
                    rec.getAlignmentStart(),
                    Math.max(
                        rec.getAlignmentEnd(),
                        rec.getAlignmentStart() + rec.getCigar().getPaddedReferenceLength()))
                .getBases();
        Iterator<CigarAlignment> i = CigarAlignment.iterator(rec);
        /*System.err.println(rec.getCigarString());
        System.err.println(bases.length);
        System.err.println("start:"+rec.getAlignmentStart());*/
        StringBuilder s1 = new StringBuilder();
        StringBuilder s2 = new StringBuilder();

        while (i.hasNext()) {
          CigarAlignment caln = i.next();
          /*
          System.err.println(rec.getCigarString());

          System.err.println("bases.length:"+bases.length);
          System.err.println("refpos:"+caln.getReferencePosition1());
          System.err.println("readpos:"+rec.getAlignmentStart());
          */
          if (caln.getReferencePosition1() - rec.getAlignmentStart() >= bases.length) {
            System.out.println("SHORT!");
            System.out.println("op:" + caln.getCigarOperator());
            System.out.println("read start:" + rec.getAlignmentStart());
            System.out.println("clan.pos1:" + caln.getReferencePosition1());
            System.out.println("read end:" + rec.getAlignmentEnd());
            System.out.println("bases.length:" + bases.length);
            System.out.println(
                "getPaddedReferenceLength:" + rec.getCigar().getPaddedReferenceLength());
            System.out.println("getReferenceLength:" + rec.getCigar().getReferenceLength());
            System.out.println("getReadLength:" + rec.getCigar().getReadLength());
            System.out.println(
                "cigar.read.length:" + Cigar.getReadLength(rec.getCigar().getCigarElements()));
            count = 2000;
            break;
          }
          if (caln.isInsertRef()) {
            s2.append("-");
            s1.append(caln.getReadBase());
          } else if (caln.isDeletionRef()) {
            s2.append((char) bases[caln.getReferencePosition1() - rec.getAlignmentStart()]);
            s1.append("-");
          } else {
            s2.append((char) bases[caln.getReferencePosition1() - rec.getAlignmentStart()]);
            s1.append(caln.getReadBase());
          }
          // System.out.println(s1);
          // System.out.println(s2);
          // System.out.println();
        }
        System.out.println(
            rec.getCigarString() + " " + rec.getReferenceName() + ":" + rec.getAlignmentStart());
        System.out.println("ref :" + new String(bases));
        System.out.println("read:" + new String(rec.getReadBases()));
        System.out.println();
        System.out.println(s1);
        System.out.println(s2);
        System.out.println();
      }
      samIn.close();
    }
  }
예제 #9
0
  public static void main(String[] argv) {
    SAMFileReader.setDefaultValidationStringency(SAMFileReader.ValidationStringency.SILENT);
    // STFU

    SAMResource sr = null;
    SAMRegion region = new SAMRegion();
    region.range = new Range();
    region.range.start = -1;
    region.range.end = -1;

    String outfile = null;
    String target_file = null;
    SAMCoverage sc = new SAMCoverage();

    for (int i = 0; i < argv.length; i++) {
      if (argv[i].equals("-bam")) {
        sr = new SAMResource();
        //	sr.import_data(SAMResourceTags.SAM_URL, argv[++i]);
        sr.set_file(argv[++i]);
        sr.detect_sample_id();
      } else if (argv[i].equals("-targets")) {
        target_file = argv[++i];
      } else if (argv[i].equals("-tname")) {
        region.tname = argv[++i];
      } else if (argv[i].equals("-verbose")) {
        sc.set_verbose(true);
      } else if (argv[i].equals("-tstart")) {
        region.range.start = Integer.parseInt(argv[++i]);
      } else if (argv[i].equals("-tend")) {
        region.range.end = Integer.parseInt(argv[++i]);
      } else if (argv[i].equals("-of")) {
        outfile = argv[++i];
      } else if (argv[i].equals("-min-quality")) {
        sc.set_min_quality(Integer.parseInt(argv[++i]));
      } else {
        System.err.println("error: unknown switch " + argv[i]); // debug
        System.exit(1);
      }
    }

    String error = null;
    if (sr == null) {
      error = "specify -bam [file]";
    } else if (target_file == null) {
      if (region.tname == null) {
        error = "specify -tname";
      } else if (region.range.start == -1) {
        error = "specify -tstart";
      } else if (region.range.end == -1) {
        error = "specify -tend";
      }
    }

    sr.set_region(region);

    if (error != null) {
      System.err.println("ERROR: " + error); // debug
    } else if (target_file != null) {
      try {
        File f = new File(target_file);
        BufferedReader br = new BufferedReader(new FileReader(f));
        String line = br.readLine();
        String[] headers = line.split("\t");

        if (headers[0].equals("Name")
            && headers[1].equals("Chromosome")
            && headers[2].equals("Start")
            && headers[3].equals("End")) {

          WorkingFile wf = null;
          if (outfile != null) {
            wf = new WorkingFile(outfile);
            sc.setPrintStream(wf.getPrintStream());
          }

          while (true) {
            line = br.readLine();
            if (line == null) {
              // EOF
              break;
            } else {
              String[] row = line.split("\t");

              region.tname = row[1];
              region.range.start = Integer.parseInt(row[2]);
              region.range.end = Integer.parseInt(row[3]);
              //	      sc.set_name(new String(row[0]));
              sc.set_name(new String(row[0]) + "," + new String(row[1]));
              sc.find_coverage(sr);
            }
          }

          if (outfile != null) wf.finish();

        } else {
          throw new IOException("file format error");
        }

      } catch (Exception e) {
        System.err.println("ERROR: " + e); // debug
        e.printStackTrace();
        System.exit(1);
      }
    } else {
      sc.set_outfile(outfile);
      sc.find_coverage(sr);
    }
  }
 public SplitBamByChromosomesTest() {
   TimeZone.setDefault(TimeZone.getTimeZone("GMT"));
   SAMFileReader.setDefaultValidationStringency(SAMFileReader.ValidationStringency.SILENT);
 }
예제 #11
0
파일: FTPTest.java 프로젝트: tcarver/crawl2
  public void run(URL url) throws SocketException, IOException {

    FTPSeekableStream fss = new FTPSeekableStream(url);
    File index = fss.getIndexFile();

    SAMFileReader reader = new SAMFileReader(fss, index, false);
    reader.getFileHeader();

    reader.setValidationStringency(ValidationStringency.SILENT);

    logger.info("attributes");
    for (Map.Entry<String, String> entry : reader.getFileHeader().getAttributes()) {
      logger.info(String.format("%s : %s", entry.getKey(), entry.getValue()));
    }

    logger.info("sequences");
    for (SAMSequenceRecord ssr : reader.getFileHeader().getSequenceDictionary().getSequences()) {
      logger.info(String.format("%s : %s ", ssr.getSequenceName(), ssr.getSequenceLength()));
    }

    logger.info("sequences");
    for (SAMSequenceRecord ssr : reader.getFileHeader().getSequenceDictionary().getSequences()) {

      //			if (! ssr.getSequenceName().equals("NT_166325")) {
      //				continue;
      //			}

      int length = ssr.getSequenceLength();

      int min = 1000;
      int max = 100000;

      if (min >= length) {
        min = 0;
      }

      // logger.warn((max >= length));

      if (max >= length) {
        max = length;
      }

      if (min >= max) {
        min = 0;
      }

      logger.info(
          String.format("Sequence: %s (%s) %s-%s", ssr.getSequenceName(), length, min, max));

      SAMRecordIterator i = reader.query(ssr.getSequenceName(), min, max, false);

      while (i.hasNext()) {
        SAMRecord record = i.next();
        logger.info(
            String.format(
                "Read: %s (%s (%s-%s) %s) / %s",
                record.getReadName(),
                min,
                record.getAlignmentStart(),
                record.getAlignmentEnd(),
                max,
                record.getFlags()));

        assertTrue(record.getAlignmentStart() >= min);
      }

      i.close();

      logger.info("_________________________________________________");
    }

    logger.info("Done");
  }
예제 #12
0
  @Override
  public int doWork(String[] args) {
    boolean repair_missing_read = false;
    SortingCollectionFactory<MappedFastq> sortingFactory =
        new SortingCollectionFactory<MappedFastq>();
    File forwardFile = null;
    File reverseFile = null;
    com.github.lindenb.jvarkit.util.cli.GetOpt opt =
        new com.github.lindenb.jvarkit.util.cli.GetOpt();
    int c;

    sortingFactory.setComponentType(MappedFastq.class);
    sortingFactory.setCodec(new MappedFastqCodec());
    sortingFactory.setComparator(new MappedFastqComparator());

    while ((c = opt.getopt(args, super.getGetOptDefault() + "F:R:N:r")) != -1) {
      switch (c) {
        case 'F':
          forwardFile = new File(opt.getOptArg());
          break;
        case 'R':
          reverseFile = new File(opt.getOptArg());
          break;
        case 't':
          addTmpDirectory(new File(opt.getOptArg()));
          break;
        case 'N':
          sortingFactory.setMaxRecordsInRAM(Math.max(Integer.parseInt(opt.getOptArg()), 100));
          break;
        case 'r':
          repair_missing_read = true;
          break;
        case ':':
          System.err.println("Missing argument for option -" + opt.getOptOpt());
          return -1;
        default:
          {
            switch (handleOtherOptions(c, opt, args)) {
              case EXIT_FAILURE:
                return -1;
              case EXIT_SUCCESS:
                return 0;
              default:
                break;
            }
          }
      }
    }
    SAMFileReader sfr = null;
    SortingCollection<MappedFastq> fastqCollection = null;
    try {
      sortingFactory.setTmpDirs(this.getTmpDirectories());
      fastqCollection = sortingFactory.make();
      fastqCollection.setDestructiveIteration(true);
      boolean found_single = false;
      boolean found_paired = false;
      long non_primary_alignmaned_flag = 0L;

      if (opt.getOptInd() == args.length) {
        info("Reading from stdin");
        sfr = new SAMFileReader(System.in);
      } else if (opt.getOptInd() + 1 == args.length) {
        String filename = args[opt.getOptInd()];
        sfr = new SAMFileReader(new File(filename));
      } else {
        error(getMessageBundle("illegal.number.of.arguments"));
        return -1;
      }
      sfr.setValidationStringency(ValidationStringency.LENIENT);
      SAMRecordIterator iter = sfr.iterator();
      SAMSequenceDictionaryProgress progress =
          new SAMSequenceDictionaryProgress(sfr.getFileHeader().getSequenceDictionary());
      while (iter.hasNext()) {
        SAMRecord rec = iter.next();
        progress.watch(rec);

        if (rec.isSecondaryOrSupplementary()) {
          if (non_primary_alignmaned_flag == 0) {
            warning("SKIPPING NON-PRIMARY " + (non_primary_alignmaned_flag + 1) + " ALIGNMENTS");
          }
          non_primary_alignmaned_flag++;
          continue;
        }

        MappedFastq m = new MappedFastq();
        m.name = rec.getReadName();
        if (m.name == null) m.name = "";
        m.hash = m.name.hashCode();
        m.seq = rec.getReadString();

        if (m.seq.equals(SAMRecord.NULL_SEQUENCE_STRING)) m.seq = "";
        m.qual = rec.getBaseQualityString();
        if (m.qual.equals(SAMRecord.NULL_QUALS_STRING)) m.qual = "";
        if (!rec.getReadUnmappedFlag() && rec.getReadNegativeStrandFlag()) {
          m.seq = AcidNucleics.reverseComplement(m.seq);
          m.qual = new StringBuilder(m.qual).reverse().toString();
        }
        if (m.seq.length() != m.qual.length()) {
          error("length(seq)!=length(qual) in " + m.name);
          continue;
        }
        if (m.seq.isEmpty() && m.qual.isEmpty()) {
          m.seq = "N";
          m.qual = "#";
        }

        if (rec.getReadPairedFlag()) {
          found_paired = true;
          if (found_single) {
            sfr.close();
            throw new PicardException("input is a mix of paired/singled reads");
          }
          m.side = (byte) (rec.getSecondOfPairFlag() ? 2 : 1);
        } else {
          found_single = true;
          if (found_paired) {
            sfr.close();
            throw new PicardException("input is a mix of paired/singled reads");
          }
          m.side = (byte) 0;
        }
        fastqCollection.add(m);
      }
      iter.close();
      CloserUtil.close(iter);
      CloserUtil.close(sfr);
      progress.finish();

      fastqCollection.doneAdding();
      info("Done reading.");

      if (found_paired) {
        FastqWriter fqw1 = null;
        FastqWriter fqw2 = null;
        if (forwardFile != null) {
          info("Writing to " + forwardFile);
          fqw1 = new BasicFastqWriter(forwardFile);
        } else {
          info("Writing to stdout");
          fqw1 = new BasicFastqWriter(new PrintStream(System.out));
        }
        if (reverseFile != null) {
          info("Writing to " + reverseFile);
          fqw2 = new BasicFastqWriter(reverseFile);
        } else {
          info("Writing to interlaced stdout");
          fqw2 = fqw1;
        }
        List<MappedFastq> row = new ArrayList<MappedFastq>();
        CloseableIterator<MappedFastq> r = fastqCollection.iterator();
        for (; ; ) {
          MappedFastq curr = null;
          if (r.hasNext()) curr = r.next();
          if (curr == null || (!row.isEmpty() && !row.get(0).name.equals(curr.name))) {
            if (!row.isEmpty()) {
              if (row.size() > 2) {
                warning("WTF :" + row);
              }
              boolean found_F = false;
              boolean found_R = false;
              for (MappedFastq m : row) {
                switch ((int) m.side) {
                  case 1:
                    if (found_F)
                      throw new PicardException("two forward reads found for " + row.get(0).name);
                    found_F = true;
                    echo(fqw1, m);
                    break;
                  case 2:
                    if (found_R)
                      throw new PicardException("two reverse reads found for " + row.get(0).name);
                    found_R = true;
                    echo(fqw2, m);
                    break;
                  default:
                    throw new IllegalStateException("uh???");
                }
              }
              if (!found_F) {
                if (repair_missing_read) {
                  warning("forward not found for " + row.get(0));
                  MappedFastq pad = new MappedFastq();
                  pad.side = (byte) 1;
                  pad.name = row.get(0).name;
                  pad.seq = "N";
                  pad.qual = "#";
                  echo(fqw1, pad);
                } else {
                  throw new PicardException("forward not found for " + row);
                }
              }
              if (!found_R) {
                if (repair_missing_read) {
                  warning("reverse not found for " + row.get(0));
                  MappedFastq pad = new MappedFastq();
                  pad.side = (byte) 2;
                  pad.name = row.get(0).name;
                  pad.seq = "N";
                  pad.qual = "#";
                  echo(fqw2, pad);
                } else {
                  throw new PicardException("reverse not found for " + row);
                }
              }
            }
            if (curr == null) break;
            row.clear();
          }
          row.add(curr);
        }
        r.close();
        fqw1.close();
        fqw2.close();
      } else if (found_single) {
        FastqWriter fqw1 = null;
        if (forwardFile != null) {
          info("Writing to " + forwardFile);
          fqw1 = new BasicFastqWriter(forwardFile);
        } else {
          info("Writing to stdout");
          fqw1 = new BasicFastqWriter(new PrintStream(System.out));
        }

        CloseableIterator<MappedFastq> r = fastqCollection.iterator();
        while (r.hasNext()) {
          echo(fqw1, r.next());
        }
        r.close();
        fqw1.close();
      }
      return 0;
    } catch (Exception err) {
      error(err);
      return -1;
    } finally {
      if (fastqCollection != null) fastqCollection.cleanup();
    }
  }