コード例 #1
0
  @Test(dataProvider = "data")
  public void testProgramGroupAndReadGroupMerge(File inputFiles[], File expectedOutputFile)
      throws IOException {

    BufferedReader reader = new BufferedReader(new FileReader(expectedOutputFile));

    String line;
    String expected_output = "";
    while ((line = reader.readLine()) != null) {
      expected_output += line + "\n";
    }

    final List<SAMFileReader> readers = new ArrayList<SAMFileReader>();
    final List<SAMFileHeader> headers = new ArrayList<SAMFileHeader>();
    for (final File inFile : inputFiles) {
      IOUtil.assertFileIsReadable(inFile);
      final SAMFileReader in = new SAMFileReader(inFile);
      // We are now checking for zero-length reads, so suppress complaint about that.
      in.setValidationStringency(ValidationStringency.SILENT);
      readers.add(in);
      headers.add(in.getFileHeader());
    }
    final MergingSamRecordIterator iterator;

    final SamFileHeaderMerger headerMerger =
        new SamFileHeaderMerger(SAMFileHeader.SortOrder.coordinate, headers, true);
    iterator = new MergingSamRecordIterator(headerMerger, readers, false);

    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    SAMFileWriter writer =
        new SAMFileWriterFactory().makeSAMWriter(headerMerger.getMergedHeader(), true, baos);
    while (iterator.hasNext()) {
      writer.addAlignment(iterator.next());
    }
    writer.close();

    String actual_output = StringUtil.bytesToString(baos.toByteArray());

    List<String> actual = Arrays.asList(actual_output.split("\\n"));
    List<String> expected = Arrays.asList(expected_output.split("\\n"));
    for (int i = 0; i < expected.size(); i++) {
      if (expected.get(i).startsWith("@")) {
        Assert.assertTrue(headersEquivalent(actual.get(i), expected.get(i)));
      } else {
        List<String> expectedSamParts = Arrays.asList(expected.get(i).split("\\s*"));
        List<String> actualSamParts = Arrays.asList(actual.get(i).split("\\s*"));
        for (String exp : expectedSamParts) {
          Assert.assertTrue(actualSamParts.contains(exp));
        }
        for (String act : actualSamParts) {
          Assert.assertTrue(expectedSamParts.contains(act));
        }
      }
    }
  }
コード例 #2
0
 @Test(expectedExceptions = {SAMException.class})
 public void testUnmergeableSequenceDictionary() {
   final String sd1 = sq1 + sq2 + sq5;
   final String sd2 = sq2 + sq3 + sq4 + sq1;
   SAMFileReader reader1 =
       new SAMFileReader(new ByteArrayInputStream(StringUtil.stringToBytes(sd1)));
   SAMFileReader reader2 =
       new SAMFileReader(new ByteArrayInputStream(StringUtil.stringToBytes(sd2)));
   final List<SAMFileHeader> inputHeaders =
       Arrays.asList(reader1.getFileHeader(), reader2.getFileHeader());
   new SamFileHeaderMerger(SAMFileHeader.SortOrder.coordinate, inputHeaders, true);
 }
コード例 #3
0
 /**
  * tests that if we've set the merging to false, we get a SAMException for bam's with different
  * dictionaries.
  */
 @Test(expectedExceptions = SequenceUtil.SequenceListsDifferException.class)
 public void testMergedException() {
   File INPUT[] = {
     new File(TEST_DATA_DIR, "SamFileHeaderMergerTest/Chromosome1to10.bam"),
     new File(TEST_DATA_DIR, "SamFileHeaderMergerTest/Chromosome5to9.bam")
   };
   final List<SAMFileHeader> headers = new ArrayList<SAMFileHeader>();
   for (final File inFile : INPUT) {
     IOUtil.assertFileIsReadable(inFile);
     final SAMFileReader in = new SAMFileReader(inFile);
     headers.add(in.getFileHeader());
   }
   new SamFileHeaderMerger(SAMFileHeader.SortOrder.unsorted, headers, false);
 }
コード例 #4
0
ファイル: BAMIndexer.java プロジェクト: jsilter/htsjdk
  /**
   * Generates a BAM index file from an input BAM file
   *
   * @param reader SAMFileReader for input BAM file
   * @param output File for output index file
   */
  public static void createIndex(SAMFileReader reader, File output, Log log) {

    BAMIndexer indexer = new BAMIndexer(output, reader.getFileHeader());

    reader.enableFileSource(true);
    int totalRecords = 0;

    // create and write the content
    for (SAMRecord rec : reader) {
      if (++totalRecords % 1000000 == 0) {
        if (null != log) log.info(totalRecords + " reads processed ...");
      }
      indexer.processAlignment(rec);
    }
    indexer.finish();
  }
コード例 #5
0
  /** Tests that we can successfully merge two files with */
  @Test
  public void testMerging() {
    File INPUT[] = {
      new File(TEST_DATA_DIR, "SamFileHeaderMergerTest/Chromosome1to10.bam"),
      new File(TEST_DATA_DIR, "SamFileHeaderMergerTest/Chromosome5to9.bam")
    };
    final List<SAMFileReader> readers = new ArrayList<SAMFileReader>();
    final List<SAMFileHeader> headers = new ArrayList<SAMFileHeader>();
    for (final File inFile : INPUT) {
      IOUtil.assertFileIsReadable(inFile);
      final SAMFileReader in = new SAMFileReader(inFile);
      // We are now checking for zero-length reads, so suppress complaint about that.
      in.setValidationStringency(ValidationStringency.SILENT);
      readers.add(in);
      headers.add(in.getFileHeader());
    }
    final MergingSamRecordIterator iterator;
    final SamFileHeaderMerger headerMerger =
        new SamFileHeaderMerger(SAMFileHeader.SortOrder.unsorted, headers, true);
    iterator = new MergingSamRecordIterator(headerMerger, readers, false);
    headerMerger.getMergedHeader();

    // count the total reads, and record read counts for each sequence
    Map<Integer, Integer> seqCounts = new HashMap<Integer, Integer>();
    int totalCount = 0;

    while (iterator.hasNext()) {
      SAMRecord r = iterator.next();
      if (seqCounts.containsKey(r.getReferenceIndex())) {
        seqCounts.put(r.getReferenceIndex(), seqCounts.get(r.getReferenceIndex()) + 1);
      } else {
        seqCounts.put(r.getReferenceIndex(), 1);
      }
      ++totalCount;
    }
    assertEquals(totalCount, 1500);
    for (Integer i : seqCounts.keySet()) {
      if (i < 4 || i > 8) {
        // seqeunce 5 - 9 should have 200 reads (indices 4 - 8)
        assertEquals(seqCounts.get(i).intValue(), 100);
      } else {
        // the others should have 100
        assertEquals(seqCounts.get(i).intValue(), 200);
      }
    }
  }
コード例 #6
0
  protected int doWork() {
    IoUtil.assertFileIsReadable(INPUT);
    IoUtil.assertFileIsWritable(OUTPUT);

    final SAMFileReader in = new SAMFileReader(INPUT);

    // create the read group we'll be using
    final SAMReadGroupRecord rg = new SAMReadGroupRecord(RGID);
    rg.setLibrary(RGLB);
    rg.setPlatform(RGPL);
    rg.setSample(RGSM);
    rg.setPlatformUnit(RGPU);
    if (RGCN != null) rg.setSequencingCenter(RGCN);
    if (RGDS != null) rg.setDescription(RGDS);
    if (RGDT != null) rg.setRunDate(RGDT);

    log.info(
        String.format(
            "Created read group ID=%s PL=%s LB=%s SM=%s%n",
            rg.getId(), rg.getPlatform(), rg.getLibrary(), rg.getSample()));

    // create the new header and output file
    final SAMFileHeader inHeader = in.getFileHeader();
    final SAMFileHeader outHeader = inHeader.clone();
    outHeader.setReadGroups(Arrays.asList(rg));
    if (SORT_ORDER != null) outHeader.setSortOrder(SORT_ORDER);

    final SAMFileWriter outWriter =
        new SAMFileWriterFactory()
            .makeSAMOrBAMWriter(
                outHeader, outHeader.getSortOrder() == inHeader.getSortOrder(), OUTPUT);

    final ProgressLogger progress = new ProgressLogger(log);
    for (final SAMRecord read : in) {
      read.setAttribute(SAMTag.RG.name(), RGID);
      outWriter.addAlignment(read);
      progress.record(read);
    }

    // cleanup
    in.close();
    outWriter.close();
    return 0;
  }
コード例 #7
0
ファイル: BAMIndexWriterTest.java プロジェクト: gkno/picard
  private int countAlignmentsInWindow(
      int reference, int window, SAMFileReader reader, int expectedCount) {
    final int SIXTEEN_K = 1 << 14; // 1 << LinearIndex.BAM_LIDX_SHIFT
    final int start = window >> 14; // window * SIXTEEN_K;
    final int stop = ((window + 1) >> 14) - 1; // (window + 1 * SIXTEEN_K) - 1;

    final String chr = reader.getFileHeader().getSequence(reference).getSequenceName();

    // get records for the entire linear index window
    SAMRecordIterator iter = reader.queryOverlapping(chr, start, stop);
    SAMRecord rec;
    int count = 0;
    while (iter.hasNext()) {
      rec = iter.next();
      count++;
      if (expectedCount == -1) System.err.println(rec.getReadName());
    }
    iter.close();
    return count;
  }
コード例 #8
0
 @Test
 public void testSequenceDictionaryMerge() {
   final String sd1 = sq1 + sq2 + sq5;
   final String sd2 = sq2 + sq3 + sq4;
   SAMFileReader reader1 =
       new SAMFileReader(new ByteArrayInputStream(StringUtil.stringToBytes(sd1)));
   SAMFileReader reader2 =
       new SAMFileReader(new ByteArrayInputStream(StringUtil.stringToBytes(sd2)));
   final List<SAMFileHeader> inputHeaders =
       Arrays.asList(reader1.getFileHeader(), reader2.getFileHeader());
   SamFileHeaderMerger merger =
       new SamFileHeaderMerger(SAMFileHeader.SortOrder.coordinate, inputHeaders, true);
   final SAMFileHeader mergedHeader = merger.getMergedHeader();
   for (final SAMFileHeader inputHeader : inputHeaders) {
     int prevTargetIndex = -1;
     for (final SAMSequenceRecord sequenceRecord :
         inputHeader.getSequenceDictionary().getSequences()) {
       final int targetIndex = mergedHeader.getSequenceIndex(sequenceRecord.getSequenceName());
       Assert.assertNotSame(targetIndex, -1);
       Assert.assertTrue(prevTargetIndex < targetIndex);
       prevTargetIndex = targetIndex;
     }
   }
 }
コード例 #9
0
ファイル: SAMCoverage.java プロジェクト: NCIP/cgr-bambino
  public static void main(String[] argv) {
    SAMFileReader.setDefaultValidationStringency(SAMFileReader.ValidationStringency.SILENT);
    // STFU

    SAMResource sr = null;
    SAMRegion region = new SAMRegion();
    region.range = new Range();
    region.range.start = -1;
    region.range.end = -1;

    String outfile = null;
    String target_file = null;
    SAMCoverage sc = new SAMCoverage();

    for (int i = 0; i < argv.length; i++) {
      if (argv[i].equals("-bam")) {
        sr = new SAMResource();
        //	sr.import_data(SAMResourceTags.SAM_URL, argv[++i]);
        sr.set_file(argv[++i]);
        sr.detect_sample_id();
      } else if (argv[i].equals("-targets")) {
        target_file = argv[++i];
      } else if (argv[i].equals("-tname")) {
        region.tname = argv[++i];
      } else if (argv[i].equals("-verbose")) {
        sc.set_verbose(true);
      } else if (argv[i].equals("-tstart")) {
        region.range.start = Integer.parseInt(argv[++i]);
      } else if (argv[i].equals("-tend")) {
        region.range.end = Integer.parseInt(argv[++i]);
      } else if (argv[i].equals("-of")) {
        outfile = argv[++i];
      } else if (argv[i].equals("-min-quality")) {
        sc.set_min_quality(Integer.parseInt(argv[++i]));
      } else {
        System.err.println("error: unknown switch " + argv[i]); // debug
        System.exit(1);
      }
    }

    String error = null;
    if (sr == null) {
      error = "specify -bam [file]";
    } else if (target_file == null) {
      if (region.tname == null) {
        error = "specify -tname";
      } else if (region.range.start == -1) {
        error = "specify -tstart";
      } else if (region.range.end == -1) {
        error = "specify -tend";
      }
    }

    sr.set_region(region);

    if (error != null) {
      System.err.println("ERROR: " + error); // debug
    } else if (target_file != null) {
      try {
        File f = new File(target_file);
        BufferedReader br = new BufferedReader(new FileReader(f));
        String line = br.readLine();
        String[] headers = line.split("\t");

        if (headers[0].equals("Name")
            && headers[1].equals("Chromosome")
            && headers[2].equals("Start")
            && headers[3].equals("End")) {

          WorkingFile wf = null;
          if (outfile != null) {
            wf = new WorkingFile(outfile);
            sc.setPrintStream(wf.getPrintStream());
          }

          while (true) {
            line = br.readLine();
            if (line == null) {
              // EOF
              break;
            } else {
              String[] row = line.split("\t");

              region.tname = row[1];
              region.range.start = Integer.parseInt(row[2]);
              region.range.end = Integer.parseInt(row[3]);
              //	      sc.set_name(new String(row[0]));
              sc.set_name(new String(row[0]) + "," + new String(row[1]));
              sc.find_coverage(sr);
            }
          }

          if (outfile != null) wf.finish();

        } else {
          throw new IOException("file format error");
        }

      } catch (Exception e) {
        System.err.println("ERROR: " + e); // debug
        e.printStackTrace();
        System.exit(1);
      }
    } else {
      sc.set_outfile(outfile);
      sc.find_coverage(sr);
    }
  }
コード例 #10
0
  @Override
  public void execute() {
    log.info("Initializing kmer code map...");
    Map<Character, Integer> kmerCodeIndices = new HashMap<Character, Integer>();
    kmerCodeIndices.put('0', 1);
    kmerCodeIndices.put('A', 3);
    kmerCodeIndices.put('B', 4);
    kmerCodeIndices.put('C', 5);
    kmerCodeIndices.put('_', 6);
    kmerCodeIndices.put('.', 7);
    kmerCodeIndices.put('1', 9);

    Map<Character, String> kmerCodeNames = new LinkedHashMap<Character, String>();
    kmerCodeNames.put('0', "ref0");
    kmerCodeNames.put('A', "repetitive");
    kmerCodeNames.put('B', "both");
    kmerCodeNames.put('C', "lowcoverage");
    kmerCodeNames.put('_', "lowconfidence");
    kmerCodeNames.put('.', "novel");
    kmerCodeNames.put('1', "ref1");

    if (KMER_CODE_NAMES != null) {
      for (Character c : kmerCodeNames.keySet()) {
        String cStr = String.valueOf(c);
        if (KMER_CODE_NAMES.containsKey(cStr)) {
          kmerCodeNames.put(c, KMER_CODE_NAMES.get(cStr));
        }
      }
    }

    for (Character c : kmerCodeNames.keySet()) {
      log.info("  {} {}: {}", c, kmerCodeIndices.get(c), kmerCodeNames.get(c));
    }

    log.info("Loading annotated contigs...");
    Map<String, Map<String, String>> annotatedContigs = new HashMap<String, Map<String, String>>();
    int kmerSize = 0;

    if (ANN.length() > 0) {
      TableReader tr = new TableReader(ANN);
      for (Map<String, String> te : tr) {
        String contigName = te.get("contigName");

        if (kmerSize == 0) {
          kmerSize = te.get("seq").length() - te.get("kmerOrigin").length() + 1;
        }

        annotatedContigs.put(contigName, te);

        String[] ref0ToCanonicalExact =
            (te.get("ref0ToCanonicalExact").equals("NA")
                        || te.get("ref0ToCanonicalExact").equals("*:0-0")
                    ? "NA:0-0"
                    : te.get("ref0ToCanonicalExact"))
                .split("[:-]");
        String[] ref1ToCanonicalExact =
            (te.get("ref1ToCanonicalExact").equals("NA")
                        || te.get("ref1ToCanonicalExact").equals("*:0-0")
                    ? "NA:0-0"
                    : te.get("ref1ToCanonicalExact"))
                .split("[:-]");

        cout.println(
            te.get("sampleName")
                + "_"
                + te.get("accession")
                + "_"
                + contigName
                + " "
                + ref0ToCanonicalExact[0]
                + " "
                + ref0ToCanonicalExact[1]
                + " "
                + ref0ToCanonicalExact[2]
                + " radius1=0.8r");
        cout.println(
            te.get("sampleName")
                + "_"
                + te.get("accession")
                + "_"
                + contigName
                + " "
                + ref1ToCanonicalExact[0]
                + " "
                + ref1ToCanonicalExact[1]
                + " "
                + ref1ToCanonicalExact[2]
                + " radius2=0.6r");
      }
    }

    log.info("    contigs: {}", annotatedContigs.size());
    log.info("  kmer size: {}", kmerSize);

    log.info("Computing kmer inheritance information...");

    SAMFileHeader sfh = CONTIGS.getFileHeader();
    for (Character c : kmerCodeNames.keySet()) {
      SAMReadGroupRecord rgr = new SAMReadGroupRecord(kmerCodeNames.get(c));
      rgr.setSample(kmerCodeNames.get(c));
      sfh.addReadGroup(rgr);
    }

    SAMFileWriterFactory sfwf = new SAMFileWriterFactory();
    sfwf.setCreateIndex(true);
    SAMFileWriter sfw = sfwf.makeBAMWriter(sfh, false, bout);

    TableWriter tw = new TableWriter(sout);

    Set<IGVEntry> igvEntries = new TreeSet<IGVEntry>();
    int numContigs = 0;
    for (SAMRecord contig : CONTIGS) {
      if (CONTIG_NAMES == null
          || CONTIG_NAMES.isEmpty()
          || CONTIG_NAMES.contains(contig.getReadName())) {
        Map<String, String> te = annotatedContigs.get(contig.getReadName());

        if (annotatedContigs.containsKey(contig.getReadName())) {
          String seq = contig.getReadString();

          // log.debug("  te: {}", te);

          String annSeq = te.get("seq");
          String kmerOrigin = te.get("kmerOrigin");

          Map<CortexKmer, Character> kmerCodes = new HashMap<CortexKmer, Character>();
          for (int i = 0; i < kmerOrigin.length(); i++) {
            CortexKmer kmer = new CortexKmer(annSeq.substring(i, i + kmerSize));
            Character code = kmerOrigin.charAt(i);

            kmerCodes.put(kmer, code);
          }

          Map<Character, Integer> kmerStats = new HashMap<Character, Integer>();
          for (Character c : kmerCodeNames.keySet()) {
            kmerStats.put(c, 0);
          }

          boolean changed = false;

          // We want to be able to examine soft-clipped regions as well.
          List<CigarElement> ces = new ArrayList<CigarElement>();
          for (CigarElement ce : contig.getCigar().getCigarElements()) {
            if (ce.getOperator().equals(CigarOperator.S)) {
              ces.add(new CigarElement(ce.getLength(), CigarOperator.M));
              changed = true;
            } else {
              ces.add(ce);
            }
          }

          if (changed) {
            CigarElement firstCe = contig.getCigar().getCigarElements().get(0);

            if (firstCe.getOperator().equals(CigarOperator.S)) {
              contig.setAlignmentStart(contig.getAlignmentStart() - firstCe.getLength());
            }

            contig.setCigar(new Cigar(ces));
          }

          for (AlignmentBlock ab : contig.getAlignmentBlocks()) {
            for (int i = ab.getReadStart() - 1; i < ab.getReadStart() + ab.getLength(); i++) {
              if (i + kmerSize < seq.length()) {
                CortexKmer kmer = new CortexKmer(seq.substring(i, i + kmerSize));

                SAMRecord skmer = new SAMRecord(CONTIGS.getFileHeader());
                skmer.setReadBases(seq.substring(i, i + kmerSize).getBytes());

                List<CigarElement> cigarElements = new ArrayList<CigarElement>();
                cigarElements.add(new CigarElement(kmerSize, CigarOperator.M));
                Cigar cigar = new Cigar(cigarElements);

                skmer.setReadName(contig.getReadName() + "." + kmer.getKmerAsString());
                skmer.setReferenceName(contig.getReferenceName());
                skmer.setCigar(cigar);
                skmer.setReadPairedFlag(false);
                skmer.setDuplicateReadFlag(false);
                skmer.setMateNegativeStrandFlag(false);
                skmer.setAlignmentStart(ab.getReferenceStart() - ab.getReadStart() + 1 + i);
                skmer.setAttribute("RG", "none");
                skmer.setMappingQuality(0);

                Character c = kmerCodes.get(kmer);
                String codeName = kmerCodeNames.get(c);

                String parentReadGroupId = null;
                String sampleReadGroupId = null;
                for (SAMReadGroupRecord rgr : sfh.getReadGroups()) {
                  if (rgr.getSample().equals(codeName)) {
                    parentReadGroupId = rgr.getReadGroupId();
                  }

                  if (rgr.getSample().equals(contig.getReadGroup().getSample())) {
                    sampleReadGroupId = rgr.getReadGroupId();
                  }
                }

                skmer.setAttribute(
                    "RG", parentReadGroupId != null ? parentReadGroupId : sampleReadGroupId);
                skmer.setMappingQuality(99);

                sfw.addAlignment(skmer);

                kmerStats.put(c, kmerStats.get(c) + 1);

                IGVEntry igvEntry = new IGVEntry();
                igvEntry.chromosome = contig.getReferenceName();
                igvEntry.start = ab.getReferenceStart() - ab.getReadStart() + i;
                igvEntry.parentageName = kmerCodeNames.get(c);
                igvEntry.parentage = kmerCodeIndices.get(c);
                igvEntries.add(igvEntry);
              }
            }
          }

          if (!contig.isSecondaryOrSupplementary()) {
            beout.println(
                contig.getReferenceName()
                    + "\t"
                    + contig.getAlignmentStart()
                    + "\t"
                    + contig.getAlignmentEnd()
                    + "\t"
                    + contig.getReadName()
                    + "."
                    + contig.getReadGroup().getSample());

            if (annotatedContigs.size() > 10 && numContigs % (annotatedContigs.size() / 10) == 0) {
              log.info("  processed {}/{} contigs", numContigs, annotatedContigs.size());
            }
            numContigs++;
          }

          Map<String, String> stats = new LinkedHashMap<String, String>();
          stats.put("contigName", contig.getReadName());
          stats.put("sampleName", contig.getReadGroup().getSample());
          for (Character c : kmerCodeNames.keySet()) {
            stats.put(kmerCodeNames.get(c), String.valueOf(kmerStats.get(c)));
          }
          tw.addEntry(stats);
        }
      }
    }

    log.info("Writing kmer inheritance information...");
    out.printf("%s\t%s\t%s\t%s\t%s\n", "Chromosome", "Start", "End", "Feature", "Parentage");
    for (IGVEntry igvEntry : igvEntries) {
      out.printf(
          "%s\t%d\t%d\t%s\t%d\n",
          igvEntry.chromosome,
          igvEntry.start,
          igvEntry.start + 1,
          igvEntry.parentageName,
          igvEntry.parentage);
    }

    sfw.close();
  }