Exemplo n.º 1
0
  /**
   * find the kmers in the contigs
   *
   * @param reader
   * @throws IOException
   */
  private void processContigFile(SequenceReader reader) throws IOException {
    Sequence seq;
    NuclKmerGenerator kmerGenerator;
    Kmer kmer;
    int contigIdx = 0;
    while ((seq = reader.readNextSequence()) != null) {
      if (seq.getSeqString().length() < kmerSize) {
        continue;
      }
      // use int to represent seqname in case contig names are too long
      contigMap.put(
          contigIdx, new Contig(seq.getSeqName(), seq.getSeqString().length() - kmerSize + 1));
      // forward direction
      kmerGenerator = new NuclKmerGenerator(seq.getSeqString(), kmerSize);
      while (kmerGenerator.hasNext()) {
        kmer = kmerGenerator.next();
        KmerAbund kmerAbund = kmerMaps[0].get(kmer);

        if (kmerAbund == null) {
          kmerAbund = new KmerAbund();
          kmerMaps[0].put(kmer, kmerAbund);
        }
        kmerAbund.contigList.add(new ContigCoverage(contigIdx, kmerGenerator.getPosition() - 1));
      }

      // reverse direction
      kmerGenerator =
          new NuclKmerGenerator(IUBUtilities.reverseComplement(seq.getSeqString()), kmerSize);
      while (kmerGenerator.hasNext()) {
        kmer = kmerGenerator.next();
        KmerAbund kmerAbund = kmerMaps[1].get(kmer);

        if (kmerAbund == null) {
          kmerAbund = new KmerAbund();
          kmerMaps[1].put(kmer, kmerAbund);
        }
        kmerAbund.contigList.add(
            new ContigCoverage(
                contigIdx,
                seq.getSeqString().length() - kmerGenerator.getPosition() - kmerSize + 1));
      }
      contigIdx++;
    }
    reader.close();
  }
Exemplo n.º 2
0
  public void printCovereage(OutputStream coverage_out, OutputStream abundance_out)
      throws IOException {
    adjustCount();
    // print out the weighted kmer coverage
    // we found mean coverage matched the previous biological observation
    PrintStream coverage_outStream = new PrintStream(coverage_out);
    coverage_outStream.println("#total reads: " + totalReads.intValue());
    coverage_outStream.println("#use mean_cov to adjust the contig abundance, not median_cov ");
    coverage_outStream.println(
        "#seqid\tmean_cov\tmedian_cov\ttotal_pos\tcovered_pos\tcovered_ratio");

    for (Contig contig : contigMap.values()) {
      ArrayList<Double> counts = new ArrayList<Double>();
      int coveredPos = 0;
      for (int pos = 0; pos < contig.coverage.length; pos++) {
        if (contig.coverage[pos] > 0) {
          coveredPos++;
        }
        counts.add(contig.coverage[pos]);
      }
      if (coveredPos > 0) {
        coverage_outStream.println(
            contig.name
                + "\t"
                + String.format(dformat, StdevCal.calMean(counts))
                + "\t"
                + String.format(dformat, (StdevCal.calMedian(counts)))
                + "\t"
                + counts.size()
                + "\t"
                + coveredPos
                + "\t"
                + String.format(dformat, (double) coveredPos / (double) contig.coverage.length));
      } else { // no coverage
        coverage_outStream.println(
            contig.name
                + "\t"
                + 0
                + "\t"
                + 0
                + "\t"
                + contig.coverage.length
                + "\t"
                + 0
                + "\t"
                + 0);
      }
    }
    coverage_outStream.close();

    // print kmer abundance
    HashMap<Integer, Integer> abundanceCountMap =
        new HashMap<Integer, Integer>(); // the frequeny of the kmer abundance
    PrintStream abundance_outStream = new PrintStream(abundance_out);
    // need to merge the counts from forward and reverse together.
    HashSet<Kmer> kmerSet = new HashSet<Kmer>();
    kmerSet.addAll(kmerMaps[0].keySet());
    for (Kmer kmer : kmerSet) {
      AtomicInteger abundance = kmerMaps[0].get(kmer).count;

      String reverseKmerStr = IUBUtilities.reverseComplement(kmer.decodeLong(kmer.getLongKmers()));
      Kmer reverseKmer = (new NuclKmerGenerator(reverseKmerStr, this.kmerSize)).next();
      KmerAbund kmerAbund = kmerMaps[1].get(reverseKmer);

      if (kmerAbund != null) {
        abundance.addAndGet(kmerAbund.count.get());
      }

      Integer count = abundanceCountMap.get(abundance.get());
      if (count == null) {
        abundanceCountMap.put(abundance.get(), 1);
      } else {
        abundanceCountMap.put(abundance.get(), count + 1);
      }
    }

    abundance_outStream.println("kmer_abundance\tfrequency");
    for (Integer abundance : abundanceCountMap.keySet()) {
      abundance_outStream.println(abundance + "\t" + abundanceCountMap.get(abundance));
    }
    abundance_outStream.close();
  }