Esempio n. 1
0
  /**
   * find the kmers in the contigs
   *
   * @param reader
   * @throws IOException
   */
  private void processContigFile(SequenceReader reader) throws IOException {
    Sequence seq;
    NuclKmerGenerator kmerGenerator;
    Kmer kmer;
    int contigIdx = 0;
    while ((seq = reader.readNextSequence()) != null) {
      if (seq.getSeqString().length() < kmerSize) {
        continue;
      }
      // use int to represent seqname in case contig names are too long
      contigMap.put(
          contigIdx, new Contig(seq.getSeqName(), seq.getSeqString().length() - kmerSize + 1));
      // forward direction
      kmerGenerator = new NuclKmerGenerator(seq.getSeqString(), kmerSize);
      while (kmerGenerator.hasNext()) {
        kmer = kmerGenerator.next();
        KmerAbund kmerAbund = kmerMaps[0].get(kmer);

        if (kmerAbund == null) {
          kmerAbund = new KmerAbund();
          kmerMaps[0].put(kmer, kmerAbund);
        }
        kmerAbund.contigList.add(new ContigCoverage(contigIdx, kmerGenerator.getPosition() - 1));
      }

      // reverse direction
      kmerGenerator =
          new NuclKmerGenerator(IUBUtilities.reverseComplement(seq.getSeqString()), kmerSize);
      while (kmerGenerator.hasNext()) {
        kmer = kmerGenerator.next();
        KmerAbund kmerAbund = kmerMaps[1].get(kmer);

        if (kmerAbund == null) {
          kmerAbund = new KmerAbund();
          kmerMaps[1].put(kmer, kmerAbund);
        }
        kmerAbund.contigList.add(
            new ContigCoverage(
                contigIdx,
                seq.getSeqString().length() - kmerGenerator.getPosition() - kmerSize + 1));
      }
      contigIdx++;
    }
    reader.close();
  }
Esempio n. 2
0
  /**
   * This program maps the kmers from reads to kmers on each contig, writes the mean, median
   * coverage of each contig to a file writes the kmer abundance to a file
   *
   * @param args
   * @throws IOException
   */
  public static void main(String[] args) throws IOException, InterruptedException {
    int kmerSize = 45;
    final int maxThreads;
    final int maxTasks = 1000;
    final PrintStream match_reads_out;
    try {
      CommandLine cmdLine = new PosixParser().parse(options, args);
      args = cmdLine.getArgs();
      if (args.length < 5) {
        throw new Exception("Unexpected number of arguments");
      }
      kmerSize = Integer.parseInt(args[0]);
      if (kmerSize > Kmer.max_nucl_kmer_size) {
        throw new Exception("kmerSize should be less than " + Kmer.max_nucl_kmer_size);
      }
      if (cmdLine.hasOption("match_reads_out")) {
        match_reads_out = new PrintStream(cmdLine.getOptionValue("match_reads_out"));
      } else {
        match_reads_out = null;
      }
      if (cmdLine.hasOption("threads")) {
        maxThreads = Integer.valueOf(cmdLine.getOptionValue("threads"));
        if (maxThreads >= Runtime.getRuntime().availableProcessors()) {
          System.err.println(
              " Runtime.getRuntime().availableProcessors() "
                  + Runtime.getRuntime().availableProcessors());
        }

      } else {
        maxThreads = 1;
      }

      final KmerCoverage kmerCoverage =
          new KmerCoverage(kmerSize, new SequenceReader(new File(args[1])));
      if (kmerCoverage.getTotalContigs() == 0) {
        System.out.println(
            "Found 0 contig with length >= kmer size "
                + kmerSize
                + " in input file "
                + args[1]
                + ". Exit program.");
        return;
      }
      final AtomicInteger outstandingTasks = new AtomicInteger();
      ExecutorService service = Executors.newFixedThreadPool(maxThreads);

      Sequence seq;

      // parse one file at a time
      for (int index = 4; index < args.length; index++) {

        SequenceReader reader = new SequenceReader(new File(args[index]));
        while ((seq = reader.readNextSequence()) != null) {
          if (seq.getSeqString().length() < kmerSize) {
            continue;
          }
          final Sequence threadSeq = seq;

          Runnable r =
              new Runnable() {

                public void run() {
                  try {
                    kmerCoverage.processReads(threadSeq, match_reads_out);
                    outstandingTasks.decrementAndGet();
                  } catch (Exception e) {
                    e.printStackTrace();
                  }
                }
              };

          outstandingTasks.incrementAndGet();
          service.submit(r);

          while (outstandingTasks.get() >= maxTasks) ;
        }
        reader.close();
      }
      service.shutdown();
      service.awaitTermination(1, TimeUnit.DAYS);

      kmerCoverage.printCovereage(
          new FileOutputStream(new File(args[2])), new FileOutputStream(new File(args[3])));
      if (match_reads_out != null) {
        match_reads_out.close();
      }
    } catch (Exception e) {
      new HelpFormatter()
          .printHelp(
              "KmerCoverage <kmerSize> <query_file> <coverage_out> <abundance_out> <reads_file> <reads_file>...\nmaximum kmerSize "
                  + Kmer.max_nucl_kmer_size,
              options);
      e.printStackTrace();
      System.exit(1);
    }
  }
  public static void main(String[] args) throws Exception {
    if (args.length != 2) {
      System.err.println("USAGE: BloomFilterAppender <bloomfilter> <read_file>");
      System.exit(1);
    }

    File bloomFilter = new File(args[0]);
    BloomFilter filter = BloomFilter.fromFile(bloomFilter);
    BloomFilter.GraphBuilder graphBuilder = filter.new GraphBuilder();

    long seqCount = 0;

    args = Arrays.copyOfRange(args, 1, args.length);
    System.err.println("Starting to build bloom filter at " + new Date());
    System.err.println("*  reads file(s):       " + Arrays.asList(args));
    System.err.println("*  bloom output:     " + bloomFilter);
    System.err.println("*  kmer size:        " + filter.getKmerSize());
    System.err.println("*  hash size log2:   " + filter.getHashSizeLog2());
    System.err.println("*  hash count:       " + filter.getHashCount());
    System.err.println("*  bitset size log2: " + filter.getBitsetSize());

    long startTime = System.currentTimeMillis();

    for (String f : args) {
      File readFile = new File(f);
      SequenceReader reader = new SequenceReader(readFile);
      Sequence seq;

      while ((seq = reader.readNextSequence()) != null) {

        seqCount++;
        if ((seqCount % 1000000) == 0) {
          System.err.println("p: " + seqCount + " kmers added " + graphBuilder.getKmerAdded());
        }

        graphBuilder.addString(seq.getSeqString().toCharArray());
      }
      reader.close();
    }

    BloomFilterStats.printStats(filter, System.out);
    long endTime = System.currentTimeMillis();

    System.err.println(
        "time to build BloomFilter: " + (endTime - startTime) / 60000.0 + " minutes");

    ObjectOutputStream oos =
        new ObjectOutputStream(
            new BufferedOutputStream(
                new FileOutputStream(bloomFilter.getAbsolutePath() + ".appended")));

    oos.writeObject(filter);
    oos.close();
  }