예제 #1
0
  public static void main(String[] args) throws IOException {
    if (args.length != 2) {
      System.err.println("USAGE: AllTranslate <in_nucl_file> <out_file>");
      System.exit(1);
    }

    SeqReader reader = new SequenceReader(new File(args[0]));
    FastaWriter writer = new FastaWriter(new PrintStream(args[1]));

    Sequence seq;
    ProteinUtils protUtils = ProteinUtils.getInstance();

    while ((seq = reader.readNextSequence()) != null) {
      for (Sequence protSeq : protUtils.allTranslate(seq)) {
        writer.writeSeq(protSeq);
      }
    }

    writer.close();
  }
예제 #2
0
  private static void convertToFasta(File sffFile, File workDir, String outfilePrefix)
      throws IOException {
    SFFCore core = new SFFCore(sffFile);
    FastaWriter fastaWriter = new FastaWriter(new File(workDir, outfilePrefix + ".fasta"));
    FastaWriter qualWriter = new FastaWriter(new File(workDir, outfilePrefix + ".qual"));
    QSequence seq;

    while ((seq = core.readNextSeq()) != null) {
      fastaWriter.writeSeq(seq);
      qualWriter.writeSeq(seq.getSeqName(), "", SeqUtils.translateQualString(seq.getQuality()));
    }

    qualWriter.close();
    fastaWriter.close();
    core.close();
  }
예제 #3
0
  public static void main(String[] args) throws Exception {
    if ((args.length < 6 && args.length != 1) || args.length > 13) {
      printUsage(true);
      System.exit(1);
    }

    // check for optional arguments
    boolean normalized = true;
    int heuristicPruning = HMMGraphSearch.PRUNE_NODE;
    String hweightstr = "static";
    double weight = 1.0;
    int optCount = 0;
    for (int i = 0; i < 6; ++i) {
      if (args[i].equals("-u")) {
        normalized = false;
        optCount++;
      } else if (args[i].equals("-p")) {
        heuristicPruning = Integer.parseInt(args[i + 1]);
        i++;
        optCount += 2;
      } else if (args[i].equals("-w")) {
        weight = Double.parseDouble(args[i + 1]);
        i++;
        optCount += 2;
      } else if (args[i].equals("-m")) {
        hweightstr = args[i + 1];
        i++;
        optCount += 2;
      } else if (args[i].equals("-h")) {
        printUsage(true);
        System.exit(1);
      }
    }

    if (optCount > 0) {
      args = Arrays.copyOfRange(args, optCount, args.length);
    }

    int k = Integer.valueOf(args[0]);
    long timeLimit = Long.valueOf(args[1]);

    File bloomFile = new File(args[2]);
    File forHMMFile = new File(args[3]);
    File revHMMFile = new File(args[4]);
    File kmersFile = new File(args[5]);

    File nuclOutFile = new File(kmersFile.getName() + "_nucl.fasta");
    File protOutFile = new File(kmersFile.getName() + "_prot.fasta");

    HeuristicWeight hweight;
    if (hweightstr.equals("static")) {
      hweight = new StaticHeuristicWeight(weight);
    } else if (hweightstr.equals("dynamic")) {
      hweight = new DynamicHeuristicWeight(weight);
    } else if (hweightstr.equals("revised_dynamic")) {
      hweight = new RevisedDynamicHeuristicWeight(weight);
    } else {
      throw new RuntimeException("Invalid argument for heuristic weight: " + hweightstr);
    }

    ProfileHMM forHMM;
    ProfileHMM revHMM;

    if (normalized) {
      forHMM = HMMER3bParser.readModel(forHMMFile);
      revHMM = HMMER3bParser.readModel(revHMMFile);
    } else {
      forHMM = HMMER3bParser.readUnnormalized(forHMMFile);
      revHMM = HMMER3bParser.readUnnormalized(revHMMFile);
    }

    FastaWriter nuclOut = new FastaWriter(nuclOutFile);
    FastaWriter protOut = null;
    boolean isProt = forHMM.getAlphabet() == SequenceType.Protein;

    if (isProt) {
      protOut = new FastaWriter(protOutFile);
    }

    int kmerCount = 0;
    int contigCount = 0;

    long startTime;

    startTime = System.currentTimeMillis();
    ObjectInputStream ois =
        new ObjectInputStream(new BufferedInputStream(new FileInputStream(bloomFile)));
    BloomFilter bloom = (BloomFilter) ois.readObject();
    ois.close();
    System.err.println(
        "Bloom filter loaded in " + (System.currentTimeMillis() - startTime) + " ms");

    System.err.println("Starting hmmgs search at " + new Date());
    System.err.println("*  Kmer file:               " + kmersFile);
    System.err.println("*  Bloom file:              " + bloomFile);
    System.err.println("*  Forward hmm file:        " + forHMMFile);
    System.err.println("*  Reverse hmm file:        " + revHMMFile);
    System.err.println("*  Searching prot?:         " + isProt);
    System.err.println("*  # paths:                 " + k);
    System.err.println("*  Nucl contigs out file    " + nuclOutFile);
    System.err.println("*  Prot contigs out file    " + protOutFile);
    System.err.println("*  heuristicPruning         " + heuristicPruning);
    // these are experimental options, not used
    // System.err.println("*  HeuristicWeightMethod    " + hweightstr);
    // System.err.println("*  HeuristicWeight          " + weight);

    startTime = System.currentTimeMillis();
    HMMBloomSearch.printHeader(System.out, isProt);

    KmerStart line;
    KmerStartsReader reader = new KmerStartsReader(kmersFile);
    HMMGraphSearch search = new HMMGraphSearch(k, heuristicPruning);
    try {
      while ((line = reader.readNext()) != null) {
        search.setHWeight(hweight);

        kmerCount++;

        if (line.getMpos() == 0) {
          System.err.println("Skipping line " + line.getKmer());
          continue;
        }

        TimeStamppedFutureTask future =
            new TimeStamppedFutureTask(
                new TimeLimitedSearchThread(
                    search,
                    new SearchTarget(
                        line.getGeneName(),
                        line.getQueryId(),
                        line.getRefId(),
                        line.getNuclKmer(),
                        0,
                        line.getMpos() - 1,
                        forHMM,
                        revHMM,
                        bloom)));

        Thread t = new Thread(future);
        t.setDaemon(true);
        t.setPriority(Thread.MAX_PRIORITY);
        t.start();

        try {
          List<SearchResult> searchResults = future.get(timeLimit, TimeUnit.SECONDS);

          for (SearchResult result : searchResults) {
            String seqid = "contig_" + (contigCount++);

            HMMBloomSearch.printResult(seqid, isProt, result, System.out);

            nuclOut.writeSeq(seqid, result.getNuclSeq());
            if (isProt) {
              protOut.writeSeq(seqid, result.getProtSeq());
            }
          }

        } catch (TimeoutException e) {
          System.out.println(
              "-\t" + future.getStartingWord() + (isProt ? "\t-" : "") + "\t-\t-\t-\t-");
          future.cancel(true);
        } catch (Exception e) {
          System.out.println(
              "-\t" + future.getStartingWord() + (isProt ? "\t-" : "") + "\t-\t-\t-\t-");
          e.printStackTrace();
          if (e.getCause() != null) {
            e.getCause().printStackTrace();
          }
          future.cancel(true);
        }
      }
      System.err.println(
          "Read in "
              + kmerCount
              + " kmers and created "
              + contigCount
              + " contigs in "
              + (System.currentTimeMillis() - startTime) / 1000f
              + " seconds");
    } finally {
      nuclOut.close();
      if (isProt) {
        protOut.close();
      }
      System.out.close();
    }
  }