public static void main(String[] args) throws IOException { if (args.length != 2) { System.err.println("USAGE: AllTranslate <in_nucl_file> <out_file>"); System.exit(1); } SeqReader reader = new SequenceReader(new File(args[0])); FastaWriter writer = new FastaWriter(new PrintStream(args[1])); Sequence seq; ProteinUtils protUtils = ProteinUtils.getInstance(); while ((seq = reader.readNextSequence()) != null) { for (Sequence protSeq : protUtils.allTranslate(seq)) { writer.writeSeq(protSeq); } } writer.close(); }
private static void convertToFasta(File sffFile, File workDir, String outfilePrefix) throws IOException { SFFCore core = new SFFCore(sffFile); FastaWriter fastaWriter = new FastaWriter(new File(workDir, outfilePrefix + ".fasta")); FastaWriter qualWriter = new FastaWriter(new File(workDir, outfilePrefix + ".qual")); QSequence seq; while ((seq = core.readNextSeq()) != null) { fastaWriter.writeSeq(seq); qualWriter.writeSeq(seq.getSeqName(), "", SeqUtils.translateQualString(seq.getQuality())); } qualWriter.close(); fastaWriter.close(); core.close(); }
public static void main(String[] args) throws Exception { if ((args.length < 6 && args.length != 1) || args.length > 13) { printUsage(true); System.exit(1); } // check for optional arguments boolean normalized = true; int heuristicPruning = HMMGraphSearch.PRUNE_NODE; String hweightstr = "static"; double weight = 1.0; int optCount = 0; for (int i = 0; i < 6; ++i) { if (args[i].equals("-u")) { normalized = false; optCount++; } else if (args[i].equals("-p")) { heuristicPruning = Integer.parseInt(args[i + 1]); i++; optCount += 2; } else if (args[i].equals("-w")) { weight = Double.parseDouble(args[i + 1]); i++; optCount += 2; } else if (args[i].equals("-m")) { hweightstr = args[i + 1]; i++; optCount += 2; } else if (args[i].equals("-h")) { printUsage(true); System.exit(1); } } if (optCount > 0) { args = Arrays.copyOfRange(args, optCount, args.length); } int k = Integer.valueOf(args[0]); long timeLimit = Long.valueOf(args[1]); File bloomFile = new File(args[2]); File forHMMFile = new File(args[3]); File revHMMFile = new File(args[4]); File kmersFile = new File(args[5]); File nuclOutFile = new File(kmersFile.getName() + "_nucl.fasta"); File protOutFile = new File(kmersFile.getName() + "_prot.fasta"); HeuristicWeight hweight; if (hweightstr.equals("static")) { hweight = new StaticHeuristicWeight(weight); } else if (hweightstr.equals("dynamic")) { hweight = new DynamicHeuristicWeight(weight); } else if (hweightstr.equals("revised_dynamic")) { hweight = new RevisedDynamicHeuristicWeight(weight); } else { throw new RuntimeException("Invalid argument for heuristic weight: " + hweightstr); } ProfileHMM forHMM; ProfileHMM revHMM; if (normalized) { forHMM = HMMER3bParser.readModel(forHMMFile); revHMM = HMMER3bParser.readModel(revHMMFile); } else { forHMM = HMMER3bParser.readUnnormalized(forHMMFile); revHMM = HMMER3bParser.readUnnormalized(revHMMFile); } FastaWriter nuclOut = new FastaWriter(nuclOutFile); FastaWriter protOut = null; boolean isProt = forHMM.getAlphabet() == SequenceType.Protein; if (isProt) { protOut = new FastaWriter(protOutFile); } int kmerCount = 0; int contigCount = 0; long startTime; startTime = System.currentTimeMillis(); ObjectInputStream ois = new ObjectInputStream(new BufferedInputStream(new FileInputStream(bloomFile))); BloomFilter bloom = (BloomFilter) ois.readObject(); ois.close(); System.err.println( "Bloom filter loaded in " + (System.currentTimeMillis() - startTime) + " ms"); System.err.println("Starting hmmgs search at " + new Date()); System.err.println("* Kmer file: " + kmersFile); System.err.println("* Bloom file: " + bloomFile); System.err.println("* Forward hmm file: " + forHMMFile); System.err.println("* Reverse hmm file: " + revHMMFile); System.err.println("* Searching prot?: " + isProt); System.err.println("* # paths: " + k); System.err.println("* Nucl contigs out file " + nuclOutFile); System.err.println("* Prot contigs out file " + protOutFile); System.err.println("* heuristicPruning " + heuristicPruning); // these are experimental options, not used // System.err.println("* HeuristicWeightMethod " + hweightstr); // System.err.println("* HeuristicWeight " + weight); startTime = System.currentTimeMillis(); HMMBloomSearch.printHeader(System.out, isProt); KmerStart line; KmerStartsReader reader = new KmerStartsReader(kmersFile); HMMGraphSearch search = new HMMGraphSearch(k, heuristicPruning); try { while ((line = reader.readNext()) != null) { search.setHWeight(hweight); kmerCount++; if (line.getMpos() == 0) { System.err.println("Skipping line " + line.getKmer()); continue; } TimeStamppedFutureTask future = new TimeStamppedFutureTask( new TimeLimitedSearchThread( search, new SearchTarget( line.getGeneName(), line.getQueryId(), line.getRefId(), line.getNuclKmer(), 0, line.getMpos() - 1, forHMM, revHMM, bloom))); Thread t = new Thread(future); t.setDaemon(true); t.setPriority(Thread.MAX_PRIORITY); t.start(); try { List<SearchResult> searchResults = future.get(timeLimit, TimeUnit.SECONDS); for (SearchResult result : searchResults) { String seqid = "contig_" + (contigCount++); HMMBloomSearch.printResult(seqid, isProt, result, System.out); nuclOut.writeSeq(seqid, result.getNuclSeq()); if (isProt) { protOut.writeSeq(seqid, result.getProtSeq()); } } } catch (TimeoutException e) { System.out.println( "-\t" + future.getStartingWord() + (isProt ? "\t-" : "") + "\t-\t-\t-\t-"); future.cancel(true); } catch (Exception e) { System.out.println( "-\t" + future.getStartingWord() + (isProt ? "\t-" : "") + "\t-\t-\t-\t-"); e.printStackTrace(); if (e.getCause() != null) { e.getCause().printStackTrace(); } future.cancel(true); } } System.err.println( "Read in " + kmerCount + " kmers and created " + contigCount + " contigs in " + (System.currentTimeMillis() - startTime) / 1000f + " seconds"); } finally { nuclOut.close(); if (isProt) { protOut.close(); } System.out.close(); } }