public static String toFasta(Sequence... sequences) { StringBuilder sb = new StringBuilder(); for (Sequence sequence : sequences) sb.append('>' + sequence.getName() + '\n' + sequence.seqString() + '\n'); return sb.toString(); }
/** * Creates the Map required by the super class. * * @param s1 * @param s2 * @return */ private static Map<String, SymbolList> createHashMap(Sequence s1, Sequence s2) { Map<String, SymbolList> m = new HashMap<String, SymbolList>(); m.put(s1.getName(), s1); m.put(s2.getName(), s2); return m; }
/** * This method provides a BLAST-like formated alignment from the given <code>String</code>s, in * which the sequence coordinates and the information "Query" or "Sbjct", respectively is added to * each line. Each line contains <code>width</code> sequence characters including the gap symbols * plus the meta information. There is one white line between two pairs of sequences. * * @param width the number of symbols to be displayed per line. * @return formated String. * @throws BioException */ public String formatOutput(int width) throws BioException { int i, j; /* * Highlights equal symbols within the alignment, String match/missmatch * representation */ StringBuilder path = new StringBuilder(); for (i = 0; i < Math.min(queryEnd - queryStart, subjectEnd - subjectStart) + 1; i++) { Symbol a = query.symbolAt(i + queryStart); Symbol b = subject.symbolAt(i + subjectStart); if (!a.equals(query.getAlphabet().getGapSymbol()) && !b.equals(subject.getAlphabet().getGapSymbol()) && ((subMatrix.getValueAt(a, b) >= 0) || a.equals(b))) { path.append('|'); } else { path.append(' '); } } int maxLength = path.length(); /* * Math.max(queryEnd - queryStart, subjectEnd - subjectStart) + 1; */ Formatter output = new Formatter(); output.format("%n Time (ms): %s%n", time); output.format(" Length: %d%n", maxLength); output.format(" Score: %d%n", getScore()); output.format(" Query: %s, Length: %d%n", query.getName(), query.length() - nGapsQ); output.format(" Sbjct: %s, Length: %d%n", subject.getName(), subject.length() - nGapsS); output.format( " Identities: %d/%d, i.e., %d %% (query) and %d %% (sbjct)%n", identicals, maxLength, Math.round(getPercentIdentityQuery()), Math.round(getPercentIdentitySubject())); output.format( " Similars: %d/%d, i.e., %d %% (query) and %d %% (sbjct)%n", similars, maxLength, Math.round(getPercentSimilarityQuery()), Math.round(getPercentSimilaritySubject())); output.format( " No. gaps: %d (%d %%) in query and %d (%d %%) in sbjct%n", nGapsQ, Math.round(getPercentGapsQuery()), nGapsS, Math.round(getPercentGapsTarget())); int queryLPos = queryStart, queryRPos, pathLPos = 0, pathRPos; int subjectLPos = subjectStart, subjectRPos; int ql = queryLPos - 1, qr = queryLPos - 1, qgaps; int sl = subjectLPos - 1, sr = subjectLPos - 1, sgaps; int widthLeft = String.valueOf(Math.max(queryStart, queryEnd)).length(); int widthRight = String.valueOf(Math.max(queryEnd, subjectEnd)).length() + 1; // Take width of the meta information into account. width = Math.max(width - widthLeft - widthRight - 12, 2); for (i = 1; i <= Math.ceil((double) maxLength / width); i++) { // Query queryRPos = Math.min( queryStart + i * width - 1, Math.min(queryEnd, subjectEnd - subjectStart + queryStart)); qgaps = 0; for (j = queryLPos; j <= queryRPos; j++) { if (!query.symbolAt(j).equals(query.getAlphabet().getGapSymbol())) { qr++; } else { qgaps++; } } if (qgaps <= queryRPos - queryLPos) { ql++; } output.format("%nQuery: %" + widthLeft + "d ", ql); output.format("%s ", query.subStr(queryLPos, queryRPos)); output.format("%-" + widthRight + "d%n", qr); queryLPos = queryRPos + 1; ql = qr; // Path pathRPos = Math.min(i * width, path.length()); output.format( "%-" + (widthLeft + 10) + "c%s", Character.valueOf(' '), path.substring(pathLPos, pathRPos)); pathLPos = pathRPos; // Sbjct subjectRPos = Math.min( subjectStart + i * width - 1, Math.min(queryEnd - queryStart + subjectStart, subjectEnd)); sgaps = 0; for (j = subjectLPos; j <= subjectRPos; j++) { if (!subject.symbolAt(j).equals(subject.getAlphabet().getGapSymbol())) { sr++; } else { sgaps++; } } if (sgaps <= subjectRPos - subjectLPos) { sl++; } output.format("%nSbjct: %" + widthLeft + "d ", sl); output.format("%s ", subject.subStr(subjectLPos, subjectRPos)); output.format("%-" + widthRight + "d%n", sr); subjectLPos = subjectRPos + 1; sl = sr; } return output.toString(); }
public void main(String[] args) throws BioException, IOException { if (count.length != args.length) { System.err.println("The number of counts and output file arguments does not match!"); System.exit(1); } OutputStream[] outputStreams; if ((args != null) && (args.length > 0)) { outputStreams = new OutputStream[args.length]; for (int i = 0; i < args.length; i++) { outputStreams[i] = new BufferedOutputStream(new FileOutputStream(args[i])); } } else { outputStreams = new OutputStream[] {System.out}; } RichSequenceIterator seqIterator = RichSequence.IOTools.readFastaDNA(new BufferedReader(new FileReader(seqFile)), null); List<Sequence> seqs = new ArrayList<Sequence>(); while (seqIterator.hasNext()) { seqs.add(seqIterator.nextSequence()); } List<List<Sequence>> chosenSeqs = new ArrayList<List<Sequence>>(); if (!sampleWithReplacement) { for (int c : count) { List<Sequence> cseqs = new ArrayList<Sequence>(); chosenSeqs.add(cseqs); while (c > 0) { int randSeqIndex = random.nextInt(seqs.size()); cseqs.add(seqs.remove(randSeqIndex)); c--; } } } else if (sampleWithReplacement || (length > 0)) { /* * if you want to sample from sequences with replacement * or if the wanted length is specified */ for (int c : count) { int i = 0; List<Sequence> cseqs = new ArrayList<Sequence>(); chosenSeqs.add(cseqs); while (c > 0) { Sequence randomSeq = seqs.get(random.nextInt(seqs.size())); if (length > 0) { int startPos = random.nextInt(1 + randomSeq.length() - length); cseqs.add( new SimpleSequence( randomSeq.subList(startPos, startPos + length), null, randomSeq.getName() + "_" + i++, Annotation.EMPTY_ANNOTATION)); } else { cseqs.add(randomSeq); } c--; } } } int i = 0; for (List<Sequence> seqList : chosenSeqs) { for (Sequence seq : seqList) { Sequence s; if (uniqueNames) { s = new SimpleSequence( seq.subList(1, seq.length()), null, seq.getName() + "_" + i, Annotation.EMPTY_ANNOTATION); } else { s = seq; } RichSequence.IOTools.writeFasta(outputStreams[i], s, null); outputStreams[i].flush(); } i++; } seqs = null; }