public ArrayList<Double> mapPDScoresToAlignment( ArrayList<Double> pdScores, Sequence seq, ArrayList<Integer> indicesOfRowsWithSumZero) { ArrayList<Double> result = new ArrayList<Double>(); int seqLength = seq.length(); int nonGapPosition = 0; Symbol gap = seq.getAlphabet().getGapSymbol(); for (int i = 0; i < seqLength; i++) { double oneScore = Double.MIN_VALUE; boolean isSumZero = false; isSumZero = (indicesOfRowsWithSumZero.contains(i)); boolean isGap = false; isGap = (seq.symbolAt(i + 1) == gap); // note: these +1 in indices are because seq starts from one and not zero! if (isSumZero) { oneScore = 0.0; } else { oneScore = pdScores.get(nonGapPosition); nonGapPosition++; } result.add(oneScore); } return result; } /*mapPDScoresToAlignment*/
public ArrayList<Double> mapViterbiPathToAlignment( ArrayList<String> viterbiPath, Sequence seq, ArrayList<Integer> indicesOfRowsWithSumZero) { ArrayList<Double> result = new ArrayList<Double>(); int seqLength = seq.length(); int nonGapPosition = 0; Symbol gap = seq.getAlphabet().getGapSymbol(); for (int i = 0; i < seqLength; i++) { double oneScore = Double.MIN_VALUE; boolean isSumZero = false; isSumZero = (indicesOfRowsWithSumZero.contains(i)); boolean isGap = false; isGap = (seq.symbolAt(i + 1) == gap); // note: these +1 in indices are because seq and viterbi path start from one // and not zero! if (isSumZero) { oneScore = 0.0; } else { if (viterbiPath.get(nonGapPosition).equals("M")) { oneScore = 0.0; } else if (viterbiPath.get(nonGapPosition).equals("R")) { oneScore = 2.0; } else if (viterbiPath.get(nonGapPosition).equals("r")) { oneScore = 2.0; } else if (viterbiPath.get(nonGapPosition).equals("G")) { oneScore = 2.0; } else if (viterbiPath.get(nonGapPosition).equals("g")) { oneScore = 2.0; } else if (viterbiPath.get(nonGapPosition).equals("E")) { oneScore = 1.0; } else if (viterbiPath.get(nonGapPosition).equals("e")) { oneScore = 1.0; } else if (viterbiPath.get(nonGapPosition).equals("J")) { oneScore = 1.5; } else { System.err.println("Unknown charecter detected as a state name!"); } nonGapPosition++; } result.add(oneScore); } return result; } /*mapViterbiPathToAlignment*/
/** @return */ public float getPercentGapsQuery() { return nGapsQ / (float) query.length() * 100; }
public LinkedHashMap<SimpleAlphabet, SimpleSymbolList> getAlphabetAndSimpleSymbolList( Matrix2D m, Sequence sequence) throws IllegalSymbolException { LinkedHashMap<SimpleAlphabet, SimpleSymbolList> alphabetAndSymbolList = new LinkedHashMap<SimpleAlphabet, SimpleSymbolList>(); SimpleAlphabet alphabet = new SimpleAlphabet(); List<AtomicSymbol> listOfSymbols = new ArrayList<AtomicSymbol>(); alphabet.setName("ObservedSequenceAlphabet"); int numberofRows = m.rows(); int seqLength = sequence.length(); if (numberofRows != seqLength) { System.err.print( "It was assumed your sequence has a length equal to the number of rows of the matrix, but found a case that is not true!"); } Symbol gap = sequence.getAlphabet().getGapSymbol(); for (int i = 0; i < numberofRows; i++) { List<Symbol> oneListOfSymbol = new ArrayList<Symbol>(3); // red is match, green is flanking and blue is background double redValue = m.get(i, 0); double greenValue = m.get(i, 1); double blueValue = m.get(i, 2); double onesum = redValue + greenValue + blueValue; boolean isGap = false; isGap = (sequence.symbolAt(i + 1) == gap); if (onesum == 0) { // sum of this value is supposed to be one, but for some rows it sums up to // zero, this is to ignore those up to time we found out why these naouthy rows // sums up to zero! // continue; // note these three lines is only a dummy solution for positions where red, green and blue // are summed up to zero! redValue = 0.3333; greenValue = 0.3333; blueValue = 1 - (redValue + greenValue); } // make one triplet symbol from three symbols Symbol redSymbol = AlphabetManager.createSymbol(Double.toString(redValue)); Symbol greenSymol = AlphabetManager.createSymbol(Double.toString(greenValue)); Symbol blueSymbol = AlphabetManager.createSymbol(Double.toString(blueValue)); oneListOfSymbol.add(redSymbol); oneListOfSymbol.add(greenSymol); oneListOfSymbol.add(blueSymbol); // now create symbol and add it to alphabet AtomicSymbol oneSym = (AtomicSymbol) AlphabetManager.createSymbol( Annotation.EMPTY_ANNOTATION, oneListOfSymbol, alphabet); alphabet.addSymbol(oneSym); listOfSymbols.add(oneSym); } SimpleSymbolList ssl = new SimpleSymbolList(alphabet, listOfSymbols); alphabetAndSymbolList.put(alphabet, ssl); return alphabetAndSymbolList; } /*getAlphabetAndSimpleSymbolList*/
/** * This method provides a BLAST-like formated alignment from the given <code>String</code>s, in * which the sequence coordinates and the information "Query" or "Sbjct", respectively is added to * each line. Each line contains <code>width</code> sequence characters including the gap symbols * plus the meta information. There is one white line between two pairs of sequences. * * @param width the number of symbols to be displayed per line. * @return formated String. * @throws BioException */ public String formatOutput(int width) throws BioException { int i, j; /* * Highlights equal symbols within the alignment, String match/missmatch * representation */ StringBuilder path = new StringBuilder(); for (i = 0; i < Math.min(queryEnd - queryStart, subjectEnd - subjectStart) + 1; i++) { Symbol a = query.symbolAt(i + queryStart); Symbol b = subject.symbolAt(i + subjectStart); if (!a.equals(query.getAlphabet().getGapSymbol()) && !b.equals(subject.getAlphabet().getGapSymbol()) && ((subMatrix.getValueAt(a, b) >= 0) || a.equals(b))) { path.append('|'); } else { path.append(' '); } } int maxLength = path.length(); /* * Math.max(queryEnd - queryStart, subjectEnd - subjectStart) + 1; */ Formatter output = new Formatter(); output.format("%n Time (ms): %s%n", time); output.format(" Length: %d%n", maxLength); output.format(" Score: %d%n", getScore()); output.format(" Query: %s, Length: %d%n", query.getName(), query.length() - nGapsQ); output.format(" Sbjct: %s, Length: %d%n", subject.getName(), subject.length() - nGapsS); output.format( " Identities: %d/%d, i.e., %d %% (query) and %d %% (sbjct)%n", identicals, maxLength, Math.round(getPercentIdentityQuery()), Math.round(getPercentIdentitySubject())); output.format( " Similars: %d/%d, i.e., %d %% (query) and %d %% (sbjct)%n", similars, maxLength, Math.round(getPercentSimilarityQuery()), Math.round(getPercentSimilaritySubject())); output.format( " No. gaps: %d (%d %%) in query and %d (%d %%) in sbjct%n", nGapsQ, Math.round(getPercentGapsQuery()), nGapsS, Math.round(getPercentGapsTarget())); int queryLPos = queryStart, queryRPos, pathLPos = 0, pathRPos; int subjectLPos = subjectStart, subjectRPos; int ql = queryLPos - 1, qr = queryLPos - 1, qgaps; int sl = subjectLPos - 1, sr = subjectLPos - 1, sgaps; int widthLeft = String.valueOf(Math.max(queryStart, queryEnd)).length(); int widthRight = String.valueOf(Math.max(queryEnd, subjectEnd)).length() + 1; // Take width of the meta information into account. width = Math.max(width - widthLeft - widthRight - 12, 2); for (i = 1; i <= Math.ceil((double) maxLength / width); i++) { // Query queryRPos = Math.min( queryStart + i * width - 1, Math.min(queryEnd, subjectEnd - subjectStart + queryStart)); qgaps = 0; for (j = queryLPos; j <= queryRPos; j++) { if (!query.symbolAt(j).equals(query.getAlphabet().getGapSymbol())) { qr++; } else { qgaps++; } } if (qgaps <= queryRPos - queryLPos) { ql++; } output.format("%nQuery: %" + widthLeft + "d ", ql); output.format("%s ", query.subStr(queryLPos, queryRPos)); output.format("%-" + widthRight + "d%n", qr); queryLPos = queryRPos + 1; ql = qr; // Path pathRPos = Math.min(i * width, path.length()); output.format( "%-" + (widthLeft + 10) + "c%s", Character.valueOf(' '), path.substring(pathLPos, pathRPos)); pathLPos = pathRPos; // Sbjct subjectRPos = Math.min( subjectStart + i * width - 1, Math.min(queryEnd - queryStart + subjectStart, subjectEnd)); sgaps = 0; for (j = subjectLPos; j <= subjectRPos; j++) { if (!subject.symbolAt(j).equals(subject.getAlphabet().getGapSymbol())) { sr++; } else { sgaps++; } } if (sgaps <= subjectRPos - subjectLPos) { sl++; } output.format("%nSbjct: %" + widthLeft + "d ", sl); output.format("%s ", subject.subStr(subjectLPos, subjectRPos)); output.format("%-" + widthRight + "d%n", sr); subjectLPos = subjectRPos + 1; sl = sr; } return output.toString(); }
/** @return */ public float getPercentGapsTarget() { return nGapsS / (float) subject.length() * 100; }
/** @return */ public int getQueryLength() { return query.length(); }
/** @return */ public int getSubjectLength() { return subject.length(); }
/** @return */ public float getPercentSimilarityQuery() { return similars / (float) query.length() * 100; }
/** @return */ public float getPercentSimilaritySubject() { return similars / (float) subject.length() * 100; }
/** @return */ public float getPercentIdentitySubject() { return identicals / (float) (subject.length() - nGapsS) * 100; }
/** @return */ public float getPercentIdentityQuery() { return identicals / (float) (query.length() - nGapsQ) * 100; }
/** * @param query * @param subject * @param subMatrix * @throws IllegalArgumentException * @throws BioException */ public AlignmentPair(Sequence query, Sequence subject, SubstitutionMatrix subMatrix) throws IllegalArgumentException, BioException { this(query, subject, 1, query.length(), 1, subject.length(), subMatrix); }
private void dnaCommand(HttpServletRequest req, DazzleResponse resp, DazzleDataSource dds) throws IOException, DataSourceException, ServletException, DazzleException { DazzleReferenceSource drs = (DazzleReferenceSource) dds; List segments = DazzleTools.getSegments(dds, req, resp); if (segments.size() == 0) { throw new DazzleException( DASStatus.STATUS_BAD_COMMAND_ARGUMENTS, "No segments specified for dna command"); } // Fetch and validate the requests. Map segmentResults = new HashMap(); for (Iterator i = segments.iterator(); i.hasNext(); ) { Segment seg = (Segment) i.next(); try { Sequence seq = drs.getSequence(seg.getReference()); if (seq.getAlphabet() != DNATools.getDNA()) { throw new DazzleException( DASStatus.STATUS_SERVER_ERROR, "Sequence " + seg.toString() + " is not in the DNA alphabet"); } if (seg.isBounded()) { if (seg.getMin() < 1 || seg.getMax() > seq.length()) { throw new DazzleException( DASStatus.STATUS_BAD_COORDS, "Segment " + seg.toString() + " doesn't fit sequence of length " + seq.length()); } } segmentResults.put(seg, seq); } catch (NoSuchElementException ex) { throw new DazzleException(DASStatus.STATUS_BAD_REFERENCE, ex); } catch (DataSourceException ex) { throw new DazzleException(DASStatus.STATUS_SERVER_ERROR, ex); } } // // Looks okay -- generate the response document // XMLWriter xw = resp.startDasXML("DASDNA", "dasdna.dtd"); try { xw.openTag("DASDNA"); for (Iterator i = segmentResults.entrySet().iterator(); i.hasNext(); ) { Map.Entry me = (Map.Entry) i.next(); Segment seg = (Segment) me.getKey(); Sequence seq = (Sequence) me.getValue(); xw.openTag("SEQUENCE"); xw.attribute("id", seg.getReference()); xw.attribute("version", drs.getLandmarkVersion(seg.getReference())); if (seg.isBounded()) { xw.attribute("start", "" + seg.getStart()); xw.attribute("stop", "" + seg.getStop()); } else { xw.attribute("start", "" + 1); xw.attribute("stop", "" + seq.length()); } SymbolList syms = seq; if (seg.isBounded()) { syms = syms.subList(seg.getMin(), seg.getMax()); } if (seg.isInverted()) { syms = DNATools.reverseComplement(syms); } xw.openTag("DNA"); xw.attribute("length", "" + syms.length()); for (int pos = 1; pos <= syms.length(); pos += 60) { int maxPos = Math.min(syms.length(), pos + 59); xw.println(syms.subStr(pos, maxPos)); } xw.closeTag("DNA"); xw.closeTag("SEQUENCE"); } xw.closeTag("DASDNA"); xw.close(); } catch (Exception ex) { throw new DazzleException(ex, "Error writing DNA document"); } }
public void main(String[] args) throws BioException, IOException { if (count.length != args.length) { System.err.println("The number of counts and output file arguments does not match!"); System.exit(1); } OutputStream[] outputStreams; if ((args != null) && (args.length > 0)) { outputStreams = new OutputStream[args.length]; for (int i = 0; i < args.length; i++) { outputStreams[i] = new BufferedOutputStream(new FileOutputStream(args[i])); } } else { outputStreams = new OutputStream[] {System.out}; } RichSequenceIterator seqIterator = RichSequence.IOTools.readFastaDNA(new BufferedReader(new FileReader(seqFile)), null); List<Sequence> seqs = new ArrayList<Sequence>(); while (seqIterator.hasNext()) { seqs.add(seqIterator.nextSequence()); } List<List<Sequence>> chosenSeqs = new ArrayList<List<Sequence>>(); if (!sampleWithReplacement) { for (int c : count) { List<Sequence> cseqs = new ArrayList<Sequence>(); chosenSeqs.add(cseqs); while (c > 0) { int randSeqIndex = random.nextInt(seqs.size()); cseqs.add(seqs.remove(randSeqIndex)); c--; } } } else if (sampleWithReplacement || (length > 0)) { /* * if you want to sample from sequences with replacement * or if the wanted length is specified */ for (int c : count) { int i = 0; List<Sequence> cseqs = new ArrayList<Sequence>(); chosenSeqs.add(cseqs); while (c > 0) { Sequence randomSeq = seqs.get(random.nextInt(seqs.size())); if (length > 0) { int startPos = random.nextInt(1 + randomSeq.length() - length); cseqs.add( new SimpleSequence( randomSeq.subList(startPos, startPos + length), null, randomSeq.getName() + "_" + i++, Annotation.EMPTY_ANNOTATION)); } else { cseqs.add(randomSeq); } c--; } } } int i = 0; for (List<Sequence> seqList : chosenSeqs) { for (Sequence seq : seqList) { Sequence s; if (uniqueNames) { s = new SimpleSequence( seq.subList(1, seq.length()), null, seq.getName() + "_" + i, Annotation.EMPTY_ANNOTATION); } else { s = seq; } RichSequence.IOTools.writeFasta(outputStreams[i], s, null); outputStreams[i].flush(); } i++; } seqs = null; }