/** @throws BioException */ private void init() throws BioException { similars = 0; identicals = 0; nGapsQ = 0; nGapsS = 0; for (int i = 0; i < Math.min(queryEnd - queryStart, subjectEnd - subjectStart); i++) { Symbol a = query.symbolAt(i + queryStart); Symbol b = subject.symbolAt(i + subjectStart); boolean gap = false; if (a.equals(b)) { identicals++; } // get score for this pair. if it is positive, they are similar... if (a.equals(query.getAlphabet().getGapSymbol())) { nGapsQ++; gap = true; } if (b.equals(subject.getAlphabet().getGapSymbol())) { nGapsS++; gap = true; } if (!gap && subMatrix != null && subMatrix.getValueAt(a, b) > 0) { similars++; } } }
public ArrayList<Double> mapPDScoresToAlignment( ArrayList<Double> pdScores, Sequence seq, ArrayList<Integer> indicesOfRowsWithSumZero) { ArrayList<Double> result = new ArrayList<Double>(); int seqLength = seq.length(); int nonGapPosition = 0; Symbol gap = seq.getAlphabet().getGapSymbol(); for (int i = 0; i < seqLength; i++) { double oneScore = Double.MIN_VALUE; boolean isSumZero = false; isSumZero = (indicesOfRowsWithSumZero.contains(i)); boolean isGap = false; isGap = (seq.symbolAt(i + 1) == gap); // note: these +1 in indices are because seq starts from one and not zero! if (isSumZero) { oneScore = 0.0; } else { oneScore = pdScores.get(nonGapPosition); nonGapPosition++; } result.add(oneScore); } return result; } /*mapPDScoresToAlignment*/
public ArrayList<Double> mapViterbiPathToAlignment( ArrayList<String> viterbiPath, Sequence seq, ArrayList<Integer> indicesOfRowsWithSumZero) { ArrayList<Double> result = new ArrayList<Double>(); int seqLength = seq.length(); int nonGapPosition = 0; Symbol gap = seq.getAlphabet().getGapSymbol(); for (int i = 0; i < seqLength; i++) { double oneScore = Double.MIN_VALUE; boolean isSumZero = false; isSumZero = (indicesOfRowsWithSumZero.contains(i)); boolean isGap = false; isGap = (seq.symbolAt(i + 1) == gap); // note: these +1 in indices are because seq and viterbi path start from one // and not zero! if (isSumZero) { oneScore = 0.0; } else { if (viterbiPath.get(nonGapPosition).equals("M")) { oneScore = 0.0; } else if (viterbiPath.get(nonGapPosition).equals("R")) { oneScore = 2.0; } else if (viterbiPath.get(nonGapPosition).equals("r")) { oneScore = 2.0; } else if (viterbiPath.get(nonGapPosition).equals("G")) { oneScore = 2.0; } else if (viterbiPath.get(nonGapPosition).equals("g")) { oneScore = 2.0; } else if (viterbiPath.get(nonGapPosition).equals("E")) { oneScore = 1.0; } else if (viterbiPath.get(nonGapPosition).equals("e")) { oneScore = 1.0; } else if (viterbiPath.get(nonGapPosition).equals("J")) { oneScore = 1.5; } else { System.err.println("Unknown charecter detected as a state name!"); } nonGapPosition++; } result.add(oneScore); } return result; } /*mapViterbiPathToAlignment*/
public LinkedHashMap<SimpleAlphabet, SimpleSymbolList> getAlphabetAndSimpleSymbolList( Matrix2D m, Sequence sequence) throws IllegalSymbolException { LinkedHashMap<SimpleAlphabet, SimpleSymbolList> alphabetAndSymbolList = new LinkedHashMap<SimpleAlphabet, SimpleSymbolList>(); SimpleAlphabet alphabet = new SimpleAlphabet(); List<AtomicSymbol> listOfSymbols = new ArrayList<AtomicSymbol>(); alphabet.setName("ObservedSequenceAlphabet"); int numberofRows = m.rows(); int seqLength = sequence.length(); if (numberofRows != seqLength) { System.err.print( "It was assumed your sequence has a length equal to the number of rows of the matrix, but found a case that is not true!"); } Symbol gap = sequence.getAlphabet().getGapSymbol(); for (int i = 0; i < numberofRows; i++) { List<Symbol> oneListOfSymbol = new ArrayList<Symbol>(3); // red is match, green is flanking and blue is background double redValue = m.get(i, 0); double greenValue = m.get(i, 1); double blueValue = m.get(i, 2); double onesum = redValue + greenValue + blueValue; boolean isGap = false; isGap = (sequence.symbolAt(i + 1) == gap); if (onesum == 0) { // sum of this value is supposed to be one, but for some rows it sums up to // zero, this is to ignore those up to time we found out why these naouthy rows // sums up to zero! // continue; // note these three lines is only a dummy solution for positions where red, green and blue // are summed up to zero! redValue = 0.3333; greenValue = 0.3333; blueValue = 1 - (redValue + greenValue); } // make one triplet symbol from three symbols Symbol redSymbol = AlphabetManager.createSymbol(Double.toString(redValue)); Symbol greenSymol = AlphabetManager.createSymbol(Double.toString(greenValue)); Symbol blueSymbol = AlphabetManager.createSymbol(Double.toString(blueValue)); oneListOfSymbol.add(redSymbol); oneListOfSymbol.add(greenSymol); oneListOfSymbol.add(blueSymbol); // now create symbol and add it to alphabet AtomicSymbol oneSym = (AtomicSymbol) AlphabetManager.createSymbol( Annotation.EMPTY_ANNOTATION, oneListOfSymbol, alphabet); alphabet.addSymbol(oneSym); listOfSymbols.add(oneSym); } SimpleSymbolList ssl = new SimpleSymbolList(alphabet, listOfSymbols); alphabetAndSymbolList.put(alphabet, ssl); return alphabetAndSymbolList; } /*getAlphabetAndSimpleSymbolList*/
/** * This method provides a BLAST-like formated alignment from the given <code>String</code>s, in * which the sequence coordinates and the information "Query" or "Sbjct", respectively is added to * each line. Each line contains <code>width</code> sequence characters including the gap symbols * plus the meta information. There is one white line between two pairs of sequences. * * @param width the number of symbols to be displayed per line. * @return formated String. * @throws BioException */ public String formatOutput(int width) throws BioException { int i, j; /* * Highlights equal symbols within the alignment, String match/missmatch * representation */ StringBuilder path = new StringBuilder(); for (i = 0; i < Math.min(queryEnd - queryStart, subjectEnd - subjectStart) + 1; i++) { Symbol a = query.symbolAt(i + queryStart); Symbol b = subject.symbolAt(i + subjectStart); if (!a.equals(query.getAlphabet().getGapSymbol()) && !b.equals(subject.getAlphabet().getGapSymbol()) && ((subMatrix.getValueAt(a, b) >= 0) || a.equals(b))) { path.append('|'); } else { path.append(' '); } } int maxLength = path.length(); /* * Math.max(queryEnd - queryStart, subjectEnd - subjectStart) + 1; */ Formatter output = new Formatter(); output.format("%n Time (ms): %s%n", time); output.format(" Length: %d%n", maxLength); output.format(" Score: %d%n", getScore()); output.format(" Query: %s, Length: %d%n", query.getName(), query.length() - nGapsQ); output.format(" Sbjct: %s, Length: %d%n", subject.getName(), subject.length() - nGapsS); output.format( " Identities: %d/%d, i.e., %d %% (query) and %d %% (sbjct)%n", identicals, maxLength, Math.round(getPercentIdentityQuery()), Math.round(getPercentIdentitySubject())); output.format( " Similars: %d/%d, i.e., %d %% (query) and %d %% (sbjct)%n", similars, maxLength, Math.round(getPercentSimilarityQuery()), Math.round(getPercentSimilaritySubject())); output.format( " No. gaps: %d (%d %%) in query and %d (%d %%) in sbjct%n", nGapsQ, Math.round(getPercentGapsQuery()), nGapsS, Math.round(getPercentGapsTarget())); int queryLPos = queryStart, queryRPos, pathLPos = 0, pathRPos; int subjectLPos = subjectStart, subjectRPos; int ql = queryLPos - 1, qr = queryLPos - 1, qgaps; int sl = subjectLPos - 1, sr = subjectLPos - 1, sgaps; int widthLeft = String.valueOf(Math.max(queryStart, queryEnd)).length(); int widthRight = String.valueOf(Math.max(queryEnd, subjectEnd)).length() + 1; // Take width of the meta information into account. width = Math.max(width - widthLeft - widthRight - 12, 2); for (i = 1; i <= Math.ceil((double) maxLength / width); i++) { // Query queryRPos = Math.min( queryStart + i * width - 1, Math.min(queryEnd, subjectEnd - subjectStart + queryStart)); qgaps = 0; for (j = queryLPos; j <= queryRPos; j++) { if (!query.symbolAt(j).equals(query.getAlphabet().getGapSymbol())) { qr++; } else { qgaps++; } } if (qgaps <= queryRPos - queryLPos) { ql++; } output.format("%nQuery: %" + widthLeft + "d ", ql); output.format("%s ", query.subStr(queryLPos, queryRPos)); output.format("%-" + widthRight + "d%n", qr); queryLPos = queryRPos + 1; ql = qr; // Path pathRPos = Math.min(i * width, path.length()); output.format( "%-" + (widthLeft + 10) + "c%s", Character.valueOf(' '), path.substring(pathLPos, pathRPos)); pathLPos = pathRPos; // Sbjct subjectRPos = Math.min( subjectStart + i * width - 1, Math.min(queryEnd - queryStart + subjectStart, subjectEnd)); sgaps = 0; for (j = subjectLPos; j <= subjectRPos; j++) { if (!subject.symbolAt(j).equals(subject.getAlphabet().getGapSymbol())) { sr++; } else { sgaps++; } } if (sgaps <= subjectRPos - subjectLPos) { sl++; } output.format("%nSbjct: %" + widthLeft + "d ", sl); output.format("%s ", subject.subStr(subjectLPos, subjectRPos)); output.format("%-" + widthRight + "d%n", sr); subjectLPos = subjectRPos + 1; sl = sr; } return output.toString(); }