/** @throws BioException */
  private void init() throws BioException {
    similars = 0;
    identicals = 0;
    nGapsQ = 0;
    nGapsS = 0;
    for (int i = 0; i < Math.min(queryEnd - queryStart, subjectEnd - subjectStart); i++) {
      Symbol a = query.symbolAt(i + queryStart);
      Symbol b = subject.symbolAt(i + subjectStart);
      boolean gap = false;
      if (a.equals(b)) {
        identicals++;
      }

      // get score for this pair. if it is positive, they are similar...
      if (a.equals(query.getAlphabet().getGapSymbol())) {
        nGapsQ++;
        gap = true;
      }
      if (b.equals(subject.getAlphabet().getGapSymbol())) {
        nGapsS++;
        gap = true;
      }
      if (!gap && subMatrix != null && subMatrix.getValueAt(a, b) > 0) {
        similars++;
      }
    }
  }
Beispiel #2
0
  public ArrayList<Double> mapPDScoresToAlignment(
      ArrayList<Double> pdScores, Sequence seq, ArrayList<Integer> indicesOfRowsWithSumZero) {
    ArrayList<Double> result = new ArrayList<Double>();

    int seqLength = seq.length();
    int nonGapPosition = 0;
    Symbol gap = seq.getAlphabet().getGapSymbol();
    for (int i = 0; i < seqLength; i++) {
      double oneScore = Double.MIN_VALUE;
      boolean isSumZero = false;
      isSumZero = (indicesOfRowsWithSumZero.contains(i));
      boolean isGap = false;
      isGap =
          (seq.symbolAt(i + 1)
              == gap); // note: these +1 in indices are because seq  starts from one and not zero!
      if (isSumZero) {
        oneScore = 0.0;
      } else {
        oneScore = pdScores.get(nonGapPosition);
        nonGapPosition++;
      }
      result.add(oneScore);
    }
    return result;
  } /*mapPDScoresToAlignment*/
Beispiel #3
0
  public ArrayList<Double> mapViterbiPathToAlignment(
      ArrayList<String> viterbiPath, Sequence seq, ArrayList<Integer> indicesOfRowsWithSumZero) {
    ArrayList<Double> result = new ArrayList<Double>();

    int seqLength = seq.length();
    int nonGapPosition = 0;
    Symbol gap = seq.getAlphabet().getGapSymbol();
    for (int i = 0; i < seqLength; i++) {
      double oneScore = Double.MIN_VALUE;
      boolean isSumZero = false;
      isSumZero = (indicesOfRowsWithSumZero.contains(i));
      boolean isGap = false;
      isGap =
          (seq.symbolAt(i + 1)
              == gap); // note: these +1 in indices are because seq and viterbi path start from one
                       // and not zero!
      if (isSumZero) {
        oneScore = 0.0;
      } else {
        if (viterbiPath.get(nonGapPosition).equals("M")) {
          oneScore = 0.0;
        } else if (viterbiPath.get(nonGapPosition).equals("R")) {
          oneScore = 2.0;
        } else if (viterbiPath.get(nonGapPosition).equals("r")) {
          oneScore = 2.0;
        } else if (viterbiPath.get(nonGapPosition).equals("G")) {
          oneScore = 2.0;
        } else if (viterbiPath.get(nonGapPosition).equals("g")) {
          oneScore = 2.0;
        } else if (viterbiPath.get(nonGapPosition).equals("E")) {
          oneScore = 1.0;
        } else if (viterbiPath.get(nonGapPosition).equals("e")) {
          oneScore = 1.0;
        } else if (viterbiPath.get(nonGapPosition).equals("J")) {
          oneScore = 1.5;
        } else {
          System.err.println("Unknown charecter detected as a state name!");
        }

        nonGapPosition++;
      }
      result.add(oneScore);
    }
    return result;
  } /*mapViterbiPathToAlignment*/
Beispiel #4
0
    public LinkedHashMap<SimpleAlphabet, SimpleSymbolList> getAlphabetAndSimpleSymbolList(
        Matrix2D m, Sequence sequence) throws IllegalSymbolException {
      LinkedHashMap<SimpleAlphabet, SimpleSymbolList> alphabetAndSymbolList =
          new LinkedHashMap<SimpleAlphabet, SimpleSymbolList>();

      SimpleAlphabet alphabet = new SimpleAlphabet();
      List<AtomicSymbol> listOfSymbols = new ArrayList<AtomicSymbol>();
      alphabet.setName("ObservedSequenceAlphabet");
      int numberofRows = m.rows();
      int seqLength = sequence.length();
      if (numberofRows != seqLength) {
        System.err.print(
            "It was assumed your sequence has a length equal to  the number of rows of the matrix, but found a case that is not true!");
      }

      Symbol gap = sequence.getAlphabet().getGapSymbol();

      for (int i = 0; i < numberofRows; i++) {
        List<Symbol> oneListOfSymbol = new ArrayList<Symbol>(3);

        // red is match, green is flanking and blue is background
        double redValue = m.get(i, 0);
        double greenValue = m.get(i, 1);
        double blueValue = m.get(i, 2);
        double onesum = redValue + greenValue + blueValue;
        boolean isGap = false;
        isGap = (sequence.symbolAt(i + 1) == gap);
        if (onesum
            == 0) { // sum of this value is supposed to be one, but for some rows it sums up to
                    // zero, this is to ignore those up to time we found out why these naouthy rows
                    // sums up to zero!
          // continue;
          // note these three lines is only a dummy solution for positions where red, green and blue
          // are summed up to zero!
          redValue = 0.3333;
          greenValue = 0.3333;
          blueValue = 1 - (redValue + greenValue);
        }

        // make one triplet symbol from three symbols
        Symbol redSymbol = AlphabetManager.createSymbol(Double.toString(redValue));
        Symbol greenSymol = AlphabetManager.createSymbol(Double.toString(greenValue));
        Symbol blueSymbol = AlphabetManager.createSymbol(Double.toString(blueValue));
        oneListOfSymbol.add(redSymbol);
        oneListOfSymbol.add(greenSymol);
        oneListOfSymbol.add(blueSymbol);

        // now create symbol and add it to alphabet
        AtomicSymbol oneSym =
            (AtomicSymbol)
                AlphabetManager.createSymbol(
                    Annotation.EMPTY_ANNOTATION, oneListOfSymbol, alphabet);
        alphabet.addSymbol(oneSym);
        listOfSymbols.add(oneSym);
      }

      SimpleSymbolList ssl = new SimpleSymbolList(alphabet, listOfSymbols);

      alphabetAndSymbolList.put(alphabet, ssl);
      return alphabetAndSymbolList;
    } /*getAlphabetAndSimpleSymbolList*/
  /**
   * This method provides a BLAST-like formated alignment from the given <code>String</code>s, in
   * which the sequence coordinates and the information "Query" or "Sbjct", respectively is added to
   * each line. Each line contains <code>width</code> sequence characters including the gap symbols
   * plus the meta information. There is one white line between two pairs of sequences.
   *
   * @param width the number of symbols to be displayed per line.
   * @return formated String.
   * @throws BioException
   */
  public String formatOutput(int width) throws BioException {
    int i, j;
    /*
     * Highlights equal symbols within the alignment, String match/missmatch
     * representation
     */
    StringBuilder path = new StringBuilder();
    for (i = 0; i < Math.min(queryEnd - queryStart, subjectEnd - subjectStart) + 1; i++) {
      Symbol a = query.symbolAt(i + queryStart);
      Symbol b = subject.symbolAt(i + subjectStart);
      if (!a.equals(query.getAlphabet().getGapSymbol())
          && !b.equals(subject.getAlphabet().getGapSymbol())
          && ((subMatrix.getValueAt(a, b) >= 0) || a.equals(b))) {
        path.append('|');
      } else {
        path.append(' ');
      }
    }

    int maxLength = path.length();
    /*
     * Math.max(queryEnd - queryStart, subjectEnd - subjectStart) + 1;
     */
    Formatter output = new Formatter();
    output.format("%n Time (ms):  %s%n", time);
    output.format(" Length:     %d%n", maxLength);
    output.format("  Score:     %d%n", getScore());
    output.format("  Query:     %s, Length: %d%n", query.getName(), query.length() - nGapsQ);
    output.format("  Sbjct:     %s, Length: %d%n", subject.getName(), subject.length() - nGapsS);
    output.format(
        " Identities: %d/%d, i.e., %d %% (query) and %d %% (sbjct)%n",
        identicals,
        maxLength,
        Math.round(getPercentIdentityQuery()),
        Math.round(getPercentIdentitySubject()));
    output.format(
        " Similars:   %d/%d, i.e., %d %% (query) and %d %% (sbjct)%n",
        similars,
        maxLength,
        Math.round(getPercentSimilarityQuery()),
        Math.round(getPercentSimilaritySubject()));
    output.format(
        " No. gaps:   %d (%d %%) in query and %d (%d %%) in sbjct%n",
        nGapsQ, Math.round(getPercentGapsQuery()), nGapsS, Math.round(getPercentGapsTarget()));

    int queryLPos = queryStart, queryRPos, pathLPos = 0, pathRPos;
    int subjectLPos = subjectStart, subjectRPos;
    int ql = queryLPos - 1, qr = queryLPos - 1, qgaps;
    int sl = subjectLPos - 1, sr = subjectLPos - 1, sgaps;

    int widthLeft = String.valueOf(Math.max(queryStart, queryEnd)).length();
    int widthRight = String.valueOf(Math.max(queryEnd, subjectEnd)).length() + 1;

    // Take width of the meta information into account.
    width = Math.max(width - widthLeft - widthRight - 12, 2);

    for (i = 1; i <= Math.ceil((double) maxLength / width); i++) {

      // Query
      queryRPos =
          Math.min(
              queryStart + i * width - 1,
              Math.min(queryEnd, subjectEnd - subjectStart + queryStart));
      qgaps = 0;
      for (j = queryLPos; j <= queryRPos; j++) {
        if (!query.symbolAt(j).equals(query.getAlphabet().getGapSymbol())) {
          qr++;
        } else {
          qgaps++;
        }
      }
      if (qgaps <= queryRPos - queryLPos) {
        ql++;
      }
      output.format("%nQuery:   %" + widthLeft + "d ", ql);
      output.format("%s ", query.subStr(queryLPos, queryRPos));
      output.format("%-" + widthRight + "d%n", qr);
      queryLPos = queryRPos + 1;
      ql = qr;

      // Path
      pathRPos = Math.min(i * width, path.length());
      output.format(
          "%-" + (widthLeft + 10) + "c%s",
          Character.valueOf(' '),
          path.substring(pathLPos, pathRPos));
      pathLPos = pathRPos;

      // Sbjct
      subjectRPos =
          Math.min(
              subjectStart + i * width - 1,
              Math.min(queryEnd - queryStart + subjectStart, subjectEnd));
      sgaps = 0;
      for (j = subjectLPos; j <= subjectRPos; j++) {
        if (!subject.symbolAt(j).equals(subject.getAlphabet().getGapSymbol())) {
          sr++;
        } else {
          sgaps++;
        }
      }
      if (sgaps <= subjectRPos - subjectLPos) {
        sl++;
      }
      output.format("%nSbjct:   %" + widthLeft + "d ", sl);
      output.format("%s ", subject.subStr(subjectLPos, subjectRPos));
      output.format("%-" + widthRight + "d%n", sr);
      subjectLPos = subjectRPos + 1;
      sl = sr;
    }
    return output.toString();
  }