Пример #1
0
  public ArrayList<Double> mapPDScoresToAlignment(
      ArrayList<Double> pdScores, Sequence seq, ArrayList<Integer> indicesOfRowsWithSumZero) {
    ArrayList<Double> result = new ArrayList<Double>();

    int seqLength = seq.length();
    int nonGapPosition = 0;
    Symbol gap = seq.getAlphabet().getGapSymbol();
    for (int i = 0; i < seqLength; i++) {
      double oneScore = Double.MIN_VALUE;
      boolean isSumZero = false;
      isSumZero = (indicesOfRowsWithSumZero.contains(i));
      boolean isGap = false;
      isGap =
          (seq.symbolAt(i + 1)
              == gap); // note: these +1 in indices are because seq  starts from one and not zero!
      if (isSumZero) {
        oneScore = 0.0;
      } else {
        oneScore = pdScores.get(nonGapPosition);
        nonGapPosition++;
      }
      result.add(oneScore);
    }
    return result;
  } /*mapPDScoresToAlignment*/
Пример #2
0
  public ArrayList<Double> mapViterbiPathToAlignment(
      ArrayList<String> viterbiPath, Sequence seq, ArrayList<Integer> indicesOfRowsWithSumZero) {
    ArrayList<Double> result = new ArrayList<Double>();

    int seqLength = seq.length();
    int nonGapPosition = 0;
    Symbol gap = seq.getAlphabet().getGapSymbol();
    for (int i = 0; i < seqLength; i++) {
      double oneScore = Double.MIN_VALUE;
      boolean isSumZero = false;
      isSumZero = (indicesOfRowsWithSumZero.contains(i));
      boolean isGap = false;
      isGap =
          (seq.symbolAt(i + 1)
              == gap); // note: these +1 in indices are because seq and viterbi path start from one
                       // and not zero!
      if (isSumZero) {
        oneScore = 0.0;
      } else {
        if (viterbiPath.get(nonGapPosition).equals("M")) {
          oneScore = 0.0;
        } else if (viterbiPath.get(nonGapPosition).equals("R")) {
          oneScore = 2.0;
        } else if (viterbiPath.get(nonGapPosition).equals("r")) {
          oneScore = 2.0;
        } else if (viterbiPath.get(nonGapPosition).equals("G")) {
          oneScore = 2.0;
        } else if (viterbiPath.get(nonGapPosition).equals("g")) {
          oneScore = 2.0;
        } else if (viterbiPath.get(nonGapPosition).equals("E")) {
          oneScore = 1.0;
        } else if (viterbiPath.get(nonGapPosition).equals("e")) {
          oneScore = 1.0;
        } else if (viterbiPath.get(nonGapPosition).equals("J")) {
          oneScore = 1.5;
        } else {
          System.err.println("Unknown charecter detected as a state name!");
        }

        nonGapPosition++;
      }
      result.add(oneScore);
    }
    return result;
  } /*mapViterbiPathToAlignment*/
Пример #3
0
 /** @return */
 public float getPercentGapsQuery() {
   return nGapsQ / (float) query.length() * 100;
 }
Пример #4
0
    public LinkedHashMap<SimpleAlphabet, SimpleSymbolList> getAlphabetAndSimpleSymbolList(
        Matrix2D m, Sequence sequence) throws IllegalSymbolException {
      LinkedHashMap<SimpleAlphabet, SimpleSymbolList> alphabetAndSymbolList =
          new LinkedHashMap<SimpleAlphabet, SimpleSymbolList>();

      SimpleAlphabet alphabet = new SimpleAlphabet();
      List<AtomicSymbol> listOfSymbols = new ArrayList<AtomicSymbol>();
      alphabet.setName("ObservedSequenceAlphabet");
      int numberofRows = m.rows();
      int seqLength = sequence.length();
      if (numberofRows != seqLength) {
        System.err.print(
            "It was assumed your sequence has a length equal to  the number of rows of the matrix, but found a case that is not true!");
      }

      Symbol gap = sequence.getAlphabet().getGapSymbol();

      for (int i = 0; i < numberofRows; i++) {
        List<Symbol> oneListOfSymbol = new ArrayList<Symbol>(3);

        // red is match, green is flanking and blue is background
        double redValue = m.get(i, 0);
        double greenValue = m.get(i, 1);
        double blueValue = m.get(i, 2);
        double onesum = redValue + greenValue + blueValue;
        boolean isGap = false;
        isGap = (sequence.symbolAt(i + 1) == gap);
        if (onesum
            == 0) { // sum of this value is supposed to be one, but for some rows it sums up to
                    // zero, this is to ignore those up to time we found out why these naouthy rows
                    // sums up to zero!
          // continue;
          // note these three lines is only a dummy solution for positions where red, green and blue
          // are summed up to zero!
          redValue = 0.3333;
          greenValue = 0.3333;
          blueValue = 1 - (redValue + greenValue);
        }

        // make one triplet symbol from three symbols
        Symbol redSymbol = AlphabetManager.createSymbol(Double.toString(redValue));
        Symbol greenSymol = AlphabetManager.createSymbol(Double.toString(greenValue));
        Symbol blueSymbol = AlphabetManager.createSymbol(Double.toString(blueValue));
        oneListOfSymbol.add(redSymbol);
        oneListOfSymbol.add(greenSymol);
        oneListOfSymbol.add(blueSymbol);

        // now create symbol and add it to alphabet
        AtomicSymbol oneSym =
            (AtomicSymbol)
                AlphabetManager.createSymbol(
                    Annotation.EMPTY_ANNOTATION, oneListOfSymbol, alphabet);
        alphabet.addSymbol(oneSym);
        listOfSymbols.add(oneSym);
      }

      SimpleSymbolList ssl = new SimpleSymbolList(alphabet, listOfSymbols);

      alphabetAndSymbolList.put(alphabet, ssl);
      return alphabetAndSymbolList;
    } /*getAlphabetAndSimpleSymbolList*/
Пример #5
0
  /**
   * This method provides a BLAST-like formated alignment from the given <code>String</code>s, in
   * which the sequence coordinates and the information "Query" or "Sbjct", respectively is added to
   * each line. Each line contains <code>width</code> sequence characters including the gap symbols
   * plus the meta information. There is one white line between two pairs of sequences.
   *
   * @param width the number of symbols to be displayed per line.
   * @return formated String.
   * @throws BioException
   */
  public String formatOutput(int width) throws BioException {
    int i, j;
    /*
     * Highlights equal symbols within the alignment, String match/missmatch
     * representation
     */
    StringBuilder path = new StringBuilder();
    for (i = 0; i < Math.min(queryEnd - queryStart, subjectEnd - subjectStart) + 1; i++) {
      Symbol a = query.symbolAt(i + queryStart);
      Symbol b = subject.symbolAt(i + subjectStart);
      if (!a.equals(query.getAlphabet().getGapSymbol())
          && !b.equals(subject.getAlphabet().getGapSymbol())
          && ((subMatrix.getValueAt(a, b) >= 0) || a.equals(b))) {
        path.append('|');
      } else {
        path.append(' ');
      }
    }

    int maxLength = path.length();
    /*
     * Math.max(queryEnd - queryStart, subjectEnd - subjectStart) + 1;
     */
    Formatter output = new Formatter();
    output.format("%n Time (ms):  %s%n", time);
    output.format(" Length:     %d%n", maxLength);
    output.format("  Score:     %d%n", getScore());
    output.format("  Query:     %s, Length: %d%n", query.getName(), query.length() - nGapsQ);
    output.format("  Sbjct:     %s, Length: %d%n", subject.getName(), subject.length() - nGapsS);
    output.format(
        " Identities: %d/%d, i.e., %d %% (query) and %d %% (sbjct)%n",
        identicals,
        maxLength,
        Math.round(getPercentIdentityQuery()),
        Math.round(getPercentIdentitySubject()));
    output.format(
        " Similars:   %d/%d, i.e., %d %% (query) and %d %% (sbjct)%n",
        similars,
        maxLength,
        Math.round(getPercentSimilarityQuery()),
        Math.round(getPercentSimilaritySubject()));
    output.format(
        " No. gaps:   %d (%d %%) in query and %d (%d %%) in sbjct%n",
        nGapsQ, Math.round(getPercentGapsQuery()), nGapsS, Math.round(getPercentGapsTarget()));

    int queryLPos = queryStart, queryRPos, pathLPos = 0, pathRPos;
    int subjectLPos = subjectStart, subjectRPos;
    int ql = queryLPos - 1, qr = queryLPos - 1, qgaps;
    int sl = subjectLPos - 1, sr = subjectLPos - 1, sgaps;

    int widthLeft = String.valueOf(Math.max(queryStart, queryEnd)).length();
    int widthRight = String.valueOf(Math.max(queryEnd, subjectEnd)).length() + 1;

    // Take width of the meta information into account.
    width = Math.max(width - widthLeft - widthRight - 12, 2);

    for (i = 1; i <= Math.ceil((double) maxLength / width); i++) {

      // Query
      queryRPos =
          Math.min(
              queryStart + i * width - 1,
              Math.min(queryEnd, subjectEnd - subjectStart + queryStart));
      qgaps = 0;
      for (j = queryLPos; j <= queryRPos; j++) {
        if (!query.symbolAt(j).equals(query.getAlphabet().getGapSymbol())) {
          qr++;
        } else {
          qgaps++;
        }
      }
      if (qgaps <= queryRPos - queryLPos) {
        ql++;
      }
      output.format("%nQuery:   %" + widthLeft + "d ", ql);
      output.format("%s ", query.subStr(queryLPos, queryRPos));
      output.format("%-" + widthRight + "d%n", qr);
      queryLPos = queryRPos + 1;
      ql = qr;

      // Path
      pathRPos = Math.min(i * width, path.length());
      output.format(
          "%-" + (widthLeft + 10) + "c%s",
          Character.valueOf(' '),
          path.substring(pathLPos, pathRPos));
      pathLPos = pathRPos;

      // Sbjct
      subjectRPos =
          Math.min(
              subjectStart + i * width - 1,
              Math.min(queryEnd - queryStart + subjectStart, subjectEnd));
      sgaps = 0;
      for (j = subjectLPos; j <= subjectRPos; j++) {
        if (!subject.symbolAt(j).equals(subject.getAlphabet().getGapSymbol())) {
          sr++;
        } else {
          sgaps++;
        }
      }
      if (sgaps <= subjectRPos - subjectLPos) {
        sl++;
      }
      output.format("%nSbjct:   %" + widthLeft + "d ", sl);
      output.format("%s ", subject.subStr(subjectLPos, subjectRPos));
      output.format("%-" + widthRight + "d%n", sr);
      subjectLPos = subjectRPos + 1;
      sl = sr;
    }
    return output.toString();
  }
Пример #6
0
 /** @return */
 public float getPercentGapsTarget() {
   return nGapsS / (float) subject.length() * 100;
 }
Пример #7
0
 /** @return */
 public int getQueryLength() {
   return query.length();
 }
Пример #8
0
 /** @return */
 public int getSubjectLength() {
   return subject.length();
 }
Пример #9
0
 /** @return */
 public float getPercentSimilarityQuery() {
   return similars / (float) query.length() * 100;
 }
Пример #10
0
 /** @return */
 public float getPercentSimilaritySubject() {
   return similars / (float) subject.length() * 100;
 }
Пример #11
0
 /** @return */
 public float getPercentIdentitySubject() {
   return identicals / (float) (subject.length() - nGapsS) * 100;
 }
Пример #12
0
 /** @return */
 public float getPercentIdentityQuery() {
   return identicals / (float) (query.length() - nGapsQ) * 100;
 }
Пример #13
0
 /**
  * @param query
  * @param subject
  * @param subMatrix
  * @throws IllegalArgumentException
  * @throws BioException
  */
 public AlignmentPair(Sequence query, Sequence subject, SubstitutionMatrix subMatrix)
     throws IllegalArgumentException, BioException {
   this(query, subject, 1, query.length(), 1, subject.length(), subMatrix);
 }
Пример #14
0
  private void dnaCommand(HttpServletRequest req, DazzleResponse resp, DazzleDataSource dds)
      throws IOException, DataSourceException, ServletException, DazzleException {

    DazzleReferenceSource drs = (DazzleReferenceSource) dds;

    List segments = DazzleTools.getSegments(dds, req, resp);
    if (segments.size() == 0) {
      throw new DazzleException(
          DASStatus.STATUS_BAD_COMMAND_ARGUMENTS, "No segments specified for dna command");
    }

    // Fetch and validate the requests.

    Map segmentResults = new HashMap();
    for (Iterator i = segments.iterator(); i.hasNext(); ) {
      Segment seg = (Segment) i.next();

      try {
        Sequence seq = drs.getSequence(seg.getReference());
        if (seq.getAlphabet() != DNATools.getDNA()) {
          throw new DazzleException(
              DASStatus.STATUS_SERVER_ERROR,
              "Sequence " + seg.toString() + " is not in the DNA alphabet");
        }
        if (seg.isBounded()) {
          if (seg.getMin() < 1 || seg.getMax() > seq.length()) {
            throw new DazzleException(
                DASStatus.STATUS_BAD_COORDS,
                "Segment " + seg.toString() + " doesn't fit sequence of length " + seq.length());
          }
        }
        segmentResults.put(seg, seq);
      } catch (NoSuchElementException ex) {
        throw new DazzleException(DASStatus.STATUS_BAD_REFERENCE, ex);
      } catch (DataSourceException ex) {
        throw new DazzleException(DASStatus.STATUS_SERVER_ERROR, ex);
      }
    }

    //
    // Looks okay -- generate the response document
    //

    XMLWriter xw = resp.startDasXML("DASDNA", "dasdna.dtd");

    try {
      xw.openTag("DASDNA");
      for (Iterator i = segmentResults.entrySet().iterator(); i.hasNext(); ) {
        Map.Entry me = (Map.Entry) i.next();
        Segment seg = (Segment) me.getKey();
        Sequence seq = (Sequence) me.getValue();

        xw.openTag("SEQUENCE");
        xw.attribute("id", seg.getReference());
        xw.attribute("version", drs.getLandmarkVersion(seg.getReference()));
        if (seg.isBounded()) {
          xw.attribute("start", "" + seg.getStart());
          xw.attribute("stop", "" + seg.getStop());
        } else {
          xw.attribute("start", "" + 1);
          xw.attribute("stop", "" + seq.length());
        }

        SymbolList syms = seq;
        if (seg.isBounded()) {
          syms = syms.subList(seg.getMin(), seg.getMax());
        }
        if (seg.isInverted()) {
          syms = DNATools.reverseComplement(syms);
        }

        xw.openTag("DNA");
        xw.attribute("length", "" + syms.length());

        for (int pos = 1; pos <= syms.length(); pos += 60) {
          int maxPos = Math.min(syms.length(), pos + 59);
          xw.println(syms.subStr(pos, maxPos));
        }

        xw.closeTag("DNA");
        xw.closeTag("SEQUENCE");
      }
      xw.closeTag("DASDNA");
      xw.close();
    } catch (Exception ex) {
      throw new DazzleException(ex, "Error writing DNA document");
    }
  }
Пример #15
0
  public void main(String[] args) throws BioException, IOException {

    if (count.length != args.length) {
      System.err.println("The number of counts and output file arguments does not match!");
      System.exit(1);
    }

    OutputStream[] outputStreams;
    if ((args != null) && (args.length > 0)) {
      outputStreams = new OutputStream[args.length];
      for (int i = 0; i < args.length; i++) {
        outputStreams[i] = new BufferedOutputStream(new FileOutputStream(args[i]));
      }
    } else {
      outputStreams = new OutputStream[] {System.out};
    }

    RichSequenceIterator seqIterator =
        RichSequence.IOTools.readFastaDNA(new BufferedReader(new FileReader(seqFile)), null);

    List<Sequence> seqs = new ArrayList<Sequence>();
    while (seqIterator.hasNext()) {
      seqs.add(seqIterator.nextSequence());
    }

    List<List<Sequence>> chosenSeqs = new ArrayList<List<Sequence>>();
    if (!sampleWithReplacement) {
      for (int c : count) {
        List<Sequence> cseqs = new ArrayList<Sequence>();
        chosenSeqs.add(cseqs);
        while (c > 0) {
          int randSeqIndex = random.nextInt(seqs.size());
          cseqs.add(seqs.remove(randSeqIndex));
          c--;
        }
      }
    } else if (sampleWithReplacement || (length > 0)) {
      /*
       * if you want to sample from sequences with replacement
       * or if the wanted length is specified
       */

      for (int c : count) {
        int i = 0;
        List<Sequence> cseqs = new ArrayList<Sequence>();
        chosenSeqs.add(cseqs);

        while (c > 0) {
          Sequence randomSeq = seqs.get(random.nextInt(seqs.size()));
          if (length > 0) {
            int startPos = random.nextInt(1 + randomSeq.length() - length);
            cseqs.add(
                new SimpleSequence(
                    randomSeq.subList(startPos, startPos + length),
                    null,
                    randomSeq.getName() + "_" + i++,
                    Annotation.EMPTY_ANNOTATION));
          } else {
            cseqs.add(randomSeq);
          }
          c--;
        }
      }
    }

    int i = 0;
    for (List<Sequence> seqList : chosenSeqs) {
      for (Sequence seq : seqList) {
        Sequence s;
        if (uniqueNames) {
          s =
              new SimpleSequence(
                  seq.subList(1, seq.length()),
                  null,
                  seq.getName() + "_" + i,
                  Annotation.EMPTY_ANNOTATION);
        } else {
          s = seq;
        }
        RichSequence.IOTools.writeFasta(outputStreams[i], s, null);
        outputStreams[i].flush();
      }
      i++;
    }
    seqs = null;
  }