Пример #1
0
  public static String toFasta(Sequence... sequences) {
    StringBuilder sb = new StringBuilder();

    for (Sequence sequence : sequences)
      sb.append('>' + sequence.getName() + '\n' + sequence.seqString() + '\n');

    return sb.toString();
  }
Пример #2
0
 /**
  * Creates the Map required by the super class.
  *
  * @param s1
  * @param s2
  * @return
  */
 private static Map<String, SymbolList> createHashMap(Sequence s1, Sequence s2) {
   Map<String, SymbolList> m = new HashMap<String, SymbolList>();
   m.put(s1.getName(), s1);
   m.put(s2.getName(), s2);
   return m;
 }
Пример #3
0
  /**
   * This method provides a BLAST-like formated alignment from the given <code>String</code>s, in
   * which the sequence coordinates and the information "Query" or "Sbjct", respectively is added to
   * each line. Each line contains <code>width</code> sequence characters including the gap symbols
   * plus the meta information. There is one white line between two pairs of sequences.
   *
   * @param width the number of symbols to be displayed per line.
   * @return formated String.
   * @throws BioException
   */
  public String formatOutput(int width) throws BioException {
    int i, j;
    /*
     * Highlights equal symbols within the alignment, String match/missmatch
     * representation
     */
    StringBuilder path = new StringBuilder();
    for (i = 0; i < Math.min(queryEnd - queryStart, subjectEnd - subjectStart) + 1; i++) {
      Symbol a = query.symbolAt(i + queryStart);
      Symbol b = subject.symbolAt(i + subjectStart);
      if (!a.equals(query.getAlphabet().getGapSymbol())
          && !b.equals(subject.getAlphabet().getGapSymbol())
          && ((subMatrix.getValueAt(a, b) >= 0) || a.equals(b))) {
        path.append('|');
      } else {
        path.append(' ');
      }
    }

    int maxLength = path.length();
    /*
     * Math.max(queryEnd - queryStart, subjectEnd - subjectStart) + 1;
     */
    Formatter output = new Formatter();
    output.format("%n Time (ms):  %s%n", time);
    output.format(" Length:     %d%n", maxLength);
    output.format("  Score:     %d%n", getScore());
    output.format("  Query:     %s, Length: %d%n", query.getName(), query.length() - nGapsQ);
    output.format("  Sbjct:     %s, Length: %d%n", subject.getName(), subject.length() - nGapsS);
    output.format(
        " Identities: %d/%d, i.e., %d %% (query) and %d %% (sbjct)%n",
        identicals,
        maxLength,
        Math.round(getPercentIdentityQuery()),
        Math.round(getPercentIdentitySubject()));
    output.format(
        " Similars:   %d/%d, i.e., %d %% (query) and %d %% (sbjct)%n",
        similars,
        maxLength,
        Math.round(getPercentSimilarityQuery()),
        Math.round(getPercentSimilaritySubject()));
    output.format(
        " No. gaps:   %d (%d %%) in query and %d (%d %%) in sbjct%n",
        nGapsQ, Math.round(getPercentGapsQuery()), nGapsS, Math.round(getPercentGapsTarget()));

    int queryLPos = queryStart, queryRPos, pathLPos = 0, pathRPos;
    int subjectLPos = subjectStart, subjectRPos;
    int ql = queryLPos - 1, qr = queryLPos - 1, qgaps;
    int sl = subjectLPos - 1, sr = subjectLPos - 1, sgaps;

    int widthLeft = String.valueOf(Math.max(queryStart, queryEnd)).length();
    int widthRight = String.valueOf(Math.max(queryEnd, subjectEnd)).length() + 1;

    // Take width of the meta information into account.
    width = Math.max(width - widthLeft - widthRight - 12, 2);

    for (i = 1; i <= Math.ceil((double) maxLength / width); i++) {

      // Query
      queryRPos =
          Math.min(
              queryStart + i * width - 1,
              Math.min(queryEnd, subjectEnd - subjectStart + queryStart));
      qgaps = 0;
      for (j = queryLPos; j <= queryRPos; j++) {
        if (!query.symbolAt(j).equals(query.getAlphabet().getGapSymbol())) {
          qr++;
        } else {
          qgaps++;
        }
      }
      if (qgaps <= queryRPos - queryLPos) {
        ql++;
      }
      output.format("%nQuery:   %" + widthLeft + "d ", ql);
      output.format("%s ", query.subStr(queryLPos, queryRPos));
      output.format("%-" + widthRight + "d%n", qr);
      queryLPos = queryRPos + 1;
      ql = qr;

      // Path
      pathRPos = Math.min(i * width, path.length());
      output.format(
          "%-" + (widthLeft + 10) + "c%s",
          Character.valueOf(' '),
          path.substring(pathLPos, pathRPos));
      pathLPos = pathRPos;

      // Sbjct
      subjectRPos =
          Math.min(
              subjectStart + i * width - 1,
              Math.min(queryEnd - queryStart + subjectStart, subjectEnd));
      sgaps = 0;
      for (j = subjectLPos; j <= subjectRPos; j++) {
        if (!subject.symbolAt(j).equals(subject.getAlphabet().getGapSymbol())) {
          sr++;
        } else {
          sgaps++;
        }
      }
      if (sgaps <= subjectRPos - subjectLPos) {
        sl++;
      }
      output.format("%nSbjct:   %" + widthLeft + "d ", sl);
      output.format("%s ", subject.subStr(subjectLPos, subjectRPos));
      output.format("%-" + widthRight + "d%n", sr);
      subjectLPos = subjectRPos + 1;
      sl = sr;
    }
    return output.toString();
  }
Пример #4
0
  public void main(String[] args) throws BioException, IOException {

    if (count.length != args.length) {
      System.err.println("The number of counts and output file arguments does not match!");
      System.exit(1);
    }

    OutputStream[] outputStreams;
    if ((args != null) && (args.length > 0)) {
      outputStreams = new OutputStream[args.length];
      for (int i = 0; i < args.length; i++) {
        outputStreams[i] = new BufferedOutputStream(new FileOutputStream(args[i]));
      }
    } else {
      outputStreams = new OutputStream[] {System.out};
    }

    RichSequenceIterator seqIterator =
        RichSequence.IOTools.readFastaDNA(new BufferedReader(new FileReader(seqFile)), null);

    List<Sequence> seqs = new ArrayList<Sequence>();
    while (seqIterator.hasNext()) {
      seqs.add(seqIterator.nextSequence());
    }

    List<List<Sequence>> chosenSeqs = new ArrayList<List<Sequence>>();
    if (!sampleWithReplacement) {
      for (int c : count) {
        List<Sequence> cseqs = new ArrayList<Sequence>();
        chosenSeqs.add(cseqs);
        while (c > 0) {
          int randSeqIndex = random.nextInt(seqs.size());
          cseqs.add(seqs.remove(randSeqIndex));
          c--;
        }
      }
    } else if (sampleWithReplacement || (length > 0)) {
      /*
       * if you want to sample from sequences with replacement
       * or if the wanted length is specified
       */

      for (int c : count) {
        int i = 0;
        List<Sequence> cseqs = new ArrayList<Sequence>();
        chosenSeqs.add(cseqs);

        while (c > 0) {
          Sequence randomSeq = seqs.get(random.nextInt(seqs.size()));
          if (length > 0) {
            int startPos = random.nextInt(1 + randomSeq.length() - length);
            cseqs.add(
                new SimpleSequence(
                    randomSeq.subList(startPos, startPos + length),
                    null,
                    randomSeq.getName() + "_" + i++,
                    Annotation.EMPTY_ANNOTATION));
          } else {
            cseqs.add(randomSeq);
          }
          c--;
        }
      }
    }

    int i = 0;
    for (List<Sequence> seqList : chosenSeqs) {
      for (Sequence seq : seqList) {
        Sequence s;
        if (uniqueNames) {
          s =
              new SimpleSequence(
                  seq.subList(1, seq.length()),
                  null,
                  seq.getName() + "_" + i,
                  Annotation.EMPTY_ANNOTATION);
        } else {
          s = seq;
        }
        RichSequence.IOTools.writeFasta(outputStreams[i], s, null);
        outputStreams[i].flush();
      }
      i++;
    }
    seqs = null;
  }