/**
   * Aligns two sequences by Needleman-Wunsch (global)
   *
   * @param s1 sequene #1 ({@link Read})
   * @param s2 sequene #2 ({@link Read})
   * @param matrix scoring matrix ({@link Matrix})
   * @param o open gap penalty
   * @param e extend gap penalty
   * @return alignment object contains the two aligned sequences, the alignment score and alignment
   *     statistics
   * @see Read
   * @see Matrix
   */
  public static Alignment align(Sequence s1, Sequence s2, Matrix matrix, float o, float e) {

    float[][] scores = matrix.getScores();

    Sequence _s1;
    Sequence _s2;

    /*
     * Commented out this stupid sequence switching. Sequence 1 should STAY sequence 1!
     */
    //        if (s1.length() < s2.length()) {
    //            _s1 = s2;
    //            _s2 = s1;
    //        } else {
    _s1 = s1;
    _s2 = s2;
    //        }

    int m = _s1.length() + 1;
    int n = _s2.length() + 1;

    byte[] pointers = new byte[m * n];

    short[] lengths = new short[m * n];

    // Initializes the element (0,0) of the traceback matrix to STOP.
    pointers[0] = Directions.STOP;

    // Initializes the boundaries of the traceback matrix.
    for (int i = 1, k = n; i < m; i++, k += n) {
      pointers[k] = Directions.UP;
      lengths[k] = (short) i;
    }
    for (int j = 1; j < n; j++) {
      pointers[j] = Directions.LEFT;
      lengths[j] = (short) j;
    }

    Cell cell = construct(_s1, _s2, scores, o, e, pointers, lengths);

    Alignment alignment = traceback(_s1, _s2, matrix, pointers, cell, lengths);

    alignment.setMatrix(matrix);
    alignment.setOpen(o);
    alignment.setExtend(e);
    alignment.setName1(_s1.getId());
    alignment.setName2(_s2.getId());
    alignment.setOriginalSequence1(_s1);
    alignment.setOriginalSequence2(_s2);

    return alignment;
  }
예제 #2
0
  static void read_data() {
    shortest.length = Integer.MAX_VALUE;
    longest.length = Integer.MIN_VALUE;

    N = fio.nextInt();
    M = fio.nextInt();

    states = new Node[N];

    FIRST = states[0] = new Node(0);

    for (int i = 1; i < N; i++) states[i] = new Node(i);

    Q = fio.nextInt() == 1;

    fio.nextLine();

    if (Q) read_automata_defined();
    else read_automata_generated();
    read_terminals();
  }
예제 #3
0
  static void solve_automata() {
    for (Node state : states) {
      if (!state.isTerminal) continue;

      for (Node k : states) k.visited = false;

      solve_automata_to(state);

      if (FIRST.hasCycle) FINITE = false;

      // post-solve for FINITE AUTOMATA
      if (FINITE) {
        if (FIRST.max.data == null) continue;
        if (longest.length < FIRST.max.length) {
          longest.length = FIRST.max.length;
          longest.data = FIRST.max.data;
        } else if (longest.length == FIRST.max.length
            && longest.data != null
            && FIRST.max.data.compareTo(longest.data) > 0) {
          longest.data = FIRST.max.data;
        }
      }

      // post-solve for INFINITE AUTOMATA
      else {
        if (FIRST.min.data == null) continue;
        if (shortest.length > FIRST.min.length) {
          shortest.length = FIRST.min.length;
          shortest.data = FIRST.min.data;
        } else if (shortest.length == FIRST.min.length
            && shortest.data != null
            && FIRST.min.data.compareTo(shortest.data) < 0) {
          shortest.data = FIRST.min.data;
        }
      }
    }
  }
  /**
   * Returns the alignment of two sequences based on the passed array of pointers
   *
   * @param s1 sequence #1
   * @param s2 sequence #2
   * @param m scoring matrix
   * @param pointers traceback matrix
   * @param cell The cell where the traceback starts.
   * @return {@link Alignment} with the two aligned sequences and alignment score.
   * @see Cell
   * @see Alignment
   */
  private static Alignment traceback(
      Sequence s1, Sequence s2, Matrix m, byte[] pointers, Cell cell, short[] lengths) {
    logger.info("Started...");

    char[] array1 = s1.toArray();
    char[] array2 = s2.toArray();
    float[][] scores = m.getScores();

    Alignment alignment = new Alignment();
    alignment.setScore(cell.getScore());

    // maximum length after the aligned sequences
    int maxlen = s1.length() + s2.length();

    char[] reversed1 = new char[maxlen]; // reversed sequence #1
    char[] reversed2 = new char[maxlen]; // reversed sequence #2
    char[] reversed3 = new char[maxlen]; // reversed markup

    int len1 = 0; // length of sequence #1 after alignment
    int len2 = 0; // length of sequence #2 after alignment
    int len3 = 0; // length of the markup line

    int identity = 0; // count of identitcal pairs
    int similarity = 0; // count of similar pairs
    int gaps = 0; // count of gaps

    char c1, c2;

    int i = cell.getRow(); // traceback start row
    int j = cell.getCol(); // traceback start col
    int n = s2.length() + 1;
    int row = i * n;

    int a = s1.length() - 1;
    int b = s2.length() - 1;
    if (a - i > b - j) {
      for (; a - i > b - j; a--) {
        reversed1[len1++] = array1[a];
        reversed2[len2++] = Alignment.GAP;
        reversed3[len3++] = Markups.GAP;
        gaps++;
      }
      for (; b > j - 1; a--, b--) {
        c1 = array1[a];
        c2 = array2[b];

        reversed1[len1++] = c1;
        reversed2[len2++] = c2;

        if (c1 == c2) {
          reversed3[len3++] = Markups.IDENTITY;
          identity++;
          similarity++;
        } else if (scores[c1][c2] > 0) {
          reversed3[len3++] = Markups.SIMILARITY;
          similarity++;
        } else {
          reversed3[len3++] = Markups.MISMATCH;
        }
      }
    } else {
      for (; b - j > a - i; b--) {
        reversed1[len1++] = Alignment.GAP;
        reversed2[len2++] = array2[b];
        reversed3[len3++] = Markups.GAP;
        gaps++;
      }
      for (; a > i - 1; a--, b--) {
        c1 = array1[a];
        c2 = array2[b];

        reversed1[len1++] = c1;
        reversed2[len2++] = c2;

        if (c1 == c2) {
          reversed3[len3++] = Markups.IDENTITY;
          identity++;
          similarity++;
        } else if (scores[c1][c2] > 0) {
          reversed3[len3++] = Markups.SIMILARITY;
          similarity++;
        } else {
          reversed3[len3++] = Markups.MISMATCH;
        }
      }
    }

    // Traceback flag, where true => continue and false => stop
    boolean stillGoing = true;
    while (stillGoing) {
      int l = row + j;
      switch (pointers[l]) {
        case Directions.UP:
          for (int k = 0, len = lengths[l]; k < len; k++) {
            reversed1[len1++] = array1[--i];
            reversed2[len2++] = Alignment.GAP;
            reversed3[len3++] = Markups.GAP;
            row -= n;
            gaps++;
          }
          break;
        case Directions.DIAGONAL:
          c1 = array1[--i];
          c2 = array2[--j];
          reversed1[len1++] = c1;
          reversed2[len2++] = c2;
          row -= n;
          if (c1 == c2) {
            reversed3[len3++] = Markups.IDENTITY;
            identity++;
            similarity++;
          } else if (scores[c1][c2] > 0) {
            reversed3[len3++] = Markups.SIMILARITY;
            similarity++;
          } else {
            reversed3[len3++] = Markups.MISMATCH;
          }
          break;
        case Directions.LEFT:
          for (int k = 0, len = lengths[l]; k < len; k++) {
            reversed1[len1++] = Alignment.GAP;
            reversed2[len2++] = array2[--j];
            reversed3[len3++] = Markups.GAP;
            gaps++;
          }
          break;
        case Directions.STOP:
          stillGoing = false;
      }
    }

    alignment.setSequence1(reverse(reversed1, len1));
    alignment.setStart1(i);
    alignment.setSequence2(reverse(reversed2, len2));
    alignment.setStart2(j);
    alignment.setMarkupLine(reverse(reversed3, len3));
    alignment.setIdentity(identity);
    alignment.setGaps(gaps);
    alignment.setSimilarity(similarity);

    logger.info("Finished.");

    return alignment;
  }
  /**
   * Constructs directions matrix for the traceback.
   *
   * @param s1 sequence #1
   * @param s2 sequence #2
   * @param matrix scoring matrix
   * @param o open gap penalty
   * @param e extend gap penalty
   * @param pointers traceback matrix
   * @return The cell where the traceback starts.
   */
  private static Cell construct(
      Sequence s1,
      Sequence s2,
      float[][] matrix,
      float o,
      float e,
      byte[] pointers,
      short[] lengths) {

    logger.info("Started...");

    char[] a1 = s1.toArray();
    char[] a2 = s2.toArray();

    int m = s1.length() + 1; // number of rows in similarity matrix
    int n = s2.length() + 1; // number of columns in similarity matrix

    float[] v = new float[n];
    float vDiagonal = 0; // Float.NEGATIVE_INFINITY; // best score in cell
    float f = Float.NEGATIVE_INFINITY; // score from diagonal
    float h = Float.NEGATIVE_INFINITY; // best score ending with gap from
    // left
    float[] g = new float[n]; // best score ending with gap from above

    // Initialization of v and g
    g[0] = Float.NEGATIVE_INFINITY;
    for (int j = 1; j < n; j++) {
      v[j] = 0; // -o - (j - 1) * e;
      g[j] = Float.NEGATIVE_INFINITY;
    }

    int lengthOfHorizontalGap = 0;
    int[] lengthOfVerticalGap = new int[n];

    float similarityScore;
    float maximumScore = Float.NEGATIVE_INFINITY;
    int maxi = 0;
    int maxj = 0;

    // Fill the matrices
    for (int i = 1, k = n; i < m; i++, k += n) { // for all rows
      v[0] = -o - (i - 1) * e;
      for (int j = 1, l = k + 1; j < n; j++, l++) { // for all columns

        similarityScore = matrix[a1[i - 1]][a2[j - 1]];

        f = vDiagonal + similarityScore; // from diagonal

        // Which cell from the left?
        if (h - e >= v[j - 1] - o) {
          h -= e;
          lengthOfHorizontalGap++;
        } else {
          h = v[j - 1] - o;
          lengthOfHorizontalGap = 1;
        }

        // Which cell from above?
        if (g[j] - e >= v[j] - o) {
          g[j] = g[j] - e;
          lengthOfVerticalGap[j] = lengthOfVerticalGap[j] + 1;
        } else {
          g[j] = v[j] - o;
          lengthOfVerticalGap[j] = 1;
        }

        vDiagonal = v[j];
        v[j] = maximum(f, g[j], h); // best one
        if (v[j] > maximumScore) {
          maximumScore = v[j];
          maxi = i;
          maxj = j;
        }

        // Determine the traceback direction
        if (v[j] == f) {
          pointers[l] = Directions.DIAGONAL;
        } else if (v[j] == g[j]) {
          pointers[l] = Directions.UP;
          lengths[l] = (short) lengthOfVerticalGap[j];
        } else if (v[j] == h) {
          pointers[l] = Directions.LEFT;
          lengths[l] = (short) lengthOfHorizontalGap;
        }
      } // loop columns

      // Reset
      h = Float.NEGATIVE_INFINITY;
      vDiagonal = 0; // -o - (i - 1) * e;

      lengthOfHorizontalGap = 0;
    } // loop rows

    Cell cell = new Cell();
    cell.set(maxi, maxj, v[n - 1]);

    logger.info("Finished.");

    return cell;
  }
예제 #6
0
  private void dnaCommand(HttpServletRequest req, DazzleResponse resp, DazzleDataSource dds)
      throws IOException, DataSourceException, ServletException, DazzleException {

    DazzleReferenceSource drs = (DazzleReferenceSource) dds;

    List segments = DazzleTools.getSegments(dds, req, resp);
    if (segments.size() == 0) {
      throw new DazzleException(
          DASStatus.STATUS_BAD_COMMAND_ARGUMENTS, "No segments specified for dna command");
    }

    // Fetch and validate the requests.

    Map segmentResults = new HashMap();
    for (Iterator i = segments.iterator(); i.hasNext(); ) {
      Segment seg = (Segment) i.next();

      try {
        Sequence seq = drs.getSequence(seg.getReference());
        if (seq.getAlphabet() != DNATools.getDNA()) {
          throw new DazzleException(
              DASStatus.STATUS_SERVER_ERROR,
              "Sequence " + seg.toString() + " is not in the DNA alphabet");
        }
        if (seg.isBounded()) {
          if (seg.getMin() < 1 || seg.getMax() > seq.length()) {
            throw new DazzleException(
                DASStatus.STATUS_BAD_COORDS,
                "Segment " + seg.toString() + " doesn't fit sequence of length " + seq.length());
          }
        }
        segmentResults.put(seg, seq);
      } catch (NoSuchElementException ex) {
        throw new DazzleException(DASStatus.STATUS_BAD_REFERENCE, ex);
      } catch (DataSourceException ex) {
        throw new DazzleException(DASStatus.STATUS_SERVER_ERROR, ex);
      }
    }

    //
    // Looks okay -- generate the response document
    //

    XMLWriter xw = resp.startDasXML("DASDNA", "dasdna.dtd");

    try {
      xw.openTag("DASDNA");
      for (Iterator i = segmentResults.entrySet().iterator(); i.hasNext(); ) {
        Map.Entry me = (Map.Entry) i.next();
        Segment seg = (Segment) me.getKey();
        Sequence seq = (Sequence) me.getValue();

        xw.openTag("SEQUENCE");
        xw.attribute("id", seg.getReference());
        xw.attribute("version", drs.getLandmarkVersion(seg.getReference()));
        if (seg.isBounded()) {
          xw.attribute("start", "" + seg.getStart());
          xw.attribute("stop", "" + seg.getStop());
        } else {
          xw.attribute("start", "" + 1);
          xw.attribute("stop", "" + seq.length());
        }

        SymbolList syms = seq;
        if (seg.isBounded()) {
          syms = syms.subList(seg.getMin(), seg.getMax());
        }
        if (seg.isInverted()) {
          syms = DNATools.reverseComplement(syms);
        }

        xw.openTag("DNA");
        xw.attribute("length", "" + syms.length());

        for (int pos = 1; pos <= syms.length(); pos += 60) {
          int maxPos = Math.min(syms.length(), pos + 59);
          xw.println(syms.subStr(pos, maxPos));
        }

        xw.closeTag("DNA");
        xw.closeTag("SEQUENCE");
      }
      xw.closeTag("DASDNA");
      xw.close();
    } catch (Exception ex) {
      throw new DazzleException(ex, "Error writing DNA document");
    }
  }