/** * Aligns two sequences by Needleman-Wunsch (global) * * @param s1 sequene #1 ({@link Read}) * @param s2 sequene #2 ({@link Read}) * @param matrix scoring matrix ({@link Matrix}) * @param o open gap penalty * @param e extend gap penalty * @return alignment object contains the two aligned sequences, the alignment score and alignment * statistics * @see Read * @see Matrix */ public static Alignment align(Sequence s1, Sequence s2, Matrix matrix, float o, float e) { float[][] scores = matrix.getScores(); Sequence _s1; Sequence _s2; /* * Commented out this stupid sequence switching. Sequence 1 should STAY sequence 1! */ // if (s1.length() < s2.length()) { // _s1 = s2; // _s2 = s1; // } else { _s1 = s1; _s2 = s2; // } int m = _s1.length() + 1; int n = _s2.length() + 1; byte[] pointers = new byte[m * n]; short[] lengths = new short[m * n]; // Initializes the element (0,0) of the traceback matrix to STOP. pointers[0] = Directions.STOP; // Initializes the boundaries of the traceback matrix. for (int i = 1, k = n; i < m; i++, k += n) { pointers[k] = Directions.UP; lengths[k] = (short) i; } for (int j = 1; j < n; j++) { pointers[j] = Directions.LEFT; lengths[j] = (short) j; } Cell cell = construct(_s1, _s2, scores, o, e, pointers, lengths); Alignment alignment = traceback(_s1, _s2, matrix, pointers, cell, lengths); alignment.setMatrix(matrix); alignment.setOpen(o); alignment.setExtend(e); alignment.setName1(_s1.getId()); alignment.setName2(_s2.getId()); alignment.setOriginalSequence1(_s1); alignment.setOriginalSequence2(_s2); return alignment; }
static void read_data() { shortest.length = Integer.MAX_VALUE; longest.length = Integer.MIN_VALUE; N = fio.nextInt(); M = fio.nextInt(); states = new Node[N]; FIRST = states[0] = new Node(0); for (int i = 1; i < N; i++) states[i] = new Node(i); Q = fio.nextInt() == 1; fio.nextLine(); if (Q) read_automata_defined(); else read_automata_generated(); read_terminals(); }
static void solve_automata() { for (Node state : states) { if (!state.isTerminal) continue; for (Node k : states) k.visited = false; solve_automata_to(state); if (FIRST.hasCycle) FINITE = false; // post-solve for FINITE AUTOMATA if (FINITE) { if (FIRST.max.data == null) continue; if (longest.length < FIRST.max.length) { longest.length = FIRST.max.length; longest.data = FIRST.max.data; } else if (longest.length == FIRST.max.length && longest.data != null && FIRST.max.data.compareTo(longest.data) > 0) { longest.data = FIRST.max.data; } } // post-solve for INFINITE AUTOMATA else { if (FIRST.min.data == null) continue; if (shortest.length > FIRST.min.length) { shortest.length = FIRST.min.length; shortest.data = FIRST.min.data; } else if (shortest.length == FIRST.min.length && shortest.data != null && FIRST.min.data.compareTo(shortest.data) < 0) { shortest.data = FIRST.min.data; } } } }
/** * Returns the alignment of two sequences based on the passed array of pointers * * @param s1 sequence #1 * @param s2 sequence #2 * @param m scoring matrix * @param pointers traceback matrix * @param cell The cell where the traceback starts. * @return {@link Alignment} with the two aligned sequences and alignment score. * @see Cell * @see Alignment */ private static Alignment traceback( Sequence s1, Sequence s2, Matrix m, byte[] pointers, Cell cell, short[] lengths) { logger.info("Started..."); char[] array1 = s1.toArray(); char[] array2 = s2.toArray(); float[][] scores = m.getScores(); Alignment alignment = new Alignment(); alignment.setScore(cell.getScore()); // maximum length after the aligned sequences int maxlen = s1.length() + s2.length(); char[] reversed1 = new char[maxlen]; // reversed sequence #1 char[] reversed2 = new char[maxlen]; // reversed sequence #2 char[] reversed3 = new char[maxlen]; // reversed markup int len1 = 0; // length of sequence #1 after alignment int len2 = 0; // length of sequence #2 after alignment int len3 = 0; // length of the markup line int identity = 0; // count of identitcal pairs int similarity = 0; // count of similar pairs int gaps = 0; // count of gaps char c1, c2; int i = cell.getRow(); // traceback start row int j = cell.getCol(); // traceback start col int n = s2.length() + 1; int row = i * n; int a = s1.length() - 1; int b = s2.length() - 1; if (a - i > b - j) { for (; a - i > b - j; a--) { reversed1[len1++] = array1[a]; reversed2[len2++] = Alignment.GAP; reversed3[len3++] = Markups.GAP; gaps++; } for (; b > j - 1; a--, b--) { c1 = array1[a]; c2 = array2[b]; reversed1[len1++] = c1; reversed2[len2++] = c2; if (c1 == c2) { reversed3[len3++] = Markups.IDENTITY; identity++; similarity++; } else if (scores[c1][c2] > 0) { reversed3[len3++] = Markups.SIMILARITY; similarity++; } else { reversed3[len3++] = Markups.MISMATCH; } } } else { for (; b - j > a - i; b--) { reversed1[len1++] = Alignment.GAP; reversed2[len2++] = array2[b]; reversed3[len3++] = Markups.GAP; gaps++; } for (; a > i - 1; a--, b--) { c1 = array1[a]; c2 = array2[b]; reversed1[len1++] = c1; reversed2[len2++] = c2; if (c1 == c2) { reversed3[len3++] = Markups.IDENTITY; identity++; similarity++; } else if (scores[c1][c2] > 0) { reversed3[len3++] = Markups.SIMILARITY; similarity++; } else { reversed3[len3++] = Markups.MISMATCH; } } } // Traceback flag, where true => continue and false => stop boolean stillGoing = true; while (stillGoing) { int l = row + j; switch (pointers[l]) { case Directions.UP: for (int k = 0, len = lengths[l]; k < len; k++) { reversed1[len1++] = array1[--i]; reversed2[len2++] = Alignment.GAP; reversed3[len3++] = Markups.GAP; row -= n; gaps++; } break; case Directions.DIAGONAL: c1 = array1[--i]; c2 = array2[--j]; reversed1[len1++] = c1; reversed2[len2++] = c2; row -= n; if (c1 == c2) { reversed3[len3++] = Markups.IDENTITY; identity++; similarity++; } else if (scores[c1][c2] > 0) { reversed3[len3++] = Markups.SIMILARITY; similarity++; } else { reversed3[len3++] = Markups.MISMATCH; } break; case Directions.LEFT: for (int k = 0, len = lengths[l]; k < len; k++) { reversed1[len1++] = Alignment.GAP; reversed2[len2++] = array2[--j]; reversed3[len3++] = Markups.GAP; gaps++; } break; case Directions.STOP: stillGoing = false; } } alignment.setSequence1(reverse(reversed1, len1)); alignment.setStart1(i); alignment.setSequence2(reverse(reversed2, len2)); alignment.setStart2(j); alignment.setMarkupLine(reverse(reversed3, len3)); alignment.setIdentity(identity); alignment.setGaps(gaps); alignment.setSimilarity(similarity); logger.info("Finished."); return alignment; }
/** * Constructs directions matrix for the traceback. * * @param s1 sequence #1 * @param s2 sequence #2 * @param matrix scoring matrix * @param o open gap penalty * @param e extend gap penalty * @param pointers traceback matrix * @return The cell where the traceback starts. */ private static Cell construct( Sequence s1, Sequence s2, float[][] matrix, float o, float e, byte[] pointers, short[] lengths) { logger.info("Started..."); char[] a1 = s1.toArray(); char[] a2 = s2.toArray(); int m = s1.length() + 1; // number of rows in similarity matrix int n = s2.length() + 1; // number of columns in similarity matrix float[] v = new float[n]; float vDiagonal = 0; // Float.NEGATIVE_INFINITY; // best score in cell float f = Float.NEGATIVE_INFINITY; // score from diagonal float h = Float.NEGATIVE_INFINITY; // best score ending with gap from // left float[] g = new float[n]; // best score ending with gap from above // Initialization of v and g g[0] = Float.NEGATIVE_INFINITY; for (int j = 1; j < n; j++) { v[j] = 0; // -o - (j - 1) * e; g[j] = Float.NEGATIVE_INFINITY; } int lengthOfHorizontalGap = 0; int[] lengthOfVerticalGap = new int[n]; float similarityScore; float maximumScore = Float.NEGATIVE_INFINITY; int maxi = 0; int maxj = 0; // Fill the matrices for (int i = 1, k = n; i < m; i++, k += n) { // for all rows v[0] = -o - (i - 1) * e; for (int j = 1, l = k + 1; j < n; j++, l++) { // for all columns similarityScore = matrix[a1[i - 1]][a2[j - 1]]; f = vDiagonal + similarityScore; // from diagonal // Which cell from the left? if (h - e >= v[j - 1] - o) { h -= e; lengthOfHorizontalGap++; } else { h = v[j - 1] - o; lengthOfHorizontalGap = 1; } // Which cell from above? if (g[j] - e >= v[j] - o) { g[j] = g[j] - e; lengthOfVerticalGap[j] = lengthOfVerticalGap[j] + 1; } else { g[j] = v[j] - o; lengthOfVerticalGap[j] = 1; } vDiagonal = v[j]; v[j] = maximum(f, g[j], h); // best one if (v[j] > maximumScore) { maximumScore = v[j]; maxi = i; maxj = j; } // Determine the traceback direction if (v[j] == f) { pointers[l] = Directions.DIAGONAL; } else if (v[j] == g[j]) { pointers[l] = Directions.UP; lengths[l] = (short) lengthOfVerticalGap[j]; } else if (v[j] == h) { pointers[l] = Directions.LEFT; lengths[l] = (short) lengthOfHorizontalGap; } } // loop columns // Reset h = Float.NEGATIVE_INFINITY; vDiagonal = 0; // -o - (i - 1) * e; lengthOfHorizontalGap = 0; } // loop rows Cell cell = new Cell(); cell.set(maxi, maxj, v[n - 1]); logger.info("Finished."); return cell; }
private void dnaCommand(HttpServletRequest req, DazzleResponse resp, DazzleDataSource dds) throws IOException, DataSourceException, ServletException, DazzleException { DazzleReferenceSource drs = (DazzleReferenceSource) dds; List segments = DazzleTools.getSegments(dds, req, resp); if (segments.size() == 0) { throw new DazzleException( DASStatus.STATUS_BAD_COMMAND_ARGUMENTS, "No segments specified for dna command"); } // Fetch and validate the requests. Map segmentResults = new HashMap(); for (Iterator i = segments.iterator(); i.hasNext(); ) { Segment seg = (Segment) i.next(); try { Sequence seq = drs.getSequence(seg.getReference()); if (seq.getAlphabet() != DNATools.getDNA()) { throw new DazzleException( DASStatus.STATUS_SERVER_ERROR, "Sequence " + seg.toString() + " is not in the DNA alphabet"); } if (seg.isBounded()) { if (seg.getMin() < 1 || seg.getMax() > seq.length()) { throw new DazzleException( DASStatus.STATUS_BAD_COORDS, "Segment " + seg.toString() + " doesn't fit sequence of length " + seq.length()); } } segmentResults.put(seg, seq); } catch (NoSuchElementException ex) { throw new DazzleException(DASStatus.STATUS_BAD_REFERENCE, ex); } catch (DataSourceException ex) { throw new DazzleException(DASStatus.STATUS_SERVER_ERROR, ex); } } // // Looks okay -- generate the response document // XMLWriter xw = resp.startDasXML("DASDNA", "dasdna.dtd"); try { xw.openTag("DASDNA"); for (Iterator i = segmentResults.entrySet().iterator(); i.hasNext(); ) { Map.Entry me = (Map.Entry) i.next(); Segment seg = (Segment) me.getKey(); Sequence seq = (Sequence) me.getValue(); xw.openTag("SEQUENCE"); xw.attribute("id", seg.getReference()); xw.attribute("version", drs.getLandmarkVersion(seg.getReference())); if (seg.isBounded()) { xw.attribute("start", "" + seg.getStart()); xw.attribute("stop", "" + seg.getStop()); } else { xw.attribute("start", "" + 1); xw.attribute("stop", "" + seq.length()); } SymbolList syms = seq; if (seg.isBounded()) { syms = syms.subList(seg.getMin(), seg.getMax()); } if (seg.isInverted()) { syms = DNATools.reverseComplement(syms); } xw.openTag("DNA"); xw.attribute("length", "" + syms.length()); for (int pos = 1; pos <= syms.length(); pos += 60) { int maxPos = Math.min(syms.length(), pos + 59); xw.println(syms.subStr(pos, maxPos)); } xw.closeTag("DNA"); xw.closeTag("SEQUENCE"); } xw.closeTag("DASDNA"); xw.close(); } catch (Exception ex) { throw new DazzleException(ex, "Error writing DNA document"); } }