/** * Returns the length of the longest common sequence. * * @return the length of the longest common sequence. */ public int length() { // case when one of the sequences is empty if (length1 == 0 || length2 == 0) this.length = 0; // normal case if (length < 0) { matrix.setup(length1 + 1, length2 + 1); // allocate storage for array L; for (int i = super.length1; i >= 0; i--) { for (int j = super.length2; j >= 0; j--) { // we reach the end of the sequence (fill with 0) if (i >= super.length1 || j >= super.length2) matrix.set(i, j, 0); else { // the events are the same if (sequence1.getEvent(i).equals(sequence2.getEvent(j))) { matrix.incrementPathBy(i, j, 1); // different events } else matrix.incrementByMaxPath(i, j); } } } this.length = this.matrix.get(0, 0); } if (DEBUG) { System.err.println(); for (int i = 0; i < this.sequence1.size(); i++) System.err.print(ShortStringFormatter.toShortString(this.sequence1.getEvent(i)) + "\t"); System.err.println(); for (int i = 0; i < this.sequence2.size(); i++) System.err.print(ShortStringFormatter.toShortString(this.sequence2.getEvent(i)) + "\n"); System.err.println(); System.err.println(this.matrix); } return this.length; }
/** * Print information when the algorithm gets lost in the matrix, ie when it does not know which * direction to follow. * * @param i The X position. * @param j The Y position. */ private void printLost(int i, int j) { DiffXEvent e1 = this.sequence1.getEvent(i); DiffXEvent e2 = this.sequence2.getEvent(j); System.err.println("(!) Ambiguous choice in (" + i + "," + j + ")"); System.err.println(" ? +" + ShortStringFormatter.toShortString(e1)); System.err.println(" ? -" + ShortStringFormatter.toShortString(e2)); System.err.println(" current=" + ShortStringFormatter.toShortString(this.estate.current())); System.err.println(" value in X+1=" + matrix.get(i + 1, j)); System.err.println(" value in Y+1=" + matrix.get(i, j + 1)); System.err.println(" equals=" + e1.equals(e2)); System.err.println(" greaterX=" + matrix.isGreaterX(i, j)); System.err.println(" greaterY=" + matrix.isGreaterY(i, j)); System.err.println(" sameXY=" + matrix.isSameXY(i, j)); System.err.println(" okFormat1=" + estate.okFormat(e1)); System.err.println(" okFormat2=" + estate.okFormat(e2)); System.err.println(" okInsert=" + estate.okInsert(e1)); System.err.println(" okDelete=" + estate.okDelete(e2)); }
/** * Writes the diff sequence using the specified formatter. * * @param formatter The formatter that will handle the output. * @throws IOException If thrown by the formatter. */ public void process(DiffXFormatter formatter) throws IOException { // handle the case when one of the two sequences is empty processEmpty(formatter); Docx4jDriver.log("length first: " + this.length1); Docx4jDriver.log("length second: " + this.length2); if (this.length1 == 0 || this.length2 == 0) return; // Phase I: calculate the LCS length to fill the matrix (slow for lengths in order of > 10^2) long startTime = System.currentTimeMillis(); length(); long endTime = System.currentTimeMillis(); long duration = endTime - startTime; Docx4jDriver.log("diffx LCS phase took: " + duration + " ms "); int i = 0; int j = 0; DiffXEvent e1 = sequence1.getEvent(i); DiffXEvent e2 = sequence2.getEvent(j); // Phase II: start walking the matrix (this should be quick) while (i < super.length1 && j < super.length2) { e1 = sequence1.getEvent(i); e2 = sequence2.getEvent(j); // we can only insert or delete, priority to insert if (matrix.isGreaterX(i, j)) { // follow the natural path and insert if (estate.okInsert(e1)) { if (DEBUG) System.err.print(" >i +" + ShortStringFormatter.toShortString(e1)); formatter.insert(e1); estate.insert(e1); i++; // if we can format checking at the stack, let's do it } else if (e1.equals(e2) && estate.okFormat(e1)) { if (DEBUG) System.err.print(" <f " + ShortStringFormatter.toShortString(e1)); formatter.format(e1); estate.format(e1); i++; j++; // go counter current and delete } else if (estate.okDelete(e2)) { if (DEBUG) System.err.print(" >d -" + ShortStringFormatter.toShortString(e2)); formatter.delete(e2); estate.delete(e2); j++; } else { if (DEBUG) System.err.print("\n(i) case greater X"); if (DEBUG) printLost(i, j); break; } // we can only insert or delete, priority to delete } else if (matrix.isGreaterY(i, j)) { // follow the natural and delete if (estate.okDelete(e2)) { if (DEBUG) System.err.print(" <d -" + ShortStringFormatter.toShortString(e2)); formatter.delete(e2); estate.delete(e2); j++; // if we can format checking at the stack, let's do it } else if (e1.equals(e2) && estate.okFormat(e1)) { if (DEBUG) System.err.print(" <f " + ShortStringFormatter.toShortString(e1)); formatter.format(e1); estate.format(e1); i++; j++; // insert (counter-current) } else if (estate.okInsert(e1)) { if (DEBUG) System.err.print(" <i +" + ShortStringFormatter.toShortString(e2)); formatter.insert(e1); estate.insert(e1); i++; } else { if (DEBUG) System.err.println("\n(i) case greater Y"); if (DEBUG) printLost(i, j); break; } // elements from i inserted and j deleted // we have to make a choice for where we are going } else if (matrix.isSameXY(i, j)) { // if we can format checking at the stack, let's do it if (e1.equals(e2) && estate.okFormat(e1)) { if (DEBUG) System.err.print(" =f " + ShortStringFormatter.toShortString(e1)); formatter.format(e1); estate.format(e1); i++; j++; // we can insert the closing tag } else if (estate.okInsert(e1) && !(e2 instanceof AttributeEvent && !(e1 instanceof AttributeEvent))) { if (DEBUG) System.err.print(" =i +" + ShortStringFormatter.toShortString(e1)); estate.insert(e1); formatter.insert(e1); i++; // we can delete the closing tag } else if (estate.okDelete(e2) && !(e1 instanceof AttributeEvent && !(e2 instanceof AttributeEvent))) { if (DEBUG) System.err.print(" =d -" + ShortStringFormatter.toShortString(e2)); formatter.delete(e2); estate.delete(e2); j++; } else { if (DEBUG) System.err.println("\n(i) case same"); if (DEBUG) printLost(i, j); break; } } else { if (DEBUG) System.err.println("\n(i) case ???"); if (DEBUG) printLost(i, j); break; } if (DEBUG) System.err.println( " stack=" + estate.currentChange() + ShortStringFormatter.toShortString(estate.current())); } // finish off the events from the first sequence while (i < super.length1) { estate.insert(sequence1.getEvent(i)); formatter.insert(sequence1.getEvent(i)); i++; } // finish off the events from the second sequence while (j < super.length2) { estate.delete(sequence2.getEvent(j)); formatter.delete(sequence2.getEvent(j)); j++; } // free some resources // matrix.release(); }