/** * Matched children backtracking algorithm * * <p>Besides finding the matched children using ta's <em>matchedMatrix</em> field, the main job * is to modify the initial <em>matchedMatrix</em> of each matched child. * * @param p The index of the p-th element of the DOM-tree ta * @param matchedMatrix The matched tree path matrix of ta for node p * @param i The x-position in the matchedMatrix to look for the value * @param j The y-position in the matchedMatrix to look for the value */ private void matchedChildrenBacktracking(int p, int[][] matchedMatrix, int i, int j) { // slight modification as if the 1st page is larger than the second or a // segment of the 1st page contains more children than the 2nd page, // j might become negative while i is 0. // To my understanding if either of i or j gets 0 the border of the // matrix was reached. if (i == 0 && j == 0) { return; } if (matchedMatrix[i][j] == MatchedMatrixValue.UP_LEFT.getValue()) { this.matchedChildrenBacktracking(p, matchedMatrix, i - 1, j - 1); // contains elements from 1st page Token child = this.ta.get(p).getChildren()[i - 1]; // contains elements from 2nd page Token comparedNode = this.tb.get(child.getComparedNodes().peek().getParentNo()); while (this.ta.get(p).getMatchedNode() != comparedNode) { int k = comparedNode.getChildren().length; int n = this.ta.get(p).getChildren().length; for (int _i = 0; _i < k - 1; _i++) // Delete child.comparedNodes.firstElement { child.getComparedNodes().poll(); } for (int h = 0; h < n - 1; h++) { this.ta.get(p).getChildren()[h].setComparedNodes(child.getComparedNodes()); for (int _i = 0; _i < k - 1; _i++) // Delete ta[p].children[h].comparedMatrix.firstElement { this.ta.get(p).getChildren()[h].getComparedMatrix().poll(); } } if (child.getComparedNodes().size() > 0) { comparedNode = this.tb.get(child.getComparedNodes().peek().getParentNo()); } else { break; } } // child.setMatchedNode(child.getComparedNodes().get(j-1)); child.setMatchedNode(comparedNode.getChildren()[j - 1]); child.setMatchedMatrix(null); if (child.getComparedMatrix().size() > 0) { child.setMatchedMatrix(child.getComparedMatrix().get(j - 1)); } // Add child to matchedChildren, tm[p].children // matchedChildren.add(new HTMLNode(child)); this.matchedChildren.add(child); // this.tm[p].addChild(new HTMLNode(child)); this.tm.get(p).addChild(child); } else if (matchedMatrix[i][j] == MatchedMatrixValue.UP.getValue()) { this.matchedChildrenBacktracking(p, matchedMatrix, i - 1, j); } else { this.matchedChildrenBacktracking(p, matchedMatrix, i, j - 1); } }
/** * Initializes the <em>matchedMatrix</em>-, <em>comparedNode</em>- and * <em>comparedMatrix</em>-fields of nodes in the DOM-tree of ta. * * <p>It first compares a node taken from both trees for equal names and if there names do not * match their subtree do not match either. But if they match the algorithm recursively finds the * maximum matching between the subtrees rooted by the children of ta[p] and tb[q]. * * <p>Therefore it initializes two matrices: m and f. The primer is the maximum matched nodes * matrix while the latter is the maximum matched path matrix. * * <p>As f is not always the true <em>matchedMatrix</em> because ta[p] may match multiple nodes in * tb, it is only treated as the initial value of matchedMatrix. * * <p>ta[p]'s <em>comparedNode</em> set will store all tb's nodes which have been compared with * ta[p], while ta[p]'s <em>comparedMatrix</em> stores the related matching path flag matrix * <em>f</em> in order to find the true matched Matrix of ta[p] in the sequent algorithms. * * @param p The index of the p-th element of the DOM-tree ta * @param q The index of the q-th element of the DOM-tree tb * @return The value of the last element in the maximum matched nodes matrix plus 1 */ private int improvedSimpleTreeMatching(int p, int q) { this.ta.get(p).addComparedNodes(this.tb.get(q)); // compare names, if they are distinct, the subtree rooted by them do // not match at all if (!this.ta.get(p).getName().equals(this.tb.get(q).getName())) { this.ta.get(p).addComparedMatrices(null); return 0; } // if they match however, the algorithm recursively finds the maximum // matching between the subtrees rooted by the children ta[p] and tb[q] else { int k = this.ta.get(p).getChildren().length; int n = this.tb.get(q).getChildren().length; int[][] m = new int[k + 1][n + 1]; // maximum matched nodes matrix int[][] f = new int[k + 1][n + 1]; // maximum matched path matrix for (int i = 0; i <= k; i++) { m[i][0] = 0; f[i][0] = 0; } for (int j = 1; j <= n; j++) { m[0][j] = 0; f[0][j] = 0; } for (int i = 1; i <= k; i++) { int _p = this.ta.get(p).getChildren()[i - 1].getNo(); for (int j = 1; j <= n; j++) { // the next index of the child int _q = this.tb.get(q).getChildren()[j - 1].getNo(); // recursively seek the maximum match in the child's context int w = this.improvedSimpleTreeMatching(_p, _q); // application of the maximum matching node m[i][j] = Math.max(m[i][j - 1], Math.max(m[i - 1][j], m[i - 1][j - 1] + w)); // set the path according to the maximum matching node if (m[i][j] == m[i - 1][j - 1] + w && w > 0) { f[i][j] = MatchedMatrixValue.UP_LEFT.getValue(); } else if (m[i][j] == m[i - 1][j]) { f[i][j] = MatchedMatrixValue.UP.getValue(); } else if (m[i][j] == m[i][j - 1]) { f[i][j] = MatchedMatrixValue.LEFT.getValue(); } } } // set matched tree path matrices this.ta.get(p).setMatchedMatrix(f); this.ta.get(p).addComparedMatrices(f); return m[k][n] + 1; } }