コード例 #1
0
  /**
   * Matched children backtracking algorithm
   *
   * <p>Besides finding the matched children using ta's <em>matchedMatrix</em> field, the main job
   * is to modify the initial <em>matchedMatrix</em> of each matched child.
   *
   * @param p The index of the p-th element of the DOM-tree ta
   * @param matchedMatrix The matched tree path matrix of ta for node p
   * @param i The x-position in the matchedMatrix to look for the value
   * @param j The y-position in the matchedMatrix to look for the value
   */
  private void matchedChildrenBacktracking(int p, int[][] matchedMatrix, int i, int j) {
    // slight modification as if the 1st page is larger than the second or a
    // segment of the 1st page contains more children than the 2nd page,
    // j might become negative while i is 0.
    // To my understanding if either of i or j gets 0 the border of the
    // matrix was reached.
    if (i == 0 && j == 0) {
      return;
    }
    if (matchedMatrix[i][j] == MatchedMatrixValue.UP_LEFT.getValue()) {
      this.matchedChildrenBacktracking(p, matchedMatrix, i - 1, j - 1);
      // contains elements from 1st page
      Token child = this.ta.get(p).getChildren()[i - 1];
      // contains elements from 2nd page
      Token comparedNode = this.tb.get(child.getComparedNodes().peek().getParentNo());

      while (this.ta.get(p).getMatchedNode() != comparedNode) {
        int k = comparedNode.getChildren().length;
        int n = this.ta.get(p).getChildren().length;

        for (int _i = 0; _i < k - 1; _i++)
        // Delete child.comparedNodes.firstElement
        {
          child.getComparedNodes().poll();
        }
        for (int h = 0; h < n - 1; h++) {
          this.ta.get(p).getChildren()[h].setComparedNodes(child.getComparedNodes());
          for (int _i = 0; _i < k - 1; _i++)
          // Delete ta[p].children[h].comparedMatrix.firstElement
          {
            this.ta.get(p).getChildren()[h].getComparedMatrix().poll();
          }
        }
        if (child.getComparedNodes().size() > 0) {
          comparedNode = this.tb.get(child.getComparedNodes().peek().getParentNo());
        } else {
          break;
        }
      }
      //			child.setMatchedNode(child.getComparedNodes().get(j-1));
      child.setMatchedNode(comparedNode.getChildren()[j - 1]);
      child.setMatchedMatrix(null);
      if (child.getComparedMatrix().size() > 0) {
        child.setMatchedMatrix(child.getComparedMatrix().get(j - 1));
      }
      // Add child to matchedChildren, tm[p].children
      //			matchedChildren.add(new HTMLNode(child));
      this.matchedChildren.add(child);
      //			this.tm[p].addChild(new HTMLNode(child));
      this.tm.get(p).addChild(child);
    } else if (matchedMatrix[i][j] == MatchedMatrixValue.UP.getValue()) {
      this.matchedChildrenBacktracking(p, matchedMatrix, i - 1, j);
    } else {
      this.matchedChildrenBacktracking(p, matchedMatrix, i, j - 1);
    }
  }
コード例 #2
0
  /**
   * Initializes the <em>matchedMatrix</em>-, <em>comparedNode</em>- and
   * <em>comparedMatrix</em>-fields of nodes in the DOM-tree of ta.
   *
   * <p>It first compares a node taken from both trees for equal names and if there names do not
   * match their subtree do not match either. But if they match the algorithm recursively finds the
   * maximum matching between the subtrees rooted by the children of ta[p] and tb[q].
   *
   * <p>Therefore it initializes two matrices: m and f. The primer is the maximum matched nodes
   * matrix while the latter is the maximum matched path matrix.
   *
   * <p>As f is not always the true <em>matchedMatrix</em> because ta[p] may match multiple nodes in
   * tb, it is only treated as the initial value of matchedMatrix.
   *
   * <p>ta[p]'s <em>comparedNode</em> set will store all tb's nodes which have been compared with
   * ta[p], while ta[p]'s <em>comparedMatrix</em> stores the related matching path flag matrix
   * <em>f</em> in order to find the true matched Matrix of ta[p] in the sequent algorithms.
   *
   * @param p The index of the p-th element of the DOM-tree ta
   * @param q The index of the q-th element of the DOM-tree tb
   * @return The value of the last element in the maximum matched nodes matrix plus 1
   */
  private int improvedSimpleTreeMatching(int p, int q) {
    this.ta.get(p).addComparedNodes(this.tb.get(q));
    // compare names, if they are distinct, the subtree rooted by them do
    // not match at all
    if (!this.ta.get(p).getName().equals(this.tb.get(q).getName())) {
      this.ta.get(p).addComparedMatrices(null);
      return 0;
    }
    // if they match however, the algorithm recursively finds the maximum
    // matching between the subtrees rooted by the children ta[p] and tb[q]
    else {
      int k = this.ta.get(p).getChildren().length;
      int n = this.tb.get(q).getChildren().length;
      int[][] m = new int[k + 1][n + 1]; // maximum matched nodes matrix
      int[][] f = new int[k + 1][n + 1]; // maximum matched path matrix
      for (int i = 0; i <= k; i++) {
        m[i][0] = 0;
        f[i][0] = 0;
      }
      for (int j = 1; j <= n; j++) {
        m[0][j] = 0;
        f[0][j] = 0;
      }
      for (int i = 1; i <= k; i++) {
        int _p = this.ta.get(p).getChildren()[i - 1].getNo();
        for (int j = 1; j <= n; j++) {
          // the next index of the child
          int _q = this.tb.get(q).getChildren()[j - 1].getNo();
          // recursively seek the maximum match in the child's context
          int w = this.improvedSimpleTreeMatching(_p, _q);
          // application of the maximum matching node
          m[i][j] = Math.max(m[i][j - 1], Math.max(m[i - 1][j], m[i - 1][j - 1] + w));
          // set the path according to the maximum matching node
          if (m[i][j] == m[i - 1][j - 1] + w && w > 0) {
            f[i][j] = MatchedMatrixValue.UP_LEFT.getValue();
          } else if (m[i][j] == m[i - 1][j]) {
            f[i][j] = MatchedMatrixValue.UP.getValue();
          } else if (m[i][j] == m[i][j - 1]) {
            f[i][j] = MatchedMatrixValue.LEFT.getValue();
          }
        }
      }

      // set matched tree path matrices
      this.ta.get(p).setMatchedMatrix(f);
      this.ta.get(p).addComparedMatrices(f);

      return m[k][n] + 1;
    }
  }