Java MDAGNode.transition примеры, java.io.MDAGNode.transition Java примеры использования

Пример #1

0

Показать файл

Файл: MDAG.java Проект: liuzl/HanLP

  /**
   * 从给点节点开始克隆一条路径<br>
   * Clones a _transition path from a given node.
   *
   * @param pivotConfluenceNode the MDAGNode that the cloning operation is to be based from
   * @param transitionStringToPivotNode a String which corresponds with a _transition path from
   *     souceNode to {@code pivotConfluenceNode}
   * @param str a String which corresponds to the _transition path from {@code pivotConfluenceNode}
   *     that is to be cloned
   */
  private void cloneTransitionPath(
      MDAGNode pivotConfluenceNode, String transitionStringToPivotNode, String str) {
    MDAGNode lastTargetNode =
        pivotConfluenceNode.transition(
            str); // Will store the last node which was used as the base of a cloning operation
    MDAGNode lastClonedNode = null; // Will store the last cloned node
    char lastTransitionLabelChar =
        '\0'; // Will store the char which labels the _transition to lastTargetNode from its parent
              // node in the prefixString's _transition path

    // Loop backwards through the indices of str, using each as a boundary to create substrings of
    // str of decreasing length
    // which will be used to _transition to, and duplicate the nodes in the _transition path of str
    // from pivotConfluenceNode.
    for (int i = str.length(); i >= 0; i--) {
      String currentTransitionString = (i > 0 ? str.substring(0, i) : null);
      MDAGNode currentTargetNode =
          (i > 0 ? pivotConfluenceNode.transition(currentTransitionString) : pivotConfluenceNode);
      MDAGNode clonedNode;

      if (i == 0) // if we have reached pivotConfluenceNode
      {
        // Clone pivotConfluenceNode in a way that reassigns the _transition of its parent node (in
        // transitionStringToConfluenceNode's path) to the clone.
        String transitionStringToPivotNodeParent =
            transitionStringToPivotNode.substring(0, transitionStringToPivotNode.length() - 1);
        char parentTransitionLabelChar =
            transitionStringToPivotNode.charAt(transitionStringToPivotNode.length() - 1);
        clonedNode =
            pivotConfluenceNode.clone(
                sourceNode.transition(transitionStringToPivotNodeParent),
                parentTransitionLabelChar);
        /////
      } else clonedNode = currentTargetNode.clone(); // simply clone curentTargetNode

      transitionCount += clonedNode.getOutgoingTransitionCount();

      // If this isn't the first node we've cloned, reassign clonedNode's _transition labeled
      // with the lastTransitionChar (which points to the last targetNode) to the last clone.
      if (lastClonedNode != null) {
        clonedNode.reassignOutgoingTransition(
            lastTransitionLabelChar, lastTargetNode, lastClonedNode);
        lastTargetNode = currentTargetNode;
      }

      // Store clonedNode and the char which labels the _transition between the node it was cloned
      // from (currentTargetNode) and THAT node's parent.
      // These will be used to establish an equivalent _transition to clonedNode from the next clone
      // to be created (it's clone parent).
      lastClonedNode = clonedNode;
      lastTransitionLabelChar = (i > 0 ? str.charAt(i - 1) : '\0');
      /////
    }
    /////
  }

Пример #2

0

Показать файл

Файл: MDAG.java Проект: liuzl/HanLP

  /**
   * Determines and retrieves data related to the first confluence node (defined as a node with two
   * or more incoming transitions) of a _transition path corresponding to a given String from a
   * given node.
   *
   * @param originNode the MDAGNode from which the _transition path corresponding to str starts from
   * @param str a String corresponding to a _transition path in the MDAG
   * @return a HashMap of Strings to Objects containing: - an int denoting the length of the path to
   *     the first confluence node in the _transition path of interest - the MDAGNode which is the
   *     first confluence node in the _transition path of interest (or null if one does not exist)
   */
  private HashMap<String, Object> getTransitionPathFirstConfluenceNodeData(
      MDAGNode originNode, String str) {
    int currentIndex = 0;
    int charCount = str.length();
    MDAGNode currentNode = originNode;

    // Loop thorugh the characters in str, sequentially using them to _transition through the MDAG
    // in search of
    // (and breaking upon reaching) the first node that is the target of two or more transitions.
    // The loop is
    // also broken from if the currently processing node doesn't have a _transition labeled with the
    // currently processing char.
    for (; currentIndex < charCount; currentIndex++) {
      char currentChar = str.charAt(currentIndex);
      currentNode =
          (currentNode.hasOutgoingTransition(currentChar)
              ? currentNode.transition(currentChar)
              : null);

      if (currentNode == null || currentNode.isConfluenceNode()) break;
    }
    /////

    boolean noConfluenceNode = (currentNode == originNode || currentIndex == charCount);

    // Create a HashMap containing the index of the last char in the substring corresponding
    // to the transitoin path to the confluence node, as well as the actual confluence node
    HashMap<String, Object> confluenceNodeDataHashMap = new HashMap<String, Object>(2);
    confluenceNodeDataHashMap.put(
        "toConfluenceNodeTransitionCharIndex", (noConfluenceNode ? null : currentIndex));
    confluenceNodeDataHashMap.put("confluenceNode", noConfluenceNode ? null : currentNode);
    /////

    return confluenceNodeDataHashMap;
  }

Пример #3

0

Показать файл

Файл: MDAG.java Проект: liuzl/HanLP

  /**
   * Adds a Collection of Strings to the MDAG.
   *
   * @param strCollection a {@link java.util.Collection} containing Strings to be added to the MDAG
   */
  public final void addStrings(Collection<String> strCollection) {
    if (sourceNode != null) {
      String previousString = "";

      // Add all the Strings in strCollection to the MDAG.
      for (String currentString : strCollection) {
        int mpsIndex = calculateMinimizationProcessingStartIndex(previousString, currentString);

        // If the _transition path of the previousString needs to be examined for minimization or
        // equivalence class representation after a certain point, call replaceOrRegister to do so.
        if (mpsIndex != -1) {

          String transitionSubstring = previousString.substring(0, mpsIndex);
          String minimizationProcessingSubString = previousString.substring(mpsIndex);
          replaceOrRegister(
              sourceNode.transition(transitionSubstring), minimizationProcessingSubString);
        }
        /////

        addStringInternal(currentString);
        previousString = currentString;
      }
      /////

      // Since we delay the minimization of the previously-added String
      // until after we read the next one, we need to have a seperate
      // statement to minimize the absolute last String.
      replaceOrRegister(sourceNode, previousString);
    } else {
      unSimplify();
      addStrings(strCollection);
    }
  }

Пример #4

0

Показать файл

Файл: MDAG.java Проект: liuzl/HanLP

  /**
   * 从一个文件建立MDAG<br>
   * Creates an MDAG from a newline delimited file containing the data of interest.
   *
   * @param dataFile a {@link java.io.File} representation of a file containing the Strings that the
   *     MDAG will contain
   * @throws java.io.IOException if {@code datafile} cannot be opened, or a read operation on it
   *     cannot be carried out
   */
  public MDAG(File dataFile) throws IOException {
    BufferedReader dataFileBufferedReader =
        new BufferedReader(
            new InputStreamReader(
                IOAdapter == null
                    ? new FileInputStream(dataFile)
                    : IOAdapter.open(dataFile.getAbsolutePath()),
                "UTF-8"));
    String currentString = "";
    String previousString = "";

    // Read all the lines in dataFile and add the String contained in each to the MDAG.
    while ((currentString = dataFileBufferedReader.readLine()) != null) {
      int mpsIndex = calculateMinimizationProcessingStartIndex(previousString, currentString);

      // If the _transition path of the previousString needs to be examined for minimization or
      // equivalence class representation after a certain point, call replaceOrRegister to do so.
      if (mpsIndex != -1) {
        String transitionSubstring = previousString.substring(0, mpsIndex); // 公共前缀
        String minimizationProcessingSubstring = previousString.substring(mpsIndex); // 不同后缀
        replaceOrRegister(
            sourceNode.transition(transitionSubstring), minimizationProcessingSubstring);
      }
      /////

      addStringInternal(currentString);
      previousString = currentString;
    }
    /////

    // Since we delay the minimization of the previously-added String
    // until after we read the next one, we need to have a seperate
    // statement to minimize the absolute last String.
    replaceOrRegister(sourceNode, previousString);
  }

Пример #5

0

Показать файл

Файл: MDAG.java Проект: liuzl/HanLP

 /**
  * 是否包含<br>
  * Determines whether a String is present in the MDAG.
  *
  * @param str the String to be searched for
  * @return true if {@code str} is present in the MDAG, and false otherwise
  */
 public boolean contains(String str) {
   if (sourceNode != null) // if the MDAG hasn't been simplified
   {
     MDAGNode targetNode = sourceNode.transition(str.toCharArray());
     return (targetNode != null && targetNode.isAcceptNode());
   } else {
     SimpleMDAGNode targetNode = simplifiedSourceNode.transition(mdagDataArray, str.toCharArray());
     return (targetNode != null && targetNode.isAcceptNode());
   }
 }

Пример #6

0

Показать файл

Файл: MDAG.java Проект: liuzl/HanLP

  /**
   * Removes a String from the MDAG.
   *
   * @param str the String to be removed from the MDAG
   */
  public void removeString(String str) {
    if (sourceNode != null) {
      // Split the _transition path corresponding to str to ensure that
      // any other _transition paths sharing nodes with it are not affected
      splitTransitionPath(sourceNode, str);

      // Remove from equivalenceClassMDAGNodeHashMap, the entries of all the nodes in the
      // _transition path corresponding to str.
      removeTransitionPathRegisterEntries(str);

      // Get the last node in the _transition path corresponding to str
      MDAGNode strEndNode = sourceNode.transition(str);
      if (strEndNode == null) return;

      if (!strEndNode.hasTransitions()) {
        int soleInternalTransitionPathLength = calculateSoleTransitionPathLength(str);
        int internalTransitionPathLength = str.length() - 1;

        if (soleInternalTransitionPathLength == internalTransitionPathLength) {
          sourceNode.removeOutgoingTransition(str.charAt(0));
          transitionCount -= str.length();
        } else {
          // Remove the sub-path in str's _transition path that is only used by str
          int toBeRemovedTransitionLabelCharIndex =
              (internalTransitionPathLength - soleInternalTransitionPathLength);
          MDAGNode latestNonSoloTransitionPathNode =
              sourceNode.transition(str.substring(0, toBeRemovedTransitionLabelCharIndex));
          latestNonSoloTransitionPathNode.removeOutgoingTransition(
              str.charAt(toBeRemovedTransitionLabelCharIndex));
          transitionCount -= str.substring(toBeRemovedTransitionLabelCharIndex).length();
          /////

          replaceOrRegister(sourceNode, str.substring(0, toBeRemovedTransitionLabelCharIndex));
        }

      } else {
        strEndNode.setAcceptStateStatus(false);
        replaceOrRegister(sourceNode, str);
      }
    } else {
      unSimplify();
    }
  }

Пример #7

0

Показать файл

Файл: MDAG.java Проект: liuzl/HanLP

  /**
   * 前缀查询<br>
   * Retrieves all the Strings in the MDAG that begin with a given String.
   *
   * @param prefixStr a String that is the prefix for all the desired Strings
   * @return a HashSet containing all the Strings present in the MDAG that begin with {@code
   *     prefixString}
   */
  public HashSet<String> getStringsStartingWith(String prefixStr) {
    HashSet<String> strHashSet = new HashSet<String>();

    if (sourceNode != null) // if the MDAG hasn't been simplified
    {
      MDAGNode originNode =
          sourceNode.transition(
              prefixStr); // attempt to _transition down the path denoted by prefixStr

      if (originNode
          != null) // if there a _transition path corresponding to prefixString (one or more stored
                   // Strings begin with prefixString)
      {
        if (originNode.isAcceptNode()) strHashSet.add(prefixStr);
        getStrings(
            strHashSet,
            SearchCondition.PREFIX_SEARCH_CONDITION,
            prefixStr,
            prefixStr,
            originNode
                .getOutgoingTransitions()); // retrieve all Strings that extend the _transition path
                                            // denoted by prefixStr
      }
    } else {
      SimpleMDAGNode originNode =
          SimpleMDAGNode.traverseMDAG(
              mdagDataArray,
              simplifiedSourceNode,
              prefixStr); // attempt to _transition down the path denoted by prefixStr

      if (originNode
          != null) // if there a _transition path corresponding to prefixString (one or more stored
                   // Strings begin with prefixStr)
      {
        if (originNode.isAcceptNode()) strHashSet.add(prefixStr);
        getStrings(
            strHashSet,
            SearchCondition.PREFIX_SEARCH_CONDITION,
            prefixStr,
            prefixStr,
            originNode); // retrieve all Strings that extend the _transition path denoted by
                         // prefixString
      }
    }

    return strHashSet;
  }

Пример #8

0

Показать файл

Файл: MDAG.java Проект: liuzl/HanLP

  /**
   * Adds a String to the MDAG (called by addString to do actual MDAG manipulation).
   *
   * @param str the String to be added to the MDAG
   */
  private void addStringInternal(String str) {
    String prefixString = determineLongestPrefixInMDAG(str);
    String suffixString = str.substring(prefixString.length());

    // Retrive the data related to the first confluence node (a node with two or more incoming
    // transitions)
    // in the _transition path from sourceNode corresponding to prefixString.
    HashMap<String, Object> firstConfluenceNodeDataHashMap =
        getTransitionPathFirstConfluenceNodeData(sourceNode, prefixString);
    MDAGNode firstConfluenceNodeInPrefix =
        (MDAGNode) firstConfluenceNodeDataHashMap.get("confluenceNode");
    Integer toFirstConfluenceNodeTransitionCharIndex =
        (Integer) firstConfluenceNodeDataHashMap.get("toConfluenceNodeTransitionCharIndex");
    /////

    // Remove the register entries of all the nodes in the prefixString _transition path up to the
    // first confluence node
    // (those past the confluence node will not need to be removed since they will be cloned and
    // unaffected by the
    // addition of suffixString). If there is no confluence node in prefixString, then remove the
    // register entries in prefixString's entire _transition path
    removeTransitionPathRegisterEntries(
        (toFirstConfluenceNodeTransitionCharIndex == null
            ? prefixString
            : prefixString.substring(0, toFirstConfluenceNodeTransitionCharIndex)));

    // If there is a confluence node in the prefix, we must duplicate the _transition path
    // of the prefix starting from that node, before we add suffixString (to the duplicate path).
    // This ensures that we do not disturb the other _transition paths containing this node.
    if (firstConfluenceNodeInPrefix != null) {
      String transitionStringOfPathToFirstConfluenceNode =
          prefixString.substring(0, toFirstConfluenceNodeTransitionCharIndex + 1);
      String transitionStringOfToBeDuplicatedPath =
          prefixString.substring(toFirstConfluenceNodeTransitionCharIndex + 1);
      cloneTransitionPath(
          firstConfluenceNodeInPrefix,
          transitionStringOfPathToFirstConfluenceNode,
          transitionStringOfToBeDuplicatedPath);
    }
    /////

    // Add the _transition based on suffixString to the end of the (possibly duplicated) _transition
    // path corresponding to prefixString
    addTransitionPath(sourceNode.transition(prefixString), suffixString);
  }

Пример #9

0

Показать файл

Файл: MDAG.java Проект: liuzl/HanLP

  /**
   * 从登记簿中移除路径对应的状态们<br>
   * Removes from equivalenceClassMDAGNodeHashmap the entries of all the nodes in a _transition
   * path.
   *
   * @param str a String corresponding to a _transition path from sourceNode
   */
  private void removeTransitionPathRegisterEntries(String str) {
    MDAGNode currentNode = sourceNode;

    int charCount = str.length();

    for (int i = 0; i < charCount; i++) {
      currentNode = currentNode.transition(str.charAt(i));
      if (equivalenceClassMDAGNodeHashMap.get(currentNode) == currentNode)
        equivalenceClassMDAGNodeHashMap.remove(currentNode);

      // The hashCode of an MDAGNode is cached the first time a hash is performed without a cache
      // value present.
      // Since we just hashed currentNode, we must clear this regardless of its presence in
      // equivalenceClassMDAGNodeHashMap
      // since we're not actually declaring equivalence class representatives here.
      if (currentNode != null) currentNode.clearStoredHashCode();
    }
  }

Пример #10

0

Показать файл

Файл: MDAG.java Проект: liuzl/HanLP

  /**
   * Determines the longest prefix of a given String that is the prefix of another String previously
   * added to the MDAG.
   *
   * @param str the String to be processed
   * @return a String of the longest prefix of {@code str} that is also a prefix of a String
   *     contained in the MDAG
   */
  private String determineLongestPrefixInMDAG(String str) {
    MDAGNode currentNode = sourceNode;
    int numberOfChars = str.length();
    int onePastPrefixEndIndex = 0;

    // Loop through the characters in str, using them in sequence to _transition
    // through the MDAG until the currently processing node doesn't have a _transition
    // labeled with the current processing char, or there are no more characters to process.
    for (int i = 0; i < numberOfChars; i++, onePastPrefixEndIndex++) {
      char currentChar = str.charAt(i);
      if (currentNode.hasOutgoingTransition(currentChar))
        currentNode = currentNode.transition(currentChar);
      else break;
    }
    /////

    return str.substring(0, onePastPrefixEndIndex);
  }

Пример #11

0

Показать файл

Файл: MDAG.java Проект: liuzl/HanLP

  /**
   * 在从给定节点开始的一段路径上执行最小化<br>
   * Performs minimization processing on a _transition path starting from a given node.
   *
   * <p>This entails either replacing a node in the path with one that has an equivalent right
   * language/equivalence class (defined as set of _transition paths that can be traversed and nodes
   * able to be reached from it), or making it a representative of a right language/equivalence
   * class if a such a node does not already exist.
   *
   * @param originNode the MDAGNode that the _transition path corresponding to str starts from
   * @param str a String related to a _transition path
   */
  private void replaceOrRegister(MDAGNode originNode, String str) {
    char transitionLabelChar = str.charAt(0);
    MDAGNode relevantTargetNode = originNode.transition(transitionLabelChar);

    // If relevantTargetNode has transitions and there is at least one char left to process,
    // recursively call
    // this on the next char in order to further processing down the _transition path corresponding
    // to str
    if (relevantTargetNode.hasTransitions() && !str.substring(1).isEmpty())
      replaceOrRegister(relevantTargetNode, str.substring(1));
    /////

    // Get the node representing the equivalence class that relevantTargetNode belongs to. MDAGNodes
    // hash on the
    // transitions paths that can be traversed from them and nodes able to be reached from them;
    // nodes with the same equivalence classes will hash to the same bucket.
    MDAGNode equivalentNode = equivalenceClassMDAGNodeHashMap.get(relevantTargetNode);

    if (equivalentNode
        == null) // if there is no node with the same right language as relevantTargetNode
    equivalenceClassMDAGNodeHashMap.put(relevantTargetNode, relevantTargetNode);
    else if (equivalentNode
        != relevantTargetNode) // if there is another node with the same right language as
                               // relevantTargetNode, reassign the
    { // _transition between originNode and relevantTargetNode, to originNode and the node
      // representing the equivalence class of interest
      relevantTargetNode.decrementTargetIncomingTransitionCounts();
      transitionCount -=
          relevantTargetNode
              .getOutgoingTransitionCount(); // Since this method is recursive, the outgoing
                                             // transitions of all of relevantTargetNode's child
                                             // nodes have already been reassigned,
      // so we only need to decrement the _transition count by the relevantTargetNode's outgoing
      // _transition count
      originNode.reassignOutgoingTransition(
          transitionLabelChar, relevantTargetNode, equivalentNode);
    }
  }

Пример #12

0

Показать файл

Файл: MDAG.java Проект: liuzl/HanLP

  private void splitTransitionPath(MDAGNode originNode, String storedStringSubstr) {
    HashMap<String, Object> firstConfluenceNodeDataHashMap =
        getTransitionPathFirstConfluenceNodeData(originNode, storedStringSubstr);
    Integer toFirstConfluenceNodeTransitionCharIndex =
        (Integer) firstConfluenceNodeDataHashMap.get("toConfluenceNodeTransitionCharIndex");
    MDAGNode firstConfluenceNode = (MDAGNode) firstConfluenceNodeDataHashMap.get("confluenceNode");

    if (firstConfluenceNode != null) {
      MDAGNode firstConfluenceNodeParent =
          originNode.transition(
              storedStringSubstr.substring(0, toFirstConfluenceNodeTransitionCharIndex));

      MDAGNode firstConfluenceNodeClone =
          firstConfluenceNode.clone(
              firstConfluenceNodeParent,
              storedStringSubstr.charAt(toFirstConfluenceNodeTransitionCharIndex));

      transitionCount += firstConfluenceNodeClone.getOutgoingTransitionCount();

      String unprocessedSubString =
          storedStringSubstr.substring(toFirstConfluenceNodeTransitionCharIndex + 1);
      splitTransitionPath(firstConfluenceNodeClone, unprocessedSubString);
    }
  }

Java MDAGNode.transition примеры использования