/** * 从给点节点开始克隆一条路径<br> * Clones a _transition path from a given node. * * @param pivotConfluenceNode the MDAGNode that the cloning operation is to be based from * @param transitionStringToPivotNode a String which corresponds with a _transition path from * souceNode to {@code pivotConfluenceNode} * @param str a String which corresponds to the _transition path from {@code pivotConfluenceNode} * that is to be cloned */ private void cloneTransitionPath( MDAGNode pivotConfluenceNode, String transitionStringToPivotNode, String str) { MDAGNode lastTargetNode = pivotConfluenceNode.transition( str); // Will store the last node which was used as the base of a cloning operation MDAGNode lastClonedNode = null; // Will store the last cloned node char lastTransitionLabelChar = '\0'; // Will store the char which labels the _transition to lastTargetNode from its parent // node in the prefixString's _transition path // Loop backwards through the indices of str, using each as a boundary to create substrings of // str of decreasing length // which will be used to _transition to, and duplicate the nodes in the _transition path of str // from pivotConfluenceNode. for (int i = str.length(); i >= 0; i--) { String currentTransitionString = (i > 0 ? str.substring(0, i) : null); MDAGNode currentTargetNode = (i > 0 ? pivotConfluenceNode.transition(currentTransitionString) : pivotConfluenceNode); MDAGNode clonedNode; if (i == 0) // if we have reached pivotConfluenceNode { // Clone pivotConfluenceNode in a way that reassigns the _transition of its parent node (in // transitionStringToConfluenceNode's path) to the clone. String transitionStringToPivotNodeParent = transitionStringToPivotNode.substring(0, transitionStringToPivotNode.length() - 1); char parentTransitionLabelChar = transitionStringToPivotNode.charAt(transitionStringToPivotNode.length() - 1); clonedNode = pivotConfluenceNode.clone( sourceNode.transition(transitionStringToPivotNodeParent), parentTransitionLabelChar); ///// } else clonedNode = currentTargetNode.clone(); // simply clone curentTargetNode transitionCount += clonedNode.getOutgoingTransitionCount(); // If this isn't the first node we've cloned, reassign clonedNode's _transition labeled // with the lastTransitionChar (which points to the last targetNode) to the last clone. if (lastClonedNode != null) { clonedNode.reassignOutgoingTransition( lastTransitionLabelChar, lastTargetNode, lastClonedNode); lastTargetNode = currentTargetNode; } // Store clonedNode and the char which labels the _transition between the node it was cloned // from (currentTargetNode) and THAT node's parent. // These will be used to establish an equivalent _transition to clonedNode from the next clone // to be created (it's clone parent). lastClonedNode = clonedNode; lastTransitionLabelChar = (i > 0 ? str.charAt(i - 1) : '\0'); ///// } ///// }
/** * Determines and retrieves data related to the first confluence node (defined as a node with two * or more incoming transitions) of a _transition path corresponding to a given String from a * given node. * * @param originNode the MDAGNode from which the _transition path corresponding to str starts from * @param str a String corresponding to a _transition path in the MDAG * @return a HashMap of Strings to Objects containing: - an int denoting the length of the path to * the first confluence node in the _transition path of interest - the MDAGNode which is the * first confluence node in the _transition path of interest (or null if one does not exist) */ private HashMap<String, Object> getTransitionPathFirstConfluenceNodeData( MDAGNode originNode, String str) { int currentIndex = 0; int charCount = str.length(); MDAGNode currentNode = originNode; // Loop thorugh the characters in str, sequentially using them to _transition through the MDAG // in search of // (and breaking upon reaching) the first node that is the target of two or more transitions. // The loop is // also broken from if the currently processing node doesn't have a _transition labeled with the // currently processing char. for (; currentIndex < charCount; currentIndex++) { char currentChar = str.charAt(currentIndex); currentNode = (currentNode.hasOutgoingTransition(currentChar) ? currentNode.transition(currentChar) : null); if (currentNode == null || currentNode.isConfluenceNode()) break; } ///// boolean noConfluenceNode = (currentNode == originNode || currentIndex == charCount); // Create a HashMap containing the index of the last char in the substring corresponding // to the transitoin path to the confluence node, as well as the actual confluence node HashMap<String, Object> confluenceNodeDataHashMap = new HashMap<String, Object>(2); confluenceNodeDataHashMap.put( "toConfluenceNodeTransitionCharIndex", (noConfluenceNode ? null : currentIndex)); confluenceNodeDataHashMap.put("confluenceNode", noConfluenceNode ? null : currentNode); ///// return confluenceNodeDataHashMap; }
/** * Adds a Collection of Strings to the MDAG. * * @param strCollection a {@link java.util.Collection} containing Strings to be added to the MDAG */ public final void addStrings(Collection<String> strCollection) { if (sourceNode != null) { String previousString = ""; // Add all the Strings in strCollection to the MDAG. for (String currentString : strCollection) { int mpsIndex = calculateMinimizationProcessingStartIndex(previousString, currentString); // If the _transition path of the previousString needs to be examined for minimization or // equivalence class representation after a certain point, call replaceOrRegister to do so. if (mpsIndex != -1) { String transitionSubstring = previousString.substring(0, mpsIndex); String minimizationProcessingSubString = previousString.substring(mpsIndex); replaceOrRegister( sourceNode.transition(transitionSubstring), minimizationProcessingSubString); } ///// addStringInternal(currentString); previousString = currentString; } ///// // Since we delay the minimization of the previously-added String // until after we read the next one, we need to have a seperate // statement to minimize the absolute last String. replaceOrRegister(sourceNode, previousString); } else { unSimplify(); addStrings(strCollection); } }
/** * 从一个文件建立MDAG<br> * Creates an MDAG from a newline delimited file containing the data of interest. * * @param dataFile a {@link java.io.File} representation of a file containing the Strings that the * MDAG will contain * @throws java.io.IOException if {@code datafile} cannot be opened, or a read operation on it * cannot be carried out */ public MDAG(File dataFile) throws IOException { BufferedReader dataFileBufferedReader = new BufferedReader( new InputStreamReader( IOAdapter == null ? new FileInputStream(dataFile) : IOAdapter.open(dataFile.getAbsolutePath()), "UTF-8")); String currentString = ""; String previousString = ""; // Read all the lines in dataFile and add the String contained in each to the MDAG. while ((currentString = dataFileBufferedReader.readLine()) != null) { int mpsIndex = calculateMinimizationProcessingStartIndex(previousString, currentString); // If the _transition path of the previousString needs to be examined for minimization or // equivalence class representation after a certain point, call replaceOrRegister to do so. if (mpsIndex != -1) { String transitionSubstring = previousString.substring(0, mpsIndex); // 公共前缀 String minimizationProcessingSubstring = previousString.substring(mpsIndex); // 不同后缀 replaceOrRegister( sourceNode.transition(transitionSubstring), minimizationProcessingSubstring); } ///// addStringInternal(currentString); previousString = currentString; } ///// // Since we delay the minimization of the previously-added String // until after we read the next one, we need to have a seperate // statement to minimize the absolute last String. replaceOrRegister(sourceNode, previousString); }
/** * 是否包含<br> * Determines whether a String is present in the MDAG. * * @param str the String to be searched for * @return true if {@code str} is present in the MDAG, and false otherwise */ public boolean contains(String str) { if (sourceNode != null) // if the MDAG hasn't been simplified { MDAGNode targetNode = sourceNode.transition(str.toCharArray()); return (targetNode != null && targetNode.isAcceptNode()); } else { SimpleMDAGNode targetNode = simplifiedSourceNode.transition(mdagDataArray, str.toCharArray()); return (targetNode != null && targetNode.isAcceptNode()); } }
/** * Removes a String from the MDAG. * * @param str the String to be removed from the MDAG */ public void removeString(String str) { if (sourceNode != null) { // Split the _transition path corresponding to str to ensure that // any other _transition paths sharing nodes with it are not affected splitTransitionPath(sourceNode, str); // Remove from equivalenceClassMDAGNodeHashMap, the entries of all the nodes in the // _transition path corresponding to str. removeTransitionPathRegisterEntries(str); // Get the last node in the _transition path corresponding to str MDAGNode strEndNode = sourceNode.transition(str); if (strEndNode == null) return; if (!strEndNode.hasTransitions()) { int soleInternalTransitionPathLength = calculateSoleTransitionPathLength(str); int internalTransitionPathLength = str.length() - 1; if (soleInternalTransitionPathLength == internalTransitionPathLength) { sourceNode.removeOutgoingTransition(str.charAt(0)); transitionCount -= str.length(); } else { // Remove the sub-path in str's _transition path that is only used by str int toBeRemovedTransitionLabelCharIndex = (internalTransitionPathLength - soleInternalTransitionPathLength); MDAGNode latestNonSoloTransitionPathNode = sourceNode.transition(str.substring(0, toBeRemovedTransitionLabelCharIndex)); latestNonSoloTransitionPathNode.removeOutgoingTransition( str.charAt(toBeRemovedTransitionLabelCharIndex)); transitionCount -= str.substring(toBeRemovedTransitionLabelCharIndex).length(); ///// replaceOrRegister(sourceNode, str.substring(0, toBeRemovedTransitionLabelCharIndex)); } } else { strEndNode.setAcceptStateStatus(false); replaceOrRegister(sourceNode, str); } } else { unSimplify(); } }
/** * 前缀查询<br> * Retrieves all the Strings in the MDAG that begin with a given String. * * @param prefixStr a String that is the prefix for all the desired Strings * @return a HashSet containing all the Strings present in the MDAG that begin with {@code * prefixString} */ public HashSet<String> getStringsStartingWith(String prefixStr) { HashSet<String> strHashSet = new HashSet<String>(); if (sourceNode != null) // if the MDAG hasn't been simplified { MDAGNode originNode = sourceNode.transition( prefixStr); // attempt to _transition down the path denoted by prefixStr if (originNode != null) // if there a _transition path corresponding to prefixString (one or more stored // Strings begin with prefixString) { if (originNode.isAcceptNode()) strHashSet.add(prefixStr); getStrings( strHashSet, SearchCondition.PREFIX_SEARCH_CONDITION, prefixStr, prefixStr, originNode .getOutgoingTransitions()); // retrieve all Strings that extend the _transition path // denoted by prefixStr } } else { SimpleMDAGNode originNode = SimpleMDAGNode.traverseMDAG( mdagDataArray, simplifiedSourceNode, prefixStr); // attempt to _transition down the path denoted by prefixStr if (originNode != null) // if there a _transition path corresponding to prefixString (one or more stored // Strings begin with prefixStr) { if (originNode.isAcceptNode()) strHashSet.add(prefixStr); getStrings( strHashSet, SearchCondition.PREFIX_SEARCH_CONDITION, prefixStr, prefixStr, originNode); // retrieve all Strings that extend the _transition path denoted by // prefixString } } return strHashSet; }
/** * Adds a String to the MDAG (called by addString to do actual MDAG manipulation). * * @param str the String to be added to the MDAG */ private void addStringInternal(String str) { String prefixString = determineLongestPrefixInMDAG(str); String suffixString = str.substring(prefixString.length()); // Retrive the data related to the first confluence node (a node with two or more incoming // transitions) // in the _transition path from sourceNode corresponding to prefixString. HashMap<String, Object> firstConfluenceNodeDataHashMap = getTransitionPathFirstConfluenceNodeData(sourceNode, prefixString); MDAGNode firstConfluenceNodeInPrefix = (MDAGNode) firstConfluenceNodeDataHashMap.get("confluenceNode"); Integer toFirstConfluenceNodeTransitionCharIndex = (Integer) firstConfluenceNodeDataHashMap.get("toConfluenceNodeTransitionCharIndex"); ///// // Remove the register entries of all the nodes in the prefixString _transition path up to the // first confluence node // (those past the confluence node will not need to be removed since they will be cloned and // unaffected by the // addition of suffixString). If there is no confluence node in prefixString, then remove the // register entries in prefixString's entire _transition path removeTransitionPathRegisterEntries( (toFirstConfluenceNodeTransitionCharIndex == null ? prefixString : prefixString.substring(0, toFirstConfluenceNodeTransitionCharIndex))); // If there is a confluence node in the prefix, we must duplicate the _transition path // of the prefix starting from that node, before we add suffixString (to the duplicate path). // This ensures that we do not disturb the other _transition paths containing this node. if (firstConfluenceNodeInPrefix != null) { String transitionStringOfPathToFirstConfluenceNode = prefixString.substring(0, toFirstConfluenceNodeTransitionCharIndex + 1); String transitionStringOfToBeDuplicatedPath = prefixString.substring(toFirstConfluenceNodeTransitionCharIndex + 1); cloneTransitionPath( firstConfluenceNodeInPrefix, transitionStringOfPathToFirstConfluenceNode, transitionStringOfToBeDuplicatedPath); } ///// // Add the _transition based on suffixString to the end of the (possibly duplicated) _transition // path corresponding to prefixString addTransitionPath(sourceNode.transition(prefixString), suffixString); }
/** * 从登记簿中移除路径对应的状态们<br> * Removes from equivalenceClassMDAGNodeHashmap the entries of all the nodes in a _transition * path. * * @param str a String corresponding to a _transition path from sourceNode */ private void removeTransitionPathRegisterEntries(String str) { MDAGNode currentNode = sourceNode; int charCount = str.length(); for (int i = 0; i < charCount; i++) { currentNode = currentNode.transition(str.charAt(i)); if (equivalenceClassMDAGNodeHashMap.get(currentNode) == currentNode) equivalenceClassMDAGNodeHashMap.remove(currentNode); // The hashCode of an MDAGNode is cached the first time a hash is performed without a cache // value present. // Since we just hashed currentNode, we must clear this regardless of its presence in // equivalenceClassMDAGNodeHashMap // since we're not actually declaring equivalence class representatives here. if (currentNode != null) currentNode.clearStoredHashCode(); } }
/** * Determines the longest prefix of a given String that is the prefix of another String previously * added to the MDAG. * * @param str the String to be processed * @return a String of the longest prefix of {@code str} that is also a prefix of a String * contained in the MDAG */ private String determineLongestPrefixInMDAG(String str) { MDAGNode currentNode = sourceNode; int numberOfChars = str.length(); int onePastPrefixEndIndex = 0; // Loop through the characters in str, using them in sequence to _transition // through the MDAG until the currently processing node doesn't have a _transition // labeled with the current processing char, or there are no more characters to process. for (int i = 0; i < numberOfChars; i++, onePastPrefixEndIndex++) { char currentChar = str.charAt(i); if (currentNode.hasOutgoingTransition(currentChar)) currentNode = currentNode.transition(currentChar); else break; } ///// return str.substring(0, onePastPrefixEndIndex); }
/** * 在从给定节点开始的一段路径上执行最小化<br> * Performs minimization processing on a _transition path starting from a given node. * * <p>This entails either replacing a node in the path with one that has an equivalent right * language/equivalence class (defined as set of _transition paths that can be traversed and nodes * able to be reached from it), or making it a representative of a right language/equivalence * class if a such a node does not already exist. * * @param originNode the MDAGNode that the _transition path corresponding to str starts from * @param str a String related to a _transition path */ private void replaceOrRegister(MDAGNode originNode, String str) { char transitionLabelChar = str.charAt(0); MDAGNode relevantTargetNode = originNode.transition(transitionLabelChar); // If relevantTargetNode has transitions and there is at least one char left to process, // recursively call // this on the next char in order to further processing down the _transition path corresponding // to str if (relevantTargetNode.hasTransitions() && !str.substring(1).isEmpty()) replaceOrRegister(relevantTargetNode, str.substring(1)); ///// // Get the node representing the equivalence class that relevantTargetNode belongs to. MDAGNodes // hash on the // transitions paths that can be traversed from them and nodes able to be reached from them; // nodes with the same equivalence classes will hash to the same bucket. MDAGNode equivalentNode = equivalenceClassMDAGNodeHashMap.get(relevantTargetNode); if (equivalentNode == null) // if there is no node with the same right language as relevantTargetNode equivalenceClassMDAGNodeHashMap.put(relevantTargetNode, relevantTargetNode); else if (equivalentNode != relevantTargetNode) // if there is another node with the same right language as // relevantTargetNode, reassign the { // _transition between originNode and relevantTargetNode, to originNode and the node // representing the equivalence class of interest relevantTargetNode.decrementTargetIncomingTransitionCounts(); transitionCount -= relevantTargetNode .getOutgoingTransitionCount(); // Since this method is recursive, the outgoing // transitions of all of relevantTargetNode's child // nodes have already been reassigned, // so we only need to decrement the _transition count by the relevantTargetNode's outgoing // _transition count originNode.reassignOutgoingTransition( transitionLabelChar, relevantTargetNode, equivalentNode); } }
private void splitTransitionPath(MDAGNode originNode, String storedStringSubstr) { HashMap<String, Object> firstConfluenceNodeDataHashMap = getTransitionPathFirstConfluenceNodeData(originNode, storedStringSubstr); Integer toFirstConfluenceNodeTransitionCharIndex = (Integer) firstConfluenceNodeDataHashMap.get("toConfluenceNodeTransitionCharIndex"); MDAGNode firstConfluenceNode = (MDAGNode) firstConfluenceNodeDataHashMap.get("confluenceNode"); if (firstConfluenceNode != null) { MDAGNode firstConfluenceNodeParent = originNode.transition( storedStringSubstr.substring(0, toFirstConfluenceNodeTransitionCharIndex)); MDAGNode firstConfluenceNodeClone = firstConfluenceNode.clone( firstConfluenceNodeParent, storedStringSubstr.charAt(toFirstConfluenceNodeTransitionCharIndex)); transitionCount += firstConfluenceNodeClone.getOutgoingTransitionCount(); String unprocessedSubString = storedStringSubstr.substring(toFirstConfluenceNodeTransitionCharIndex + 1); splitTransitionPath(firstConfluenceNodeClone, unprocessedSubString); } }