/** Returns a string representation of the deterministic FSM graph using GML. */ public String getFSMgml() { String res = "graph[ \ndirected 1\n"; StringBuffer nodes = new StringBuffer(gate.Gate.STRINGBUFFER_SIZE), edges = new StringBuffer(gate.Gate.STRINGBUFFER_SIZE); Iterator fsmStatesIter = fsmStates.iterator(); while (fsmStatesIter.hasNext()) { FSMState currentState = (FSMState) fsmStatesIter.next(); int stateIndex = currentState.getIndex(); nodes.append("node[ id "); nodes.append(stateIndex); nodes.append(" label \""); nodes.append(stateIndex); if (currentState.isFinal()) { nodes.append(",F\\n"); nodes.append(currentState.getLookupSet()); } nodes.append("\" ]\n"); edges.append(currentState.getEdgesGML()); } res += nodes.toString() + edges.toString() + "]\n"; return res; } // getFSMgml
/** * This method runs the gazetteer. It assumes that all the needed parameters are set. If they are * not, an exception will be fired. */ @Override public void execute() throws ExecutionException { interrupted = false; AnnotationSet annotationSet; // check the input if (document == null) { throw new ExecutionException("No document to process!"); } if (annotationSetName == null || annotationSetName.equals("")) { annotationSet = document.getAnnotations(); } else { annotationSet = document.getAnnotations(annotationSetName); } fireStatusChanged("Performing look-up in " + document.getName() + "..."); String content = document.getContent().toString(); int length = content.length(); char currentChar; FSMState currentState = initialState; FSMState nextState; FSMState lastMatchingState = null; int matchedRegionEnd = 0; int matchedRegionStart = 0; int charIdx = 0; int oldCharIdx = 0; FeatureMap fm; Lookup currentLookup; while (charIdx < length) { currentChar = content.charAt(charIdx); if (Character.isSpaceChar(currentChar) || Character.isWhitespace(currentChar)) { currentChar = ' '; } else { currentChar = caseSensitive.booleanValue() ? currentChar : Character.toUpperCase(currentChar); } nextState = currentState.next(currentChar); if (nextState == null) { // the matching stopped // if we had a successful match then act on it; if (lastMatchingState != null) { createLookups(lastMatchingState, matchedRegionStart, matchedRegionEnd, annotationSet); lastMatchingState = null; } // reset the FSM (обходим каждую позицию т.е. сначала с 0, потом с 1, потом с 2) charIdx = matchedRegionStart + 1; matchedRegionStart = charIdx; currentState = initialState; } else { // go on with the matching currentState = nextState; // if we have a successful state then store it if (currentState.isFinal() && ((!wholeWordsOnly.booleanValue()) || ((matchedRegionStart == 0 || !isWordInternal(content.charAt(matchedRegionStart - 1))) && (charIdx + 1 >= content.length() || !isWordInternal(content.charAt(charIdx + 1)))))) { // we have a new match // if we had an existing match and we need to annotate prefixes, then // apply it if (!longestMatchOnly && lastMatchingState != null) { createLookups(lastMatchingState, matchedRegionStart, matchedRegionEnd, annotationSet); } matchedRegionEnd = charIdx; lastMatchingState = currentState; } charIdx++; if (charIdx == content.length()) { // we can't go on, use the last matching state and restart matching // from the next char if (lastMatchingState != null) { // let's add the new annotation(s) createLookups(lastMatchingState, matchedRegionStart, matchedRegionEnd, annotationSet); lastMatchingState = null; } // reset the FSM charIdx = matchedRegionStart + 1; matchedRegionStart = charIdx; currentState = initialState; } } // fire the progress event if (charIdx - oldCharIdx > 256) { fireProgressChanged((100 * charIdx) / length); oldCharIdx = charIdx; if (isInterrupted()) throw new ExecutionInterruptedException( "The execution of the " + getName() + " gazetteer has been abruptly interrupted!"); } } // while(charIdx < length) // we've finished. If we had a stored match, then apply it. if (lastMatchingState != null) { createLookups(lastMatchingState, matchedRegionStart, matchedRegionEnd, annotationSet); } fireProcessFinished(); fireStatusChanged("Look-up complete!"); } // execute