protected Token createToken( final JCas aJCas, final int aBegin, final int aEnd, final int aIndex) { int[] span = new int[] {aBegin, aEnd}; trim(aJCas.getDocumentText(), span); if (!isEmpty(span[0], span[1]) && isWriteToken()) { Token seg = new Token(aJCas, span[0], span[1]); seg.addToIndexes(aJCas); return seg; } else { return null; } }
@Override public void process(JCas aJCas) throws AnalysisEngineProcessException { buf = new StringBuilder(); List<Token> toAdd = new ArrayList<Token>(); List<Token> toRemove = new ArrayList<Token>(); for (Token t : select(aJCas, Token.class)) { String text = t.getCoveredText(); int offset = t.getBegin(); int start = 0; SplitPattern lastPattern = getPattern(text.charAt(0), null); Token firstToken = null; for (int i = 1; i < text.length(); i++) { SplitPattern pattern = getPattern(text.charAt(i), lastPattern); if (pattern != lastPattern) { if (lastPattern == null || lastPattern.includeInOutput) { Token nt = addToken(aJCas, offset, text, start, i, toAdd); firstToken = (firstToken == null) ? nt : firstToken; } start = i; } lastPattern = pattern; } // If we would just create the same token again, better do nothing if (start == 0) { // That is - if the whole token matches something to exclude, we remove it if (lastPattern != null && !lastPattern.includeInOutput) { toRemove.add(t); } continue; } if (deleteCover) { toRemove.add(t); } // The rest goes into the final token if (lastPattern == null || lastPattern.includeInOutput) { addToken(aJCas, offset, text, start, text.length(), toAdd); } } for (Token t : toAdd) { t.addToIndexes(); } for (Token t : toRemove) { t.removeFromIndexes(); } }