Beispiel #1
0
 protected Token createToken(
     final JCas aJCas, final int aBegin, final int aEnd, final int aIndex) {
   int[] span = new int[] {aBegin, aEnd};
   trim(aJCas.getDocumentText(), span);
   if (!isEmpty(span[0], span[1]) && isWriteToken()) {
     Token seg = new Token(aJCas, span[0], span[1]);
     seg.addToIndexes(aJCas);
     return seg;
   } else {
     return null;
   }
 }
  @Override
  public void process(JCas aJCas) throws AnalysisEngineProcessException {
    buf = new StringBuilder();
    List<Token> toAdd = new ArrayList<Token>();
    List<Token> toRemove = new ArrayList<Token>();

    for (Token t : select(aJCas, Token.class)) {
      String text = t.getCoveredText();
      int offset = t.getBegin();
      int start = 0;
      SplitPattern lastPattern = getPattern(text.charAt(0), null);
      Token firstToken = null;
      for (int i = 1; i < text.length(); i++) {
        SplitPattern pattern = getPattern(text.charAt(i), lastPattern);
        if (pattern != lastPattern) {
          if (lastPattern == null || lastPattern.includeInOutput) {
            Token nt = addToken(aJCas, offset, text, start, i, toAdd);
            firstToken = (firstToken == null) ? nt : firstToken;
          }
          start = i;
        }
        lastPattern = pattern;
      }

      // If we would just create the same token again, better do nothing
      if (start == 0) {
        // That is - if the whole token matches something to exclude, we remove it
        if (lastPattern != null && !lastPattern.includeInOutput) {
          toRemove.add(t);
        }
        continue;
      }

      if (deleteCover) {
        toRemove.add(t);
      }

      // The rest goes into the final token
      if (lastPattern == null || lastPattern.includeInOutput) {
        addToken(aJCas, offset, text, start, text.length(), toAdd);
      }
    }

    for (Token t : toAdd) {
      t.addToIndexes();
    }

    for (Token t : toRemove) {
      t.removeFromIndexes();
    }
  }