示例#1
0
  // 패턴에 매칭되는 토큰 리스트를 만든다.
  private List<Token> match(
      StringBuilder text, FilterTokenPattern tokenPattern, Map<Integer, Token> filteredTokenMap) {
    List<Token> tokenList = new ArrayList<Token>();

    for (Matcher matcher = tokenPattern.getPattern().matcher(text); matcher.find(); ) {
      Token token =
          new Token(
              text.substring(matcher.start(), matcher.end()),
              tokenPattern.getCharType(),
              matcher.start());
      tokenList.add(token);
      markFiltered(text, matcher.start(), matcher.end(), token, filteredTokenMap);
    }

    return tokenList;
  }
示例#2
0
  // 미리 정의된 패턴과 일치하는 부분을 걸러낸다 (ㅜㅜ, 숫자 등)
  private List<Token> filterPredefinedPatterns(
      StringBuilder buf, Map<Integer, Token> filteredTokenMap) {
    List<Token> result = new ArrayList<Token>();

    FilterTokenPattern[] predefinedPatterns = FilterTokenPattern.getPredefinedPatterns();

    List<Token> filteredTokens;
    for (FilterTokenPattern each : predefinedPatterns) {
      filteredTokens = match(buf, each, filteredTokenMap);
      if (filteredTokens.size() > 0) {
        result.addAll(filteredTokens);
      }
    }
    return result;
  }