// 패턴에 매칭되는 토큰 리스트를 만든다. private List<Token> match( StringBuilder text, FilterTokenPattern tokenPattern, Map<Integer, Token> filteredTokenMap) { List<Token> tokenList = new ArrayList<Token>(); for (Matcher matcher = tokenPattern.getPattern().matcher(text); matcher.find(); ) { Token token = new Token( text.substring(matcher.start(), matcher.end()), tokenPattern.getCharType(), matcher.start()); tokenList.add(token); markFiltered(text, matcher.start(), matcher.end(), token, filteredTokenMap); } return tokenList; }
// 미리 정의된 패턴과 일치하는 부분을 걸러낸다 (ㅜㅜ, 숫자 등) private List<Token> filterPredefinedPatterns( StringBuilder buf, Map<Integer, Token> filteredTokenMap) { List<Token> result = new ArrayList<Token>(); FilterTokenPattern[] predefinedPatterns = FilterTokenPattern.getPredefinedPatterns(); List<Token> filteredTokens; for (FilterTokenPattern each : predefinedPatterns) { filteredTokens = match(buf, each, filteredTokenMap); if (filteredTokens.size() > 0) { result.addAll(filteredTokens); } } return result; }