private static String rewrite(String text, Set<PatternStringTuple> patterns) { String x = text; for (PatternStringTuple pair : patterns) x = pair.getPattern().matcher(x).replaceAll(pair.getEntry()); return x.trim(); }
private static List<TokenTagTuple> recursiveTokenize( String text, int index, Tokenizer tokenization) { if (index < tupleList.size()) { PatternStringTuple pst = tupleList.get(index); Pattern pattern = pst.getPattern(); String tag = pst.getEntry(); Matcher matcher; matcher = pattern.matcher(text); int groupCount = matcher.groupCount(); String textFragment = ""; if (groupCount > 0) { List<List<TokenTagTuple>> arrays = new ArrayList<>(); int lastEnd = 0; while (matcher.find()) { if (matcher.start() > lastEnd) { textFragment = text.substring(lastEnd, matcher.start()).trim(); if (textFragment.length() > 0) // possible could have // started all as // whitespace arrays.add(recursiveTokenize(textFragment, index + 1, tokenization)); } // System.out.println("[" + matcher.group() + "] " + // matcher.start() + " " + matcher.end()); List<TokenTagTuple> tmpList = new ArrayList<>(); tmpList.add(new TokenTagTuple(matcher.group(), tag)); arrays.add(tmpList); lastEnd = matcher.end(); } if (lastEnd < text.length()) arrays.add( recursiveTokenize( text.substring(lastEnd, text.length()).trim(), index + 1, tokenization)); return concatAll(arrays); } else { return recursiveTokenize(text.trim(), index + 1, tokenization); } } else { List<String> tokenList = tokenization.tokenize(text); List<TokenTagTuple> y = new ArrayList<>(tokenList.size()); for (String token : tokenList) y.add(new TokenTagTuple(token, null)); return y; } }