protected void checkAndAppendSyllables( BufferedWriter w, String line, Vector<ClusterProperties> syllables) throws IOException { while (syllables.size() > 0) { int candidateLength = Math.min(syllables.size(), mMaxSyllables); for (; candidateLength > 0; --candidateLength) { ClusterProperties endSyllable = syllables.elementAt(candidateLength - 1); String candidate = line.substring(syllables.firstElement().getStart(), endSyllable.getEnd()); if (mWordList.contains(candidate)) { w.append(candidate); appendSpacer(w, line, endSyllable); List<ClusterProperties> wordCP = new ArrayList<ClusterProperties>(syllables.subList(0, candidateLength)); syllables.removeAll(wordCP); break; } } if (candidateLength == 0) { // no match found, so append first syllable as is w.append( line.substring(syllables.firstElement().getStart(), syllables.firstElement().getEnd())); appendSpacer(w, line, syllables.firstElement()); syllables.remove(0); } } }
/** * Parse text from reader and write word broken output to writer * * @param r * @param w * @throws IOException */ public void parse(BufferedReader r, BufferedWriter w) throws IOException { MyanmarParser mp = new MyanmarParser(); String line = r.readLine(); Vector<ClusterProperties> syllables = new Vector<ClusterProperties>(); while (line != null) { int offset = 0; do { if (line.length() == 0) { w.newLine(); continue; } ClusterProperties cp = mp.getNextSyllable(line, offset); syllables.add(cp); MyPairStatus status = cp.getBreakStatus(); if (status == MyPairStatus.MY_PAIR_WORD_BREAK || status == MyPairStatus.MY_PAIR_EOL || status == MyPairStatus.MY_PAIR_PUNCTUATION) { checkAndAppendSyllables(w, line, syllables); } offset = cp.getEnd(); } while (offset < line.length()); w.newLine(); line = r.readLine(); } }
private void appendSpacer(BufferedWriter w, String line, ClusterProperties endSyllable) throws IOException { MyPairStatus status = endSyllable.getBreakStatus(); if (status == MyPairStatus.MY_PAIR_WORD_BREAK) { if (Character.getType(line.charAt(endSyllable.getEnd() - 1)) != Character.SPACE_SEPARATOR && (endSyllable.getEnd() != line.length()) && Character.getType(line.charAt(endSyllable.getEnd())) != Character.SPACE_SEPARATOR) { w.append(mSpacer); } } else if (status == MyPairStatus.MY_PAIR_SYL_BREAK) { w.append(mSpacer); } }
/** * Constructor * * @param spacer - char to place in breaks which are found * @param dictionary * @throws IOException */ public MyanmarBreaker(char spacer, BufferedReader dictionary) throws IOException { mSpacer = spacer; MyanmarParser mp = new MyanmarParser(); if (dictionary != null) { String word = dictionary.readLine(); while (word != null) { int offset = 0; int syllableCount = 0; while (offset < word.length()) { ClusterProperties cp = mp.getNextLineBreak(word, offset); offset = cp.getEnd(); ++syllableCount; } mMaxSyllables = Math.max(mMaxSyllables, syllableCount); mWordList.add(word); word = dictionary.readLine(); } } }