Пример #1
1
  protected void checkAndAppendSyllables(
      BufferedWriter w, String line, Vector<ClusterProperties> syllables) throws IOException {
    while (syllables.size() > 0) {
      int candidateLength = Math.min(syllables.size(), mMaxSyllables);

      for (; candidateLength > 0; --candidateLength) {
        ClusterProperties endSyllable = syllables.elementAt(candidateLength - 1);

        String candidate =
            line.substring(syllables.firstElement().getStart(), endSyllable.getEnd());
        if (mWordList.contains(candidate)) {
          w.append(candidate);
          appendSpacer(w, line, endSyllable);
          List<ClusterProperties> wordCP =
              new ArrayList<ClusterProperties>(syllables.subList(0, candidateLength));
          syllables.removeAll(wordCP);
          break;
        }
      }
      if (candidateLength == 0) {
        // no match found, so append first syllable as is
        w.append(
            line.substring(syllables.firstElement().getStart(), syllables.firstElement().getEnd()));
        appendSpacer(w, line, syllables.firstElement());
        syllables.remove(0);
      }
    }
  }
Пример #2
0
 /**
  * Parse text from reader and write word broken output to writer
  *
  * @param r
  * @param w
  * @throws IOException
  */
 public void parse(BufferedReader r, BufferedWriter w) throws IOException {
   MyanmarParser mp = new MyanmarParser();
   String line = r.readLine();
   Vector<ClusterProperties> syllables = new Vector<ClusterProperties>();
   while (line != null) {
     int offset = 0;
     do {
       if (line.length() == 0) {
         w.newLine();
         continue;
       }
       ClusterProperties cp = mp.getNextSyllable(line, offset);
       syllables.add(cp);
       MyPairStatus status = cp.getBreakStatus();
       if (status == MyPairStatus.MY_PAIR_WORD_BREAK
           || status == MyPairStatus.MY_PAIR_EOL
           || status == MyPairStatus.MY_PAIR_PUNCTUATION) {
         checkAndAppendSyllables(w, line, syllables);
       }
       offset = cp.getEnd();
     } while (offset < line.length());
     w.newLine();
     line = r.readLine();
   }
 }
Пример #3
0
 private void appendSpacer(BufferedWriter w, String line, ClusterProperties endSyllable)
     throws IOException {
   MyPairStatus status = endSyllable.getBreakStatus();
   if (status == MyPairStatus.MY_PAIR_WORD_BREAK) {
     if (Character.getType(line.charAt(endSyllable.getEnd() - 1)) != Character.SPACE_SEPARATOR
         && (endSyllable.getEnd() != line.length())
         && Character.getType(line.charAt(endSyllable.getEnd())) != Character.SPACE_SEPARATOR) {
       w.append(mSpacer);
     }
   } else if (status == MyPairStatus.MY_PAIR_SYL_BREAK) {
     w.append(mSpacer);
   }
 }
Пример #4
0
 /**
  * Constructor
  *
  * @param spacer - char to place in breaks which are found
  * @param dictionary
  * @throws IOException
  */
 public MyanmarBreaker(char spacer, BufferedReader dictionary) throws IOException {
   mSpacer = spacer;
   MyanmarParser mp = new MyanmarParser();
   if (dictionary != null) {
     String word = dictionary.readLine();
     while (word != null) {
       int offset = 0;
       int syllableCount = 0;
       while (offset < word.length()) {
         ClusterProperties cp = mp.getNextLineBreak(word, offset);
         offset = cp.getEnd();
         ++syllableCount;
       }
       mMaxSyllables = Math.max(mMaxSyllables, syllableCount);
       mWordList.add(word);
       word = dictionary.readLine();
     }
   }
 }