예제 #1
0
  private static UnicodeSet getRepresentativeBoundaryHangul() {
    UnicodeSet resultToAddTo = new UnicodeSet();
    // U+1100 ( ᄀ ) HANGUL CHOSEONG KIYEOK
    // U+1161 ( ᅡ ) HANGUL JUNGSEONG A
    UnicodeSet L = new UnicodeSet("[:hst=L:]");
    UnicodeSet V = new UnicodeSet("[:hst=V:]");
    UnicodeSet T = new UnicodeSet("[:hst=T:]");

    String prefixLV = "\u1100\u1161";
    String prefixL = "\u1100";
    String suffixV = "\u1161";
    String nullL = "\u110B";

    UnicodeSet L0 = new UnicodeSet("[\u1100\u110B]");

    // do all combinations of L0 + V + nullL + V

    for (UnicodeSetIterator iL0 = new UnicodeSetIterator(L0); iL0.next(); ) {
      for (UnicodeSetIterator iV = new UnicodeSetIterator(V); iV.next(); ) {
        for (UnicodeSetIterator iV2 = new UnicodeSetIterator(V); iV2.next(); ) {
          String sample = iL0.getString() + iV.getString() + nullL + iV2.getString();
          String trial = Normalizer.compose(sample, false);
          if (trial.length() == 2) {
            resultToAddTo.add(trial);
          }
        }
      }
    }

    for (UnicodeSetIterator iL = new UnicodeSetIterator(L); iL.next(); ) {
      // do all combinations of "g" + V + L + "a"
      final String suffix = iL.getString() + suffixV;
      for (UnicodeSetIterator iV = new UnicodeSetIterator(V); iV.next(); ) {
        String sample = prefixL + iV.getString() + suffix;
        String trial = Normalizer.compose(sample, false);
        if (trial.length() == 2) {
          resultToAddTo.add(trial);
        }
      }
      // do all combinations of "ga" + T + L + "a"
      for (UnicodeSetIterator iT = new UnicodeSetIterator(T); iT.next(); ) {
        String sample = prefixLV + iT.getString() + suffix;
        String trial = Normalizer.compose(sample, false);
        if (trial.length() == 2) {
          resultToAddTo.add(trial);
        }
      }
    }
    return resultToAddTo;
  }
예제 #2
0
 private static void addRepresentativeHangul(
     UnicodeSet resultToAddTo, int leng, boolean noFirstConsonant) {
   UnicodeSet notYetSeen = new UnicodeSet();
   for (char c = '\uAC00'; c < '\uD7AF'; ++c) {
     String charStr = String.valueOf(c);
     String decomp = Normalizer.decompose(charStr, false);
     if (decomp.length() != leng) {
       continue; // only take one length at a time
     }
     if (decomp.startsWith("ᄋ") != noFirstConsonant) {
       continue;
     }
     if (!notYetSeen.containsAll(decomp)) {
       resultToAddTo.add(c);
       notYetSeen.addAll(decomp);
     }
   }
 }