示例#1
0
  public static UnicodeSet getRepresentativeHangul() {
    UnicodeSet extraSamples =
        new UnicodeSet(
            "[\uCE20{\uAD6C\uB514}{\uAD73\uC774}{\uBB34\uB837}{\uBB3C\uC5FF}{\uC544\uAE4C}{\uC544\uB530}{\uC544\uBE60}{\uC544\uC2F8}{\uC544\uC9DC}{\uC544\uCC28}{\uC545\uC0AC}{\uC545\uC2F8}{\uC546\uCE74}{\uC548\uAC00}{\uC548\uC790}{\uC548\uC9DC}{\uC548\uD558}{\uC54C\uAC00}{\uC54C\uB530}{\uC54C\uB9C8}{\uC54C\uBC14}{\uC54C\uBE60}{\uC54C\uC0AC}{\uC54C\uC2F8}{\uC54C\uD0C0}{\uC54C\uD30C}{\uC54C\uD558}{\uC555\uC0AC}{\uC555\uC2F8}{\uC558\uC0AC}{\uC5C5\uC12F\uC501}{\uC5C6\uC5C8\uC2B5}]");
    UnicodeSet sourceSet = new UnicodeSet();
    addRepresentativeHangul(sourceSet, 2, false);
    addRepresentativeHangul(sourceSet, 3, false);
    addRepresentativeHangul(sourceSet, 2, true);
    addRepresentativeHangul(sourceSet, 3, true);
    // add the boundary cases; we want an example of each case of V + L and one example of each case
    // of T+L

    UnicodeSet more = getRepresentativeBoundaryHangul();
    sourceSet.addAll(more);
    sourceSet.addAll(extraSamples);
    return sourceSet;
  }
示例#2
0
 private static void addRepresentativeHangul(
     UnicodeSet resultToAddTo, int leng, boolean noFirstConsonant) {
   UnicodeSet notYetSeen = new UnicodeSet();
   for (char c = '\uAC00'; c < '\uD7AF'; ++c) {
     String charStr = String.valueOf(c);
     String decomp = Normalizer.decompose(charStr, false);
     if (decomp.length() != leng) {
       continue; // only take one length at a time
     }
     if (decomp.startsWith("ᄋ") != noFirstConsonant) {
       continue;
     }
     if (!notYetSeen.containsAll(decomp)) {
       resultToAddTo.add(c);
       notYetSeen.addAll(decomp);
     }
   }
 }