private static UnicodeSet getRepresentativeBoundaryHangul() { UnicodeSet resultToAddTo = new UnicodeSet(); // U+1100 ( ᄀ ) HANGUL CHOSEONG KIYEOK // U+1161 ( ᅡ ) HANGUL JUNGSEONG A UnicodeSet L = new UnicodeSet("[:hst=L:]"); UnicodeSet V = new UnicodeSet("[:hst=V:]"); UnicodeSet T = new UnicodeSet("[:hst=T:]"); String prefixLV = "\u1100\u1161"; String prefixL = "\u1100"; String suffixV = "\u1161"; String nullL = "\u110B"; UnicodeSet L0 = new UnicodeSet("[\u1100\u110B]"); // do all combinations of L0 + V + nullL + V for (UnicodeSetIterator iL0 = new UnicodeSetIterator(L0); iL0.next(); ) { for (UnicodeSetIterator iV = new UnicodeSetIterator(V); iV.next(); ) { for (UnicodeSetIterator iV2 = new UnicodeSetIterator(V); iV2.next(); ) { String sample = iL0.getString() + iV.getString() + nullL + iV2.getString(); String trial = Normalizer.compose(sample, false); if (trial.length() == 2) { resultToAddTo.add(trial); } } } } for (UnicodeSetIterator iL = new UnicodeSetIterator(L); iL.next(); ) { // do all combinations of "g" + V + L + "a" final String suffix = iL.getString() + suffixV; for (UnicodeSetIterator iV = new UnicodeSetIterator(V); iV.next(); ) { String sample = prefixL + iV.getString() + suffix; String trial = Normalizer.compose(sample, false); if (trial.length() == 2) { resultToAddTo.add(trial); } } // do all combinations of "ga" + T + L + "a" for (UnicodeSetIterator iT = new UnicodeSetIterator(T); iT.next(); ) { String sample = prefixLV + iT.getString() + suffix; String trial = Normalizer.compose(sample, false); if (trial.length() == 2) { resultToAddTo.add(trial); } } } return resultToAddTo; }
private static void addRepresentativeHangul( UnicodeSet resultToAddTo, int leng, boolean noFirstConsonant) { UnicodeSet notYetSeen = new UnicodeSet(); for (char c = '\uAC00'; c < '\uD7AF'; ++c) { String charStr = String.valueOf(c); String decomp = Normalizer.decompose(charStr, false); if (decomp.length() != leng) { continue; // only take one length at a time } if (decomp.startsWith("ᄋ") != noFirstConsonant) { continue; } if (!notYetSeen.containsAll(decomp)) { resultToAddTo.add(c); notYetSeen.addAll(decomp); } } }