Java UnicodeSet Examples

Programming Language: Java

Namespace/Package Name: com.ibm.icu.text

Class/Type: UnicodeSet

Examples at hotexamples.com: 9

Java UnicodeSet - 9 examples found. These are the top rated real world Java examples of com.ibm.icu.text.UnicodeSet extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

add(3)

addAll(2)

contains(2)

clear(1)

compact(1)

containsAll(1)

containsSome(1)

freeze(1)

size(1)

toPattern(1)

Example #1

Show file

File: TestTransformsSimple.java Project: hanikesn/cldr

  public void TestJamo() throws IOException {
    {
      // CLDRTransforms.registerCldrTransforms(null, ".*(Jamo).*", out);
      String name = "Latin-ConjoiningJamo";
      Transliterator fromLatin = Transliterator.getInstance(name);
      Transliterator toLatin = Transliterator.getInstance(name, Transliterator.REVERSE);
      UnicodeSet sourceSet = getRepresentativeHangul();
      logln(sourceSet.size() + "\t" + sourceSet.toPattern(false));

      Transliterator nfd = Transliterator.getInstance("nfd");

      UnicodeSet multiply = new UnicodeSet(sourceSet);
      // for (UnicodeSetIterator it = new UnicodeSetIterator(sourceSet); it.next();) {
      // for (UnicodeSetIterator it2 = new UnicodeSetIterator(sourceSet); it2.next();) {
      // String source1 = it.getString() + it2.getString(); // try all combinations.
      // multiply.add(source1);
      // }
      // }

      // latin.addAll(toTarget.getSourceSet())
      // .addAll(toTarget.getTargetSet())
      // .addAll(fromTarget.getSourceSet())
      // .addAll(fromTarget.getTargetSet());
      // latin.retainAll(new UnicodeSet("[[:latin:][:common:][:inherited:]]"));

      // Transliterator.DEBUG = true;

      UnicodeSet specials = null; // new UnicodeSet("[{ch}]");
      writeFile(name, multiply, nfd, toLatin, fromLatin, true, null, specials);
    }
  }

Example #2

Show file

File: TestTransformsSimple.java Project: hanikesn/cldr

 private static String pretty(String source) {
   StringBuilder result = new StringBuilder();
   for (int i = 0; i < source.length(); ++i) {
     char c = source.charAt(i);
     String color =
         lead.contains(c)
             ? "FFcccc"
             : vowel.contains(c) ? "ccFFcc" : trail.contains(c) ? "ccccFF" : "FFFFFF";
     result.append("<span style='background-color: #" + color + "'>" + c + "</span>");
   }
   return result.toString();
 }

Example #3

Show file

File: UCharacterName.java Project: Rolana/Demo

  /**
   * Converts the char set cset into a Unicode set uset. Equivalent to charSetToUSet.
   *
   * @param set Set of 256 bit flags corresponding to a set of chars.
   * @param uset USet to receive characters. Existing contents are deleted.
   */
  private void convert(int set[], UnicodeSet uset) {
    uset.clear();
    if (!initNameSetsLengths()) {
      return;
    }

    // build a char string with all chars that are used in character names
    for (char c = 255; c > 0; c--) {
      if (contains(set, c)) {
        uset.add(c);
      }
    }
  }

Example #4

Show file

File: TestTransformsSimple.java Project: hanikesn/cldr

  private static UnicodeSet getRepresentativeBoundaryHangul() {
    UnicodeSet resultToAddTo = new UnicodeSet();
    // U+1100 ( ᄀ ) HANGUL CHOSEONG KIYEOK
    // U+1161 ( ᅡ ) HANGUL JUNGSEONG A
    UnicodeSet L = new UnicodeSet("[:hst=L:]");
    UnicodeSet V = new UnicodeSet("[:hst=V:]");
    UnicodeSet T = new UnicodeSet("[:hst=T:]");

    String prefixLV = "\u1100\u1161";
    String prefixL = "\u1100";
    String suffixV = "\u1161";
    String nullL = "\u110B";

    UnicodeSet L0 = new UnicodeSet("[\u1100\u110B]");

    // do all combinations of L0 + V + nullL + V

    for (UnicodeSetIterator iL0 = new UnicodeSetIterator(L0); iL0.next(); ) {
      for (UnicodeSetIterator iV = new UnicodeSetIterator(V); iV.next(); ) {
        for (UnicodeSetIterator iV2 = new UnicodeSetIterator(V); iV2.next(); ) {
          String sample = iL0.getString() + iV.getString() + nullL + iV2.getString();
          String trial = Normalizer.compose(sample, false);
          if (trial.length() == 2) {
            resultToAddTo.add(trial);
          }
        }
      }
    }

    for (UnicodeSetIterator iL = new UnicodeSetIterator(L); iL.next(); ) {
      // do all combinations of "g" + V + L + "a"
      final String suffix = iL.getString() + suffixV;
      for (UnicodeSetIterator iV = new UnicodeSetIterator(V); iV.next(); ) {
        String sample = prefixL + iV.getString() + suffix;
        String trial = Normalizer.compose(sample, false);
        if (trial.length() == 2) {
          resultToAddTo.add(trial);
        }
      }
      // do all combinations of "ga" + T + L + "a"
      for (UnicodeSetIterator iT = new UnicodeSetIterator(T); iT.next(); ) {
        String sample = prefixLV + iT.getString() + suffix;
        String trial = Normalizer.compose(sample, false);
        if (trial.length() == 2) {
          resultToAddTo.add(trial);
        }
      }
    }
    return resultToAddTo;
  }

Example #5

Show file

File: TestTransformsSimple.java Project: hanikesn/cldr

  public static UnicodeSet getRepresentativeHangul() {
    UnicodeSet extraSamples =
        new UnicodeSet(
            "[\uCE20{\uAD6C\uB514}{\uAD73\uC774}{\uBB34\uB837}{\uBB3C\uC5FF}{\uC544\uAE4C}{\uC544\uB530}{\uC544\uBE60}{\uC544\uC2F8}{\uC544\uC9DC}{\uC544\uCC28}{\uC545\uC0AC}{\uC545\uC2F8}{\uC546\uCE74}{\uC548\uAC00}{\uC548\uC790}{\uC548\uC9DC}{\uC548\uD558}{\uC54C\uAC00}{\uC54C\uB530}{\uC54C\uB9C8}{\uC54C\uBC14}{\uC54C\uBE60}{\uC54C\uC0AC}{\uC54C\uC2F8}{\uC54C\uD0C0}{\uC54C\uD30C}{\uC54C\uD558}{\uC555\uC0AC}{\uC555\uC2F8}{\uC558\uC0AC}{\uC5C5\uC12F\uC501}{\uC5C6\uC5C8\uC2B5}]");
    UnicodeSet sourceSet = new UnicodeSet();
    addRepresentativeHangul(sourceSet, 2, false);
    addRepresentativeHangul(sourceSet, 3, false);
    addRepresentativeHangul(sourceSet, 2, true);
    addRepresentativeHangul(sourceSet, 3, true);
    // add the boundary cases; we want an example of each case of V + L and one example of each case
    // of T+L

    UnicodeSet more = getRepresentativeBoundaryHangul();
    sourceSet.addAll(more);
    sourceSet.addAll(extraSamples);
    return sourceSet;
  }

Example #6

Show file

File: TestTransformsSimple.java Project: hanikesn/cldr

 private static void addRepresentativeHangul(
     UnicodeSet resultToAddTo, int leng, boolean noFirstConsonant) {
   UnicodeSet notYetSeen = new UnicodeSet();
   for (char c = '\uAC00'; c < '\uD7AF'; ++c) {
     String charStr = String.valueOf(c);
     String decomp = Normalizer.decompose(charStr, false);
     if (decomp.length() != leng) {
       continue; // only take one length at a time
     }
     if (decomp.startsWith("ᄋ") != noFirstConsonant) {
       continue;
     }
     if (!notYetSeen.containsAll(decomp)) {
       resultToAddTo.add(c);
       notYetSeen.addAll(decomp);
     }
   }
 }

Example #7

Show file

File: TestTransformsSimple.java Project: hanikesn/cldr

 private static int checkLatin(PrintWriter out, Transliterator fromLatin, Transliterator toLatin) {
   int errorCount = 0;
   for (UnicodeSetIterator it = new UnicodeSetIterator(latin); it.next(); ) {
     String source = it.getString();
     String to = fromLatin.transliterate(source);
     if (latin.containsSome(to)) {
       String from = toLatin.transliterate(to);
       String backto = toLatin.transliterate(from);
       errorCount += showItems(out, false, source, to, from, backto);
     }
   }
   return errorCount;
 }

Example #8

Show file

File: LaoBreakIterator.java Project: jibaro/lucene_solr

  @Override
  public int next() {
    int current = current();
    int next = rules.next();
    if (next == BreakIterator.DONE) return next;
    else next += workingOffset;

    char c = working.current();
    int following = rules.next(); // lookahead
    if (following != BreakIterator.DONE) {
      following += workingOffset;
      if (rules.getRuleStatus() == 0 && laoSet.contains(c) && verifyPushBack(current, next)) {
        workingOffset = next - 1;
        working.setText(
            text.getText(), text.getStart() + workingOffset, text.getLength() - workingOffset);
        return next - 1;
      }
      rules.previous(); // undo the lookahead
    }

    return next;
  }

Example #9

Show file

File: LaoBreakIterator.java Project: jibaro/lucene_solr

 static {
   laoSet = new UnicodeSet("[:Lao:]");
   laoSet.compact();
   laoSet.freeze();
 }