private static void expandSingleRule(
     StringBuilder builder, String leftHandSide, String rightHandSide)
     throws IllegalArgumentException {
   UnicodeSet set = new UnicodeSet(leftHandSide, UnicodeSet.IGNORE_SPACE);
   boolean numericValue = NUMERIC_VALUE_PATTERN.matcher(rightHandSide).matches();
   for (UnicodeSetIterator it = new UnicodeSetIterator(set); it.nextRange(); ) {
     if (it.codepoint != UnicodeSetIterator.IS_STRING) {
       if (numericValue) {
         for (int cp = it.codepoint; cp <= it.codepointEnd; ++cp) {
           builder.append(String.format(Locale.ROOT, "%04X", cp)).append('>');
           builder.append(
               String.format(Locale.ROOT, "%04X", 0x30 + UCharacter.getNumericValue(cp)));
           builder.append("   # ").append(UCharacter.getName(cp));
           builder.append("\n");
         }
       } else {
         builder.append(String.format(Locale.ROOT, "%04X", it.codepoint));
         if (it.codepointEnd > it.codepoint) {
           builder.append("..").append(String.format(Locale.ROOT, "%04X", it.codepointEnd));
         }
         builder.append('>').append(rightHandSide).append("\n");
       }
     } else {
       System.err.println("ERROR: String '" + it.getString() + "' found in UnicodeSet");
       System.exit(1);
     }
   }
 }
Beispiel #2
0
  // If you don't need any file initialization or postprocessing, you only need this one routine
  public CheckCLDR handleCheck(
      String path, String fullPath, String value, Options options, List<CheckStatus> result) {
    // it helps performance to have a quick reject of most paths
    if (fullPath == null) return this; // skip paths that we don't have
    if (fullPath.indexOf("casing") < 0) return this;

    // pick up the casing attributes from the full path
    parts.set(fullPath);

    Case caseTest = Case.mixed;
    for (int i = 0; i < parts.size(); ++i) {
      String casingValue = parts.getAttributeValue(i, "casing");
      if (casingValue == null) {
        continue;
      }
      caseTest = Case.forString(casingValue);
      if (caseTest == Case.verbatim) {
        return this; // we're done
      }
    }

    String newValue = value;
    switch (caseTest) {
      case lowercase_words:
        newValue = UCharacter.toLowerCase(uLocale, value);
        break;
      case titlecase_words:
        newValue = UCharacter.toTitleCase(uLocale, value, null);
        break;
      case titlecase_firstword:
        newValue = TitleCaseFirst(uLocale, value);
        break;
      default:
        break;
    }
    if (!newValue.equals(value)) {
      // the following is how you signal an error or warning (or add a demo....)
      result.add(
          new CheckStatus()
              .setCause(this)
              .setMainType(CheckStatus.errorType)
              .setSubtype(Subtype.incorrectCasing)
              // typically warningType or errorType
              .setMessage(
                  "Casing incorrect: either should have casing=\"verbatim\" or be <{0}>",
                  new Object[] {newValue})); // the message; can be MessageFormat with arguments
    }
    return this;
  }
        public Object invoke(
            ModuleInstance module,
            Object source,
            Object[] args,
            QvtOperationalEvaluationEnv evalEnv) {
          String self = (String) source;
          self = self.trim();
          StringBuilder buf = new StringBuilder(self.length());
          boolean isWhiteArea = false;
          for (int i = 0; i < self.length(); i++) {
            char c = self.charAt(i);
            if (UCharacter.isWhitespace(c)) {
              if (isWhiteArea) {
                continue;
              }

              isWhiteArea = true;
              buf.append(c);
            } else {
              isWhiteArea = false;
              buf.append(c);
            }
          }
          return buf.toString();
        }
 public void run() {
   for (int i = 0; i < 10000; i++) {
     actualName = UCharacter.getName(codePoint);
     if (!correctName.equals(actualName)) {
       break;
     }
   }
 }
  private static void getNFKCDataFilesFromIcuProject() throws IOException {
    URL icuTagsURL = new URL(ICU_SVN_TAG_URL + "/");
    URL icuReleaseTagURL = new URL(icuTagsURL, ICU_RELEASE_TAG + "/");
    URL norm2url = new URL(icuReleaseTagURL, ICU_DATA_NORM2_PATH + "/");

    System.err.print("Downloading " + NFKC_TXT + " ... ");
    download(new URL(norm2url, NFKC_TXT), NFKC_TXT);
    System.err.println("done.");
    System.err.print("Downloading " + NFKC_CF_TXT + " ... ");
    download(new URL(norm2url, NFKC_CF_TXT), NFKC_CF_TXT);
    System.err.println("done.");

    System.err.print("Downloading " + NFKC_CF_TXT + " and making diacritic rules one-way ... ");
    URLConnection connection = openConnection(new URL(norm2url, NFC_TXT));
    BufferedReader reader =
        new BufferedReader(new InputStreamReader(connection.getInputStream(), "UTF-8"));
    Writer writer = new OutputStreamWriter(new FileOutputStream(NFC_TXT), "UTF-8");
    try {
      String line;

      while (null != (line = reader.readLine())) {
        Matcher matcher = ROUND_TRIP_MAPPING_LINE_PATTERN.matcher(line);
        if (matcher.matches()) {
          final String leftHandSide = matcher.group(1);
          final String rightHandSide = matcher.group(2).trim();
          List<String> diacritics = new ArrayList<String>();
          for (String outputCodePoint : rightHandSide.split("\\s+")) {
            int ch = Integer.parseInt(outputCodePoint, 16);
            if (UCharacter.hasBinaryProperty(ch, UProperty.DIACRITIC)
                // gennorm2 fails if U+0653-U+0656 are included in round-trip mappings
                || (ch >= 0x653 && ch <= 0x656)) {
              diacritics.add(outputCodePoint);
            }
          }
          if (!diacritics.isEmpty()) {
            StringBuilder replacementLine = new StringBuilder();
            replacementLine.append(leftHandSide).append(">").append(rightHandSide);
            replacementLine.append("  # one-way: diacritic");
            if (diacritics.size() > 1) {
              replacementLine.append("s");
            }
            for (String diacritic : diacritics) {
              replacementLine.append(" ").append(diacritic);
            }
            line = replacementLine.toString();
          }
        }
        writer.write(line);
        writer.write("\n");
      }
    } finally {
      reader.close();
      writer.close();
    }
    System.err.println("done.");
  }
        public Object invoke(
            ModuleInstance module,
            Object source,
            Object[] args,
            QvtOperationalEvaluationEnv evalEnv) {
          Object leftVal = args[0];
          if (leftVal == null && leftVal == CallHandlerAdapter.getInvalidResult(evalEnv)) {
            return false;
          }

          String self = (String) source;
          for (int i = 0; i < self.length(); i++) {
            char c = self.charAt(i);
            if ((i == 0 && !UCharacter.isLetter(c)) || !UCharacter.isLetterOrDigit(c)) {
              return Boolean.FALSE;
            }
          }
          return Boolean.TRUE;
        }
Beispiel #7
0
 private String TitleCaseFirst(ULocale locale, String value) {
   if (value.length() == 0) {
     return value;
   }
   breaker.setText(value);
   breaker.first();
   int endOfFirstWord = breaker.next();
   return UCharacter.toTitleCase(uLocale, value.substring(0, endOfFirstWord), breaker)
       + value.substring(endOfFirstWord);
 }
 @Override
 public void checkValid(CharSequence literal) throws DatatypeException {
   String[] keylabels = literal.toString().split("\\s+");
   Arrays.sort(keylabels);
   for (int i = 0; i < keylabels.length; i++) {
     String label = keylabels[i];
     if (i > 0 && label.equals(keylabels[i - 1])) {
       throw newDatatypeException("Duplicate key label. Each key label must be unique.");
     }
     if (label.length() == 2) {
       char[] chars = label.toCharArray();
       if (!(UCharacter.isHighSurrogate(chars[0]) && UCharacter.isLowSurrogate(chars[1]))) {
         throw newDatatypeException(
             "Key label has multiple characters. Each key label must be a single character.");
       }
     }
     if (label.length() > 2) {
       throw newDatatypeException(
           "Key label has multiple characters. Each key label must be a single character.");
     }
   }
 }
Beispiel #9
0
 /**
  * Gets the character extended type
  *
  * @param ch character to be tested
  * @return extended type it is associated with
  */
 private static int getType(int ch) {
   if (UCharacterUtility.isNonCharacter(ch)) {
     // not a character we return a invalid category count
     return NON_CHARACTER_;
   }
   int result = UCharacter.getType(ch);
   if (result == UCharacterCategory.SURROGATE) {
     if (ch <= UTF16.LEAD_SURROGATE_MAX_VALUE) {
       result = LEAD_SURROGATE_;
     } else {
       result = TRAIL_SURROGATE_;
     }
   }
   return result;
 }
 //
 //  Test multi-threaded parallel calls to UCharacter.getName(codePoint)
 //  Regression test for ticket 6264.
 //
 public void TestUCharactersGetName() throws InterruptedException {
   List threads = new LinkedList();
   for (int t = 0; t < 20; t++) {
     int codePoint = 47 + t;
     String correctName = UCharacter.getName(codePoint);
     GetNameThread thread = new GetNameThread(codePoint, correctName);
     thread.start();
     threads.add(thread);
   }
   ListIterator i = threads.listIterator();
   while (i.hasNext()) {
     GetNameThread thread = (GetNameThread) i.next();
     thread.join();
     if (!thread.correctName.equals(thread.actualName)) {
       errln("FAIL, expected \"" + thread.correctName + "\", got \"" + thread.actualName + "\"");
     }
   }
 }
  // TODO: only bubblesort around runs of combining marks, instead of the entire text.
  private void ccReorder(char[] text, int start, int length) {
    boolean reordered;
    do {
      int prevCC = 0;
      reordered = false;
      for (int i = start; i < start + length; i++) {
        final char c = text[i];
        final int cc = UCharacter.getCombiningClass(c);
        if (cc > 0 && cc < prevCC) {
          // swap
          text[i] = text[i - 1];
          text[i - 1] = c;
          reordered = true;
        } else {
          prevCC = cc;
        }
      }

    } while (reordered == true);
  }
    public static CasingType from(String s) {
      if (s == null || s.length() == 0) {
        return other;
      }
      int cp;
      // Look for the first meaningful character in the string to determine case.
      for (int i = 0; i < s.length(); i += Character.charCount(cp)) {
        cp = s.codePointAt(i);
        // used to skip the placeholders, but works better to have them be 'other'
        // if (cp == '{') {
        // if (placeholder.reset(s).region(i,s.length()).lookingAt()) {
        // i = placeholder.end() - 1; // skip
        // continue;
        // }
        // }
        int type = UCharacter.getType(cp);
        switch (type) {
          case UCharacter.LOWERCASE_LETTER:
            return lowercase;

          case UCharacter.UPPERCASE_LETTER:
          case UCharacter.TITLECASE_LETTER:
            return titlecase;

            // for other letters / numbers / symbols, return other
          case UCharacter.OTHER_LETTER:
          case UCharacter.DECIMAL_DIGIT_NUMBER:
          case UCharacter.LETTER_NUMBER:
          case UCharacter.OTHER_NUMBER:
          case UCharacter.MATH_SYMBOL:
          case UCharacter.CURRENCY_SYMBOL:
          case UCharacter.MODIFIER_SYMBOL:
          case UCharacter.OTHER_SYMBOL:
            return other;
            // ignore everything else (whitespace, punctuation, etc) and keep going
        }
      }
      return other;
    }
Beispiel #13
0
 /**
  * Set an identifier to analyze. Afterwards, call methods like getScripts()
  *
  * @param identifier the identifier to analyze
  * @return self
  * @internal
  * @deprecated This API is ICU internal only.
  */
 @Deprecated
 public IdentifierInfo setIdentifier(String identifier) {
   this.identifier = identifier;
   clear();
   BitSet scriptsForCP = new BitSet();
   int cp;
   for (int i = 0; i < identifier.length(); i += Character.charCount(i)) {
     cp = Character.codePointAt(identifier, i);
     // Store a representative character for each kind of decimal digit
     if (UCharacter.getType(cp) == UCharacterCategory.DECIMAL_DIGIT_NUMBER) {
       // Just store the zero character as a representative for comparison. Unicode guarantees it
       // is cp - value
       numerics.add(cp - UCharacter.getNumericValue(cp));
     }
     UScript.getScriptExtensions(cp, scriptsForCP);
     scriptsForCP.clear(UScript.COMMON);
     scriptsForCP.clear(UScript.INHERITED);
     //            if (temp.cardinality() == 0) {
     //                // HACK for older version of ICU
     //                requiredScripts.set(UScript.getScript(cp));
     //            } else
     switch (scriptsForCP.cardinality()) {
       case 0:
         break;
       case 1:
         // Single script, record it.
         requiredScripts.or(scriptsForCP);
         break;
       default:
         if (!requiredScripts.intersects(scriptsForCP) && scriptSetSet.add(scriptsForCP)) {
           scriptsForCP = new BitSet();
         }
         break;
     }
   }
   // Now make a final pass through to remove alternates that came before singles.
   // [Kana], [Kana Hira] => [Kana]
   // This is relatively infrequent, so doesn't have to be optimized.
   // We also compute any commonalities among the alternates.
   if (scriptSetSet.size() > 0) {
     commonAmongAlternates.set(0, UScript.CODE_LIMIT);
     for (Iterator<BitSet> it = scriptSetSet.iterator(); it.hasNext(); ) {
       final BitSet next = it.next();
       // [Kana], [Kana Hira] => [Kana]
       if (requiredScripts.intersects(next)) {
         it.remove();
       } else {
         // [[Arab Syrc Thaa]; [Arab Syrc]] => [[Arab Syrc]]
         commonAmongAlternates.and(next); // get the intersection.
         for (BitSet other : scriptSetSet) {
           if (next != other && contains(next, other)) {
             it.remove();
             break;
           }
         }
       }
     }
   }
   if (scriptSetSet.size() == 0) {
     commonAmongAlternates.clear();
   }
   return this;
 }