Ejemplo n.º 1
0
  public static void main(String[] args) throws Exception {
    int size = Util.getPropertyInt("size", 100);
    double min = Util.getPropertyDouble("min", 0.01);
    double max = Util.getPropertyDouble("max", 0.9);
    Font font = new Font("serif", Font.PLAIN, size);
    String fpath = Util.getProperty("font", null);
    if (fpath != null) {
      font = Font.createFont(Font.TRUETYPE_FONT, new FileInputStream(fpath));
    }

    for (char c = Character.MIN_VALUE + 1; c < Character.MAX_VALUE; ++c) {
      int type = Character.getType(c);
      if (type != Character.CONTROL
          && type != Character.FORMAT
          && type != Character.PRIVATE_USE
          && type != Character.SURROGATE
          && type != Character.UNASSIGNED
          && !Character.isMirrored(c)
          && !Character.isSpaceChar(c)) {
        String s = "" + c;
        if (Normalizer.normalize(s, NFKC).contains("\u0308")) continue; // TODO: adhoc
        UnigramMetrics m = new UnigramMetrics(s, size, false, true, font);
        if (min < m.getBlackness() && m.getBlackness() < max) {
          System.out.println("" + c + " " + (int) c);
        }
      }
    }
  }
Ejemplo n.º 2
0
 /**
  * Copy-paste of Character.isAlphabetic() (needed as we require only 1.6)
  *
  * @param codePoint The input character.
  * @return True if the character is a Unicode alphabetic character.
  */
 static boolean isAlphabetic(final int codePoint) {
   return ((1 << Character.UPPERCASE_LETTER
                   | 1 << Character.LOWERCASE_LETTER
                   | 1 << Character.TITLECASE_LETTER
                   | 1 << Character.MODIFIER_LETTER
                   | 1 << Character.OTHER_LETTER
                   | 1 << Character.LETTER_NUMBER)
               >> Character.getType(codePoint)
           & 1)
       != 0;
 }
Ejemplo n.º 3
0
 private boolean isNameAddress(String text) {
   boolean maybe = false;
   if (text != null) {
     for (int i = 0; i < text.length(); i++) {
       char c = text.charAt(i);
       if (Character.getType(c) == Character.OTHER_LETTER) {
         maybe = true;
         break;
       }
     }
   }
   return maybe;
 }
Ejemplo n.º 4
0
 private boolean isTelephoneZip(String text) {
   boolean maybe = true;
   if (text != null) {
     for (int i = 0; i < text.length(); i++) {
       char c = text.charAt(i);
       int type = Character.getType(c);
       if (type == Character.DECIMAL_DIGIT_NUMBER || c == '-' || c == '(' || c == ')') {
         continue;
       } else {
         maybe = false;
         break;
       }
     }
     return maybe;
   }
   return false;
 }
Ejemplo n.º 5
0
 private boolean isId(String text) {
   boolean maybe = true;
   if (text != null) {
     for (int i = 0; i < text.length(); i++) {
       char c = text.charAt(i);
       int type = Character.getType(c);
       if (type == Character.UPPERCASE_LETTER
           || type == Character.LOWERCASE_LETTER
           || type == Character.DECIMAL_DIGIT_NUMBER) {
         continue;
       } else {
         maybe = false;
         break;
       }
     }
     return maybe;
   }
   return false;
 }
Ejemplo n.º 6
0
  public static void convertCangjieHK() {
    try {
      Font font = new Font("Droid Sans Fallback", 16, Font.PLAIN);
      ArrayList<String> codeList = new ArrayList<String>();
      HashMap<String, ArrayList<CangjieChar>> codeMap =
          new HashMap<String, ArrayList<CangjieChar>>();
      int totalCangjieColumn = 7;
      FileInputStream fis = new FileInputStream("cangjie3.txt");
      InputStreamReader input = new InputStreamReader(fis, "UTF-8");
      BufferedReader reader = new BufferedReader(input);
      String str = null;
      int index = 0;
      int total = 0;
      char column[] = new char[5];
      boolean hkchar = false;

      System.out.println("#define CANGJIE_COLUMN " + totalCangjieColumn);
      System.out.println("const jchar cangjie[][CANGJIE_COLUMN] = {");
      do {
        str = reader.readLine();
        if (str == null) break;
        if (str.compareTo("#####") == 0) {
          hkchar = true;
          continue;
        }
        index = str.indexOf('\t');
        if (index < 0) index = str.indexOf(' ');
        if (index > 0 && font.canDisplay(str.charAt(index + 1))) {
          int type = Character.getType(str.charAt(index + 1));
          if (Character.isLetter(str.charAt(index + 1))
              || type == Character.START_PUNCTUATION
              || type == Character.END_PUNCTUATION
              || type == Character.OTHER_PUNCTUATION
              || type == Character.MATH_SYMBOL
              || type == Character.DASH_PUNCTUATION
              || type == Character.CONNECTOR_PUNCTUATION
              || type == Character.OTHER_SYMBOL
              || type == Character.INITIAL_QUOTE_PUNCTUATION
              || type == Character.FINAL_QUOTE_PUNCTUATION
              || type == Character.SPACE_SEPARATOR) {
            // System.out.print("\t { ");
            // for (int count = 0; count < 5; count++) {
            //     if (count < index) {
            // 	column[count] = str.charAt(count);
            // 	if (column[count] < 'a' || column[count] > 'z') column[count] = 0;
            // 	if (((int) column[count]) >= 10 || ((int) column[count]) <= 99) System.out.print('
            // ');
            // 	if (((int) column[count]) <= 9) System.out.print(' ');
            // 	System.out.print(((int)	column[count]));
            //     } else {
            // 	System.out.print("  0");
            //     }
            //     System.out.print(", ");
            // }
            // System.out.println((int) str.charAt(index + 1) + " }, ");

            String cangjie = str.substring(0, index).trim();
            char ch = str.charAt(index + 1);
            if (!codeList.contains(cangjie)) codeList.add(cangjie);
            ArrayList<CangjieChar> list = null;
            if (codeMap.containsKey(cangjie)) {
              list = codeMap.get(cangjie);
            } else {
              list = new ArrayList<CangjieChar>();
            }
            CangjieChar cc = new CangjieChar(ch, hkchar);
            list.add(cc);
            codeMap.put(cangjie, list);

            total++;
          } else {
            System.err.println(
                "Character Not Found : "
                    + str.charAt(index + 1)
                    + " "
                    + Character.getType(str.charAt(index + 1)));
          }
        }
      } while (str != null);

      Collections.sort(codeList);

      for (int count0 = 0; count0 < codeList.size(); count0++) {
        String _str = codeList.get(count0);
        ArrayList<CangjieChar> ca = codeMap.get(_str);
        for (int count1 = 0; count1 < ca.size(); count1++) {
          for (int count2 = 0; count2 < 5; count2++) {
            if (count2 < _str.length()) System.out.print("'" + _str.charAt(count2) + "', ");
            else System.out.print("  0, ");
          }
          System.out.println(((int) ca.get(count1).c) + ", " + (ca.get(count1).hk ? 1 : 0) + ", ");
        }
      }

      System.out.println("};");
      System.out.println("jint cangjie_index[" + total + "];");
      System.out.println("jint cangjie_frequency[" + total + "];");
      reader.close();
      input.close();
      fis.close();
    } catch (Exception ex) {
      ex.printStackTrace();
    }
  }