public static void main(String[] args) throws Exception { int size = Util.getPropertyInt("size", 100); double min = Util.getPropertyDouble("min", 0.01); double max = Util.getPropertyDouble("max", 0.9); Font font = new Font("serif", Font.PLAIN, size); String fpath = Util.getProperty("font", null); if (fpath != null) { font = Font.createFont(Font.TRUETYPE_FONT, new FileInputStream(fpath)); } for (char c = Character.MIN_VALUE + 1; c < Character.MAX_VALUE; ++c) { int type = Character.getType(c); if (type != Character.CONTROL && type != Character.FORMAT && type != Character.PRIVATE_USE && type != Character.SURROGATE && type != Character.UNASSIGNED && !Character.isMirrored(c) && !Character.isSpaceChar(c)) { String s = "" + c; if (Normalizer.normalize(s, NFKC).contains("\u0308")) continue; // TODO: adhoc UnigramMetrics m = new UnigramMetrics(s, size, false, true, font); if (min < m.getBlackness() && m.getBlackness() < max) { System.out.println("" + c + " " + (int) c); } } } }
/** * Copy-paste of Character.isAlphabetic() (needed as we require only 1.6) * * @param codePoint The input character. * @return True if the character is a Unicode alphabetic character. */ static boolean isAlphabetic(final int codePoint) { return ((1 << Character.UPPERCASE_LETTER | 1 << Character.LOWERCASE_LETTER | 1 << Character.TITLECASE_LETTER | 1 << Character.MODIFIER_LETTER | 1 << Character.OTHER_LETTER | 1 << Character.LETTER_NUMBER) >> Character.getType(codePoint) & 1) != 0; }
private boolean isNameAddress(String text) { boolean maybe = false; if (text != null) { for (int i = 0; i < text.length(); i++) { char c = text.charAt(i); if (Character.getType(c) == Character.OTHER_LETTER) { maybe = true; break; } } } return maybe; }
private boolean isTelephoneZip(String text) { boolean maybe = true; if (text != null) { for (int i = 0; i < text.length(); i++) { char c = text.charAt(i); int type = Character.getType(c); if (type == Character.DECIMAL_DIGIT_NUMBER || c == '-' || c == '(' || c == ')') { continue; } else { maybe = false; break; } } return maybe; } return false; }
private boolean isId(String text) { boolean maybe = true; if (text != null) { for (int i = 0; i < text.length(); i++) { char c = text.charAt(i); int type = Character.getType(c); if (type == Character.UPPERCASE_LETTER || type == Character.LOWERCASE_LETTER || type == Character.DECIMAL_DIGIT_NUMBER) { continue; } else { maybe = false; break; } } return maybe; } return false; }
public static void convertCangjieHK() { try { Font font = new Font("Droid Sans Fallback", 16, Font.PLAIN); ArrayList<String> codeList = new ArrayList<String>(); HashMap<String, ArrayList<CangjieChar>> codeMap = new HashMap<String, ArrayList<CangjieChar>>(); int totalCangjieColumn = 7; FileInputStream fis = new FileInputStream("cangjie3.txt"); InputStreamReader input = new InputStreamReader(fis, "UTF-8"); BufferedReader reader = new BufferedReader(input); String str = null; int index = 0; int total = 0; char column[] = new char[5]; boolean hkchar = false; System.out.println("#define CANGJIE_COLUMN " + totalCangjieColumn); System.out.println("const jchar cangjie[][CANGJIE_COLUMN] = {"); do { str = reader.readLine(); if (str == null) break; if (str.compareTo("#####") == 0) { hkchar = true; continue; } index = str.indexOf('\t'); if (index < 0) index = str.indexOf(' '); if (index > 0 && font.canDisplay(str.charAt(index + 1))) { int type = Character.getType(str.charAt(index + 1)); if (Character.isLetter(str.charAt(index + 1)) || type == Character.START_PUNCTUATION || type == Character.END_PUNCTUATION || type == Character.OTHER_PUNCTUATION || type == Character.MATH_SYMBOL || type == Character.DASH_PUNCTUATION || type == Character.CONNECTOR_PUNCTUATION || type == Character.OTHER_SYMBOL || type == Character.INITIAL_QUOTE_PUNCTUATION || type == Character.FINAL_QUOTE_PUNCTUATION || type == Character.SPACE_SEPARATOR) { // System.out.print("\t { "); // for (int count = 0; count < 5; count++) { // if (count < index) { // column[count] = str.charAt(count); // if (column[count] < 'a' || column[count] > 'z') column[count] = 0; // if (((int) column[count]) >= 10 || ((int) column[count]) <= 99) System.out.print(' // '); // if (((int) column[count]) <= 9) System.out.print(' '); // System.out.print(((int) column[count])); // } else { // System.out.print(" 0"); // } // System.out.print(", "); // } // System.out.println((int) str.charAt(index + 1) + " }, "); String cangjie = str.substring(0, index).trim(); char ch = str.charAt(index + 1); if (!codeList.contains(cangjie)) codeList.add(cangjie); ArrayList<CangjieChar> list = null; if (codeMap.containsKey(cangjie)) { list = codeMap.get(cangjie); } else { list = new ArrayList<CangjieChar>(); } CangjieChar cc = new CangjieChar(ch, hkchar); list.add(cc); codeMap.put(cangjie, list); total++; } else { System.err.println( "Character Not Found : " + str.charAt(index + 1) + " " + Character.getType(str.charAt(index + 1))); } } } while (str != null); Collections.sort(codeList); for (int count0 = 0; count0 < codeList.size(); count0++) { String _str = codeList.get(count0); ArrayList<CangjieChar> ca = codeMap.get(_str); for (int count1 = 0; count1 < ca.size(); count1++) { for (int count2 = 0; count2 < 5; count2++) { if (count2 < _str.length()) System.out.print("'" + _str.charAt(count2) + "', "); else System.out.print(" 0, "); } System.out.println(((int) ca.get(count1).c) + ", " + (ca.get(count1).hk ? 1 : 0) + ", "); } } System.out.println("};"); System.out.println("jint cangjie_index[" + total + "];"); System.out.println("jint cangjie_frequency[" + total + "];"); reader.close(); input.close(); fis.close(); } catch (Exception ex) { ex.printStackTrace(); } }