public static void main(String[] args) throws Exception { int size = Util.getPropertyInt("size", 100); double min = Util.getPropertyDouble("min", 0.01); double max = Util.getPropertyDouble("max", 0.9); Font font = new Font("serif", Font.PLAIN, size); String fpath = Util.getProperty("font", null); if (fpath != null) { font = Font.createFont(Font.TRUETYPE_FONT, new FileInputStream(fpath)); } for (char c = Character.MIN_VALUE + 1; c < Character.MAX_VALUE; ++c) { int type = Character.getType(c); if (type != Character.CONTROL && type != Character.FORMAT && type != Character.PRIVATE_USE && type != Character.SURROGATE && type != Character.UNASSIGNED && !Character.isMirrored(c) && !Character.isSpaceChar(c)) { String s = "" + c; if (Normalizer.normalize(s, NFKC).contains("\u0308")) continue; // TODO: adhoc UnigramMetrics m = new UnigramMetrics(s, size, false, true, font); if (min < m.getBlackness() && m.getBlackness() < max) { System.out.println("" + c + " " + (int) c); } } } }
private StringBuffer getXMLdata(String XMLuri) throws MalformedURLException { // IP's defined in the .ini file are not allowed to act as XML source. (blacklist) // The remote response content header must be set to xml, otherwise the request will not result // in a transformation. URL url = new URL("http://localhost/index.html"); StringBuffer result = new StringBuffer("Error!"); Boolean allowed = true; final String bomChar = "\uFEFF"; try { url = new URL(XMLuri); } catch (MalformedURLException e) { result.append(" Invalid XML uri!"); return (result); } String reqHostIp = ""; // to prevent URL Obfuscation, first get the real IP addr that is requested, then check the // blacklist try { InetAddress resolve = InetAddress.getByName(url.getHost()); reqHostIp = resolve.getHostAddress(); } catch (UnknownHostException e) { result.append("The given XML uri is not allowed!"); return (result); } for (int i = 0; i < this.xml_blacklist.length; i++) { String hostIP = new String( this.xml_blacklist[i].trim().substring(1, this.xml_blacklist[i].trim().length() - 1)); if (reqHostIp.startsWith(hostIP)) { allowed = false; } } if (allowed != true) { for (int i = 0; i < this.xml_whitelist.length; i++) { String uri = new String( this.xml_whitelist[i] .trim() .substring(1, this.xml_whitelist[i].trim().length() - 1)); log.debug(uri); if (XMLuri.startsWith(uri)) { allowed = true; } } } if (allowed != true) { result.append(" The given XML uri is not allowed!"); log.debug( this.working_for + " : Tried an XML source that is on the XML source blacklist : " + XMLuri); return (result); } try { URLConnection con = url.openConnection(); con.connect(); String remoteContentType = con.getContentType(); log.fatal("'" + remoteContentType + "'"); if (remoteContentType == null) { remoteContentType = "xml"; } if (remoteContentType.toLowerCase().indexOf("xml") < 0) { result.append(" The content type of the given XML paramerter is not XML."); return (result); } else { result = new StringBuffer(""); InputStream inputStream = con.getInputStream(); BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream, "UTF8")); String line = bufferedReader.readLine(); int i = 0; while (line != null) { Character c = null; try { c = line.charAt(0); } catch (StringIndexOutOfBoundsException e) { } if (i == 0 && Character.getType(c) == 16) { result.append("<?xml version='1.0' encoding='utf-8'?>"); line = bufferedReader.readLine(); } else { result.append(line); line = bufferedReader.readLine(); i = i + 1; } } bufferedReader.close(); } } catch (IOException e) { } if (result.length() == 0) { this.log.debug(this.working_for + " : No data from sru source : " + url); } else { this.log.debug(this.working_for + " : Done getting data from source : " + url); } return (result); }
public static void convertCangjieHK() { try { Font font = new Font("Droid Sans Fallback", 16, Font.PLAIN); ArrayList<String> codeList = new ArrayList<String>(); HashMap<String, ArrayList<CangjieChar>> codeMap = new HashMap<String, ArrayList<CangjieChar>>(); int totalCangjieColumn = 7; FileInputStream fis = new FileInputStream("cangjie3.txt"); InputStreamReader input = new InputStreamReader(fis, "UTF-8"); BufferedReader reader = new BufferedReader(input); String str = null; int index = 0; int total = 0; char column[] = new char[5]; boolean hkchar = false; System.out.println("#define CANGJIE_COLUMN " + totalCangjieColumn); System.out.println("const jchar cangjie[][CANGJIE_COLUMN] = {"); do { str = reader.readLine(); if (str == null) break; if (str.compareTo("#####") == 0) { hkchar = true; continue; } index = str.indexOf('\t'); if (index < 0) index = str.indexOf(' '); if (index > 0 && font.canDisplay(str.charAt(index + 1))) { int type = Character.getType(str.charAt(index + 1)); if (Character.isLetter(str.charAt(index + 1)) || type == Character.START_PUNCTUATION || type == Character.END_PUNCTUATION || type == Character.OTHER_PUNCTUATION || type == Character.MATH_SYMBOL || type == Character.DASH_PUNCTUATION || type == Character.CONNECTOR_PUNCTUATION || type == Character.OTHER_SYMBOL || type == Character.INITIAL_QUOTE_PUNCTUATION || type == Character.FINAL_QUOTE_PUNCTUATION || type == Character.SPACE_SEPARATOR) { // System.out.print("\t { "); // for (int count = 0; count < 5; count++) { // if (count < index) { // column[count] = str.charAt(count); // if (column[count] < 'a' || column[count] > 'z') column[count] = 0; // if (((int) column[count]) >= 10 || ((int) column[count]) <= 99) System.out.print(' // '); // if (((int) column[count]) <= 9) System.out.print(' '); // System.out.print(((int) column[count])); // } else { // System.out.print(" 0"); // } // System.out.print(", "); // } // System.out.println((int) str.charAt(index + 1) + " }, "); String cangjie = str.substring(0, index).trim(); char ch = str.charAt(index + 1); if (!codeList.contains(cangjie)) codeList.add(cangjie); ArrayList<CangjieChar> list = null; if (codeMap.containsKey(cangjie)) { list = codeMap.get(cangjie); } else { list = new ArrayList<CangjieChar>(); } CangjieChar cc = new CangjieChar(ch, hkchar); list.add(cc); codeMap.put(cangjie, list); total++; } else { System.err.println( "Character Not Found : " + str.charAt(index + 1) + " " + Character.getType(str.charAt(index + 1))); } } } while (str != null); Collections.sort(codeList); for (int count0 = 0; count0 < codeList.size(); count0++) { String _str = codeList.get(count0); ArrayList<CangjieChar> ca = codeMap.get(_str); for (int count1 = 0; count1 < ca.size(); count1++) { for (int count2 = 0; count2 < 5; count2++) { if (count2 < _str.length()) System.out.print("'" + _str.charAt(count2) + "', "); else System.out.print(" 0, "); } System.out.println(((int) ca.get(count1).c) + ", " + (ca.get(count1).hk ? 1 : 0) + ", "); } } System.out.println("};"); System.out.println("jint cangjie_index[" + total + "];"); System.out.println("jint cangjie_frequency[" + total + "];"); reader.close(); input.close(); fis.close(); } catch (Exception ex) { ex.printStackTrace(); } }