예제 #1
0
  public static void main(String[] args) throws Exception {
    int size = Util.getPropertyInt("size", 100);
    double min = Util.getPropertyDouble("min", 0.01);
    double max = Util.getPropertyDouble("max", 0.9);
    Font font = new Font("serif", Font.PLAIN, size);
    String fpath = Util.getProperty("font", null);
    if (fpath != null) {
      font = Font.createFont(Font.TRUETYPE_FONT, new FileInputStream(fpath));
    }

    for (char c = Character.MIN_VALUE + 1; c < Character.MAX_VALUE; ++c) {
      int type = Character.getType(c);
      if (type != Character.CONTROL
          && type != Character.FORMAT
          && type != Character.PRIVATE_USE
          && type != Character.SURROGATE
          && type != Character.UNASSIGNED
          && !Character.isMirrored(c)
          && !Character.isSpaceChar(c)) {
        String s = "" + c;
        if (Normalizer.normalize(s, NFKC).contains("\u0308")) continue; // TODO: adhoc
        UnigramMetrics m = new UnigramMetrics(s, size, false, true, font);
        if (min < m.getBlackness() && m.getBlackness() < max) {
          System.out.println("" + c + " " + (int) c);
        }
      }
    }
  }
예제 #2
0
  private StringBuffer getXMLdata(String XMLuri) throws MalformedURLException {

    // IP's defined in the .ini file are not allowed to act as XML source. (blacklist)
    // The remote response content header must be set to xml, otherwise the request will not result
    // in a transformation.

    URL url = new URL("http://localhost/index.html");
    StringBuffer result = new StringBuffer("Error!");
    Boolean allowed = true;
    final String bomChar = "\uFEFF";

    try {
      url = new URL(XMLuri);
    } catch (MalformedURLException e) {
      result.append(" Invalid XML uri!");
      return (result);
    }

    String reqHostIp = "";
    // to prevent URL Obfuscation, first get the real IP addr that is requested, then check the
    // blacklist
    try {
      InetAddress resolve = InetAddress.getByName(url.getHost());
      reqHostIp = resolve.getHostAddress();
    } catch (UnknownHostException e) {
      result.append("The given XML uri is not allowed!");
      return (result);
    }

    for (int i = 0; i < this.xml_blacklist.length; i++) {
      String hostIP =
          new String(
              this.xml_blacklist[i].trim().substring(1, this.xml_blacklist[i].trim().length() - 1));
      if (reqHostIp.startsWith(hostIP)) {
        allowed = false;
      }
    }

    if (allowed != true) {
      for (int i = 0; i < this.xml_whitelist.length; i++) {
        String uri =
            new String(
                this.xml_whitelist[i]
                    .trim()
                    .substring(1, this.xml_whitelist[i].trim().length() - 1));
        log.debug(uri);
        if (XMLuri.startsWith(uri)) {
          allowed = true;
        }
      }
    }

    if (allowed != true) {
      result.append(" The given XML uri is not allowed!");
      log.debug(
          this.working_for
              + " : Tried an XML source that is on the XML source blacklist : "
              + XMLuri);
      return (result);
    }

    try {
      URLConnection con = url.openConnection();
      con.connect();
      String remoteContentType = con.getContentType();
      log.fatal("'" + remoteContentType + "'");
      if (remoteContentType == null) {
        remoteContentType = "xml";
      }
      if (remoteContentType.toLowerCase().indexOf("xml") < 0) {
        result.append(" The content type of the given XML paramerter is not XML.");
        return (result);
      } else {
        result = new StringBuffer("");
        InputStream inputStream = con.getInputStream();
        BufferedReader bufferedReader =
            new BufferedReader(new InputStreamReader(inputStream, "UTF8"));
        String line = bufferedReader.readLine();
        int i = 0;
        while (line != null) {
          Character c = null;
          try {
            c = line.charAt(0);
          } catch (StringIndexOutOfBoundsException e) {
          }

          if (i == 0 && Character.getType(c) == 16) {
            result.append("<?xml version='1.0' encoding='utf-8'?>");
            line = bufferedReader.readLine();
          } else {
            result.append(line);
            line = bufferedReader.readLine();
            i = i + 1;
          }
        }
        bufferedReader.close();
      }
    } catch (IOException e) {
    }

    if (result.length() == 0) {
      this.log.debug(this.working_for + " : No data from sru source : " + url);
    } else {
      this.log.debug(this.working_for + " : Done getting data from source : " + url);
    }

    return (result);
  }
예제 #3
0
  public static void convertCangjieHK() {
    try {
      Font font = new Font("Droid Sans Fallback", 16, Font.PLAIN);
      ArrayList<String> codeList = new ArrayList<String>();
      HashMap<String, ArrayList<CangjieChar>> codeMap =
          new HashMap<String, ArrayList<CangjieChar>>();
      int totalCangjieColumn = 7;
      FileInputStream fis = new FileInputStream("cangjie3.txt");
      InputStreamReader input = new InputStreamReader(fis, "UTF-8");
      BufferedReader reader = new BufferedReader(input);
      String str = null;
      int index = 0;
      int total = 0;
      char column[] = new char[5];
      boolean hkchar = false;

      System.out.println("#define CANGJIE_COLUMN " + totalCangjieColumn);
      System.out.println("const jchar cangjie[][CANGJIE_COLUMN] = {");
      do {
        str = reader.readLine();
        if (str == null) break;
        if (str.compareTo("#####") == 0) {
          hkchar = true;
          continue;
        }
        index = str.indexOf('\t');
        if (index < 0) index = str.indexOf(' ');
        if (index > 0 && font.canDisplay(str.charAt(index + 1))) {
          int type = Character.getType(str.charAt(index + 1));
          if (Character.isLetter(str.charAt(index + 1))
              || type == Character.START_PUNCTUATION
              || type == Character.END_PUNCTUATION
              || type == Character.OTHER_PUNCTUATION
              || type == Character.MATH_SYMBOL
              || type == Character.DASH_PUNCTUATION
              || type == Character.CONNECTOR_PUNCTUATION
              || type == Character.OTHER_SYMBOL
              || type == Character.INITIAL_QUOTE_PUNCTUATION
              || type == Character.FINAL_QUOTE_PUNCTUATION
              || type == Character.SPACE_SEPARATOR) {
            // System.out.print("\t { ");
            // for (int count = 0; count < 5; count++) {
            //     if (count < index) {
            // 	column[count] = str.charAt(count);
            // 	if (column[count] < 'a' || column[count] > 'z') column[count] = 0;
            // 	if (((int) column[count]) >= 10 || ((int) column[count]) <= 99) System.out.print('
            // ');
            // 	if (((int) column[count]) <= 9) System.out.print(' ');
            // 	System.out.print(((int)	column[count]));
            //     } else {
            // 	System.out.print("  0");
            //     }
            //     System.out.print(", ");
            // }
            // System.out.println((int) str.charAt(index + 1) + " }, ");

            String cangjie = str.substring(0, index).trim();
            char ch = str.charAt(index + 1);
            if (!codeList.contains(cangjie)) codeList.add(cangjie);
            ArrayList<CangjieChar> list = null;
            if (codeMap.containsKey(cangjie)) {
              list = codeMap.get(cangjie);
            } else {
              list = new ArrayList<CangjieChar>();
            }
            CangjieChar cc = new CangjieChar(ch, hkchar);
            list.add(cc);
            codeMap.put(cangjie, list);

            total++;
          } else {
            System.err.println(
                "Character Not Found : "
                    + str.charAt(index + 1)
                    + " "
                    + Character.getType(str.charAt(index + 1)));
          }
        }
      } while (str != null);

      Collections.sort(codeList);

      for (int count0 = 0; count0 < codeList.size(); count0++) {
        String _str = codeList.get(count0);
        ArrayList<CangjieChar> ca = codeMap.get(_str);
        for (int count1 = 0; count1 < ca.size(); count1++) {
          for (int count2 = 0; count2 < 5; count2++) {
            if (count2 < _str.length()) System.out.print("'" + _str.charAt(count2) + "', ");
            else System.out.print("  0, ");
          }
          System.out.println(((int) ca.get(count1).c) + ", " + (ca.get(count1).hk ? 1 : 0) + ", ");
        }
      }

      System.out.println("};");
      System.out.println("jint cangjie_index[" + total + "];");
      System.out.println("jint cangjie_frequency[" + total + "];");
      reader.close();
      input.close();
      fis.close();
    } catch (Exception ex) {
      ex.printStackTrace();
    }
  }