Java Common 예제들

프로그래밍 언어: Java

네임스페이스/패키지 이름: ir.ac.itrc.qqa.semantic.util

클래스/타입: Common

hotexamples.com에서의 예제들: 4

Java Common - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Java의 ir.ac.itrc.qqa.semantic.util.Common에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

log(2)

canonicalizeString(1)

logInline(1)

openFileForWriting(1)

printInline(1)

removeDiacritic(1)

removeParenthesis(1)

removeParenthesisWithException(1)

removePunctuations(1)

trimAll(1)

예제 #1

파일 보기

파일: Common.java 프로젝트: hasheminamin/HPR

  public static String canonicalizeString(String text) {
    HashSet<String> outs = new HashSet<String>();

    if (text.isEmpty()) return "";

    String original = text;

    text = Common.removeDiacritic(text);

    text = text.replaceAll("(\\(\\s*)?علیها.?لسلام(\\s*\\))?", "( ع )");
    text = text.replaceAll("(\\(\\s*)?علیها.?سلام(\\s*\\))?", "( ع )");
    text = text.replaceAll("(\\(\\s*)?علیها.?السلام(\\s*\\))?", "( ع )");
    text = text.replaceAll("(\\(\\s*)?علیه.?السلام(\\s*\\))?", "( ع )");
    text = text.replaceAll("(\\(\\s*)?سلام.?الله.?علیه(\\(\\s*)?(\\s*\\))?", "( س )");
    // text = text.replaceAll("(\\(\\s*)?صلی.?الله.?علیه.?و.?آله.?و.?سلم(\\s*\\))?", "( ص )");
    text = text.replaceAll("(\\(\\s*)?صلی.?الله.?علیه.?و.?آله(\\s*\\))?", "( ص )");
    text = text.replaceAll("(\\(\\s*)?صلی.?الله(\\s*\\))?", "( ص )");
    text = text.replace("()", "");
    text = text.replace("(  )", ""); // tokenized version

    // converting حضرت محمد (ص) and حضرت محمد to canonical form محمد (ص)

    text = text.replaceAll("حضرت ([^ ]+) \\(\\s*ص\\s*\\)", "حضرت $1");
    text = text.replaceAll("حضرت ([^ ]+) \\(\\s*ع\\s*\\)", "حضرت $1");
    text = text.replaceAll("حضرت ([^ ]+) \\(\\s*س\\s*\\)", "حضرت $1");
    text = text.replace("  ", " ");

    // either there is a stupid bug in regex or I am going bananas! the following regex does not
    // match the last parenthesis which complicates the code
    Pattern pattern = Pattern.compile("(?<!آن )\\s*(حضرت [^ ]+)");
    Matcher matcher = pattern.matcher(text);

    String modified = text;

    while (matcher.find()) {
      String search = matcher.group(1);

      String name = search.substring("حضرت".length() + 1);

      if (name.equals("محمد")) modified = modified.replace(search, name + " ( ص )");
      else modified = modified.replace(search, name + " ( ع )");
    }

    text = modified;

    text = Common.removeParenthesisWithException(text, "ص", "ع", "س");

    text = text.replace('\u200C', ' ').replace("  ", " ").trim();

    return text;
  }

예제 #2

파일 보기

파일: Common.java 프로젝트: hasheminamin/HPR

  public static ArrayList<String> getLexicalTransformations(String original) {
    HashSet<String> outs = new HashSet<String>();

    if (original.isEmpty()) return new ArrayList<String>(outs);

    HashSet<String> news = new HashSet<String>();

    String canonical = Common.canonicalizeString(original);

    outs.add(canonical);

    // now other things

    String parenthesisLess = Common.removeParenthesis(canonical);

    String halfSpaceLess = canonical.replace('\u200C', ' ');

    String spaceLess = canonical.replace(' ', '\u200C');

    String puncLess = Common.removePunctuations(canonical);

    outs.addAll(news);
    news.clear();

    for (String probe : outs) {
      probe = probe.replaceAll("\\bعلیهالسلام\\b", "");
      probe = probe.replaceAll("\\bعلیها السلام\\b", "");
      probe = probe.replaceAll("\\bعلیه السلام\\b", "");
      probe = probe.replaceAll("\\bعلیه‌السلام\\b", "");
      probe = probe.replaceAll("\\(\\s*ع\\s*\\)", "");
      probe = probe.replaceAll("\\(\\s*س\\s*\\)", "");
      probe = probe.replaceAll("\\(\\s*ص\\s*\\)", "");
      probe = probe.replaceAll("\\bصلی الله علیه وآله\\b", "");
      probe = probe.replaceAll("\\bصلی الله علیه و آله\\b", "");
      probe = probe.replaceAll("\\bصلی الله علیه و آله و سلم\\b", "");
      probe = probe.replaceAll("\\bصلی الله علیه وآله و سلم\\b", "");
      probe = probe.replaceAll("\\bصلی الله علیه وآله وسلم\\b", "");
      probe = probe.replace("()", "");
      probe = probe.replace("(  )", ""); // tokenized version

      news.addAll(probeLexicalTransformation(probe, outs));
    }

    outs.addAll(news);
    news.clear();

    outs.remove(original);

    return new ArrayList<String>(outs);
  }

예제 #3

파일 보기

파일: Common.java 프로젝트: hasheminamin/HPR

  public static void putFileContent(String path, String payload) {
    BufferedWriter outFile = Common.openFileForWriting(path);

    try {
      outFile.write(payload);

      outFile.close();
    } catch (IOException e) {
      MyError.exit("Couldn't write to file '" + path + "'!");
    }
  }

예제 #4

파일 보기

파일: Common.java 프로젝트: hasheminamin/HPR

  /**
   * A simple string normalizer
   *
   * @param text input text
   * @return normalized text
   */
  public static String normalizeNotTokenized(String text) {
    // TODO: some concpets have '\r\n' and need them. find a way to remove 'replace("\r", "
    // ").replace("\n", " ")'. known issues if do so: permamnet concept ids file
    text =
        text.replace("ك", "ک")
            .replace("ي", "ی")
            .replace("ى", "ی")
            .replace("\r", " ")
            .replace("\n", " ");

    text =
        text.replace("ي", "ی")
            .replace("ی", "ی")
            .replace("ى", "ی")
            .replace("ك", "ک")
            .replace("ک", "ک");

    text =
        text.replaceAll(
            String.valueOf(Character.toChars(8203)), new String(Character.toChars(8204)));
    text =
        text.replaceAll(String.valueOf(Character.toChars(1609)), "ی"); // arabic letter ye maksura

    text = replaceCorresponding(text, "۰۱۲۳۴۵۶۷۸۹", "0123456789");
    text = replaceCorresponding(text, "٠١٢٣٤٥٦٧٨٩", "0123456789");

    // correcting punctuation spacings, commented as it contradicts the tokenizer's output
    // text = text.replaceAll(" ([;,،؛:])", "$1 ");
    // text = text.replaceAll("\\(", " \\(");
    // text = text.replaceAll("\\)", "\\) ");

    text = text.replace("  ", " ");
    text = Common.trimAll(text, "\" \u200C");

    return text;
  }