Пример #1
0
  public static ArrayList<String> getLexicalTransformations(String original) {
    HashSet<String> outs = new HashSet<String>();

    if (original.isEmpty()) return new ArrayList<String>(outs);

    HashSet<String> news = new HashSet<String>();

    String canonical = Common.canonicalizeString(original);

    outs.add(canonical);

    // now other things

    String parenthesisLess = Common.removeParenthesis(canonical);

    String halfSpaceLess = canonical.replace('\u200C', ' ');

    String spaceLess = canonical.replace(' ', '\u200C');

    String puncLess = Common.removePunctuations(canonical);

    outs.addAll(news);
    news.clear();

    for (String probe : outs) {
      probe = probe.replaceAll("\\bعلیهالسلام\\b", "");
      probe = probe.replaceAll("\\bعلیها السلام\\b", "");
      probe = probe.replaceAll("\\bعلیه السلام\\b", "");
      probe = probe.replaceAll("\\bعلیه‌السلام\\b", "");
      probe = probe.replaceAll("\\(\\s*ع\\s*\\)", "");
      probe = probe.replaceAll("\\(\\s*س\\s*\\)", "");
      probe = probe.replaceAll("\\(\\s*ص\\s*\\)", "");
      probe = probe.replaceAll("\\bصلی الله علیه وآله\\b", "");
      probe = probe.replaceAll("\\bصلی الله علیه و آله\\b", "");
      probe = probe.replaceAll("\\bصلی الله علیه و آله و سلم\\b", "");
      probe = probe.replaceAll("\\bصلی الله علیه وآله و سلم\\b", "");
      probe = probe.replaceAll("\\bصلی الله علیه وآله وسلم\\b", "");
      probe = probe.replace("()", "");
      probe = probe.replace("(  )", ""); // tokenized version

      news.addAll(probeLexicalTransformation(probe, outs));
    }

    outs.addAll(news);
    news.clear();

    outs.remove(original);

    return new ArrayList<String>(outs);
  }