public static ArrayList<String> getLexicalTransformations(String original) { HashSet<String> outs = new HashSet<String>(); if (original.isEmpty()) return new ArrayList<String>(outs); HashSet<String> news = new HashSet<String>(); String canonical = Common.canonicalizeString(original); outs.add(canonical); // now other things String parenthesisLess = Common.removeParenthesis(canonical); String halfSpaceLess = canonical.replace('\u200C', ' '); String spaceLess = canonical.replace(' ', '\u200C'); String puncLess = Common.removePunctuations(canonical); outs.addAll(news); news.clear(); for (String probe : outs) { probe = probe.replaceAll("\\bعلیهالسلام\\b", ""); probe = probe.replaceAll("\\bعلیها السلام\\b", ""); probe = probe.replaceAll("\\bعلیه السلام\\b", ""); probe = probe.replaceAll("\\bعلیهالسلام\\b", ""); probe = probe.replaceAll("\\(\\s*ع\\s*\\)", ""); probe = probe.replaceAll("\\(\\s*س\\s*\\)", ""); probe = probe.replaceAll("\\(\\s*ص\\s*\\)", ""); probe = probe.replaceAll("\\bصلی الله علیه وآله\\b", ""); probe = probe.replaceAll("\\bصلی الله علیه و آله\\b", ""); probe = probe.replaceAll("\\bصلی الله علیه و آله و سلم\\b", ""); probe = probe.replaceAll("\\bصلی الله علیه وآله و سلم\\b", ""); probe = probe.replaceAll("\\bصلی الله علیه وآله وسلم\\b", ""); probe = probe.replace("()", ""); probe = probe.replace("( )", ""); // tokenized version news.addAll(probeLexicalTransformation(probe, outs)); } outs.addAll(news); news.clear(); outs.remove(original); return new ArrayList<String>(outs); }