public static ArrayList<ArrayList<TaggedWord>> getPhrasesNaive( String sentence, LexicalizedParser lp, AbstractSequenceClassifier<CoreLabel> classifier) { ArrayList<ArrayList<TaggedWord>> newList = new ArrayList<ArrayList<TaggedWord>>(); ArrayList<TaggedWord> taggedWords = StanfordNER.parse(sentence, lp, classifier); HashMap<String, String> phraseBoundaries = new HashMap<String, String>(); phraseBoundaries.put(",", ","); phraseBoundaries.put("\"", "\""); phraseBoundaries.put("''", "''"); phraseBoundaries.put("``", "``"); phraseBoundaries.put("--", "--"); // List<Tree> leaves = parse.getLeaves(); ArrayList<TaggedWord> temp = new ArrayList<TaggedWord>(); int index = 0; while (index < taggedWords.size()) { if ((phraseBoundaries.containsKey(taggedWords.get(index).word()))) { if (temp.size() > 0) { // System.out.println(temp); ArrayList<TaggedWord> tempCopy = new ArrayList<TaggedWord>(temp); newList.add(Preprocess(tempCopy)); } temp.clear(); } else { // System.out.println(taggedWords.get(index).toString()); temp.add(taggedWords.get(index)); } index += 1; } if (temp.size() > 0) { ArrayList<TaggedWord> tempCopy = new ArrayList<TaggedWord>(temp); newList.add(Preprocess(tempCopy)); } // System.out.println(newList); return newList; }
// mode - 2level public static double LexicalSimilarityScore( String sentence1, String sentence2, DISCOSimilarity discoRAM, LexicalizedParser lp, AbstractSequenceClassifier<CoreLabel> classifier) { // ArrayList<TaggedWord> taggedWords1 = Preprocess(StanfordNER.parse(sentence1, lp, // classifier)); // ArrayList<TaggedWord> taggedWords2 = Preprocess(StanfordNER.parse(sentence2, lp, // classifier)); ArrayList<TaggedWord> taggedWords1 = Preprocess(StanfordNER.parseNERMWE(sentence1, lp, classifier)); ArrayList<TaggedWord> taggedWords2 = Preprocess(StanfordNER.parseNERMWE(sentence2, lp, classifier)); // return LexicalSimilarityScore(taggedWords1, taggedWords2, discoRAM, lp); return LexicalSimilarityScoreMax(taggedWords1, taggedWords2, discoRAM, lp); }