public String expandQuery(String originalQuery) { StringBuilder result = new StringBuilder(); List<TaggedToken> taggedTokens = tagger.tokenizeAndTag(originalQuery); for (TaggedToken token : taggedTokens) { if (token.tag.matches("N|O|^|S|Z|L|M|Y|X|!")) { result.append(" "); result.append(token.token); Synset[] synsets = database.getSynsets(token.token, SynsetType.NOUN); if (synsets.length > 0) { String[] temp = synsets[0].getWordForms(); for (int j = 0; j < Math.min(temp.length, 3); ++j) { result.append(" "); result.append(temp[j]); } } } else if (token.tag.matches("V|T")) { result.append(" "); result.append(token.token); Synset[] synsets = database.getSynsets(token.token, SynsetType.VERB); if (synsets.length > 0) { String[] temp = synsets[0].getWordForms(); for (int j = 0; j < Math.min(temp.length, 3); ++j) { result.append(" "); result.append(temp[j]); } } } else if (token.tag.matches("A")) { result.append(" "); result.append(token.token); Synset[] synsets = database.getSynsets(token.token, SynsetType.ADJECTIVE); if (synsets.length > 0) { String[] temp = synsets[0].getWordForms(); for (int j = 0; j < Math.min(temp.length, 3); ++j) { result.append(" "); result.append(temp[j]); } } } else if (token.tag.matches("R")) { result.append(" "); result.append(token.token); Synset[] synsets = database.getSynsets(token.token, SynsetType.ADVERB); if (synsets.length > 0) { String[] temp = synsets[0].getWordForms(); for (int j = 0; j < Math.min(temp.length, 3); ++j) { result.append(" "); result.append(temp[j]); } } } } logger.info( "Before expansion, query = " + originalQuery + ", after expansion, query = " + result.toString().trim()); return result.toString().trim(); }
/** * Check if the keyword is in dictionary * * @param word - the keyword * @return <tt>true</tt> - if it is in dictionary, <tt>false</tt> - if not in dictionary */ public static boolean isAWord(String word) { WordNetDatabase database = WordNetDatabase.getFileInstance(); Synset[] synsetNoun = database.getSynsets(word, SynsetType.NOUN); Synset[] synsetVerb = database.getSynsets(word, SynsetType.VERB); // System.err.println(synsetNoun.length + synsetVerb.length); if (synsetNoun.length + synsetVerb.length > 0) return true; else return false; }
public boolean isNoun(String word) { for (Synset s : database.getSynsets(word)) if (s.getType().equals(SynsetType.NOUN)) return true; if (LexicalInflector.isPlural(word)) return isNoun(LexicalInflector.singularize(word)); return false; }
private String findSynonym(ArrayList<Word> sentence, int indexToReplace /*<Word wordIn>*/) { /** * Takes in one String of a word, hopefully only one word(will truncate after space), and * searches for a suitable synonym * * <p>* */ String output = ""; // String word; // word=wordIn.getString(); String word = sentence.get(indexToReplace).getValue(); Synset[] synsets = database.getSynsets(word); synonyms.clear(); if (synsets.length > 0) { for (int i = 0; i < synsets.length; i++) { String[] wordForms = synsets[i].getWordForms(); for (int j = 0; j < wordForms.length; j++) { if (wordForms[j].contains(" ")) { System.out.println(wordForms[j]); } else if (!wordForms[j].equals(word)) synonyms.add(wordForms[j]); } } // FIND IF PLURAL OR NO HERE /////////////////////////// ###########################//////////////////// Collections.sort(synonyms, myLengthComparator); // synonyms is now sorted as longest first // now we have a list of all the terms that can replace the word. // We need to check them in the lexical analyzer // System.out.println("kkkk"); if (synonyms.size() == 0) return word; while (!LexicalAnalyzer(sentence, indexToReplace, synonyms.get(0))) { // System.out.println("1"); synonyms.remove(0); if (synonyms.size() == 0) return word; // did not find synonym if (synonyms.get(0).length() <= word.length()) { return word; // do not replace word, synonyms are shorter } } ////////////// ############# PLACEHOLDER CODE if (synonyms.size() > 0) output = synonyms.get(0); else output = word; ///////////// ################# PLACEHOLDER CODE } else { // There are no other known synonyms in our wordnet database. output = word; } return output; }
public ParseEssay() { System.setProperty("wordnet.database.dir", "../war/dict"); synonyms = new ArrayList<String>(); database = WordNetDatabase.getFileInstance(); baos = new ByteArrayOutputStream(); lp = LexicalizedParser.loadModel("edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz"); // ?? }
/** * get synonym according to the lemma and its synset type * * @param lemma * @param type * @return */ public Set<String> getSynonym(String lemma, SynsetType type) { Set<String> synonyms = new HashSet<String>(); Synset[] synsets = wordnet.getSynsets(lemma, type); for (Synset synset : synsets) { String[] wordforms = synset.getWordForms(); synonyms.addAll(Arrays.asList(wordforms)); } return synonyms; }
public List<String> getExamples(String concept) { List<String> examples = new ArrayList<String>(); Synset[] synsets = database.getSynsets(concept); for (Synset synset : synsets) { for (String s : synset.getUsageExamples()) examples.add(s); } return examples; }
public Wordnet() { super(); // String wordnet_location = getPathDict(); // System.setProperty("wordnet.database.dir", wordnet_location); System.setProperty( "wordnet.database.dir", "/Users/angel/Desktop/workspace-miso/metaRDF.core/dict"); database = WordNetDatabase.getFileInstance(); }
public List<String> getMeanings(String concept) { List<String> meanings = new ArrayList<String>(); Synset[] synsets = database.getSynsets(concept); for (Synset synset : synsets) { meanings.add(synset.getDefinition()); } return meanings; }
public List<String> getDefinitions(String concept) { List<String> definitions = new ArrayList<String>(); Synset[] synsets = database.getSynsets(concept); if (synsets.length > 0) { for (int i = 0; i < synsets.length; i++) { definitions.add(synsets[i].getDefinition()); } } else return null; return definitions; }
public List<String> getDerivation(String concept1, String concept2) { List<String> derivation = new ArrayList<String>(); Synset[] synsets = database.getSynsets(concept1); for (Synset synset : synsets) { for (WordSense wordsense : synset.getDerivationallyRelatedForms(concept2)) derivation.add(wordsense.getWordForm()); } return derivation; }
/** * get derivationally form * * @param lemma * @param type * @return */ public Set<String> getDerivationallyRelatedForms(String lemma, SynsetType type) { Set<String> derivationallyForm = new HashSet<String>(); Synset[] synsets = wordnet.getSynsets(lemma, type); for (Synset synset : synsets) { WordSense[] senses = synset.getDerivationallyRelatedForms(lemma); for (WordSense sense : senses) { derivationallyForm.add(sense.getWordForm()); } } return derivationallyForm; }
/** * get noun hypernym * * @param lemma * @param type * @return */ public Set<String> getNounHypernym(String lemma) { Set<String> hypernyms = new HashSet<String>(); Synset[] synsets = wordnet.getSynsets(lemma, SynsetType.NOUN); for (Synset synset : synsets) { NounSynset nounSynset = (NounSynset) synset; NounSynset[] hypernymSynset = nounSynset.getHypernyms(); for (NounSynset set : hypernymSynset) { hypernyms.addAll(Arrays.asList(set.getWordForms())); } } return hypernyms; }
/** * get verb hypernym * * @param lemma * @param type * @return */ public Set<String> getVerbHypernym(String lemma) { Set<String> hypernyms = new HashSet<String>(); Synset[] synsets = wordnet.getSynsets(lemma, SynsetType.VERB); for (Synset synset : synsets) { VerbSynset verbSynset = (VerbSynset) synset; VerbSynset[] hypernymSynset = verbSynset.getHypernyms(); for (VerbSynset set : hypernymSynset) { hypernyms.addAll(Arrays.asList(set.getWordForms())); } } return hypernyms; }
public List<String> getSynonymsByDefition(String concept, String definition) { List<String> synonyms = new ArrayList<String>(); Synset[] synsets = database.getSynsets(concept); if (synsets.length > 0) { for (int i = 0; i < synsets.length; i++) { if (synsets[i].getDefinition().equals(definition)) { String[] words = synsets[i].getWordForms(); for (int j = 0; j < words.length; j++) { synonyms.add(words[j]); } break; } } } else return null; return synonyms; }
public Map<String, Tuple<String[], String[]>> getSynonymsProposal(String concept) { Map<String, Tuple<String[], String[]>> synonyms = new HashMap<String, Tuple<String[], String[]>>(); Synset[] synsets = database.getSynsets(concept); for (Synset synset : synsets) { int tagCount = 0; try { tagCount = synset.getTagCount(concept); } catch (edu.smu.tspell.wordnet.WordNetException e) { tagCount = synset.getTagCount(synset.getWordForms()[0]); } Tuple<String[], String[]> wordformsAndExamples = new Tuple<String[], String[]>(synset.getWordForms(), synset.getUsageExamples(), tagCount); synonyms.put(synset.getDefinition(), wordformsAndExamples); // System.out.println("tag counts for " + concept + ">><" + tagCount + ":::" + concept); } return synonyms; }
public Result evaluateModel( ModelFile solution, ModelFile studentModel, Result mistakes, ArrayList<EvaluationCriteria> markers, double totalMarks) throws Exception { String wordNetpath = this.getClass().getClassLoader().getResource("").getPath() + "/WordNetDic"; System.setProperty("wordnet.database.dir", wordNetpath); SystemSequenceDiagramReader reader = new SystemSequenceDiagramReader(); SSD solutionDetails = reader.getRefModelDetails(solution); EvaluationResult evaluationResult = new EvaluationResult(); Double studentMarks = totalMarks; try { /* * Evaluate student's model by comparing with reference model and * mistakes that are essential. */ for (EvaluationCriteria marker : markers) { if (marker.getType().equals("LifeLine")) { for (String lifeline : solutionDetails.getLifelines()) { /* * If lifeline/class name in student's model does not * match exactly with the reference model's * lifelines/class name, then look for its synonyms. */ if (!lifeline.toLowerCase().contains(marker.getElementName().toLowerCase()) || !lifeline.toLowerCase().equals(marker.getElementName().toLowerCase())) { boolean variationFound = false; WordNetDatabase database = WordNetDatabase.getFileInstance(); Synset[] synsets = database.getSynsets(lifeline.toLowerCase()); for (int k = 0; k < synsets.length; k++) { String[] wordForms = synsets[k].getWordForms(); for (int j = 0; j < wordForms.length; j++) { if (wordForms[j].toLowerCase().equals(marker.getElementName().toLowerCase())) { variationFound = true; break; } } } if (!variationFound) { studentMarks = deductMarks(studentMarks, marker.isEssential(), marker.getMarks()); break; } } else { solutionDetails.getLifelines().remove(lifeline); break; } } } } /* * Comparing operations and their sequence of student model with * evaluators' model. */ for (EvaluationCriteria marker : markers) { if (marker.getType().equals("Operation")) { for (String operation : solutionDetails.getOperations()) { boolean found = false; // looking for exact match if (!found && !operation.toLowerCase().equals(marker.getElementName().toLowerCase())) { found = true; } /* * Find occurrence of reference operation in student's * operation */ else if (!found && marker.getElementName().contains("(")) { String[] values = marker.getElementName().split("\\("); Pattern pattern = Pattern.compile(values[0]); Matcher matcher = pattern.matcher(operation); while (matcher.find()) { found = true; break; } } if (!found) studentMarks = deductMarks(studentMarks, marker.isEssential(), marker.getMarks()); else { solutionDetails.getOperations().remove(operation); break; } } } } /* * Evaluate student's model by checking syntax as well as semantic * mistakes and deduct marks of those mistakes that are * essential(Specified by evaluator). */ HashMap<String, ArrayList<String>> mistakesResult = new HashMap<String, ArrayList<String>>(); for (EvaluationCriteria marker : markers) { if (marker.getType().equals("Mistake")) { for (Errors mistake : mistakes.getErrors()) { if (mistake.getErrorDiscrption().contains(marker.getElementName())) { if (marker.isEssential() == true) { studentMarks = deductMarks(studentMarks, marker.isEssential(), marker.getMarks()); String error = mistake.getErrorName(); // for errors if (mistake.getType().equals("Error")) { if (mistakesResult.containsKey(error)) { mistakesResult .get(error) .add(mistake.getElementName() + "%" + mistake.getErrorDiscrption()); } else { ArrayList<String> list = new ArrayList<>(); list.add(mistake.getElementName() + "%" + mistake.getErrorDiscrption()); mistakesResult.put(error, list); } } // for warnings if (mistake.getType().equals("Warning")) { if (mistakesResult.containsKey(error)) { mistakesResult .get(error) .add(mistake.getElementName() + "%" + mistake.getErrorDiscrption()); } else { ArrayList<String> list = new ArrayList<>(); list.add(mistake.getElementName() + "%" + mistake.getErrorDiscrption()); mistakesResult.put(error, list); } } } } } } } Set<String> keySet = mistakesResult.keySet(); for (String error : keySet) { int count = 0; EvaluationResultError resultError = new EvaluationResultError(); for (String string : mistakesResult.get(error)) { String[] token = string.split("%"); EvaluationResultErrorsDetail detail = new EvaluationResultErrorsDetail(); detail.setElementName(token[0]); detail.setErrorDiscption(token[1]); resultError.addDetail(detail); count++; } resultError.setErrorName(error); resultError.setErrorCount(count); evaluationResult.addErrors(resultError); } /* * Store marks of each student in ArrayList */ evaluationResult.setTotalMarks(totalMarks); evaluationResult.setStudentMarks(studentMarks); } catch (Exception e) { e.printStackTrace(); } Result result = new Result(); result.setEvaluationResult(evaluationResult); return result; }
public boolean isVerb(String word) { for (Synset s : database.getSynsets(word)) if (s.getType().equals(SynsetType.VERB)) return true; return false; }
public class QueryExpansion { private Tagger tagger = new Tagger(); private static Logger logger = Logger.getLogger(QueryExpansion.class); private WordNetDatabase database = WordNetDatabase.getFileInstance(); public QueryExpansion() throws IOException { tagger.loadModel("/cmu/arktweetnlp/model.20120919"); System.setProperty("wordnet.database.dir", GlobalProperty.getInstance().getWordNetPath()); // System.setProperty("wordnet.database.dir", // "/Users/qiaoyu/Documents/E6998_Semantic_Tech_In_IBM_Watson/twitter-semantic-search/search-pipeline/lib/dict"); logger.info("Done initializing QueryExpansion"); } public String expandQuery(String originalQuery) { StringBuilder result = new StringBuilder(); List<TaggedToken> taggedTokens = tagger.tokenizeAndTag(originalQuery); for (TaggedToken token : taggedTokens) { if (token.tag.matches("N|O|^|S|Z|L|M|Y|X|!")) { result.append(" "); result.append(token.token); Synset[] synsets = database.getSynsets(token.token, SynsetType.NOUN); if (synsets.length > 0) { String[] temp = synsets[0].getWordForms(); for (int j = 0; j < Math.min(temp.length, 3); ++j) { result.append(" "); result.append(temp[j]); } } } else if (token.tag.matches("V|T")) { result.append(" "); result.append(token.token); Synset[] synsets = database.getSynsets(token.token, SynsetType.VERB); if (synsets.length > 0) { String[] temp = synsets[0].getWordForms(); for (int j = 0; j < Math.min(temp.length, 3); ++j) { result.append(" "); result.append(temp[j]); } } } else if (token.tag.matches("A")) { result.append(" "); result.append(token.token); Synset[] synsets = database.getSynsets(token.token, SynsetType.ADJECTIVE); if (synsets.length > 0) { String[] temp = synsets[0].getWordForms(); for (int j = 0; j < Math.min(temp.length, 3); ++j) { result.append(" "); result.append(temp[j]); } } } else if (token.tag.matches("R")) { result.append(" "); result.append(token.token); Synset[] synsets = database.getSynsets(token.token, SynsetType.ADVERB); if (synsets.length > 0) { String[] temp = synsets[0].getWordForms(); for (int j = 0; j < Math.min(temp.length, 3); ++j) { result.append(" "); result.append(temp[j]); } } } } logger.info( "Before expansion, query = " + originalQuery + ", after expansion, query = " + result.toString().trim()); return result.toString().trim(); } public static void main(String[] args) throws IOException { QueryExpansion qe = new QueryExpansion(); System.out.println( qe.expandQuery( "The Political Power of Social Media | Foreign Affairs: http://fam.ag/i5A7Av")); } }
public boolean isAdjective(String word) { for (Synset s : database.getSynsets(word)) if (s.getType().equals(SynsetType.ADJECTIVE)) return true; return false; }
public Wordnet() { wordnet = WordNetDatabase.getFileInstance(); }