@Test public void loadTranslations_OnlyName() throws IOException { when(bufferedReaderMock.readLine()).thenReturn("DictionnaireTest"); IDictionary dictionary = dictionaryParser.loadTranslations(bufferedReaderMock); assertThat(dictionary.getName(), is(equalTo("DictionnaireTest"))); assertThat(dictionary.isEmpty(), is(equalTo(true))); }
@Test public void loadTranslations_containsTranslation() throws IOException { when(bufferedReaderMock.readLine()) .thenReturn("DictionnaireTest") .thenReturn("contre = against") .thenReturn(""); IDictionary dictionary = dictionaryParser.loadTranslations(bufferedReaderMock); assertThat(dictionary.getName(), is(equalTo("DictionnaireTest"))); assertThat(dictionary.getTranslation("contre"), hasItem("against")); assertThat(dictionary.isEmpty(), is(equalTo(false))); }
private List<DictionaryEntry> doLookUp(IDictionary dict, String word) { String[] stemmed = tokenizer.tokenizeWordsToStrings(word, StemmingMode.MATCHING); if (stemmed.length == 0) { // Stop word. Skip. return Collections.<DictionaryEntry>emptyList(); } try { List<DictionaryEntry> result = dict.readArticles(word); if (!result.isEmpty()) { return result; } // The verbatim word didn't get any hits; try the stem. if (stemmed.length > 1 && doFuzzyMatching()) { return dict.readArticlesPredictive(stemmed[0]); } } catch (Exception ex) { Log.log(ex); } return Collections.<DictionaryEntry>emptyList(); }
private Duple<CrownOperations.Reason, ISynset> getEstimatedSynonym( String targetLemma, Set<String> synonyms, POS pos, String gloss) { Counter<ISynset> synsetCounts = new ObjectCounter<ISynset>(); List<String> lemmasInWn = new ArrayList<String>(); for (String lemma : synonyms) { // Get the WordNet sysnet if it exists Set<ISynset> senses = WordNetUtils.getSynsets(dict, lemma, pos); if (senses.isEmpty()) continue; lemmasInWn.add(lemma); synsetCounts.countAll(senses); // Get the hypernyms of the synset and count their occurrence too for (ISynset synset : senses) { // Do a sanity check that avoids attaching this Entry if its // lemma appears anywhere near the synonoyms. This check // potentially has some false positives since we might avoid // putting the lemma somewhere valid (in which case it would // have more than would valid location) but is used to avoid // noisy integration if (WordNetUtils.isAlreadyInWordNet(dict, targetLemma, pos, synset)) { return null; } for (ISynsetID hyper : synset.getRelatedSynsets(Pointer.HYPERNYM)) { ISynset hyperSyn = dict.getSynset(hyper); if (WordNetUtils.isAlreadyInWordNet(dict, targetLemma, pos, hyperSyn)) { return null; } synsetCounts.count(hyperSyn); } } } // Return null if we couldn't find any of the lemma's synonyms or // hyponyms in WordNet if (synsetCounts.items().isEmpty()) return null; // If there was only one lemma in this list in WordNet, try comparing // the glosses for just that word to find a match if (lemmasInWn.size() == 1) { double maxScore = 0; ISynset best = null; String bestGloss = null; Set<ISynset> candidateSynonymSynsets = WordNetUtils.getSynsets(dict, lemmasInWn.get(0), pos); for (ISynset candidate : candidateSynonymSynsets) { String wnExtendedGloss = WordNetUtils.getGlossWithoutExamples(candidate); double score = simFunc.compare(gloss, wnExtendedGloss); if (maxScore < score) { maxScore = score; best = candidate; bestGloss = wnExtendedGloss; } } CrownOperations.Reason r = new CrownOperations.Reason(getClass()); r.set("relation_type", "synonym"); r.set("heuristic", "single-synonym"); r.set("max_score", maxScore); return new Duple<CrownOperations.Reason, ISynset>(r, best); } else { // Check for whether there were ties in the max ISynset mostFreq = synsetCounts.max(); int mostFreqCount = synsetCounts.getCount(mostFreq); List<ISynset> ties = new ArrayList<ISynset>(); for (ISynset syn : synsetCounts.items()) { int c = synsetCounts.getCount(syn); if (c == mostFreqCount) ties.add(syn); } // If there was only one synset that had the maximum count, then we // report this if (ties.size() == 1) { CrownOperations.Reason r = new CrownOperations.Reason(getClass()); r.set("relation_type", "synonym"); r.set("heuristic", "unambiguous-max"); r.set("count", mostFreqCount); return new Duple<CrownOperations.Reason, ISynset>(r, mostFreq); } // Otherwise, we try breaking ties between the synsets using gloss // similarity else { double maxScore = 0; ISynset best = null; String bestGloss = null; for (ISynset candidate : ties) { String wnExtendedGloss = WordNetUtils.getGlossWithoutExamples(candidate); double score = simFunc.compare(gloss, wnExtendedGloss); if (maxScore < score) { maxScore = score; best = candidate; bestGloss = wnExtendedGloss; } } CrownOperations.Reason r = new CrownOperations.Reason(getClass()); r.set("relation_type", "synonym"); r.set("heuristic", "tied-synonyms"); r.set("max_score", maxScore); return new Duple<CrownOperations.Reason, ISynset>(r, best); } } }
public static void main(String[] args) { System.out.println("Begin Test..."); IDictionary<String, String> table = new Dictionary<String, String>(5); for (int i = 0; i < 20; i++) { boolean b = table.insert(i + "", (i + 100) + ""); check(b == false); } check( table .toString() .equals( "[ (2 -> 102) (7 -> 107) (13 -> 113) (18 -> 118) (3 -> 103) (8 -> 108) (14 -> 114) (19 -> 119) (4 -> 104) (9 -> 109) (10 -> 110) (15 -> 115) (0 -> 100) (5 -> 105) (11 -> 111) (16 -> 116) (1 -> 101) (6 -> 106) (12 -> 112) (17 -> 117) ]")); check(table.search("0").equals("100")); check(table.search("10").equals("110")); for (int i = 5; i < 20; i++) { boolean b = table.delete(i + ""); check(b == true); } check(table.toString().equals("[ (2 -> 102) (3 -> 103) (4 -> 104) (0 -> 100) (1 -> 101) ]")); check(table.search("0").equals("100")); check(table.search("50") == null); for (int i = 0; i < 5; i++) { boolean b = table.delete(i + ""); check(b == true); } check(table.toString().equals("[ ]")); check(table.search("0") == null); check(table.search("50") == null); check(table.delete("0") == false); System.out.println("Success!"); }