/** * calculate the similarity score between T and H based on VerbOcean relations * * @param tBag the bag of words of T * @param hBag the bag of words of H * @param volr the VerbOcean relations * @return the similarity score * @throws ScoringComponentException */ protected double calculateSingleLexScoreWithVORelations( HashMap<String, Integer> tBag, HashMap<String, Integer> hBag, VerbOceanLexicalResource volr) throws ScoringComponentException { double score = 0.0d; HashMap<String, Integer> tWordBag = new HashMap<String, Integer>(); for (final Iterator<Entry<String, Integer>> iter = tBag.entrySet().iterator(); iter.hasNext(); ) { Entry<String, Integer> entry = iter.next(); final String word = entry.getKey(); final int counts = entry.getValue().intValue(); try { tWordBag.put(word, counts); for (LexicalRule<? extends RuleInfo> rule : volr.getRulesForLeft(word, null)) { if (tWordBag.containsKey(rule.getRLemma())) { int tmp = tWordBag.get(rule.getRLemma()); tWordBag.put(rule.getRLemma(), tmp + counts); } else { tWordBag.put(rule.getRLemma(), counts); } } } catch (LexicalResourceException e) { throw new ScoringComponentException(e.getMessage()); } } score = calculateSimilarity(tWordBag, hBag).get(0); return score; }
/** * Fill up the one sided rule maps * * @param mapRulesByUnorderedPair * @param verbPairs * @throws LexicalResourceException */ private void fillTheRuleMaps( PairMap<String, LexicalRule<? extends VerbOceanRuleInfo>> mapRulesByUnorderedPair, Set<Pair<String>> verbPairs) throws LexicalResourceException { for (Pair<String> verbPair : verbPairs) { LexicalRule<? extends VerbOceanRuleInfo> rule = mapRulesByUnorderedPair.getValueOf(verbPair); addToMappedList(mapRulesByEntailingVerb, rule.getLLemma(), rule); addToMappedList(mapRulesByEntailedVerb, rule.getRLemma(), rule); mapRulesByEntailmentPair.put(new EntailmentPair(rule.getLLemma(), rule.getRLemma()), rule); if (rule.getInfo() .getRelationType() .isBidirectional()) // bidirectional rules are symmetrically duplicated { LexicalRule<VerbOceanRuleInfo> inverseRule = invertRule(rule); addToMappedList(mapRulesByEntailedVerb, inverseRule.getRLemma(), inverseRule); mapRulesByEntailmentPair.put( new EntailmentPair(inverseRule.getLLemma(), inverseRule.getRLemma()), inverseRule); } } // sort each little list according to score sortMappedLists(mapRulesByEntailingVerb); sortMappedLists(mapRulesByEntailedVerb); }
/** * Adds a rule to the list of rules, only if there exists no other rule with the same rule info * and a lower confidence * * @param rules The list of rules * @param rule The new rule to add */ private void addRuleToList( List<LexicalRule<? extends RuleInfo>> rules, LexicalRule<? extends RuleInfo> rule) { boolean addRule = true; for (int otherIndex = 0; otherIndex < rules.size(); ++otherIndex) { LexicalRule<? extends RuleInfo> otherRule = rules.get(otherIndex); if (getLinkInfo(rule).equals(getLinkInfo(otherRule))) { addRule = false; // Replace the rule with the same info and a lower confidence if (rule.getConfidence() > otherRule.getConfidence()) { rules.set(otherIndex, rule); } break; } } if (addRule) { rules.add(rule); } }
/** * @param rule * @return * @throws LexicalResourceException */ private LexicalRule<VerbOceanRuleInfo> invertRule(LexicalRule<? extends VerbOceanRuleInfo> rule) throws LexicalResourceException { return new LexicalRule<VerbOceanRuleInfo>( rule.getRLemma(), VERB, rule.getLLemma(), VERB, rule.getConfidence(), rule.getRelation(), RESOURCE_NAME, rule.getInfo()); }
/** * Get rules of type leftSide -> rightSide, using the given lexical resource * * @param resource The lexical resource to use * @param leftSide The phrase that will be looked for as lhs of a rule * @param rightSide The phrase that will be looked for as rhs of a rule * @param partOfSpeech2 * @param partOfSpeech * @return The list of rules leftSide -> rightSide * @throws LexicalResourceException */ private List<LexicalRule<? extends RuleInfo>> getRules( LexicalResource<? extends RuleInfo> resource, String leftSide, String rightSide, PartOfSpeech leftSidePOS, PartOfSpeech rightSidePOS) throws LexicalResourceException { List<LexicalRule<? extends RuleInfo>> rules = new ArrayList<LexicalRule<? extends RuleInfo>>(); try { // WordNet workaround: // Make sure the synsets of the right and left sides of the rule // are equal to the right and left phrases. // (WN returns rules associated with any of the words in the phrase) if (resource.getClass().getName().toLowerCase().contains(WORDNET)) { for (LexicalRule<? extends RuleInfo> rule : resource.getRules(leftSide, leftSidePOS, rightSide, rightSidePOS)) { WordnetRuleInfo ruleInfo = (WordnetRuleInfo) rule.getInfo(); if ((ruleInfo.getLeftSense().getWords().contains(leftSide)) && (ruleInfo.getRightSense().getWords().contains(rightSide))) { addRuleToList(rules, rule); } } } else { // Get rules from t to h for (LexicalRule<? extends RuleInfo> rule : resource.getRules(leftSide, leftSidePOS, rightSide, rightSidePOS)) { addRuleToList(rules, rule); } } } catch (Exception e) { logger.warn( "Could not add rules from " + resource.getClass().getSimpleName() + " for " + leftSide + "->" + rightSide, e); } return rules; }
/** * Receives a rule and return the type of the rule, such as "synonym" or "hypernym" for WordNet, * "redirect" for Wikipedia, etc. The default value is "local-entailment".<br> * A better solution is to add an abstract class implementing RuleInfo, that all the concrete * RuleInfos will extend. This class will contain a field "relation" with a default of * "local-entailment". Then we can call: rule.getInfo().getRelation() without having to know which * resource the rule belongs to. * * @param rule * @return The type of the rule */ private String getLinkInfo(LexicalRule<? extends RuleInfo> rule) { String type = "local-entailment"; // WordNet if (rule.getResourceName().equals("WORDNET")) { type = ((WordnetRuleInfo) rule.getInfo()).getTypedRelation().name(); } // VerbOcean else if (rule.getResourceName().equals("VerbOcean")) { type = ((VerbOceanRuleInfo) rule.getInfo()).getRelationType().name(); } return type; }
@Override protected ImmutableSet<LexicalRule> getRulesNotInCache(String lhsLemma, PartOfSpeech lhsPos) throws RuleBaseException { try { List< ? extends eu.excitementproject.eop.common.component.lexicalknowledge.LexicalRule< ? extends RuleInfo>> rulesFromResource = realLexicalResource.getRulesForLeft(lhsLemma, lhsPos); Set<LexicalRule> ret = new LinkedHashSet<LexicalRule>(); if (rulesFromResource != null) { for (eu.excitementproject.eop.common.component.lexicalknowledge.LexicalRule< ? extends RuleInfo> ruleFromResource : rulesFromResource) { double confidence = 0.0; if (Constants.LEXICAL_RESOURCES_USE_CONSTANT_SCORE_FOR_ALL_RULES) { confidence = Constants.LEXICAL_RESOURCE_CONSTANT_SCORE_WHEN_USING_CONSTANT_SCORE; } else { confidence = ruleFromResource.getConfidence(); } if ((confidence <= 0) || (confidence >= 1)) throw new RuleBaseException( "Bad confidence for rule from " + this.realLexicalResource.getClass().getSimpleName() + ". The confidene is: " + String.format("%-4.4f", confidence)); ret.add( new LexicalRule( ruleFromResource.getLLemma(), ruleFromResource.getLPos(), ruleFromResource.getRLemma(), ruleFromResource.getRPos(), confidence)); } } return new ImmutableSetWrapper<LexicalRule>(ret); } catch (LexicalResourceException e) { throw new RuleBaseException("Lexical resource failure. See nested exception.", e); } }
@Override public int compare( LexicalRule<? extends VerbOceanRuleInfo> rule1, LexicalRule<? extends VerbOceanRuleInfo> rule2) { return rule1.getConfidence() > rule2.getConfidence() ? 1 : -1; }
/** * Ctor read and map all rules from the given verb ocean file, but, keep only rules with allowed * relation types, and, for each verb pair, keep only the highest scoring rule. The rules are then * mapped. * * @param scoreThreshold rules with thresholds not higher than this will be screened * @param verbOceanRelationsFile e.g. Data\RESOURCES\VerbOcean\verbocean.unrefined.2004-05-20.txt * @param allowedRelationTypes only rules with these relations will be returned. others will be * screened. If they contain any of the {@link #FORBIDDEN_RELATION_TYPES}, a * LexicalResourceException is thrown. Cannot be null, can be empty. * @throws LexicalResourceException */ public VerbOceanLexicalResource( double scoreThreshold, File verbOceanRelationsFile, Set<RelationType> allowedRelationTypes) throws LexicalResourceException { if (scoreThreshold <= 0) throw new LexicalResourceException( "the score threshold must be positive. I got " + scoreThreshold); if (verbOceanRelationsFile == null) throw new LexicalResourceException("got null relations file"); if (!verbOceanRelationsFile.exists()) throw new LexicalResourceException(verbOceanRelationsFile + " doesn't exist"); if (allowedRelationTypes == null) throw new LexicalResourceException("allowedRelationTypes is null"); for (RelationType forbiddenRelationType : FORBIDDEN_RELATION_TYPES) if (allowedRelationTypes.contains(forbiddenRelationType)) throw new LexicalResourceException( "The given allowed relation types set " + allowedRelationTypes + " contains a forbidden relation type " + forbiddenRelationType); try { VERB = new BySimplerCanonicalPartOfSpeech(SimplerCanonicalPosTag.VERB); } catch (UnsupportedPosTagStringException e) { throw new LexicalResourceException("Internal error", e); } PairMap<String, LexicalRule<? extends VerbOceanRuleInfo>> mapRulesByUnorderedPair = new PairMap<String, LexicalRule<? extends VerbOceanRuleInfo>>(); Set<Pair<String>> verbPairs = new LinkedHashSet<Pair<String>>(); // read and map all rules, but, keep only rules with allowed relation types, and, for each verb // pair, keep only the highest scoring rule try { BufferedReader reader = new BufferedReader(new FileReader(verbOceanRelationsFile)); String line; while ((line = reader.readLine()) != null) { if (line.length() != 0 && line.charAt(0) != '#') // skip empty and commented lines { String[] parts = line.split(" "); RelationType relationType = RelationType.parse(parts[1]); double score = Double.parseDouble(parts[4]); if (allowedRelationTypes.contains(relationType) && score > scoreThreshold) // screen out unallowed relation types and low scores { String leftVerb = parts[0]; String rightVerb = parts[2]; Pair<String> verbPair = new Pair<String>(leftVerb, rightVerb); LexicalRule<? extends VerbOceanRuleInfo> comparedRule = mapRulesByUnorderedPair.getValueOf(verbPair); if (comparedRule == null || score > comparedRule .getConfidence()) // if there is a better rule for the same verb pair, skip // this rule mapRulesByUnorderedPair.put( verbPair, makeRule(leftVerb, rightVerb, score, relationType)); if (comparedRule == null) verbPairs.add(verbPair); } } } reader.close(); } catch (FileNotFoundException e) { throw new LexicalResourceException("file not found: " + verbOceanRelationsFile, e); } catch (IOException e) { throw new LexicalResourceException("IO error reading: " + verbOceanRelationsFile, e); } // fill up the one sided rule maps fillTheRuleMaps(mapRulesByUnorderedPair, verbPairs); }
/** * Returns true if these two rules are opposite, meaning that: the first rule is w1->w2, with * confidence c and relation r the second rule is w2->w1, with confidence c and relation r * * @param firstRule The first rule * @param secondRule The second rule * @return Whether the rules are opposite */ private boolean areOppositeLinks( LexicalRule<? extends RuleInfo> firstRule, LexicalRule<? extends RuleInfo> secondRule) { return ((getLinkInfo(firstRule).equals(getLinkInfo(secondRule))) && ((Math.abs(firstRule.getConfidence() - secondRule.getConfidence()) <= 0.000001))); }
/** * Receives a list of rules of type t->h and h->t and creates the alignment links for them * * @param aJCas The JCas object * @param textStart The index of the first token in T in this alignment link * @param textEnd The index of the last token in T in this alignment link * @param hypoStart The index of the first token in H in this alignment link * @param hypoEnd The index of the last token in H in this alignment link * @param rulesFromLeft The list of rules t->h * @param rulesFromRight The list of rules h->t * @param lexicalResourceVersion The lexical resource version * @throws CASException */ private void createAlignmentLinks( JCas aJCas, int textStart, int textEnd, int hypoStart, int hypoEnd, List<LexicalRule<? extends RuleInfo>> rulesFromLeft, List<LexicalRule<? extends RuleInfo>> rulesFromRight, String lexicalResourceVersion) throws CASException { // Find rules that match by rule info and make them bidirectional for (int leftRuleIndex = rulesFromLeft.size() - 1; leftRuleIndex >= 0; --leftRuleIndex) { for (int rightRuleIndex = rulesFromRight.size() - 1; rightRuleIndex >= 0; --rightRuleIndex) { if (areOppositeLinks( rulesFromLeft.get(leftRuleIndex), rulesFromRight.get(rightRuleIndex))) { // Remove these rules from the list LexicalRule<? extends RuleInfo> rightRule = rulesFromRight.remove(rightRuleIndex); LexicalRule<? extends RuleInfo> leftRule = rulesFromLeft.remove(leftRuleIndex); // Add the annotation addAlignmentAnnotations( aJCas, textStart, textEnd, hypoStart, hypoEnd, rightRule.getResourceName(), lexicalResourceVersion, Math.max(rightRule.getConfidence(), leftRule.getConfidence()), Direction.Bidirection, getLinkInfo(rightRule)); break; } } } // Add rules from t to h for (LexicalRule<? extends RuleInfo> rule : rulesFromLeft) { addAlignmentAnnotations( aJCas, textStart, textEnd, hypoStart, hypoEnd, rule.getResourceName(), lexicalResourceVersion, rule.getConfidence(), Direction.TtoH, getLinkInfo(rule)); } // Add rules from h to t for (LexicalRule<? extends RuleInfo> rule : rulesFromRight) { addAlignmentAnnotations( aJCas, textStart, textEnd, hypoStart, hypoEnd, rule.getResourceName(), lexicalResourceVersion, rule.getConfidence(), Direction.HtoT, getLinkInfo(rule)); } }