/** * Adds a rule to the list of rules, only if there exists no other rule with the same rule info * and a lower confidence * * @param rules The list of rules * @param rule The new rule to add */ private void addRuleToList( List<LexicalRule<? extends RuleInfo>> rules, LexicalRule<? extends RuleInfo> rule) { boolean addRule = true; for (int otherIndex = 0; otherIndex < rules.size(); ++otherIndex) { LexicalRule<? extends RuleInfo> otherRule = rules.get(otherIndex); if (getLinkInfo(rule).equals(getLinkInfo(otherRule))) { addRule = false; // Replace the rule with the same info and a lower confidence if (rule.getConfidence() > otherRule.getConfidence()) { rules.set(otherIndex, rule); } break; } } if (addRule) { rules.add(rule); } }
/** * @param rule * @return * @throws LexicalResourceException */ private LexicalRule<VerbOceanRuleInfo> invertRule(LexicalRule<? extends VerbOceanRuleInfo> rule) throws LexicalResourceException { return new LexicalRule<VerbOceanRuleInfo>( rule.getRLemma(), VERB, rule.getLLemma(), VERB, rule.getConfidence(), rule.getRelation(), RESOURCE_NAME, rule.getInfo()); }
@Override protected ImmutableSet<LexicalRule> getRulesNotInCache(String lhsLemma, PartOfSpeech lhsPos) throws RuleBaseException { try { List< ? extends eu.excitementproject.eop.common.component.lexicalknowledge.LexicalRule< ? extends RuleInfo>> rulesFromResource = realLexicalResource.getRulesForLeft(lhsLemma, lhsPos); Set<LexicalRule> ret = new LinkedHashSet<LexicalRule>(); if (rulesFromResource != null) { for (eu.excitementproject.eop.common.component.lexicalknowledge.LexicalRule< ? extends RuleInfo> ruleFromResource : rulesFromResource) { double confidence = 0.0; if (Constants.LEXICAL_RESOURCES_USE_CONSTANT_SCORE_FOR_ALL_RULES) { confidence = Constants.LEXICAL_RESOURCE_CONSTANT_SCORE_WHEN_USING_CONSTANT_SCORE; } else { confidence = ruleFromResource.getConfidence(); } if ((confidence <= 0) || (confidence >= 1)) throw new RuleBaseException( "Bad confidence for rule from " + this.realLexicalResource.getClass().getSimpleName() + ". The confidene is: " + String.format("%-4.4f", confidence)); ret.add( new LexicalRule( ruleFromResource.getLLemma(), ruleFromResource.getLPos(), ruleFromResource.getRLemma(), ruleFromResource.getRPos(), confidence)); } } return new ImmutableSetWrapper<LexicalRule>(ret); } catch (LexicalResourceException e) { throw new RuleBaseException("Lexical resource failure. See nested exception.", e); } }
@Override public int compare( LexicalRule<? extends VerbOceanRuleInfo> rule1, LexicalRule<? extends VerbOceanRuleInfo> rule2) { return rule1.getConfidence() > rule2.getConfidence() ? 1 : -1; }
/** * Ctor read and map all rules from the given verb ocean file, but, keep only rules with allowed * relation types, and, for each verb pair, keep only the highest scoring rule. The rules are then * mapped. * * @param scoreThreshold rules with thresholds not higher than this will be screened * @param verbOceanRelationsFile e.g. Data\RESOURCES\VerbOcean\verbocean.unrefined.2004-05-20.txt * @param allowedRelationTypes only rules with these relations will be returned. others will be * screened. If they contain any of the {@link #FORBIDDEN_RELATION_TYPES}, a * LexicalResourceException is thrown. Cannot be null, can be empty. * @throws LexicalResourceException */ public VerbOceanLexicalResource( double scoreThreshold, File verbOceanRelationsFile, Set<RelationType> allowedRelationTypes) throws LexicalResourceException { if (scoreThreshold <= 0) throw new LexicalResourceException( "the score threshold must be positive. I got " + scoreThreshold); if (verbOceanRelationsFile == null) throw new LexicalResourceException("got null relations file"); if (!verbOceanRelationsFile.exists()) throw new LexicalResourceException(verbOceanRelationsFile + " doesn't exist"); if (allowedRelationTypes == null) throw new LexicalResourceException("allowedRelationTypes is null"); for (RelationType forbiddenRelationType : FORBIDDEN_RELATION_TYPES) if (allowedRelationTypes.contains(forbiddenRelationType)) throw new LexicalResourceException( "The given allowed relation types set " + allowedRelationTypes + " contains a forbidden relation type " + forbiddenRelationType); try { VERB = new BySimplerCanonicalPartOfSpeech(SimplerCanonicalPosTag.VERB); } catch (UnsupportedPosTagStringException e) { throw new LexicalResourceException("Internal error", e); } PairMap<String, LexicalRule<? extends VerbOceanRuleInfo>> mapRulesByUnorderedPair = new PairMap<String, LexicalRule<? extends VerbOceanRuleInfo>>(); Set<Pair<String>> verbPairs = new LinkedHashSet<Pair<String>>(); // read and map all rules, but, keep only rules with allowed relation types, and, for each verb // pair, keep only the highest scoring rule try { BufferedReader reader = new BufferedReader(new FileReader(verbOceanRelationsFile)); String line; while ((line = reader.readLine()) != null) { if (line.length() != 0 && line.charAt(0) != '#') // skip empty and commented lines { String[] parts = line.split(" "); RelationType relationType = RelationType.parse(parts[1]); double score = Double.parseDouble(parts[4]); if (allowedRelationTypes.contains(relationType) && score > scoreThreshold) // screen out unallowed relation types and low scores { String leftVerb = parts[0]; String rightVerb = parts[2]; Pair<String> verbPair = new Pair<String>(leftVerb, rightVerb); LexicalRule<? extends VerbOceanRuleInfo> comparedRule = mapRulesByUnorderedPair.getValueOf(verbPair); if (comparedRule == null || score > comparedRule .getConfidence()) // if there is a better rule for the same verb pair, skip // this rule mapRulesByUnorderedPair.put( verbPair, makeRule(leftVerb, rightVerb, score, relationType)); if (comparedRule == null) verbPairs.add(verbPair); } } } reader.close(); } catch (FileNotFoundException e) { throw new LexicalResourceException("file not found: " + verbOceanRelationsFile, e); } catch (IOException e) { throw new LexicalResourceException("IO error reading: " + verbOceanRelationsFile, e); } // fill up the one sided rule maps fillTheRuleMaps(mapRulesByUnorderedPair, verbPairs); }
/** * Returns true if these two rules are opposite, meaning that: the first rule is w1->w2, with * confidence c and relation r the second rule is w2->w1, with confidence c and relation r * * @param firstRule The first rule * @param secondRule The second rule * @return Whether the rules are opposite */ private boolean areOppositeLinks( LexicalRule<? extends RuleInfo> firstRule, LexicalRule<? extends RuleInfo> secondRule) { return ((getLinkInfo(firstRule).equals(getLinkInfo(secondRule))) && ((Math.abs(firstRule.getConfidence() - secondRule.getConfidence()) <= 0.000001))); }
/** * Receives a list of rules of type t->h and h->t and creates the alignment links for them * * @param aJCas The JCas object * @param textStart The index of the first token in T in this alignment link * @param textEnd The index of the last token in T in this alignment link * @param hypoStart The index of the first token in H in this alignment link * @param hypoEnd The index of the last token in H in this alignment link * @param rulesFromLeft The list of rules t->h * @param rulesFromRight The list of rules h->t * @param lexicalResourceVersion The lexical resource version * @throws CASException */ private void createAlignmentLinks( JCas aJCas, int textStart, int textEnd, int hypoStart, int hypoEnd, List<LexicalRule<? extends RuleInfo>> rulesFromLeft, List<LexicalRule<? extends RuleInfo>> rulesFromRight, String lexicalResourceVersion) throws CASException { // Find rules that match by rule info and make them bidirectional for (int leftRuleIndex = rulesFromLeft.size() - 1; leftRuleIndex >= 0; --leftRuleIndex) { for (int rightRuleIndex = rulesFromRight.size() - 1; rightRuleIndex >= 0; --rightRuleIndex) { if (areOppositeLinks( rulesFromLeft.get(leftRuleIndex), rulesFromRight.get(rightRuleIndex))) { // Remove these rules from the list LexicalRule<? extends RuleInfo> rightRule = rulesFromRight.remove(rightRuleIndex); LexicalRule<? extends RuleInfo> leftRule = rulesFromLeft.remove(leftRuleIndex); // Add the annotation addAlignmentAnnotations( aJCas, textStart, textEnd, hypoStart, hypoEnd, rightRule.getResourceName(), lexicalResourceVersion, Math.max(rightRule.getConfidence(), leftRule.getConfidence()), Direction.Bidirection, getLinkInfo(rightRule)); break; } } } // Add rules from t to h for (LexicalRule<? extends RuleInfo> rule : rulesFromLeft) { addAlignmentAnnotations( aJCas, textStart, textEnd, hypoStart, hypoEnd, rule.getResourceName(), lexicalResourceVersion, rule.getConfidence(), Direction.TtoH, getLinkInfo(rule)); } // Add rules from h to t for (LexicalRule<? extends RuleInfo> rule : rulesFromRight) { addAlignmentAnnotations( aJCas, textStart, textEnd, hypoStart, hypoEnd, rule.getResourceName(), lexicalResourceVersion, rule.getConfidence(), Direction.HtoT, getLinkInfo(rule)); } }