/**
   * calculate the similarity score between T and H based on VerbOcean relations
   *
   * @param tBag the bag of words of T
   * @param hBag the bag of words of H
   * @param volr the VerbOcean relations
   * @return the similarity score
   * @throws ScoringComponentException
   */
  protected double calculateSingleLexScoreWithVORelations(
      HashMap<String, Integer> tBag, HashMap<String, Integer> hBag, VerbOceanLexicalResource volr)
      throws ScoringComponentException {
    double score = 0.0d;
    HashMap<String, Integer> tWordBag = new HashMap<String, Integer>();

    for (final Iterator<Entry<String, Integer>> iter = tBag.entrySet().iterator();
        iter.hasNext(); ) {
      Entry<String, Integer> entry = iter.next();
      final String word = entry.getKey();
      final int counts = entry.getValue().intValue();
      try {
        tWordBag.put(word, counts);
        for (LexicalRule<? extends RuleInfo> rule : volr.getRulesForLeft(word, null)) {
          if (tWordBag.containsKey(rule.getRLemma())) {
            int tmp = tWordBag.get(rule.getRLemma());
            tWordBag.put(rule.getRLemma(), tmp + counts);
          } else {
            tWordBag.put(rule.getRLemma(), counts);
          }
        }
      } catch (LexicalResourceException e) {
        throw new ScoringComponentException(e.getMessage());
      }
    }

    score = calculateSimilarity(tWordBag, hBag).get(0);

    return score;
  }
  /**
   * Fill up the one sided rule maps
   *
   * @param mapRulesByUnorderedPair
   * @param verbPairs
   * @throws LexicalResourceException
   */
  private void fillTheRuleMaps(
      PairMap<String, LexicalRule<? extends VerbOceanRuleInfo>> mapRulesByUnorderedPair,
      Set<Pair<String>> verbPairs)
      throws LexicalResourceException {
    for (Pair<String> verbPair : verbPairs) {
      LexicalRule<? extends VerbOceanRuleInfo> rule = mapRulesByUnorderedPair.getValueOf(verbPair);
      addToMappedList(mapRulesByEntailingVerb, rule.getLLemma(), rule);
      addToMappedList(mapRulesByEntailedVerb, rule.getRLemma(), rule);

      mapRulesByEntailmentPair.put(new EntailmentPair(rule.getLLemma(), rule.getRLemma()), rule);

      if (rule.getInfo()
          .getRelationType()
          .isBidirectional()) // bidirectional rules are symmetrically duplicated
      {
        LexicalRule<VerbOceanRuleInfo> inverseRule = invertRule(rule);
        addToMappedList(mapRulesByEntailedVerb, inverseRule.getRLemma(), inverseRule);
        mapRulesByEntailmentPair.put(
            new EntailmentPair(inverseRule.getLLemma(), inverseRule.getRLemma()), inverseRule);
      }
    }

    // sort each little list according to score
    sortMappedLists(mapRulesByEntailingVerb);
    sortMappedLists(mapRulesByEntailedVerb);
  }
コード例 #3
0
  /**
   * Adds a rule to the list of rules, only if there exists no other rule with the same rule info
   * and a lower confidence
   *
   * @param rules The list of rules
   * @param rule The new rule to add
   */
  private void addRuleToList(
      List<LexicalRule<? extends RuleInfo>> rules, LexicalRule<? extends RuleInfo> rule) {

    boolean addRule = true;

    for (int otherIndex = 0; otherIndex < rules.size(); ++otherIndex) {

      LexicalRule<? extends RuleInfo> otherRule = rules.get(otherIndex);

      if (getLinkInfo(rule).equals(getLinkInfo(otherRule))) {

        addRule = false;

        // Replace the rule with the same info and a lower confidence
        if (rule.getConfidence() > otherRule.getConfidence()) {
          rules.set(otherIndex, rule);
        }

        break;
      }
    }

    if (addRule) {
      rules.add(rule);
    }
  }
 /**
  * @param rule
  * @return
  * @throws LexicalResourceException
  */
 private LexicalRule<VerbOceanRuleInfo> invertRule(LexicalRule<? extends VerbOceanRuleInfo> rule)
     throws LexicalResourceException {
   return new LexicalRule<VerbOceanRuleInfo>(
       rule.getRLemma(),
       VERB,
       rule.getLLemma(),
       VERB,
       rule.getConfidence(),
       rule.getRelation(),
       RESOURCE_NAME,
       rule.getInfo());
 }
コード例 #5
0
  /**
   * Get rules of type leftSide -> rightSide, using the given lexical resource
   *
   * @param resource The lexical resource to use
   * @param leftSide The phrase that will be looked for as lhs of a rule
   * @param rightSide The phrase that will be looked for as rhs of a rule
   * @param partOfSpeech2
   * @param partOfSpeech
   * @return The list of rules leftSide -> rightSide
   * @throws LexicalResourceException
   */
  private List<LexicalRule<? extends RuleInfo>> getRules(
      LexicalResource<? extends RuleInfo> resource,
      String leftSide,
      String rightSide,
      PartOfSpeech leftSidePOS,
      PartOfSpeech rightSidePOS)
      throws LexicalResourceException {

    List<LexicalRule<? extends RuleInfo>> rules = new ArrayList<LexicalRule<? extends RuleInfo>>();

    try {

      // WordNet workaround:
      // Make sure the synsets of the right and left sides of the rule
      // are equal to the right and left phrases.
      // (WN returns rules associated with any of the words in the phrase)
      if (resource.getClass().getName().toLowerCase().contains(WORDNET)) {

        for (LexicalRule<? extends RuleInfo> rule :
            resource.getRules(leftSide, leftSidePOS, rightSide, rightSidePOS)) {

          WordnetRuleInfo ruleInfo = (WordnetRuleInfo) rule.getInfo();

          if ((ruleInfo.getLeftSense().getWords().contains(leftSide))
              && (ruleInfo.getRightSense().getWords().contains(rightSide))) {

            addRuleToList(rules, rule);
          }
        }

      } else {

        // Get rules from t to h
        for (LexicalRule<? extends RuleInfo> rule :
            resource.getRules(leftSide, leftSidePOS, rightSide, rightSidePOS)) {

          addRuleToList(rules, rule);
        }
      }

    } catch (Exception e) {
      logger.warn(
          "Could not add rules from "
              + resource.getClass().getSimpleName()
              + " for "
              + leftSide
              + "->"
              + rightSide,
          e);
    }

    return rules;
  }
コード例 #6
0
  /**
   * Receives a rule and return the type of the rule, such as "synonym" or "hypernym" for WordNet,
   * "redirect" for Wikipedia, etc. The default value is "local-entailment".<br>
   * A better solution is to add an abstract class implementing RuleInfo, that all the concrete
   * RuleInfos will extend. This class will contain a field "relation" with a default of
   * "local-entailment". Then we can call: rule.getInfo().getRelation() without having to know which
   * resource the rule belongs to.
   *
   * @param rule
   * @return The type of the rule
   */
  private String getLinkInfo(LexicalRule<? extends RuleInfo> rule) {

    String type = "local-entailment";

    // WordNet
    if (rule.getResourceName().equals("WORDNET")) {
      type = ((WordnetRuleInfo) rule.getInfo()).getTypedRelation().name();
    }

    // VerbOcean
    else if (rule.getResourceName().equals("VerbOcean")) {
      type = ((VerbOceanRuleInfo) rule.getInfo()).getRelationType().name();
    }

    return type;
  }
  @Override
  protected ImmutableSet<LexicalRule> getRulesNotInCache(String lhsLemma, PartOfSpeech lhsPos)
      throws RuleBaseException {
    try {
      List<
              ? extends
                  eu.excitementproject.eop.common.component.lexicalknowledge.LexicalRule<
                      ? extends RuleInfo>>
          rulesFromResource = realLexicalResource.getRulesForLeft(lhsLemma, lhsPos);
      Set<LexicalRule> ret = new LinkedHashSet<LexicalRule>();
      if (rulesFromResource != null) {
        for (eu.excitementproject.eop.common.component.lexicalknowledge.LexicalRule<
                ? extends RuleInfo>
            ruleFromResource : rulesFromResource) {
          double confidence = 0.0;
          if (Constants.LEXICAL_RESOURCES_USE_CONSTANT_SCORE_FOR_ALL_RULES) {
            confidence = Constants.LEXICAL_RESOURCE_CONSTANT_SCORE_WHEN_USING_CONSTANT_SCORE;
          } else {
            confidence = ruleFromResource.getConfidence();
          }
          if ((confidence <= 0) || (confidence >= 1))
            throw new RuleBaseException(
                "Bad confidence for rule from "
                    + this.realLexicalResource.getClass().getSimpleName()
                    + ". The confidene is: "
                    + String.format("%-4.4f", confidence));

          ret.add(
              new LexicalRule(
                  ruleFromResource.getLLemma(),
                  ruleFromResource.getLPos(),
                  ruleFromResource.getRLemma(),
                  ruleFromResource.getRPos(),
                  confidence));
        }
      }

      return new ImmutableSetWrapper<LexicalRule>(ret);
    } catch (LexicalResourceException e) {
      throw new RuleBaseException("Lexical resource failure. See nested exception.", e);
    }
  }
 @Override
 public int compare(
     LexicalRule<? extends VerbOceanRuleInfo> rule1,
     LexicalRule<? extends VerbOceanRuleInfo> rule2) {
   return rule1.getConfidence() > rule2.getConfidence() ? 1 : -1;
 }
  /**
   * Ctor read and map all rules from the given verb ocean file, but, keep only rules with allowed
   * relation types, and, for each verb pair, keep only the highest scoring rule. The rules are then
   * mapped.
   *
   * @param scoreThreshold rules with thresholds not higher than this will be screened
   * @param verbOceanRelationsFile e.g. Data\RESOURCES\VerbOcean\verbocean.unrefined.2004-05-20.txt
   * @param allowedRelationTypes only rules with these relations will be returned. others will be
   *     screened. If they contain any of the {@link #FORBIDDEN_RELATION_TYPES}, a
   *     LexicalResourceException is thrown. Cannot be null, can be empty.
   * @throws LexicalResourceException
   */
  public VerbOceanLexicalResource(
      double scoreThreshold, File verbOceanRelationsFile, Set<RelationType> allowedRelationTypes)
      throws LexicalResourceException {
    if (scoreThreshold <= 0)
      throw new LexicalResourceException(
          "the score threshold must be positive. I got " + scoreThreshold);
    if (verbOceanRelationsFile == null)
      throw new LexicalResourceException("got null relations file");
    if (!verbOceanRelationsFile.exists())
      throw new LexicalResourceException(verbOceanRelationsFile + " doesn't exist");
    if (allowedRelationTypes == null)
      throw new LexicalResourceException("allowedRelationTypes  is null");
    for (RelationType forbiddenRelationType : FORBIDDEN_RELATION_TYPES)
      if (allowedRelationTypes.contains(forbiddenRelationType))
        throw new LexicalResourceException(
            "The given allowed relation types set "
                + allowedRelationTypes
                + " contains a forbidden relation type "
                + forbiddenRelationType);

    try {
      VERB = new BySimplerCanonicalPartOfSpeech(SimplerCanonicalPosTag.VERB);
    } catch (UnsupportedPosTagStringException e) {
      throw new LexicalResourceException("Internal error", e);
    }

    PairMap<String, LexicalRule<? extends VerbOceanRuleInfo>> mapRulesByUnorderedPair =
        new PairMap<String, LexicalRule<? extends VerbOceanRuleInfo>>();
    Set<Pair<String>> verbPairs = new LinkedHashSet<Pair<String>>();

    // read and map all rules, but, keep only rules with allowed relation types, and, for each verb
    // pair, keep only the highest scoring rule
    try {
      BufferedReader reader = new BufferedReader(new FileReader(verbOceanRelationsFile));

      String line;
      while ((line = reader.readLine()) != null) {
        if (line.length() != 0 && line.charAt(0) != '#') // skip empty and commented lines
        {
          String[] parts = line.split(" ");
          RelationType relationType = RelationType.parse(parts[1]);
          double score = Double.parseDouble(parts[4]);
          if (allowedRelationTypes.contains(relationType)
              && score > scoreThreshold) // screen out unallowed relation types and low scores
          {
            String leftVerb = parts[0];
            String rightVerb = parts[2];
            Pair<String> verbPair = new Pair<String>(leftVerb, rightVerb);

            LexicalRule<? extends VerbOceanRuleInfo> comparedRule =
                mapRulesByUnorderedPair.getValueOf(verbPair);
            if (comparedRule == null
                || score
                    > comparedRule
                        .getConfidence()) // if there is a better rule for the same verb pair, skip
                                          // this rule
            mapRulesByUnorderedPair.put(
                  verbPair, makeRule(leftVerb, rightVerb, score, relationType));

            if (comparedRule == null) verbPairs.add(verbPair);
          }
        }
      }
      reader.close();
    } catch (FileNotFoundException e) {
      throw new LexicalResourceException("file not found: " + verbOceanRelationsFile, e);
    } catch (IOException e) {
      throw new LexicalResourceException("IO error reading: " + verbOceanRelationsFile, e);
    }

    // fill up the one sided rule maps
    fillTheRuleMaps(mapRulesByUnorderedPair, verbPairs);
  }
コード例 #10
0
  /**
   * Returns true if these two rules are opposite, meaning that: the first rule is w1->w2, with
   * confidence c and relation r the second rule is w2->w1, with confidence c and relation r
   *
   * @param firstRule The first rule
   * @param secondRule The second rule
   * @return Whether the rules are opposite
   */
  private boolean areOppositeLinks(
      LexicalRule<? extends RuleInfo> firstRule, LexicalRule<? extends RuleInfo> secondRule) {

    return ((getLinkInfo(firstRule).equals(getLinkInfo(secondRule)))
        && ((Math.abs(firstRule.getConfidence() - secondRule.getConfidence()) <= 0.000001)));
  }
コード例 #11
0
  /**
   * Receives a list of rules of type t->h and h->t and creates the alignment links for them
   *
   * @param aJCas The JCas object
   * @param textStart The index of the first token in T in this alignment link
   * @param textEnd The index of the last token in T in this alignment link
   * @param hypoStart The index of the first token in H in this alignment link
   * @param hypoEnd The index of the last token in H in this alignment link
   * @param rulesFromLeft The list of rules t->h
   * @param rulesFromRight The list of rules h->t
   * @param lexicalResourceVersion The lexical resource version
   * @throws CASException
   */
  private void createAlignmentLinks(
      JCas aJCas,
      int textStart,
      int textEnd,
      int hypoStart,
      int hypoEnd,
      List<LexicalRule<? extends RuleInfo>> rulesFromLeft,
      List<LexicalRule<? extends RuleInfo>> rulesFromRight,
      String lexicalResourceVersion)
      throws CASException {

    // Find rules that match by rule info and make them bidirectional
    for (int leftRuleIndex = rulesFromLeft.size() - 1; leftRuleIndex >= 0; --leftRuleIndex) {
      for (int rightRuleIndex = rulesFromRight.size() - 1; rightRuleIndex >= 0; --rightRuleIndex) {

        if (areOppositeLinks(
            rulesFromLeft.get(leftRuleIndex), rulesFromRight.get(rightRuleIndex))) {

          // Remove these rules from the list
          LexicalRule<? extends RuleInfo> rightRule = rulesFromRight.remove(rightRuleIndex);
          LexicalRule<? extends RuleInfo> leftRule = rulesFromLeft.remove(leftRuleIndex);

          // Add the annotation
          addAlignmentAnnotations(
              aJCas,
              textStart,
              textEnd,
              hypoStart,
              hypoEnd,
              rightRule.getResourceName(),
              lexicalResourceVersion,
              Math.max(rightRule.getConfidence(), leftRule.getConfidence()),
              Direction.Bidirection,
              getLinkInfo(rightRule));

          break;
        }
      }
    }

    // Add rules from t to h
    for (LexicalRule<? extends RuleInfo> rule : rulesFromLeft) {

      addAlignmentAnnotations(
          aJCas,
          textStart,
          textEnd,
          hypoStart,
          hypoEnd,
          rule.getResourceName(),
          lexicalResourceVersion,
          rule.getConfidence(),
          Direction.TtoH,
          getLinkInfo(rule));
    }

    // Add rules from h to t
    for (LexicalRule<? extends RuleInfo> rule : rulesFromRight) {

      addAlignmentAnnotations(
          aJCas,
          textStart,
          textEnd,
          hypoStart,
          hypoEnd,
          rule.getResourceName(),
          lexicalResourceVersion,
          rule.getConfidence(),
          Direction.HtoT,
          getLinkInfo(rule));
    }
  }