コード例 #1
0
  /**
   * Cleans up any resources that were used by the aligner.
   *
   * <p>Call this method when the aligner is no longer to be used.
   */
  public void cleanUp() {

    // Close the lexical resources
    for (LexicalResource<? extends RuleInfo> lexicalResource : lexicalResources) {
      try {
        lexicalResource.close();
      } catch (LexicalResourceCloseException e) {
        logger.warn("Closing the resource failed.", e);
      }
    }
  }
コード例 #2
0
  /**
   * Get rules of type leftSide -> rightSide, using the given lexical resource
   *
   * @param resource The lexical resource to use
   * @param leftSide The phrase that will be looked for as lhs of a rule
   * @param rightSide The phrase that will be looked for as rhs of a rule
   * @param partOfSpeech2
   * @param partOfSpeech
   * @return The list of rules leftSide -> rightSide
   * @throws LexicalResourceException
   */
  private List<LexicalRule<? extends RuleInfo>> getRules(
      LexicalResource<? extends RuleInfo> resource,
      String leftSide,
      String rightSide,
      PartOfSpeech leftSidePOS,
      PartOfSpeech rightSidePOS)
      throws LexicalResourceException {

    List<LexicalRule<? extends RuleInfo>> rules = new ArrayList<LexicalRule<? extends RuleInfo>>();

    try {

      // WordNet workaround:
      // Make sure the synsets of the right and left sides of the rule
      // are equal to the right and left phrases.
      // (WN returns rules associated with any of the words in the phrase)
      if (resource.getClass().getName().toLowerCase().contains(WORDNET)) {

        for (LexicalRule<? extends RuleInfo> rule :
            resource.getRules(leftSide, leftSidePOS, rightSide, rightSidePOS)) {

          WordnetRuleInfo ruleInfo = (WordnetRuleInfo) rule.getInfo();

          if ((ruleInfo.getLeftSense().getWords().contains(leftSide))
              && (ruleInfo.getRightSense().getWords().contains(rightSide))) {

            addRuleToList(rules, rule);
          }
        }

      } else {

        // Get rules from t to h
        for (LexicalRule<? extends RuleInfo> rule :
            resource.getRules(leftSide, leftSidePOS, rightSide, rightSidePOS)) {

          addRuleToList(rules, rule);
        }
      }

    } catch (Exception e) {
      logger.warn(
          "Could not add rules from "
              + resource.getClass().getSimpleName()
              + " for "
              + leftSide
              + "->"
              + rightSide,
          e);
    }

    return rules;
  }
コード例 #3
0
  /**
   * Call this method once before starting to align sentence pairs.
   *
   * @param config a CommonConfig instance. The aligner retrieves the lexical resources
   *     configuration values.
   * @throws LexicalResourceException if initialization of a resource failed
   * @throws ConfigurationException if the configuration is invalid
   */
  private void init(CommonConfig config) throws LexicalResourceException, ConfigurationException {

    // Get the general parameters configuration section
    NameValueTable paramsSection = null;
    try {
      paramsSection = config.getSection(GENERAL_PARAMS_CONF_SECTION);
    } catch (ConfigurationException e) {
      throw new ConfigurationException(e);
    }

    maxPhrase = paramsSection.getInteger(MAX_PHRASE_KEY);

    // Get the Lexical Resources configuration section
    NameValueTable lexicalResourcesSection = null;
    try {
      lexicalResourcesSection = config.getSection(LEXICAL_RESOURCES_CONF_SECTION);
    } catch (ConfigurationException e) {
      throw new ConfigurationException(e);
    }

    lexicalResources = new ArrayList<LexicalResource<? extends RuleInfo>>();
    ConfigurationFile configFile = new ConfigurationFile(config);

    // Get each resource and create it using the configuration section related to it
    for (String resourceName : lexicalResourcesSection.keySet()) {

      // Get the class name
      String resourceClassName = lexicalResourcesSection.getString(resourceName);

      // Get the configuration params
      ConfigurationParams resourceParams = configFile.getModuleConfiguration(resourceName);
      resourceParams.setExpandingEnvironmentVariables(true);
      LexicalResource<? extends RuleInfo> lexicalResource =
          createLexicalResource(resourceClassName, resourceParams);

      if (lexicalResource != null) {
        lexicalResources.add(lexicalResource);

        PartOfSpeech leftSidePOS = null, rightSidePOS = null;

        // Add the information about this resource

        // Get the right and left side POS, in case it's mentioned
        if (resourceParams.keySet().contains(LEFT_SIDE_POS_PARAM)) {
          try {
            leftSidePOS =
                new ByCanonicalPartOfSpeech(resourceParams.getString(LEFT_SIDE_POS_PARAM));
          } catch (UnsupportedPosTagStringException e) {
            logger.warn(
                "Could not load POS for left side: "
                    + resourceParams.getString(LEFT_SIDE_POS_PARAM)
                    + ". Alignment links of all POS will be retreived.");
          }
        }

        if (resourceParams.keySet().contains(RIGHT_SIDE_POS_PARAM)) {
          try {
            rightSidePOS =
                new ByCanonicalPartOfSpeech(resourceParams.getString(RIGHT_SIDE_POS_PARAM));
          } catch (UnsupportedPosTagStringException e) {
            logger.warn(
                "Could not load POS for right side: "
                    + resourceParams.getString(RIGHT_SIDE_POS_PARAM)
                    + ". Alignment links of all POS will be retreived.");
          }
        }

        lexicalResourcesInformation.put(
            lexicalResource.getClass().getName(),
            new LexicalResourceInformation(
                resourceParams.getString(VERSION_PARAM),
                resourceParams.getBoolean(USE_LEMMA_PARAM),
                leftSidePOS,
                rightSidePOS));
      }
    }
  }
コード例 #4
0
  /**
   * Align the text and the hypothesis.
   *
   * <p>This method receives a JCAS object containing two views: Hypothesis and text views. The
   * method assumes that the views were already annotated with a tokenizer.
   *
   * <p>The lexical aligner looks at every phrase t in the text and every phrase h in the
   * hypothesis, and uses the lexical resources to find rules with lhs = t and rhs = h.
   *
   * @param aJCas the JCAS object with the text and hypothesis view.
   * @throws AlignmentComponentException
   */
  @Override
  public void annotate(JCas aJCas) throws AlignmentComponentException {

    try {

      logger.info("Started annotating a text and hypothesis pair using lexical aligner");

      // Get the tokens and lemmas of the text and hypothesis
      getTokenAnnotations(aJCas);

      // Check in all the resources for rules of type textPhrase -> hypoPhrase
      for (LexicalResource<? extends RuleInfo> resource : lexicalResources) {

        LexicalResourceInformation resourceInfo =
            lexicalResourcesInformation.get(resource.getClass().getName());

        // For every phrase t in T and phrase h in H, check the lexical
        // resources if they contain a rule t->h
        String textPhrase = "", hypoPhrase = "";

        for (int textStart = 0; textStart < textTokens.size(); ++textStart) {
          for (int textEnd = textStart;
              textEnd < Math.min(textTokens.size(), textStart + maxPhrase);
              ++textEnd) {

            textPhrase = getPhrase(textTokens, textStart, textEnd, resourceInfo.useLemma());

            for (int hypoStart = 0; hypoStart < hypoTokens.size(); ++hypoStart) {
              for (int hypoEnd = hypoStart;
                  hypoEnd < Math.min(hypoTokens.size(), hypoStart + maxPhrase);
                  ++hypoEnd) {

                hypoPhrase = getPhrase(hypoTokens, hypoStart, hypoEnd, resourceInfo.useLemma());

                // Get the rules textPhrase -> hypoPhrase
                List<LexicalRule<? extends RuleInfo>> ruleFromLeft =
                    getRules(
                        resource,
                        textPhrase,
                        hypoPhrase,
                        resourceInfo.getLeftSidePOS(),
                        resourceInfo.getRightSidePOS());

                // Get the rules hypoPhrase -> textPhrase
                List<LexicalRule<? extends RuleInfo>> ruleFromRight =
                    getRules(
                        resource,
                        hypoPhrase,
                        textPhrase,
                        resourceInfo.getLeftSidePOS(),
                        resourceInfo.getRightSidePOS());

                // Create the alignment links for the rules
                createAlignmentLinks(
                    aJCas,
                    textStart,
                    textEnd,
                    hypoStart,
                    hypoEnd,
                    ruleFromLeft,
                    ruleFromRight,
                    resourceInfo.getVersion());
              }
            }
          }
        }
      }

      logger.info("Finished annotating a text and hypothesis pair using lexical aligner");

    } catch (CASException | LexicalResourceException e) {

      throw new AlignmentComponentException("LexicalAligner failed aligning the sentence pair.", e);
    }
  }