/** * Cleans up any resources that were used by the aligner. * * <p>Call this method when the aligner is no longer to be used. */ public void cleanUp() { // Close the lexical resources for (LexicalResource<? extends RuleInfo> lexicalResource : lexicalResources) { try { lexicalResource.close(); } catch (LexicalResourceCloseException e) { logger.warn("Closing the resource failed.", e); } } }
/** * Get rules of type leftSide -> rightSide, using the given lexical resource * * @param resource The lexical resource to use * @param leftSide The phrase that will be looked for as lhs of a rule * @param rightSide The phrase that will be looked for as rhs of a rule * @param partOfSpeech2 * @param partOfSpeech * @return The list of rules leftSide -> rightSide * @throws LexicalResourceException */ private List<LexicalRule<? extends RuleInfo>> getRules( LexicalResource<? extends RuleInfo> resource, String leftSide, String rightSide, PartOfSpeech leftSidePOS, PartOfSpeech rightSidePOS) throws LexicalResourceException { List<LexicalRule<? extends RuleInfo>> rules = new ArrayList<LexicalRule<? extends RuleInfo>>(); try { // WordNet workaround: // Make sure the synsets of the right and left sides of the rule // are equal to the right and left phrases. // (WN returns rules associated with any of the words in the phrase) if (resource.getClass().getName().toLowerCase().contains(WORDNET)) { for (LexicalRule<? extends RuleInfo> rule : resource.getRules(leftSide, leftSidePOS, rightSide, rightSidePOS)) { WordnetRuleInfo ruleInfo = (WordnetRuleInfo) rule.getInfo(); if ((ruleInfo.getLeftSense().getWords().contains(leftSide)) && (ruleInfo.getRightSense().getWords().contains(rightSide))) { addRuleToList(rules, rule); } } } else { // Get rules from t to h for (LexicalRule<? extends RuleInfo> rule : resource.getRules(leftSide, leftSidePOS, rightSide, rightSidePOS)) { addRuleToList(rules, rule); } } } catch (Exception e) { logger.warn( "Could not add rules from " + resource.getClass().getSimpleName() + " for " + leftSide + "->" + rightSide, e); } return rules; }
/** * Call this method once before starting to align sentence pairs. * * @param config a CommonConfig instance. The aligner retrieves the lexical resources * configuration values. * @throws LexicalResourceException if initialization of a resource failed * @throws ConfigurationException if the configuration is invalid */ private void init(CommonConfig config) throws LexicalResourceException, ConfigurationException { // Get the general parameters configuration section NameValueTable paramsSection = null; try { paramsSection = config.getSection(GENERAL_PARAMS_CONF_SECTION); } catch (ConfigurationException e) { throw new ConfigurationException(e); } maxPhrase = paramsSection.getInteger(MAX_PHRASE_KEY); // Get the Lexical Resources configuration section NameValueTable lexicalResourcesSection = null; try { lexicalResourcesSection = config.getSection(LEXICAL_RESOURCES_CONF_SECTION); } catch (ConfigurationException e) { throw new ConfigurationException(e); } lexicalResources = new ArrayList<LexicalResource<? extends RuleInfo>>(); ConfigurationFile configFile = new ConfigurationFile(config); // Get each resource and create it using the configuration section related to it for (String resourceName : lexicalResourcesSection.keySet()) { // Get the class name String resourceClassName = lexicalResourcesSection.getString(resourceName); // Get the configuration params ConfigurationParams resourceParams = configFile.getModuleConfiguration(resourceName); resourceParams.setExpandingEnvironmentVariables(true); LexicalResource<? extends RuleInfo> lexicalResource = createLexicalResource(resourceClassName, resourceParams); if (lexicalResource != null) { lexicalResources.add(lexicalResource); PartOfSpeech leftSidePOS = null, rightSidePOS = null; // Add the information about this resource // Get the right and left side POS, in case it's mentioned if (resourceParams.keySet().contains(LEFT_SIDE_POS_PARAM)) { try { leftSidePOS = new ByCanonicalPartOfSpeech(resourceParams.getString(LEFT_SIDE_POS_PARAM)); } catch (UnsupportedPosTagStringException e) { logger.warn( "Could not load POS for left side: " + resourceParams.getString(LEFT_SIDE_POS_PARAM) + ". Alignment links of all POS will be retreived."); } } if (resourceParams.keySet().contains(RIGHT_SIDE_POS_PARAM)) { try { rightSidePOS = new ByCanonicalPartOfSpeech(resourceParams.getString(RIGHT_SIDE_POS_PARAM)); } catch (UnsupportedPosTagStringException e) { logger.warn( "Could not load POS for right side: " + resourceParams.getString(RIGHT_SIDE_POS_PARAM) + ". Alignment links of all POS will be retreived."); } } lexicalResourcesInformation.put( lexicalResource.getClass().getName(), new LexicalResourceInformation( resourceParams.getString(VERSION_PARAM), resourceParams.getBoolean(USE_LEMMA_PARAM), leftSidePOS, rightSidePOS)); } } }
/** * Align the text and the hypothesis. * * <p>This method receives a JCAS object containing two views: Hypothesis and text views. The * method assumes that the views were already annotated with a tokenizer. * * <p>The lexical aligner looks at every phrase t in the text and every phrase h in the * hypothesis, and uses the lexical resources to find rules with lhs = t and rhs = h. * * @param aJCas the JCAS object with the text and hypothesis view. * @throws AlignmentComponentException */ @Override public void annotate(JCas aJCas) throws AlignmentComponentException { try { logger.info("Started annotating a text and hypothesis pair using lexical aligner"); // Get the tokens and lemmas of the text and hypothesis getTokenAnnotations(aJCas); // Check in all the resources for rules of type textPhrase -> hypoPhrase for (LexicalResource<? extends RuleInfo> resource : lexicalResources) { LexicalResourceInformation resourceInfo = lexicalResourcesInformation.get(resource.getClass().getName()); // For every phrase t in T and phrase h in H, check the lexical // resources if they contain a rule t->h String textPhrase = "", hypoPhrase = ""; for (int textStart = 0; textStart < textTokens.size(); ++textStart) { for (int textEnd = textStart; textEnd < Math.min(textTokens.size(), textStart + maxPhrase); ++textEnd) { textPhrase = getPhrase(textTokens, textStart, textEnd, resourceInfo.useLemma()); for (int hypoStart = 0; hypoStart < hypoTokens.size(); ++hypoStart) { for (int hypoEnd = hypoStart; hypoEnd < Math.min(hypoTokens.size(), hypoStart + maxPhrase); ++hypoEnd) { hypoPhrase = getPhrase(hypoTokens, hypoStart, hypoEnd, resourceInfo.useLemma()); // Get the rules textPhrase -> hypoPhrase List<LexicalRule<? extends RuleInfo>> ruleFromLeft = getRules( resource, textPhrase, hypoPhrase, resourceInfo.getLeftSidePOS(), resourceInfo.getRightSidePOS()); // Get the rules hypoPhrase -> textPhrase List<LexicalRule<? extends RuleInfo>> ruleFromRight = getRules( resource, hypoPhrase, textPhrase, resourceInfo.getLeftSidePOS(), resourceInfo.getRightSidePOS()); // Create the alignment links for the rules createAlignmentLinks( aJCas, textStart, textEnd, hypoStart, hypoEnd, ruleFromLeft, ruleFromRight, resourceInfo.getVersion()); } } } } } logger.info("Finished annotating a text and hypothesis pair using lexical aligner"); } catch (CASException | LexicalResourceException e) { throw new AlignmentComponentException("LexicalAligner failed aligning the sentence pair.", e); } }