/**
   * Reads the threshold from the configuration. We use a threshold to decide whether it's an
   * entailment (above the threshold) or not (below). This is intended for illustration purposes
   * only, as the similarity scores are not normally distributed.
   */
  private void initializeThreshold(CommonConfig config)
      throws ConfigurationException, ComponentException {
    NameValueTable EDA = null;
    try {
      EDA = config.getSection(this.getClass().getName());
    } catch (ConfigurationException e) {
      throw new ConfigurationException(e.getMessage() + " No EDA section.");
    }

    threshold = EDA.getDouble("threshold");
  }
  public void testTesting_MultiTH_AND_Output(CommonConfig config) {
    MaxEntClassificationEDA meceda = new MaxEntClassificationEDA();

    BufferedWriter output = null;

    try {
      meceda.initialize(config);
      // check the test data directory
      meceda.initializeData(config, false);

      output =
          new BufferedWriter(
              new OutputStreamWriter(
                  new FileOutputStream(
                      config.getConfigurationFileName().replace("configuration-file", "results")
                          + "_Result.txt"),
                  "UTF-8"));
      logger.info("build CASes for input sentence pairs:");
      int correct = 0;
      int sum = 0;
      for (File file : (new File(meceda.getTestDIR())).listFiles()) {
        // ignore all the non-xmi files
        if (!file.getName().endsWith(".xmi")) {
          continue;
        }
        JCas cas = PlatformCASProber.probeXmi(file, null);
        ClassificationTEDecision decision = meceda.process(cas);
        output.write(decision.getPairID());
        output.write("\t");
        output.write(meceda.getGoldLabel(cas).toUpperCase());
        output.write("\t");
        output.write(decision.getDecision().toString().toUpperCase());
        output.write("\t");
        output.write(String.valueOf(decision.getConfidence()));
        output.newLine();
        logger.info("Pair " + decision.getPairID() + " is done.");
        if (meceda.getGoldLabel(cas).equalsIgnoreCase(decision.getDecision().toString())) {
          correct++;
        }
        sum++;
      }
      output.close();
      logger.info("The correctly predicted pairs are " + correct + " / " + sum);
      meceda.shutdown();
      logger.info("EDA shuts down.");
    } catch (Exception e) {
      logger.info(e.getMessage());
    }
  }
  /**
   * the constructor using <code>CommonConfig</code>
   *
   * @param config the configuration
   * @throws ConfigurationException
   * @throws LexicalResourceException
   */
  public BagOfLexesScoringEN(CommonConfig config)
      throws ConfigurationException, LexicalResourceException {
    NameValueTable comp = config.getSection("BagOfLexesScoring");

    if (null == comp.getString("WordnetLexicalResource")
        && null == comp.getString("VerbOceanLexicalResource")) {
      throw new ConfigurationException(
          "Wrong configuation: didn't find any lexical resources for the BagOfLexesScoring component");
    }

    if (null != comp.getString("WordnetLexicalResource")) {
      String[] WNRelations = comp.getString("WordnetLexicalResource").split(",");
      if (null == WNRelations || 0 == WNRelations.length) {
        throw new ConfigurationException(
            "Wrong configuation: didn't find any relations for the WordNet");
      }
      Set<WordNetRelation> wnRelSet = new HashSet<WordNetRelation>();
      for (String relation : WNRelations) {
        if (relation.equalsIgnoreCase("HYPERNYM")) {
          wnRelSet.add(WordNetRelation.HYPERNYM);
        } else if (relation.equalsIgnoreCase("SYNONYM")) {
          wnRelSet.add(WordNetRelation.SYNONYM);
        } else if (relation.equalsIgnoreCase("PART_HOLONYM")) {
          wnRelSet.add(WordNetRelation.PART_HOLONYM);
        } else {
          logger.warning("Warning: wrong relation names for the WordNet");
        }
      }
      if (wnRelSet.isEmpty()) {
        throw new ConfigurationException(
            "Wrong configuation: didn't find any (correct) relations for the WordNet");
      }
      boolean isCollapsed = true;
      boolean useFirstSenseOnlyLeft = false;
      boolean useFirstSenseOnlyRight = false;
      String wnPath = "/ontologies/EnglishWordNet-dict/";
      NameValueTable wnComp = config.getSection("WordnetLexicalResource");
      if (null != wnComp) {
        if (null != wnComp.getString("isCollapsed")
            && !Boolean.parseBoolean(wnComp.getString("isCollapsed"))) {
          isCollapsed = false;
        }
        if (null != wnComp.getString("useFirstSenseOnlyLeft")
            && Boolean.parseBoolean(wnComp.getString("useFirstSenseOnlyLeft"))) {
          useFirstSenseOnlyLeft = true;
        }
        if (null != wnComp.getString("useFirstSenseOnlyRight")
            && Boolean.parseBoolean(wnComp.getString("useFirstSenseOnlyRight"))) {
          useFirstSenseOnlyRight = true;
        }
        if (null != wnComp.getString("wordNetFilesPath")) {
          wnPath = wnComp.getString("wordNetFilesPath");
        }
      }
      wnlrSet = new HashSet<WordnetLexicalResource>();
      File wnFile = new File(wnPath);
      if (!wnFile.exists()) {
        throw new ConfigurationException("cannot find WordNet at: " + wnPath);
      }
      if (isCollapsed) {
        WordnetLexicalResource wnlr =
            new WordnetLexicalResource(
                wnFile, useFirstSenseOnlyLeft, useFirstSenseOnlyRight, wnRelSet);
        wnlrSet.add(wnlr);
        numOfFeats++;
      } else {
        for (WordNetRelation wnr : wnRelSet) {
          WordnetLexicalResource wnlr =
              new WordnetLexicalResource(
                  wnFile,
                  useFirstSenseOnlyLeft,
                  useFirstSenseOnlyRight,
                  Collections.singleton(wnr));
          wnlrSet.add(wnlr);
          numOfFeats++;
        }
      }
      logger.info("Load WordNet done.");
    }

    if (null != comp.getString("VerbOceanLexicalResource")) {
      String[] VORelations = comp.getString("VerbOceanLexicalResource").split(",");
      if (null == VORelations || 0 == VORelations.length) {
        throw new ConfigurationException(
            "Wrong configuation: didn't find any relations for the VerbOcean");
      }
      Set<RelationType> voRelSet = new HashSet<RelationType>();
      for (String relation : VORelations) {
        if (relation.equalsIgnoreCase("strongerthan")) {
          voRelSet.add(RelationType.STRONGER_THAN);
        } else if (relation.equalsIgnoreCase("canresultin")) {
          voRelSet.add(RelationType.CAN_RESULT_IN);
        } else if (relation.equalsIgnoreCase("similar")) {
          voRelSet.add(RelationType.SIMILAR);
        } else {
          logger.warning("Warning: wrong relation names for the VerbOcean");
        }
      }
      if (voRelSet.isEmpty()) {
        throw new ConfigurationException(
            "Wrong configuation: didn't find any (correct) relations for the VerbOcean");
      }
      boolean isCollapsed = true;
      String voPath = "/VerbOcean/verbocean.unrefined.2004-05-20.txt";
      NameValueTable voComp = config.getSection("VerbOceanLexicalResource");
      if (null != voComp) {
        if (null != voComp.getString("isCollapsed")
            && !Boolean.parseBoolean(voComp.getString("isCollapsed"))) {
          isCollapsed = false;
        }
        if (null != voComp.getString("verbOceanFilePath")) {
          voPath = voComp.getString("verbOceanFilePath");
        }
      }
      volrSet = new HashSet<VerbOceanLexicalResource>();
      File voFile = new File(voPath);
      if (!voFile.exists()) {
        throw new ConfigurationException("cannot find VerbOcean at: " + voPath);
      }
      if (isCollapsed) {
        VerbOceanLexicalResource volr = new VerbOceanLexicalResource(1, voFile, voRelSet);
        volrSet.add(volr);
        numOfFeats++;
      } else {
        for (RelationType vor : voRelSet) {
          VerbOceanLexicalResource volr =
              new VerbOceanLexicalResource(1, voFile, Collections.singleton(vor));
          volrSet.add(volr);
          numOfFeats++;
        }
      }
      logger.info("Load VerbOcean done.");
    }
  }
Exemplo n.º 4
0
  /**
   * Call this method once before starting to align sentence pairs.
   *
   * @param config a CommonConfig instance. The aligner retrieves the lexical resources
   *     configuration values.
   * @throws LexicalResourceException if initialization of a resource failed
   * @throws ConfigurationException if the configuration is invalid
   */
  private void init(CommonConfig config) throws LexicalResourceException, ConfigurationException {

    // Get the general parameters configuration section
    NameValueTable paramsSection = null;
    try {
      paramsSection = config.getSection(GENERAL_PARAMS_CONF_SECTION);
    } catch (ConfigurationException e) {
      throw new ConfigurationException(e);
    }

    maxPhrase = paramsSection.getInteger(MAX_PHRASE_KEY);

    // Get the Lexical Resources configuration section
    NameValueTable lexicalResourcesSection = null;
    try {
      lexicalResourcesSection = config.getSection(LEXICAL_RESOURCES_CONF_SECTION);
    } catch (ConfigurationException e) {
      throw new ConfigurationException(e);
    }

    lexicalResources = new ArrayList<LexicalResource<? extends RuleInfo>>();
    ConfigurationFile configFile = new ConfigurationFile(config);

    // Get each resource and create it using the configuration section related to it
    for (String resourceName : lexicalResourcesSection.keySet()) {

      // Get the class name
      String resourceClassName = lexicalResourcesSection.getString(resourceName);

      // Get the configuration params
      ConfigurationParams resourceParams = configFile.getModuleConfiguration(resourceName);
      resourceParams.setExpandingEnvironmentVariables(true);
      LexicalResource<? extends RuleInfo> lexicalResource =
          createLexicalResource(resourceClassName, resourceParams);

      if (lexicalResource != null) {
        lexicalResources.add(lexicalResource);

        PartOfSpeech leftSidePOS = null, rightSidePOS = null;

        // Add the information about this resource

        // Get the right and left side POS, in case it's mentioned
        if (resourceParams.keySet().contains(LEFT_SIDE_POS_PARAM)) {
          try {
            leftSidePOS =
                new ByCanonicalPartOfSpeech(resourceParams.getString(LEFT_SIDE_POS_PARAM));
          } catch (UnsupportedPosTagStringException e) {
            logger.warn(
                "Could not load POS for left side: "
                    + resourceParams.getString(LEFT_SIDE_POS_PARAM)
                    + ". Alignment links of all POS will be retreived.");
          }
        }

        if (resourceParams.keySet().contains(RIGHT_SIDE_POS_PARAM)) {
          try {
            rightSidePOS =
                new ByCanonicalPartOfSpeech(resourceParams.getString(RIGHT_SIDE_POS_PARAM));
          } catch (UnsupportedPosTagStringException e) {
            logger.warn(
                "Could not load POS for right side: "
                    + resourceParams.getString(RIGHT_SIDE_POS_PARAM)
                    + ". Alignment links of all POS will be retreived.");
          }
        }

        lexicalResourcesInformation.put(
            lexicalResource.getClass().getName(),
            new LexicalResourceInformation(
                resourceParams.getString(VERSION_PARAM),
                resourceParams.getBoolean(USE_LEMMA_PARAM),
                leftSidePOS,
                rightSidePOS));
      }
    }
  }