/**
   * @param args
   * @throws UnsupportedPosTagStringException
   * @throws EntailmentCompilationException
   * @throws IOException
   * @throws FileNotFoundException
   * @throws ConfigurationException
   */
  public static void main(String[] args)
      throws UnsupportedPosTagStringException, EntailmentCompilationException,
          FileNotFoundException, IOException, ConfigurationException {
    if (args.length < 1)
      throw new EntailmentCompilationException(
          "usage: EntailmentRuleCompiler configurationFile.xml");
    ConfigurationFile confFile = new ConfigurationFile(new File(args[0]));
    confFile.setExpandingEnvironmentVariables(true);
    ConfigurationParams compilationParams =
        confFile.getModuleConfiguration(RuleCompilerParameterNames.RULE_COMPILER_PARAMS_MODULE);
    // ConfigurationParams applictionParams =
    // confFile.getModuleConfiguration(KnowledgeResource.SYNTACTIC.getModuleName());
    ConfigurationParams applictionParams =
        confFile.getModuleConfiguration(RuleCompilerParameterNames.SYNTACTIC_PARAMS_MODULE);

    File dir =
        compilationParams.getDirectory(
            RuleCompilerParameterNames
                .ENTAILMENT_RULES_DIRECTORY); // new
                                              // File(props.getProperty("directoryName").trim());
    final String ruleFileSuffix =
        compilationParams.get(
            RuleCompilerParameterNames
                .RULE_FILE_SUFFIX); // props.getProperty("graphFileSuffix").trim();

    // create an english node rule compliler
    EntailmentRuleCompiler compiler = new EntailmentRuleCompiler();
    List<RuleWithConfidenceAndDescription<Info, BasicNode>> rulesWithCD;

    rulesWithCD = compiler.compileFolder(dir, ruleFileSuffix);

    // EnglishRulesViewer rv = new EnglishRulesViewer(rulesWithCD);
    // ExtendedRulesViewer rv = new ExtendedRulesViewer(rulesWithCD.subList(0,1));
    // rv.view();

    // serialize rules to file
    Set<RuleWithConfidenceAndDescription<Info, BasicNode>> rules =
        new LinkedHashSet<RuleWithConfidenceAndDescription<Info, BasicNode>>(rulesWithCD);

    String outFile =
        applictionParams.get(TransformationsConfigurationParametersNames.SYNTACTIC_RULES_FILE);
    try {
      RuleCompilerUtils.serializeToFile(rules, outFile);
    } catch (CompilationException e) {
      throw new EntailmentCompilationException("see nested", e);
    }

    System.out.println("\n\nMade " + rules.size() + " rules.");
    System.out.println("Serialized them into " + outFile);
  }
  public static void main(String[] args) {

    if (args.length != 1) {
      System.err.println("Usage: Right2LeftSimilarities <configuration file>");
      System.exit(0);
    }

    try {

      // ConfigurationFile confFile = new ConfigurationFile(args[0]);
      ConfigurationFile confFile = new ConfigurationFile(new ImplCommonConfig(new File(args[0])));

      ConfigurationParams loggingParams = confFile.getModuleConfiguration(Configuration.LOGGING);
      PropertyConfigurator.configure(loggingParams.get(Configuration.PROPERTIES_FILE));
      final Logger logger = Logger.getLogger(MemoryBasedRight2LeftSimilarities.class);

      final ConfigurationParams confParams =
          confFile.getModuleConfiguration(Configuration.RIGHT_TO_LEFT_SIMILARITIES);

      eu.excitementproject.eop.distsim.storage.File rightSimilaritiesFile =
          new eu.excitementproject.eop.distsim.storage.File(
              new File(confParams.get(Configuration.INFILE)), true);
      rightSimilaritiesFile.open();

      RedisBasedIDKeyPersistentBasicMap<HashMap<Integer, Double>> leftSimilarities =
          new RedisBasedIDKeyPersistentBasicMap<HashMap<Integer, Double>>(
              confParams.getString(Configuration.REDIS_FILE));
      leftSimilarities.clear();

      // set the right similarities at the left similarities map
      logger.info("Loading right similarities");
      Pair<Integer, Serializable> pair = null;
      int i = 0;

      TIntObjectMap<HashMap<Integer, Double>> tmpLeftSimilarities =
          new TIntObjectHashMap<HashMap<Integer, Double>>();

      while ((pair = rightSimilaritiesFile.read()) != null) {
        int rightElementId = pair.getFirst();

        // System.out.println("rightElementId = " + rightElementId);

        @SuppressWarnings("unchecked")
        HashMap<Integer, Double> similarities = (HashMap<Integer, Double>) pair.getSecond();

        // System.out.println("left similarities: " + similarities.size());

        for (Entry<Integer, Double> entry : similarities.entrySet()) {
          int leftElementId = entry.getKey();
          double score = entry.getValue();

          // System.out.println("\t" + leftElementId + "\t" + score);

          HashMap<Integer, Double> scores = tmpLeftSimilarities.get(leftElementId);
          if (scores == null) scores = leftSimilarities.get(leftElementId);

          if (scores == null) scores = new HashMap<Integer, Double>();
          scores.put(rightElementId, score);

          // System.out.println("\t" + scores);

          tmpLeftSimilarities.put(leftElementId, scores);
        }
        i++;
        if (i % 1000 == 0) logger.info(i);

        if (!memoryLeft()) {
          logger.info("writing data to redis");
          writeToRedis(tmpLeftSimilarities, leftSimilarities);
          System.gc();
          Thread.sleep(10000);
        }
      }

      logger.info("writing data to redis");
      writeToRedis(tmpLeftSimilarities, leftSimilarities);

      rightSimilaritiesFile.close();

      // save the right element similarity file
      java.io.File outfile = new File(confParams.get(Configuration.OUTFILE));
      logger.info("Saving right similarities to file: " + outfile.getPath());
      eu.excitementproject.eop.distsim.storage.File leftSimilaritiesFile =
          new eu.excitementproject.eop.distsim.storage.File(outfile, false);
      leftSimilaritiesFile.open();
      ImmutableIterator<Pair<Integer, HashMap<Integer, Double>>> it = leftSimilarities.iterator();
      while (it.hasNext()) {
        Pair<Integer, HashMap<Integer, Double>> idData = it.next();
        leftSimilaritiesFile.write(
            idData.getFirst(), SortUtil.sortMapByValue(idData.getSecond(), true));
      }
      leftSimilaritiesFile.close();
    } catch (Exception e) {
      e.printStackTrace();
    }
  }
예제 #3
0
  /**
   * Call this method once before starting to align sentence pairs.
   *
   * @param config a CommonConfig instance. The aligner retrieves the lexical resources
   *     configuration values.
   * @throws LexicalResourceException if initialization of a resource failed
   * @throws ConfigurationException if the configuration is invalid
   */
  private void init(CommonConfig config) throws LexicalResourceException, ConfigurationException {

    // Get the general parameters configuration section
    NameValueTable paramsSection = null;
    try {
      paramsSection = config.getSection(GENERAL_PARAMS_CONF_SECTION);
    } catch (ConfigurationException e) {
      throw new ConfigurationException(e);
    }

    maxPhrase = paramsSection.getInteger(MAX_PHRASE_KEY);

    // Get the Lexical Resources configuration section
    NameValueTable lexicalResourcesSection = null;
    try {
      lexicalResourcesSection = config.getSection(LEXICAL_RESOURCES_CONF_SECTION);
    } catch (ConfigurationException e) {
      throw new ConfigurationException(e);
    }

    lexicalResources = new ArrayList<LexicalResource<? extends RuleInfo>>();
    ConfigurationFile configFile = new ConfigurationFile(config);

    // Get each resource and create it using the configuration section related to it
    for (String resourceName : lexicalResourcesSection.keySet()) {

      // Get the class name
      String resourceClassName = lexicalResourcesSection.getString(resourceName);

      // Get the configuration params
      ConfigurationParams resourceParams = configFile.getModuleConfiguration(resourceName);
      resourceParams.setExpandingEnvironmentVariables(true);
      LexicalResource<? extends RuleInfo> lexicalResource =
          createLexicalResource(resourceClassName, resourceParams);

      if (lexicalResource != null) {
        lexicalResources.add(lexicalResource);

        PartOfSpeech leftSidePOS = null, rightSidePOS = null;

        // Add the information about this resource

        // Get the right and left side POS, in case it's mentioned
        if (resourceParams.keySet().contains(LEFT_SIDE_POS_PARAM)) {
          try {
            leftSidePOS =
                new ByCanonicalPartOfSpeech(resourceParams.getString(LEFT_SIDE_POS_PARAM));
          } catch (UnsupportedPosTagStringException e) {
            logger.warn(
                "Could not load POS for left side: "
                    + resourceParams.getString(LEFT_SIDE_POS_PARAM)
                    + ". Alignment links of all POS will be retreived.");
          }
        }

        if (resourceParams.keySet().contains(RIGHT_SIDE_POS_PARAM)) {
          try {
            rightSidePOS =
                new ByCanonicalPartOfSpeech(resourceParams.getString(RIGHT_SIDE_POS_PARAM));
          } catch (UnsupportedPosTagStringException e) {
            logger.warn(
                "Could not load POS for right side: "
                    + resourceParams.getString(RIGHT_SIDE_POS_PARAM)
                    + ". Alignment links of all POS will be retreived.");
          }
        }

        lexicalResourcesInformation.put(
            lexicalResource.getClass().getName(),
            new LexicalResourceInformation(
                resourceParams.getString(VERSION_PARAM),
                resourceParams.getBoolean(USE_LEMMA_PARAM),
                leftSidePOS,
                rightSidePOS));
      }
    }
  }