/**
  * @param params configuration params for initialization. Should include:
  *     <li>table_file - path to the file that contains the rules, in table format. Can also be a
  *         URL.
  *     <li>table_separator - pattern of column-separator, e.g. "->".
  *     <li>part_of_speech - canonical name of the part-of-speech for this rule-base. For possible
  *         values, see {@link SimplerCanonicalPosTag}.
  *     <li>relation_name - name of relation to put in rules (the same for all rules).
  *     <li>minimum_seconds_between_loads (int) - The minimum number of seconds between each
  *         consecutive loads from the file.
  *     <li>(NOTE: The params.getModuleName() is used as the resource_name).
  */
 public OnlineFileBasedLexicalResource(ConfigurationParams params)
     throws UnsupportedPosTagStringException, IOException, ConfigurationException {
   super(
       null,
       params.getEnum(SimplerCanonicalPosTag.class, "part_of_speech"),
       params.getModuleName(),
       params.getString("relation_name"));
   this.file = params.getString("file");
   this.separator = params.getString("separator");
   this.minimumSecondsBetweenLoads = params.getInt("minimum_seconds_between_loads");
   this.map = ValueSetMapFromStringCreator.mapFromConfigurationParams(params);
 }
  public static void main(String[] args) {

    if (args.length != 1) {
      System.err.println("Usage: Right2LeftSimilarities <configuration file>");
      System.exit(0);
    }

    try {

      // ConfigurationFile confFile = new ConfigurationFile(args[0]);
      ConfigurationFile confFile = new ConfigurationFile(new ImplCommonConfig(new File(args[0])));

      ConfigurationParams loggingParams = confFile.getModuleConfiguration(Configuration.LOGGING);
      PropertyConfigurator.configure(loggingParams.get(Configuration.PROPERTIES_FILE));
      final Logger logger = Logger.getLogger(MemoryBasedRight2LeftSimilarities.class);

      final ConfigurationParams confParams =
          confFile.getModuleConfiguration(Configuration.RIGHT_TO_LEFT_SIMILARITIES);

      eu.excitementproject.eop.distsim.storage.File rightSimilaritiesFile =
          new eu.excitementproject.eop.distsim.storage.File(
              new File(confParams.get(Configuration.INFILE)), true);
      rightSimilaritiesFile.open();

      RedisBasedIDKeyPersistentBasicMap<HashMap<Integer, Double>> leftSimilarities =
          new RedisBasedIDKeyPersistentBasicMap<HashMap<Integer, Double>>(
              confParams.getString(Configuration.REDIS_FILE));
      leftSimilarities.clear();

      // set the right similarities at the left similarities map
      logger.info("Loading right similarities");
      Pair<Integer, Serializable> pair = null;
      int i = 0;

      TIntObjectMap<HashMap<Integer, Double>> tmpLeftSimilarities =
          new TIntObjectHashMap<HashMap<Integer, Double>>();

      while ((pair = rightSimilaritiesFile.read()) != null) {
        int rightElementId = pair.getFirst();

        // System.out.println("rightElementId = " + rightElementId);

        @SuppressWarnings("unchecked")
        HashMap<Integer, Double> similarities = (HashMap<Integer, Double>) pair.getSecond();

        // System.out.println("left similarities: " + similarities.size());

        for (Entry<Integer, Double> entry : similarities.entrySet()) {
          int leftElementId = entry.getKey();
          double score = entry.getValue();

          // System.out.println("\t" + leftElementId + "\t" + score);

          HashMap<Integer, Double> scores = tmpLeftSimilarities.get(leftElementId);
          if (scores == null) scores = leftSimilarities.get(leftElementId);

          if (scores == null) scores = new HashMap<Integer, Double>();
          scores.put(rightElementId, score);

          // System.out.println("\t" + scores);

          tmpLeftSimilarities.put(leftElementId, scores);
        }
        i++;
        if (i % 1000 == 0) logger.info(i);

        if (!memoryLeft()) {
          logger.info("writing data to redis");
          writeToRedis(tmpLeftSimilarities, leftSimilarities);
          System.gc();
          Thread.sleep(10000);
        }
      }

      logger.info("writing data to redis");
      writeToRedis(tmpLeftSimilarities, leftSimilarities);

      rightSimilaritiesFile.close();

      // save the right element similarity file
      java.io.File outfile = new File(confParams.get(Configuration.OUTFILE));
      logger.info("Saving right similarities to file: " + outfile.getPath());
      eu.excitementproject.eop.distsim.storage.File leftSimilaritiesFile =
          new eu.excitementproject.eop.distsim.storage.File(outfile, false);
      leftSimilaritiesFile.open();
      ImmutableIterator<Pair<Integer, HashMap<Integer, Double>>> it = leftSimilarities.iterator();
      while (it.hasNext()) {
        Pair<Integer, HashMap<Integer, Double>> idData = it.next();
        leftSimilaritiesFile.write(
            idData.getFirst(), SortUtil.sortMapByValue(idData.getSecond(), true));
      }
      leftSimilaritiesFile.close();
    } catch (Exception e) {
      e.printStackTrace();
    }
  }
Пример #3
0
  /**
   * Call this method once before starting to align sentence pairs.
   *
   * @param config a CommonConfig instance. The aligner retrieves the lexical resources
   *     configuration values.
   * @throws LexicalResourceException if initialization of a resource failed
   * @throws ConfigurationException if the configuration is invalid
   */
  private void init(CommonConfig config) throws LexicalResourceException, ConfigurationException {

    // Get the general parameters configuration section
    NameValueTable paramsSection = null;
    try {
      paramsSection = config.getSection(GENERAL_PARAMS_CONF_SECTION);
    } catch (ConfigurationException e) {
      throw new ConfigurationException(e);
    }

    maxPhrase = paramsSection.getInteger(MAX_PHRASE_KEY);

    // Get the Lexical Resources configuration section
    NameValueTable lexicalResourcesSection = null;
    try {
      lexicalResourcesSection = config.getSection(LEXICAL_RESOURCES_CONF_SECTION);
    } catch (ConfigurationException e) {
      throw new ConfigurationException(e);
    }

    lexicalResources = new ArrayList<LexicalResource<? extends RuleInfo>>();
    ConfigurationFile configFile = new ConfigurationFile(config);

    // Get each resource and create it using the configuration section related to it
    for (String resourceName : lexicalResourcesSection.keySet()) {

      // Get the class name
      String resourceClassName = lexicalResourcesSection.getString(resourceName);

      // Get the configuration params
      ConfigurationParams resourceParams = configFile.getModuleConfiguration(resourceName);
      resourceParams.setExpandingEnvironmentVariables(true);
      LexicalResource<? extends RuleInfo> lexicalResource =
          createLexicalResource(resourceClassName, resourceParams);

      if (lexicalResource != null) {
        lexicalResources.add(lexicalResource);

        PartOfSpeech leftSidePOS = null, rightSidePOS = null;

        // Add the information about this resource

        // Get the right and left side POS, in case it's mentioned
        if (resourceParams.keySet().contains(LEFT_SIDE_POS_PARAM)) {
          try {
            leftSidePOS =
                new ByCanonicalPartOfSpeech(resourceParams.getString(LEFT_SIDE_POS_PARAM));
          } catch (UnsupportedPosTagStringException e) {
            logger.warn(
                "Could not load POS for left side: "
                    + resourceParams.getString(LEFT_SIDE_POS_PARAM)
                    + ". Alignment links of all POS will be retreived.");
          }
        }

        if (resourceParams.keySet().contains(RIGHT_SIDE_POS_PARAM)) {
          try {
            rightSidePOS =
                new ByCanonicalPartOfSpeech(resourceParams.getString(RIGHT_SIDE_POS_PARAM));
          } catch (UnsupportedPosTagStringException e) {
            logger.warn(
                "Could not load POS for right side: "
                    + resourceParams.getString(RIGHT_SIDE_POS_PARAM)
                    + ". Alignment links of all POS will be retreived.");
          }
        }

        lexicalResourcesInformation.put(
            lexicalResource.getClass().getName(),
            new LexicalResourceInformation(
                resourceParams.getString(VERSION_PARAM),
                resourceParams.getBoolean(USE_LEMMA_PARAM),
                leftSidePOS,
                rightSidePOS));
      }
    }
  }