public VerbOceanLexicalResource(ConfigurationParams params)
     throws LexicalResourceException, ConfigurationException {
   this(
       params.getDouble(PARAM_SCORE_THRESHOLD),
       params.getFile(PARAM_FILE),
       params.getEnumSet(RelationType.class, PARAM_ALLOWED_RELATIONS));
 }
 public UnlimitedMemoryBasedCountableIdentifiableStorage(ConfigurationParams params)
     throws LoadingStateException {
   this();
   try {
     initRedis(
         params.get(Configuration.REDIS_HOST),
         Integer.parseInt(params.get(Configuration.REDIS_PORT)));
     tmpDIR = params.get(Configuration.TMP_CONTENT_DIR);
     tmpFileIndex = 0;
   } catch (Exception e) {
     throw new LoadingStateException(e);
   }
 }
  /**
   * @param args
   * @throws UnsupportedPosTagStringException
   * @throws EntailmentCompilationException
   * @throws IOException
   * @throws FileNotFoundException
   * @throws ConfigurationException
   */
  public static void main(String[] args)
      throws UnsupportedPosTagStringException, EntailmentCompilationException,
          FileNotFoundException, IOException, ConfigurationException {
    if (args.length < 1)
      throw new EntailmentCompilationException(
          "usage: EntailmentRuleCompiler configurationFile.xml");
    ConfigurationFile confFile = new ConfigurationFile(new File(args[0]));
    confFile.setExpandingEnvironmentVariables(true);
    ConfigurationParams compilationParams =
        confFile.getModuleConfiguration(RuleCompilerParameterNames.RULE_COMPILER_PARAMS_MODULE);
    // ConfigurationParams applictionParams =
    // confFile.getModuleConfiguration(KnowledgeResource.SYNTACTIC.getModuleName());
    ConfigurationParams applictionParams =
        confFile.getModuleConfiguration(RuleCompilerParameterNames.SYNTACTIC_PARAMS_MODULE);

    File dir =
        compilationParams.getDirectory(
            RuleCompilerParameterNames
                .ENTAILMENT_RULES_DIRECTORY); // new
                                              // File(props.getProperty("directoryName").trim());
    final String ruleFileSuffix =
        compilationParams.get(
            RuleCompilerParameterNames
                .RULE_FILE_SUFFIX); // props.getProperty("graphFileSuffix").trim();

    // create an english node rule compliler
    EntailmentRuleCompiler compiler = new EntailmentRuleCompiler();
    List<RuleWithConfidenceAndDescription<Info, BasicNode>> rulesWithCD;

    rulesWithCD = compiler.compileFolder(dir, ruleFileSuffix);

    // EnglishRulesViewer rv = new EnglishRulesViewer(rulesWithCD);
    // ExtendedRulesViewer rv = new ExtendedRulesViewer(rulesWithCD.subList(0,1));
    // rv.view();

    // serialize rules to file
    Set<RuleWithConfidenceAndDescription<Info, BasicNode>> rules =
        new LinkedHashSet<RuleWithConfidenceAndDescription<Info, BasicNode>>(rulesWithCD);

    String outFile =
        applictionParams.get(TransformationsConfigurationParametersNames.SYNTACTIC_RULES_FILE);
    try {
      RuleCompilerUtils.serializeToFile(rules, outFile);
    } catch (CompilationException e) {
      throw new EntailmentCompilationException("see nested", e);
    }

    System.out.println("\n\nMade " + rules.size() + " rules.");
    System.out.println("Serialized them into " + outFile);
  }
  public BasicVectorTruncate(ConfigurationParams params) throws ConfigurationException {
    double minScore = Double.MIN_VALUE;
    try {
      minScore = params.getDouble(Configuration.MIN_SCORE);
    } catch (ConfigurationException e) {
    }

    double percent = 1;
    try {
      percent = params.getDouble(Configuration.PERCENT);
    } catch (ConfigurationException e) {

    }

    init(percent, params.getInt(Configuration.TOPN), minScore);
  }
 private void readConfigurationFile()
     throws ConfigurationFileDuplicateKeyException, ConfigurationException, NumberFormatException,
         TeEngineMlException, ParserRunException, NamedEntityRecognizerException,
         TextPreprocessorException {
   configurationFile = SystemInitialization.loadConfigurationFile(this.configurationFileName);
   configurationFile.setExpandingEnvironmentVariables(true);
   ConfigurationParams params =
       configurationFile.getModuleConfiguration(RTE_PAIRS_PREPROCESS_MODULE_NAME);
   instruments = new InstrumentsFactory().getDefaultInstruments(params);
   if (params.containsKey(PREPROCESS_DO_NER)) {
     doNer = params.getBoolean(PREPROCESS_DO_NER);
   } else {
     doNer = true;
   }
   if (params.containsKey(PREPROCESS_DO_TEXT_NORMALIZATION)) {
     doTextNormalization = params.getBoolean(PREPROCESS_DO_TEXT_NORMALIZATION);
   } else {
     doTextNormalization = true;
   }
 }
 /**
  * @param params configuration params for initialization. Should include:
  *     <li>table_file - path to the file that contains the rules, in table format. Can also be a
  *         URL.
  *     <li>table_separator - pattern of column-separator, e.g. "->".
  *     <li>part_of_speech - canonical name of the part-of-speech for this rule-base. For possible
  *         values, see {@link SimplerCanonicalPosTag}.
  *     <li>relation_name - name of relation to put in rules (the same for all rules).
  *     <li>minimum_seconds_between_loads (int) - The minimum number of seconds between each
  *         consecutive loads from the file.
  *     <li>(NOTE: The params.getModuleName() is used as the resource_name).
  */
 public OnlineFileBasedLexicalResource(ConfigurationParams params)
     throws UnsupportedPosTagStringException, IOException, ConfigurationException {
   super(
       null,
       params.getEnum(SimplerCanonicalPosTag.class, "part_of_speech"),
       params.getModuleName(),
       params.getString("relation_name"));
   this.file = params.getString("file");
   this.separator = params.getString("separator");
   this.minimumSecondsBetweenLoads = params.getInt("minimum_seconds_between_loads");
   this.map = ValueSetMapFromStringCreator.mapFromConfigurationParams(params);
 }
  public static void main(String[] args) {

    if (args.length != 1) {
      System.err.println("Usage: Right2LeftSimilarities <configuration file>");
      System.exit(0);
    }

    try {

      // ConfigurationFile confFile = new ConfigurationFile(args[0]);
      ConfigurationFile confFile = new ConfigurationFile(new ImplCommonConfig(new File(args[0])));

      ConfigurationParams loggingParams = confFile.getModuleConfiguration(Configuration.LOGGING);
      PropertyConfigurator.configure(loggingParams.get(Configuration.PROPERTIES_FILE));
      final Logger logger = Logger.getLogger(MemoryBasedRight2LeftSimilarities.class);

      final ConfigurationParams confParams =
          confFile.getModuleConfiguration(Configuration.RIGHT_TO_LEFT_SIMILARITIES);

      eu.excitementproject.eop.distsim.storage.File rightSimilaritiesFile =
          new eu.excitementproject.eop.distsim.storage.File(
              new File(confParams.get(Configuration.INFILE)), true);
      rightSimilaritiesFile.open();

      RedisBasedIDKeyPersistentBasicMap<HashMap<Integer, Double>> leftSimilarities =
          new RedisBasedIDKeyPersistentBasicMap<HashMap<Integer, Double>>(
              confParams.getString(Configuration.REDIS_FILE));
      leftSimilarities.clear();

      // set the right similarities at the left similarities map
      logger.info("Loading right similarities");
      Pair<Integer, Serializable> pair = null;
      int i = 0;

      TIntObjectMap<HashMap<Integer, Double>> tmpLeftSimilarities =
          new TIntObjectHashMap<HashMap<Integer, Double>>();

      while ((pair = rightSimilaritiesFile.read()) != null) {
        int rightElementId = pair.getFirst();

        // System.out.println("rightElementId = " + rightElementId);

        @SuppressWarnings("unchecked")
        HashMap<Integer, Double> similarities = (HashMap<Integer, Double>) pair.getSecond();

        // System.out.println("left similarities: " + similarities.size());

        for (Entry<Integer, Double> entry : similarities.entrySet()) {
          int leftElementId = entry.getKey();
          double score = entry.getValue();

          // System.out.println("\t" + leftElementId + "\t" + score);

          HashMap<Integer, Double> scores = tmpLeftSimilarities.get(leftElementId);
          if (scores == null) scores = leftSimilarities.get(leftElementId);

          if (scores == null) scores = new HashMap<Integer, Double>();
          scores.put(rightElementId, score);

          // System.out.println("\t" + scores);

          tmpLeftSimilarities.put(leftElementId, scores);
        }
        i++;
        if (i % 1000 == 0) logger.info(i);

        if (!memoryLeft()) {
          logger.info("writing data to redis");
          writeToRedis(tmpLeftSimilarities, leftSimilarities);
          System.gc();
          Thread.sleep(10000);
        }
      }

      logger.info("writing data to redis");
      writeToRedis(tmpLeftSimilarities, leftSimilarities);

      rightSimilaritiesFile.close();

      // save the right element similarity file
      java.io.File outfile = new File(confParams.get(Configuration.OUTFILE));
      logger.info("Saving right similarities to file: " + outfile.getPath());
      eu.excitementproject.eop.distsim.storage.File leftSimilaritiesFile =
          new eu.excitementproject.eop.distsim.storage.File(outfile, false);
      leftSimilaritiesFile.open();
      ImmutableIterator<Pair<Integer, HashMap<Integer, Double>>> it = leftSimilarities.iterator();
      while (it.hasNext()) {
        Pair<Integer, HashMap<Integer, Double>> idData = it.next();
        leftSimilaritiesFile.write(
            idData.getFirst(), SortUtil.sortMapByValue(idData.getSecond(), true));
      }
      leftSimilaritiesFile.close();
    } catch (Exception e) {
      e.printStackTrace();
    }
  }
Пример #8
0
  /**
   * Call this method once before starting to align sentence pairs.
   *
   * @param config a CommonConfig instance. The aligner retrieves the lexical resources
   *     configuration values.
   * @throws LexicalResourceException if initialization of a resource failed
   * @throws ConfigurationException if the configuration is invalid
   */
  private void init(CommonConfig config) throws LexicalResourceException, ConfigurationException {

    // Get the general parameters configuration section
    NameValueTable paramsSection = null;
    try {
      paramsSection = config.getSection(GENERAL_PARAMS_CONF_SECTION);
    } catch (ConfigurationException e) {
      throw new ConfigurationException(e);
    }

    maxPhrase = paramsSection.getInteger(MAX_PHRASE_KEY);

    // Get the Lexical Resources configuration section
    NameValueTable lexicalResourcesSection = null;
    try {
      lexicalResourcesSection = config.getSection(LEXICAL_RESOURCES_CONF_SECTION);
    } catch (ConfigurationException e) {
      throw new ConfigurationException(e);
    }

    lexicalResources = new ArrayList<LexicalResource<? extends RuleInfo>>();
    ConfigurationFile configFile = new ConfigurationFile(config);

    // Get each resource and create it using the configuration section related to it
    for (String resourceName : lexicalResourcesSection.keySet()) {

      // Get the class name
      String resourceClassName = lexicalResourcesSection.getString(resourceName);

      // Get the configuration params
      ConfigurationParams resourceParams = configFile.getModuleConfiguration(resourceName);
      resourceParams.setExpandingEnvironmentVariables(true);
      LexicalResource<? extends RuleInfo> lexicalResource =
          createLexicalResource(resourceClassName, resourceParams);

      if (lexicalResource != null) {
        lexicalResources.add(lexicalResource);

        PartOfSpeech leftSidePOS = null, rightSidePOS = null;

        // Add the information about this resource

        // Get the right and left side POS, in case it's mentioned
        if (resourceParams.keySet().contains(LEFT_SIDE_POS_PARAM)) {
          try {
            leftSidePOS =
                new ByCanonicalPartOfSpeech(resourceParams.getString(LEFT_SIDE_POS_PARAM));
          } catch (UnsupportedPosTagStringException e) {
            logger.warn(
                "Could not load POS for left side: "
                    + resourceParams.getString(LEFT_SIDE_POS_PARAM)
                    + ". Alignment links of all POS will be retreived.");
          }
        }

        if (resourceParams.keySet().contains(RIGHT_SIDE_POS_PARAM)) {
          try {
            rightSidePOS =
                new ByCanonicalPartOfSpeech(resourceParams.getString(RIGHT_SIDE_POS_PARAM));
          } catch (UnsupportedPosTagStringException e) {
            logger.warn(
                "Could not load POS for right side: "
                    + resourceParams.getString(RIGHT_SIDE_POS_PARAM)
                    + ". Alignment links of all POS will be retreived.");
          }
        }

        lexicalResourcesInformation.put(
            lexicalResource.getClass().getName(),
            new LexicalResourceInformation(
                resourceParams.getString(VERSION_PARAM),
                resourceParams.getBoolean(USE_LEMMA_PARAM),
                leftSidePOS,
                rightSidePOS));
      }
    }
  }