/** * @param params configuration params for initialization. Should include: * <li>table_file - path to the file that contains the rules, in table format. Can also be a * URL. * <li>table_separator - pattern of column-separator, e.g. "->". * <li>part_of_speech - canonical name of the part-of-speech for this rule-base. For possible * values, see {@link SimplerCanonicalPosTag}. * <li>relation_name - name of relation to put in rules (the same for all rules). * <li>minimum_seconds_between_loads (int) - The minimum number of seconds between each * consecutive loads from the file. * <li>(NOTE: The params.getModuleName() is used as the resource_name). */ public OnlineFileBasedLexicalResource(ConfigurationParams params) throws UnsupportedPosTagStringException, IOException, ConfigurationException { super( null, params.getEnum(SimplerCanonicalPosTag.class, "part_of_speech"), params.getModuleName(), params.getString("relation_name")); this.file = params.getString("file"); this.separator = params.getString("separator"); this.minimumSecondsBetweenLoads = params.getInt("minimum_seconds_between_loads"); this.map = ValueSetMapFromStringCreator.mapFromConfigurationParams(params); }
public static void main(String[] args) { if (args.length != 1) { System.err.println("Usage: Right2LeftSimilarities <configuration file>"); System.exit(0); } try { // ConfigurationFile confFile = new ConfigurationFile(args[0]); ConfigurationFile confFile = new ConfigurationFile(new ImplCommonConfig(new File(args[0]))); ConfigurationParams loggingParams = confFile.getModuleConfiguration(Configuration.LOGGING); PropertyConfigurator.configure(loggingParams.get(Configuration.PROPERTIES_FILE)); final Logger logger = Logger.getLogger(MemoryBasedRight2LeftSimilarities.class); final ConfigurationParams confParams = confFile.getModuleConfiguration(Configuration.RIGHT_TO_LEFT_SIMILARITIES); eu.excitementproject.eop.distsim.storage.File rightSimilaritiesFile = new eu.excitementproject.eop.distsim.storage.File( new File(confParams.get(Configuration.INFILE)), true); rightSimilaritiesFile.open(); RedisBasedIDKeyPersistentBasicMap<HashMap<Integer, Double>> leftSimilarities = new RedisBasedIDKeyPersistentBasicMap<HashMap<Integer, Double>>( confParams.getString(Configuration.REDIS_FILE)); leftSimilarities.clear(); // set the right similarities at the left similarities map logger.info("Loading right similarities"); Pair<Integer, Serializable> pair = null; int i = 0; TIntObjectMap<HashMap<Integer, Double>> tmpLeftSimilarities = new TIntObjectHashMap<HashMap<Integer, Double>>(); while ((pair = rightSimilaritiesFile.read()) != null) { int rightElementId = pair.getFirst(); // System.out.println("rightElementId = " + rightElementId); @SuppressWarnings("unchecked") HashMap<Integer, Double> similarities = (HashMap<Integer, Double>) pair.getSecond(); // System.out.println("left similarities: " + similarities.size()); for (Entry<Integer, Double> entry : similarities.entrySet()) { int leftElementId = entry.getKey(); double score = entry.getValue(); // System.out.println("\t" + leftElementId + "\t" + score); HashMap<Integer, Double> scores = tmpLeftSimilarities.get(leftElementId); if (scores == null) scores = leftSimilarities.get(leftElementId); if (scores == null) scores = new HashMap<Integer, Double>(); scores.put(rightElementId, score); // System.out.println("\t" + scores); tmpLeftSimilarities.put(leftElementId, scores); } i++; if (i % 1000 == 0) logger.info(i); if (!memoryLeft()) { logger.info("writing data to redis"); writeToRedis(tmpLeftSimilarities, leftSimilarities); System.gc(); Thread.sleep(10000); } } logger.info("writing data to redis"); writeToRedis(tmpLeftSimilarities, leftSimilarities); rightSimilaritiesFile.close(); // save the right element similarity file java.io.File outfile = new File(confParams.get(Configuration.OUTFILE)); logger.info("Saving right similarities to file: " + outfile.getPath()); eu.excitementproject.eop.distsim.storage.File leftSimilaritiesFile = new eu.excitementproject.eop.distsim.storage.File(outfile, false); leftSimilaritiesFile.open(); ImmutableIterator<Pair<Integer, HashMap<Integer, Double>>> it = leftSimilarities.iterator(); while (it.hasNext()) { Pair<Integer, HashMap<Integer, Double>> idData = it.next(); leftSimilaritiesFile.write( idData.getFirst(), SortUtil.sortMapByValue(idData.getSecond(), true)); } leftSimilaritiesFile.close(); } catch (Exception e) { e.printStackTrace(); } }
/** * Call this method once before starting to align sentence pairs. * * @param config a CommonConfig instance. The aligner retrieves the lexical resources * configuration values. * @throws LexicalResourceException if initialization of a resource failed * @throws ConfigurationException if the configuration is invalid */ private void init(CommonConfig config) throws LexicalResourceException, ConfigurationException { // Get the general parameters configuration section NameValueTable paramsSection = null; try { paramsSection = config.getSection(GENERAL_PARAMS_CONF_SECTION); } catch (ConfigurationException e) { throw new ConfigurationException(e); } maxPhrase = paramsSection.getInteger(MAX_PHRASE_KEY); // Get the Lexical Resources configuration section NameValueTable lexicalResourcesSection = null; try { lexicalResourcesSection = config.getSection(LEXICAL_RESOURCES_CONF_SECTION); } catch (ConfigurationException e) { throw new ConfigurationException(e); } lexicalResources = new ArrayList<LexicalResource<? extends RuleInfo>>(); ConfigurationFile configFile = new ConfigurationFile(config); // Get each resource and create it using the configuration section related to it for (String resourceName : lexicalResourcesSection.keySet()) { // Get the class name String resourceClassName = lexicalResourcesSection.getString(resourceName); // Get the configuration params ConfigurationParams resourceParams = configFile.getModuleConfiguration(resourceName); resourceParams.setExpandingEnvironmentVariables(true); LexicalResource<? extends RuleInfo> lexicalResource = createLexicalResource(resourceClassName, resourceParams); if (lexicalResource != null) { lexicalResources.add(lexicalResource); PartOfSpeech leftSidePOS = null, rightSidePOS = null; // Add the information about this resource // Get the right and left side POS, in case it's mentioned if (resourceParams.keySet().contains(LEFT_SIDE_POS_PARAM)) { try { leftSidePOS = new ByCanonicalPartOfSpeech(resourceParams.getString(LEFT_SIDE_POS_PARAM)); } catch (UnsupportedPosTagStringException e) { logger.warn( "Could not load POS for left side: " + resourceParams.getString(LEFT_SIDE_POS_PARAM) + ". Alignment links of all POS will be retreived."); } } if (resourceParams.keySet().contains(RIGHT_SIDE_POS_PARAM)) { try { rightSidePOS = new ByCanonicalPartOfSpeech(resourceParams.getString(RIGHT_SIDE_POS_PARAM)); } catch (UnsupportedPosTagStringException e) { logger.warn( "Could not load POS for right side: " + resourceParams.getString(RIGHT_SIDE_POS_PARAM) + ". Alignment links of all POS will be retreived."); } } lexicalResourcesInformation.put( lexicalResource.getClass().getName(), new LexicalResourceInformation( resourceParams.getString(VERSION_PARAM), resourceParams.getBoolean(USE_LEMMA_PARAM), leftSidePOS, rightSidePOS)); } } }