/** * @param args * @throws UnsupportedPosTagStringException * @throws EntailmentCompilationException * @throws IOException * @throws FileNotFoundException * @throws ConfigurationException */ public static void main(String[] args) throws UnsupportedPosTagStringException, EntailmentCompilationException, FileNotFoundException, IOException, ConfigurationException { if (args.length < 1) throw new EntailmentCompilationException( "usage: EntailmentRuleCompiler configurationFile.xml"); ConfigurationFile confFile = new ConfigurationFile(new File(args[0])); confFile.setExpandingEnvironmentVariables(true); ConfigurationParams compilationParams = confFile.getModuleConfiguration(RuleCompilerParameterNames.RULE_COMPILER_PARAMS_MODULE); // ConfigurationParams applictionParams = // confFile.getModuleConfiguration(KnowledgeResource.SYNTACTIC.getModuleName()); ConfigurationParams applictionParams = confFile.getModuleConfiguration(RuleCompilerParameterNames.SYNTACTIC_PARAMS_MODULE); File dir = compilationParams.getDirectory( RuleCompilerParameterNames .ENTAILMENT_RULES_DIRECTORY); // new // File(props.getProperty("directoryName").trim()); final String ruleFileSuffix = compilationParams.get( RuleCompilerParameterNames .RULE_FILE_SUFFIX); // props.getProperty("graphFileSuffix").trim(); // create an english node rule compliler EntailmentRuleCompiler compiler = new EntailmentRuleCompiler(); List<RuleWithConfidenceAndDescription<Info, BasicNode>> rulesWithCD; rulesWithCD = compiler.compileFolder(dir, ruleFileSuffix); // EnglishRulesViewer rv = new EnglishRulesViewer(rulesWithCD); // ExtendedRulesViewer rv = new ExtendedRulesViewer(rulesWithCD.subList(0,1)); // rv.view(); // serialize rules to file Set<RuleWithConfidenceAndDescription<Info, BasicNode>> rules = new LinkedHashSet<RuleWithConfidenceAndDescription<Info, BasicNode>>(rulesWithCD); String outFile = applictionParams.get(TransformationsConfigurationParametersNames.SYNTACTIC_RULES_FILE); try { RuleCompilerUtils.serializeToFile(rules, outFile); } catch (CompilationException e) { throw new EntailmentCompilationException("see nested", e); } System.out.println("\n\nMade " + rules.size() + " rules."); System.out.println("Serialized them into " + outFile); }
public static void main(String[] args) { if (args.length != 1) { System.err.println("Usage: Right2LeftSimilarities <configuration file>"); System.exit(0); } try { // ConfigurationFile confFile = new ConfigurationFile(args[0]); ConfigurationFile confFile = new ConfigurationFile(new ImplCommonConfig(new File(args[0]))); ConfigurationParams loggingParams = confFile.getModuleConfiguration(Configuration.LOGGING); PropertyConfigurator.configure(loggingParams.get(Configuration.PROPERTIES_FILE)); final Logger logger = Logger.getLogger(MemoryBasedRight2LeftSimilarities.class); final ConfigurationParams confParams = confFile.getModuleConfiguration(Configuration.RIGHT_TO_LEFT_SIMILARITIES); eu.excitementproject.eop.distsim.storage.File rightSimilaritiesFile = new eu.excitementproject.eop.distsim.storage.File( new File(confParams.get(Configuration.INFILE)), true); rightSimilaritiesFile.open(); RedisBasedIDKeyPersistentBasicMap<HashMap<Integer, Double>> leftSimilarities = new RedisBasedIDKeyPersistentBasicMap<HashMap<Integer, Double>>( confParams.getString(Configuration.REDIS_FILE)); leftSimilarities.clear(); // set the right similarities at the left similarities map logger.info("Loading right similarities"); Pair<Integer, Serializable> pair = null; int i = 0; TIntObjectMap<HashMap<Integer, Double>> tmpLeftSimilarities = new TIntObjectHashMap<HashMap<Integer, Double>>(); while ((pair = rightSimilaritiesFile.read()) != null) { int rightElementId = pair.getFirst(); // System.out.println("rightElementId = " + rightElementId); @SuppressWarnings("unchecked") HashMap<Integer, Double> similarities = (HashMap<Integer, Double>) pair.getSecond(); // System.out.println("left similarities: " + similarities.size()); for (Entry<Integer, Double> entry : similarities.entrySet()) { int leftElementId = entry.getKey(); double score = entry.getValue(); // System.out.println("\t" + leftElementId + "\t" + score); HashMap<Integer, Double> scores = tmpLeftSimilarities.get(leftElementId); if (scores == null) scores = leftSimilarities.get(leftElementId); if (scores == null) scores = new HashMap<Integer, Double>(); scores.put(rightElementId, score); // System.out.println("\t" + scores); tmpLeftSimilarities.put(leftElementId, scores); } i++; if (i % 1000 == 0) logger.info(i); if (!memoryLeft()) { logger.info("writing data to redis"); writeToRedis(tmpLeftSimilarities, leftSimilarities); System.gc(); Thread.sleep(10000); } } logger.info("writing data to redis"); writeToRedis(tmpLeftSimilarities, leftSimilarities); rightSimilaritiesFile.close(); // save the right element similarity file java.io.File outfile = new File(confParams.get(Configuration.OUTFILE)); logger.info("Saving right similarities to file: " + outfile.getPath()); eu.excitementproject.eop.distsim.storage.File leftSimilaritiesFile = new eu.excitementproject.eop.distsim.storage.File(outfile, false); leftSimilaritiesFile.open(); ImmutableIterator<Pair<Integer, HashMap<Integer, Double>>> it = leftSimilarities.iterator(); while (it.hasNext()) { Pair<Integer, HashMap<Integer, Double>> idData = it.next(); leftSimilaritiesFile.write( idData.getFirst(), SortUtil.sortMapByValue(idData.getSecond(), true)); } leftSimilaritiesFile.close(); } catch (Exception e) { e.printStackTrace(); } }
/** * Call this method once before starting to align sentence pairs. * * @param config a CommonConfig instance. The aligner retrieves the lexical resources * configuration values. * @throws LexicalResourceException if initialization of a resource failed * @throws ConfigurationException if the configuration is invalid */ private void init(CommonConfig config) throws LexicalResourceException, ConfigurationException { // Get the general parameters configuration section NameValueTable paramsSection = null; try { paramsSection = config.getSection(GENERAL_PARAMS_CONF_SECTION); } catch (ConfigurationException e) { throw new ConfigurationException(e); } maxPhrase = paramsSection.getInteger(MAX_PHRASE_KEY); // Get the Lexical Resources configuration section NameValueTable lexicalResourcesSection = null; try { lexicalResourcesSection = config.getSection(LEXICAL_RESOURCES_CONF_SECTION); } catch (ConfigurationException e) { throw new ConfigurationException(e); } lexicalResources = new ArrayList<LexicalResource<? extends RuleInfo>>(); ConfigurationFile configFile = new ConfigurationFile(config); // Get each resource and create it using the configuration section related to it for (String resourceName : lexicalResourcesSection.keySet()) { // Get the class name String resourceClassName = lexicalResourcesSection.getString(resourceName); // Get the configuration params ConfigurationParams resourceParams = configFile.getModuleConfiguration(resourceName); resourceParams.setExpandingEnvironmentVariables(true); LexicalResource<? extends RuleInfo> lexicalResource = createLexicalResource(resourceClassName, resourceParams); if (lexicalResource != null) { lexicalResources.add(lexicalResource); PartOfSpeech leftSidePOS = null, rightSidePOS = null; // Add the information about this resource // Get the right and left side POS, in case it's mentioned if (resourceParams.keySet().contains(LEFT_SIDE_POS_PARAM)) { try { leftSidePOS = new ByCanonicalPartOfSpeech(resourceParams.getString(LEFT_SIDE_POS_PARAM)); } catch (UnsupportedPosTagStringException e) { logger.warn( "Could not load POS for left side: " + resourceParams.getString(LEFT_SIDE_POS_PARAM) + ". Alignment links of all POS will be retreived."); } } if (resourceParams.keySet().contains(RIGHT_SIDE_POS_PARAM)) { try { rightSidePOS = new ByCanonicalPartOfSpeech(resourceParams.getString(RIGHT_SIDE_POS_PARAM)); } catch (UnsupportedPosTagStringException e) { logger.warn( "Could not load POS for right side: " + resourceParams.getString(RIGHT_SIDE_POS_PARAM) + ". Alignment links of all POS will be retreived."); } } lexicalResourcesInformation.put( lexicalResource.getClass().getName(), new LexicalResourceInformation( resourceParams.getString(VERSION_PARAM), resourceParams.getBoolean(USE_LEMMA_PARAM), leftSidePOS, rightSidePOS)); } } }