private void readConfigurationFile() throws ConfigurationFileDuplicateKeyException, ConfigurationException, NumberFormatException, TeEngineMlException, ParserRunException, NamedEntityRecognizerException, TextPreprocessorException { configurationFile = SystemInitialization.loadConfigurationFile(this.configurationFileName); configurationFile.setExpandingEnvironmentVariables(true); ConfigurationParams params = configurationFile.getModuleConfiguration(RTE_PAIRS_PREPROCESS_MODULE_NAME); instruments = new InstrumentsFactory().getDefaultInstruments(params); if (params.containsKey(PREPROCESS_DO_NER)) { doNer = params.getBoolean(PREPROCESS_DO_NER); } else { doNer = true; } if (params.containsKey(PREPROCESS_DO_TEXT_NORMALIZATION)) { doTextNormalization = params.getBoolean(PREPROCESS_DO_TEXT_NORMALIZATION); } else { doTextNormalization = true; } }
/** * Call this method once before starting to align sentence pairs. * * @param config a CommonConfig instance. The aligner retrieves the lexical resources * configuration values. * @throws LexicalResourceException if initialization of a resource failed * @throws ConfigurationException if the configuration is invalid */ private void init(CommonConfig config) throws LexicalResourceException, ConfigurationException { // Get the general parameters configuration section NameValueTable paramsSection = null; try { paramsSection = config.getSection(GENERAL_PARAMS_CONF_SECTION); } catch (ConfigurationException e) { throw new ConfigurationException(e); } maxPhrase = paramsSection.getInteger(MAX_PHRASE_KEY); // Get the Lexical Resources configuration section NameValueTable lexicalResourcesSection = null; try { lexicalResourcesSection = config.getSection(LEXICAL_RESOURCES_CONF_SECTION); } catch (ConfigurationException e) { throw new ConfigurationException(e); } lexicalResources = new ArrayList<LexicalResource<? extends RuleInfo>>(); ConfigurationFile configFile = new ConfigurationFile(config); // Get each resource and create it using the configuration section related to it for (String resourceName : lexicalResourcesSection.keySet()) { // Get the class name String resourceClassName = lexicalResourcesSection.getString(resourceName); // Get the configuration params ConfigurationParams resourceParams = configFile.getModuleConfiguration(resourceName); resourceParams.setExpandingEnvironmentVariables(true); LexicalResource<? extends RuleInfo> lexicalResource = createLexicalResource(resourceClassName, resourceParams); if (lexicalResource != null) { lexicalResources.add(lexicalResource); PartOfSpeech leftSidePOS = null, rightSidePOS = null; // Add the information about this resource // Get the right and left side POS, in case it's mentioned if (resourceParams.keySet().contains(LEFT_SIDE_POS_PARAM)) { try { leftSidePOS = new ByCanonicalPartOfSpeech(resourceParams.getString(LEFT_SIDE_POS_PARAM)); } catch (UnsupportedPosTagStringException e) { logger.warn( "Could not load POS for left side: " + resourceParams.getString(LEFT_SIDE_POS_PARAM) + ". Alignment links of all POS will be retreived."); } } if (resourceParams.keySet().contains(RIGHT_SIDE_POS_PARAM)) { try { rightSidePOS = new ByCanonicalPartOfSpeech(resourceParams.getString(RIGHT_SIDE_POS_PARAM)); } catch (UnsupportedPosTagStringException e) { logger.warn( "Could not load POS for right side: " + resourceParams.getString(RIGHT_SIDE_POS_PARAM) + ". Alignment links of all POS will be retreived."); } } lexicalResourcesInformation.put( lexicalResource.getClass().getName(), new LexicalResourceInformation( resourceParams.getString(VERSION_PARAM), resourceParams.getBoolean(USE_LEMMA_PARAM), leftSidePOS, rightSidePOS)); } } }