private POSTagger getPOSTagger(String language) { String modelName = languageConfig.getParameter(language, MODEL_NAME_PARAM); try { POSModel model; if (modelName == null) { // use the default model = openNLP.getPartOfSpeechModel(language); } else { model = openNLP.getModel(POSModel.class, modelName, null); } if (model != null) { log.debug( "POS Tagger Model {} for lanugage '{}' version: {}", new Object[] { model.getClass().getSimpleName(), model.getLanguage(), model.getVersion() != null ? model.getVersion() : "undefined" }); return new POSTaggerME(model); } } catch (Exception e) { log.warn("Unable to load POS model for language '" + language + "'!", e); } log.debug("POS tagging Model for Language '{}' not available.", language); return null; }
/** * Indicate if this engine can enhance supplied ContentItem, and if it suggests enhancing it * synchronously or asynchronously. The {@link * org.apache.stanbol.enhancer.servicesapi.EnhancementJobManager} can force sync/async mode if * desired, it is just a suggestion from the engine. * * <p>Returns ENHANCE_ASYNC in case there is a text/plain content part and a tagger for the * language identified for the content item, CANNOT_ENHANCE otherwise. * * @throws org.apache.stanbol.enhancer.servicesapi.EngineException if the introspecting process of * the content item fails */ @Override public int canEnhance(ContentItem ci) throws EngineException { // check if content is present Map.Entry<UriRef, Blob> entry = NlpEngineHelper.getPlainText(this, ci, false); if (entry == null || entry.getValue() == null) { return CANNOT_ENHANCE; } String language = getLanguage(this, ci, false); if (language == null) { return CANNOT_ENHANCE; } if (!languageConfig.isLanguage(language)) { log.trace( " > can NOT enhance ContentItem {} because language {} is " + "not enabled by this engines configuration", ci, language); return CANNOT_ENHANCE; } if (getPOSTagger(language) == null) { log.trace( " > can NOT enhance ContentItem {} because no POSTagger is" + "is present for language {}", ci, language); return CANNOT_ENHANCE; } log.trace(" > can enhance ContentItem {} with language {}", ci, language); return ENHANCE_ASYNC; }
/** * Activate and read the properties. Configures and initialises a POSTagger for each language * configured in CONFIG_LANGUAGES. * * @param ce the {@link org.osgi.service.component.ComponentContext} */ @Activate protected void activate(ComponentContext ce) throws ConfigurationException { log.info("activating POS tagging engine"); super.activate(ce); @SuppressWarnings("unchecked") Dictionary<String, Object> properties = ce.getProperties(); languageConfig.setConfiguration(properties); }
@Activate @SuppressWarnings("unchecked") protected void activate(ComponentContext ctx) throws ConfigurationException { log.info("activate {}", getClass().getSimpleName()); this.bundleContext = ctx.getBundleContext(); Dictionary<String, Object> properties = ctx.getProperties(); // (0) The name for the Enhancement Engine and the basic metadata Object value = properties.get(PROPERTY_NAME); if (value == null || value.toString().isEmpty()) { throw new ConfigurationException( PROPERTY_NAME, "The EnhancementEngine name MUST BE configured!"); } else { this.engineName = value.toString(); } engineMetadata = new Hashtable<String, Object>(); engineMetadata.put(PROPERTY_NAME, this.engineName); value = properties.get(Constants.SERVICE_RANKING); engineMetadata.put(Constants.SERVICE_RANKING, value == null ? Integer.valueOf(0) : value); // (1) parse the TextProcessing configuration // TODO: decide if we should use the TextProcessingConfig for this engine textProcessingConfig = TextProcessingConfig.createInstance(properties); // change default for EntityLinkerConfig.MIN_FOUND_TOKENS value = properties.get(EntityLinkerConfig.MIN_FOUND_TOKENS); entityLinkerConfig = EntityLinkerConfig.createInstance(properties, prefixService); if (value == null) { // no MIN_FOUND_TOKENS config present // manually set the default to the value used by this engine entityLinkerConfig.setMinFoundTokens(FST_DEFAULT_MIN_FOUND_TOKENS); } // (2) parse the configured IndexReference value = properties.get(SOLR_CORE); if (value == null) { throw new ConfigurationException(SOLR_CORE, "Missing required configuration of the SolrCore"); } else { indexReference = IndexReference.parse(value.toString()); } value = properties.get(IndexConfiguration.FIELD_ENCODING); if (value == null) { throw new ConfigurationException( IndexConfiguration.FIELD_ENCODING, "Missing required configuration of the Solr Field Encoding"); } else { try { fieldEncoding = FieldEncodingEnum.valueOf(value.toString().trim()); } catch (IllegalArgumentException e) { throw new ConfigurationException( IndexConfiguration.FIELD_ENCODING, "The configured " + "FieldEncoding MUST BE a member of " + Arrays.toString(FieldEncodingEnum.values()), e); } } value = properties.get(IndexConfiguration.SKIP_ALT_TOKENS); if (value instanceof Boolean) { skipAltTokensConfig = ((Boolean) value); } else if (value != null) { skipAltTokensConfig = Boolean.valueOf(value.toString()); } // else no config -> will use the default // (4) init the FST configuration // We can create the default configuration only here, as it depends on the // name of the solrIndex String defaultConfig = "*;" + IndexConfiguration.PARAM_FST + "=" + indexReference.getIndex() + ";" + IndexConfiguration.PARAM_FIELD + "=" + IndexConfiguration.DEFAULT_FIELD; fstConfig = new LanguageConfiguration(IndexConfiguration.FST_CONFIG, new String[] {defaultConfig}); // now set the actual configuration parsed to the engine value = properties.get(IndexConfiguration.FST_CONFIG); if (value != null && !StringUtils.isBlank(value.toString())) { fstConfig.setConfiguration(properties); } // else keep the default value = properties.get(IndexConfiguration.FST_FOLDER); if (value instanceof String) { this.fstFolder = ((String) value).trim(); if (this.fstFolder.isEmpty()) { this.fstFolder = null; } } else if (value == null) { this.fstFolder = null; } else { throw new ConfigurationException( IndexConfiguration.FST_FOLDER, "Values MUST BE of type String" + "(found: " + value.getClass().getName() + ")!"); } // (5) Create the ThreadPool used for the runtime creation of FST models value = properties.get(FST_THREAD_POOL_SIZE); int tpSize; if (value instanceof Number) { tpSize = ((Number) value).intValue(); } else if (value != null) { try { tpSize = Integer.parseInt(value.toString()); } catch (NumberFormatException e) { throw new ConfigurationException( FST_THREAD_POOL_SIZE, "Unable to parse the integer FST thread pool size from the " + "configured " + value.getClass().getSimpleName() + " '" + value + "'!", e); } } else { tpSize = -1; } if (tpSize <= 0) { // if configured value <= 0 we use the default tpSize = DEFAULT_FST_THREAD_POOL_SIZE; } // build a ThreadFactoryBuilder for low priority daemon threads that // do use a meaningful name ThreadFactoryBuilder tfBuilder = new ThreadFactoryBuilder(); tfBuilder.setDaemon(true); // should be stopped if the VM closes tfBuilder.setPriority(Thread.MIN_PRIORITY); // low priority tfBuilder.setNameFormat(engineName + "-FstRuntimeCreation-thread-%d"); if (fstCreatorService != null && !fstCreatorService.isTerminated()) { // NOTE: We can not call terminateNow, because to interrupt threads // here would also close FileChannels used by the SolrCore // and produce java.nio.channels.ClosedByInterruptException // exceptions followed by java.nio.channels.ClosedChannelException // on following calls to affected files of the SolrIndex. // Because of that we just log a warning and let uncompleted tasks // complete! log.warn( "some items in a previouse FST Runtime Creation Threadpool have " + "still not finished!"); } fstCreatorService = Executors.newFixedThreadPool(tpSize, tfBuilder.build()); // (6) Parse the EntityCache config int entityCacheSize; value = properties.get(ENTITY_CACHE_SIZE); if (value instanceof Number) { entityCacheSize = ((Number) value).intValue(); } else if (value != null) { try { entityCacheSize = Integer.parseInt(value.toString()); } catch (NumberFormatException e) { throw new ConfigurationException( ENTITY_CACHE_SIZE, "Unable to parse the integer EntityCacheSize from the " + "configured " + value.getClass().getSimpleName() + " '" + value + "'!", e); } } else { entityCacheSize = -1; } if (entityCacheSize == 0) { log.info(" ... EntityCache deactivated"); this.entityCacheSize = entityCacheSize; } else { this.entityCacheSize = entityCacheSize < 0 ? DEFAULT_ENTITY_CACHE_SIZE : entityCacheSize; log.info(" ... EntityCache enabled (size: {})", this.entityCacheSize); } // (7) parse the Entity type field value = properties.get(IndexConfiguration.SOLR_TYPE_FIELD); if (value == null || StringUtils.isBlank(value.toString())) { solrTypeField = null; } else { solrTypeField = value.toString().trim(); } // (8) parse the Entity Ranking field value = properties.get(IndexConfiguration.SOLR_RANKING_FIELD); if (value == null) { solrRankingField = null; } else { solrRankingField = value.toString().trim(); } // (9) start tracking the SolrCore try { solrServerTracker = new RegisteredSolrServerTracker(bundleContext, indexReference, null) { @Override public void removedService(ServiceReference reference, Object service) { log.info(" ... SolrCore for {} was removed!", reference); // try to get an other serviceReference from the tracker updateEngineRegistration(solrServerTracker.getServiceReference(), null); super.removedService(reference, service); } @Override public void modifiedService(ServiceReference reference, Object service) { log.info(" ... SolrCore for {} was updated!", indexReference); updateEngineRegistration(solrServerTracker.getServiceReference(), null); super.modifiedService(reference, service); } @Override public SolrServer addingService(ServiceReference reference) { SolrServer server = super.addingService(reference); if (solrCore != null) { log.info( "Multiple SolrCores for name {}! Will update engine " + "with the newly added {}!", new Object[] {solrCore.getName(), indexReference, reference}); } updateEngineRegistration(reference, server); return server; } }; } catch (InvalidSyntaxException e) { throw new ConfigurationException( SOLR_CORE, "parsed SolrCore name '" + value.toString() + "' is invalid (expected: '[{server-name}:]{indexname}'"); } solrServerTracker.open(); }
@Deactivate protected void deactivate(ComponentContext context) { languageConfig.setDefault(); super.deactivate(context); }