コード例 #1
0
 private POSTagger getPOSTagger(String language) {
   String modelName = languageConfig.getParameter(language, MODEL_NAME_PARAM);
   try {
     POSModel model;
     if (modelName == null) { // use the default
       model = openNLP.getPartOfSpeechModel(language);
     } else {
       model = openNLP.getModel(POSModel.class, modelName, null);
     }
     if (model != null) {
       log.debug(
           "POS Tagger Model {} for lanugage '{}' version: {}",
           new Object[] {
             model.getClass().getSimpleName(),
             model.getLanguage(),
             model.getVersion() != null ? model.getVersion() : "undefined"
           });
       return new POSTaggerME(model);
     }
   } catch (Exception e) {
     log.warn("Unable to load POS model for language '" + language + "'!", e);
   }
   log.debug("POS tagging Model for Language '{}' not available.", language);
   return null;
 }
コード例 #2
0
  /**
   * Indicate if this engine can enhance supplied ContentItem, and if it suggests enhancing it
   * synchronously or asynchronously. The {@link
   * org.apache.stanbol.enhancer.servicesapi.EnhancementJobManager} can force sync/async mode if
   * desired, it is just a suggestion from the engine.
   *
   * <p>Returns ENHANCE_ASYNC in case there is a text/plain content part and a tagger for the
   * language identified for the content item, CANNOT_ENHANCE otherwise.
   *
   * @throws org.apache.stanbol.enhancer.servicesapi.EngineException if the introspecting process of
   *     the content item fails
   */
  @Override
  public int canEnhance(ContentItem ci) throws EngineException {
    // check if content is present
    Map.Entry<UriRef, Blob> entry = NlpEngineHelper.getPlainText(this, ci, false);
    if (entry == null || entry.getValue() == null) {
      return CANNOT_ENHANCE;
    }

    String language = getLanguage(this, ci, false);
    if (language == null) {
      return CANNOT_ENHANCE;
    }
    if (!languageConfig.isLanguage(language)) {
      log.trace(
          " > can NOT enhance ContentItem {} because language {} is "
              + "not enabled by this engines configuration",
          ci,
          language);
      return CANNOT_ENHANCE;
    }

    if (getPOSTagger(language) == null) {
      log.trace(
          " > can NOT enhance ContentItem {} because no POSTagger is"
              + "is present for language {}",
          ci,
          language);
      return CANNOT_ENHANCE;
    }

    log.trace(" > can enhance ContentItem {} with language {}", ci, language);
    return ENHANCE_ASYNC;
  }
コード例 #3
0
  /**
   * Activate and read the properties. Configures and initialises a POSTagger for each language
   * configured in CONFIG_LANGUAGES.
   *
   * @param ce the {@link org.osgi.service.component.ComponentContext}
   */
  @Activate
  protected void activate(ComponentContext ce) throws ConfigurationException {
    log.info("activating POS tagging engine");
    super.activate(ce);
    @SuppressWarnings("unchecked")
    Dictionary<String, Object> properties = ce.getProperties();

    languageConfig.setConfiguration(properties);
  }
コード例 #4
0
  @Activate
  @SuppressWarnings("unchecked")
  protected void activate(ComponentContext ctx) throws ConfigurationException {
    log.info("activate {}", getClass().getSimpleName());
    this.bundleContext = ctx.getBundleContext();
    Dictionary<String, Object> properties = ctx.getProperties();
    // (0) The name for the Enhancement Engine and the basic metadata
    Object value = properties.get(PROPERTY_NAME);
    if (value == null || value.toString().isEmpty()) {
      throw new ConfigurationException(
          PROPERTY_NAME, "The EnhancementEngine name MUST BE configured!");
    } else {
      this.engineName = value.toString();
    }
    engineMetadata = new Hashtable<String, Object>();
    engineMetadata.put(PROPERTY_NAME, this.engineName);
    value = properties.get(Constants.SERVICE_RANKING);
    engineMetadata.put(Constants.SERVICE_RANKING, value == null ? Integer.valueOf(0) : value);

    // (1) parse the TextProcessing configuration
    // TODO: decide if we should use the TextProcessingConfig for this engine
    textProcessingConfig = TextProcessingConfig.createInstance(properties);
    // change default for EntityLinkerConfig.MIN_FOUND_TOKENS
    value = properties.get(EntityLinkerConfig.MIN_FOUND_TOKENS);
    entityLinkerConfig = EntityLinkerConfig.createInstance(properties, prefixService);
    if (value == null) { // no MIN_FOUND_TOKENS config present
      // manually set the default to the value used by this engine
      entityLinkerConfig.setMinFoundTokens(FST_DEFAULT_MIN_FOUND_TOKENS);
    }

    // (2) parse the configured IndexReference
    value = properties.get(SOLR_CORE);
    if (value == null) {
      throw new ConfigurationException(SOLR_CORE, "Missing required configuration of the SolrCore");
    } else {
      indexReference = IndexReference.parse(value.toString());
    }
    value = properties.get(IndexConfiguration.FIELD_ENCODING);
    if (value == null) {
      throw new ConfigurationException(
          IndexConfiguration.FIELD_ENCODING,
          "Missing required configuration of the Solr Field Encoding");
    } else {
      try {
        fieldEncoding = FieldEncodingEnum.valueOf(value.toString().trim());
      } catch (IllegalArgumentException e) {
        throw new ConfigurationException(
            IndexConfiguration.FIELD_ENCODING,
            "The configured "
                + "FieldEncoding MUST BE a member of "
                + Arrays.toString(FieldEncodingEnum.values()),
            e);
      }
    }
    value = properties.get(IndexConfiguration.SKIP_ALT_TOKENS);
    if (value instanceof Boolean) {
      skipAltTokensConfig = ((Boolean) value);
    } else if (value != null) {
      skipAltTokensConfig = Boolean.valueOf(value.toString());
    } // else no config -> will use the default

    // (4) init the FST configuration
    // We can create the default configuration only here, as it depends on the
    // name of the solrIndex
    String defaultConfig =
        "*;"
            + IndexConfiguration.PARAM_FST
            + "="
            + indexReference.getIndex()
            + ";"
            + IndexConfiguration.PARAM_FIELD
            + "="
            + IndexConfiguration.DEFAULT_FIELD;
    fstConfig =
        new LanguageConfiguration(IndexConfiguration.FST_CONFIG, new String[] {defaultConfig});
    // now set the actual configuration parsed to the engine
    value = properties.get(IndexConfiguration.FST_CONFIG);
    if (value != null && !StringUtils.isBlank(value.toString())) {
      fstConfig.setConfiguration(properties);
    } // else keep the default

    value = properties.get(IndexConfiguration.FST_FOLDER);
    if (value instanceof String) {
      this.fstFolder = ((String) value).trim();
      if (this.fstFolder.isEmpty()) {
        this.fstFolder = null;
      }
    } else if (value == null) {
      this.fstFolder = null;
    } else {
      throw new ConfigurationException(
          IndexConfiguration.FST_FOLDER,
          "Values MUST BE of type String" + "(found: " + value.getClass().getName() + ")!");
    }

    // (5) Create the ThreadPool used for the runtime creation of FST models
    value = properties.get(FST_THREAD_POOL_SIZE);
    int tpSize;
    if (value instanceof Number) {
      tpSize = ((Number) value).intValue();
    } else if (value != null) {
      try {
        tpSize = Integer.parseInt(value.toString());
      } catch (NumberFormatException e) {
        throw new ConfigurationException(
            FST_THREAD_POOL_SIZE,
            "Unable to parse the integer FST thread pool size from the "
                + "configured "
                + value.getClass().getSimpleName()
                + " '"
                + value
                + "'!",
            e);
      }
    } else {
      tpSize = -1;
    }
    if (tpSize <= 0) { // if configured value <= 0 we use the default
      tpSize = DEFAULT_FST_THREAD_POOL_SIZE;
    }
    // build a ThreadFactoryBuilder for low priority daemon threads that
    // do use a meaningful name
    ThreadFactoryBuilder tfBuilder = new ThreadFactoryBuilder();
    tfBuilder.setDaemon(true); // should be stopped if the VM closes
    tfBuilder.setPriority(Thread.MIN_PRIORITY); // low priority
    tfBuilder.setNameFormat(engineName + "-FstRuntimeCreation-thread-%d");
    if (fstCreatorService != null && !fstCreatorService.isTerminated()) {
      // NOTE: We can not call terminateNow, because to interrupt threads
      //      here would also close FileChannels used by the SolrCore
      //      and produce java.nio.channels.ClosedByInterruptException
      //      exceptions followed by java.nio.channels.ClosedChannelException
      //      on following calls to affected files of the SolrIndex.

      // Because of that we just log a warning and let uncompleted tasks
      // complete!
      log.warn(
          "some items in a previouse FST Runtime Creation Threadpool have "
              + "still not finished!");
    }
    fstCreatorService = Executors.newFixedThreadPool(tpSize, tfBuilder.build());

    // (6) Parse the EntityCache config
    int entityCacheSize;
    value = properties.get(ENTITY_CACHE_SIZE);
    if (value instanceof Number) {
      entityCacheSize = ((Number) value).intValue();
    } else if (value != null) {
      try {
        entityCacheSize = Integer.parseInt(value.toString());
      } catch (NumberFormatException e) {
        throw new ConfigurationException(
            ENTITY_CACHE_SIZE,
            "Unable to parse the integer EntityCacheSize from the "
                + "configured "
                + value.getClass().getSimpleName()
                + " '"
                + value
                + "'!",
            e);
      }
    } else {
      entityCacheSize = -1;
    }
    if (entityCacheSize == 0) {
      log.info(" ... EntityCache deactivated");
      this.entityCacheSize = entityCacheSize;
    } else {
      this.entityCacheSize = entityCacheSize < 0 ? DEFAULT_ENTITY_CACHE_SIZE : entityCacheSize;
      log.info(" ... EntityCache enabled (size: {})", this.entityCacheSize);
    }

    // (7) parse the Entity type field
    value = properties.get(IndexConfiguration.SOLR_TYPE_FIELD);
    if (value == null || StringUtils.isBlank(value.toString())) {
      solrTypeField = null;
    } else {
      solrTypeField = value.toString().trim();
    }
    // (8) parse the Entity Ranking field
    value = properties.get(IndexConfiguration.SOLR_RANKING_FIELD);
    if (value == null) {
      solrRankingField = null;
    } else {
      solrRankingField = value.toString().trim();
    }

    // (9) start tracking the SolrCore
    try {
      solrServerTracker =
          new RegisteredSolrServerTracker(bundleContext, indexReference, null) {

            @Override
            public void removedService(ServiceReference reference, Object service) {
              log.info(" ... SolrCore for {} was removed!", reference);
              // try to get an other serviceReference from the tracker
              updateEngineRegistration(solrServerTracker.getServiceReference(), null);
              super.removedService(reference, service);
            }

            @Override
            public void modifiedService(ServiceReference reference, Object service) {
              log.info(" ... SolrCore for {} was updated!", indexReference);
              updateEngineRegistration(solrServerTracker.getServiceReference(), null);
              super.modifiedService(reference, service);
            }

            @Override
            public SolrServer addingService(ServiceReference reference) {
              SolrServer server = super.addingService(reference);
              if (solrCore != null) {
                log.info(
                    "Multiple SolrCores for name {}! Will update engine "
                        + "with the newly added {}!",
                    new Object[] {solrCore.getName(), indexReference, reference});
              }
              updateEngineRegistration(reference, server);
              return server;
            }
          };
    } catch (InvalidSyntaxException e) {
      throw new ConfigurationException(
          SOLR_CORE,
          "parsed SolrCore name '"
              + value.toString()
              + "' is invalid (expected: '[{server-name}:]{indexname}'");
    }
    solrServerTracker.open();
  }
コード例 #5
0
 @Deactivate
 protected void deactivate(ComponentContext context) {
   languageConfig.setDefault();
   super.deactivate(context);
 }