Beispiel #1
0
 protected String[] getSearchTerms(String searchString) {
   List tokens = new ArrayList();
   Tokenizer tokenizer = context.getBroker().getTextEngine().getTokenizer();
   tokenizer.setText(searchString);
   org.exist.storage.analysis.TextToken token;
   String word;
   while (null != (token = tokenizer.nextToken(true))) {
     word = token.getText();
     tokens.add(word);
   }
   String[] terms = new String[tokens.size()];
   terms = (String[]) tokens.toArray(terms);
   return terms;
 }
  /**
   * Construct a new instance and configure it.
   *
   * @param broker
   * @param conf
   */
  public TextSearchEngine(DBBroker broker, Configuration conf) {
    this.broker = broker;
    this.config = conf;
    String stopword, tokenizerClass;
    Boolean num, stemming, termFrequencies;
    if ((num = (Boolean) config.getProperty(PROPERTY_INDEX_NUMBERS)) != null)
      indexNumbers = num.booleanValue();
    if ((stemming = (Boolean) config.getProperty(PROPERTY_STEM)) != null)
      stem = stemming.booleanValue();
    if ((termFrequencies = (Boolean) config.getProperty(PROPERTY_STORE_TERM_FREQUENCY)) != null)
      termFreq = termFrequencies.booleanValue();
    String track = (String) config.getProperty(Serializer.PROPERTY_TAG_MATCHING_ELEMENTS);
    if (track != null)
      trackMatches =
          track.equalsIgnoreCase("yes") ? Serializer.TAG_ELEMENT_MATCHES : Serializer.TAG_NONE;
    track = (String) config.getProperty(Serializer.PROPERTY_TAG_MATCHING_ATTRIBUTES);
    if (track != null && track.equalsIgnoreCase("yes"))
      trackMatches = trackMatches | Serializer.TAG_ATTRIBUTE_MATCHES;

    if ((tokenizerClass = (String) config.getProperty(PROPERTY_TOKENIZER)) != null) {
      try {
        Class tokClass = Class.forName(tokenizerClass);
        tokenizer = (Tokenizer) tokClass.newInstance();
        LOG.debug("using tokenizer: " + tokenizerClass);
      } catch (ClassNotFoundException e) {
        LOG.debug(e);
      } catch (InstantiationException e) {
        LOG.debug(e);
      } catch (IllegalAccessException e) {
        LOG.debug(e);
      }
    }
    if (tokenizer == null) {
      LOG.debug("using simple tokenizer");
      tokenizer = new SimpleTokenizer();
    }

    if (stem) stemmer = new PorterStemmer();
    tokenizer.setStemming(stem);
    if ((stopword = (String) config.getProperty(PROPERTY_STOPWORD_FILE)) != null) {
      try {
        FileReader in = new FileReader(stopword);
        StreamTokenizer tok = new StreamTokenizer(in);
        int next = tok.nextToken();
        while (next != StreamTokenizer.TT_EOF) {
          if (next != StreamTokenizer.TT_WORD) continue;
          stoplist.add(tok.sval);
          next = tok.nextToken();
        }
      } catch (FileNotFoundException e) {
        LOG.debug(e);
      } catch (IOException e) {
        LOG.debug(e);
      }
    }
  }