protected String[] getSearchTerms(String searchString) { List tokens = new ArrayList(); Tokenizer tokenizer = context.getBroker().getTextEngine().getTokenizer(); tokenizer.setText(searchString); org.exist.storage.analysis.TextToken token; String word; while (null != (token = tokenizer.nextToken(true))) { word = token.getText(); tokens.add(word); } String[] terms = new String[tokens.size()]; terms = (String[]) tokens.toArray(terms); return terms; }
/** * Construct a new instance and configure it. * * @param broker * @param conf */ public TextSearchEngine(DBBroker broker, Configuration conf) { this.broker = broker; this.config = conf; String stopword, tokenizerClass; Boolean num, stemming, termFrequencies; if ((num = (Boolean) config.getProperty(PROPERTY_INDEX_NUMBERS)) != null) indexNumbers = num.booleanValue(); if ((stemming = (Boolean) config.getProperty(PROPERTY_STEM)) != null) stem = stemming.booleanValue(); if ((termFrequencies = (Boolean) config.getProperty(PROPERTY_STORE_TERM_FREQUENCY)) != null) termFreq = termFrequencies.booleanValue(); String track = (String) config.getProperty(Serializer.PROPERTY_TAG_MATCHING_ELEMENTS); if (track != null) trackMatches = track.equalsIgnoreCase("yes") ? Serializer.TAG_ELEMENT_MATCHES : Serializer.TAG_NONE; track = (String) config.getProperty(Serializer.PROPERTY_TAG_MATCHING_ATTRIBUTES); if (track != null && track.equalsIgnoreCase("yes")) trackMatches = trackMatches | Serializer.TAG_ATTRIBUTE_MATCHES; if ((tokenizerClass = (String) config.getProperty(PROPERTY_TOKENIZER)) != null) { try { Class tokClass = Class.forName(tokenizerClass); tokenizer = (Tokenizer) tokClass.newInstance(); LOG.debug("using tokenizer: " + tokenizerClass); } catch (ClassNotFoundException e) { LOG.debug(e); } catch (InstantiationException e) { LOG.debug(e); } catch (IllegalAccessException e) { LOG.debug(e); } } if (tokenizer == null) { LOG.debug("using simple tokenizer"); tokenizer = new SimpleTokenizer(); } if (stem) stemmer = new PorterStemmer(); tokenizer.setStemming(stem); if ((stopword = (String) config.getProperty(PROPERTY_STOPWORD_FILE)) != null) { try { FileReader in = new FileReader(stopword); StreamTokenizer tok = new StreamTokenizer(in); int next = tok.nextToken(); while (next != StreamTokenizer.TT_EOF) { if (next != StreamTokenizer.TT_WORD) continue; stoplist.add(tok.sval); next = tok.nextToken(); } } catch (FileNotFoundException e) { LOG.debug(e); } catch (IOException e) { LOG.debug(e); } } }