// Helper method to transform an IndexTank Document to a Lucene Document private static org.apache.lucene.document.Document asLuceneDocument(Document itd) { org.apache.lucene.document.Document doc = new org.apache.lucene.document.Document(); for (String field : itd.getFieldNames()) { doc.add(new Field(field, itd.getField(field), Field.Store.NO, Field.Index.ANALYZED)); } return doc; }
/** @inheritDoc */ public synchronized void add(final String docId, final Document itdoc) { if (null == docId) { logger.error("No documentId specified. Ignoring addition."); return; } org.apache.lucene.document.Document doc = asLuceneDocument(itdoc); org.apache.lucene.document.Field docidPayloadField = new org.apache.lucene.document.Field( LsiIndex.PAYLOAD_TERM_FIELD, docId, Field.Store.NO, Field.Index.ANALYZED); doc.add(docidPayloadField); doc.add(new Field("documentId", docId, Field.Store.NO, Field.Index.NOT_ANALYZED)); try { if (logger.isDebugEnabled()) { logger.debug("Adding document with docId=" + docId + ". Doc is " + itdoc.getFieldNames()); } writer.updateDocument(docIdTerm(docId), doc); } catch (IOException e) { logger.error(e); } }
/** * @see AbstractDocumentSearcher#search(Query query, int start, int limit, int * scoringFunctionIndex, Map<String, String> extraParameters). * @param extraParameters: It will process 'fetch_fields', 'snippet_fields' and 'snippet_type'. * 'fetch_fields' and 'snippet_fields' are comma-separated lists of field names to fetch an * snippet. 'snippet_type' can be either 'html' or 'lines'. 'html' is the default. */ @Override public SearchResults search( Query query, int start, int limit, int scoringFunctionIndex, Map<String, String> extraParameters) throws InterruptedException { // call delegate searcher SearchResults results = this.delegate.search(query, start, limit, scoringFunctionIndex, extraParameters); long startTime = System.currentTimeMillis(); String[] fetchFields = parseFields(extraParameters, "fetch"); String[] snippetFields = parseFields(extraParameters, "snippet"); Set<TermQuery> positiveTerms = query.getRoot().getPositiveTerms(); // find out which snippeter type is the right one for this query String snType = extraParameters.get("snippet_type"); Snippeter sn = null; if (null == snType || "html".equalsIgnoreCase(snType)) { sn = this.snippeters.get(SnippeterType.HTML_AWARE); } else if ("lines".equalsIgnoreCase(snType)) { sn = this.snippeters.get(SnippeterType.LINE_AWARE); } else { throw new IllegalArgumentException("snippet_type has to be either 'html' or 'lines'"); } if (fetchFields.length + snippetFields.length > 0) { for (SearchResult result : results.getResults()) { Document data = storage.getDocument(result.getDocId()); // fetch fields for (String field : fetchFields) { // handle '*', as a fetch all if ("*".equals(field.trim())) { // assume we get the actual fields, not a copy. result.getFields().putAll(data.asMap()); break; } String text = data.getField(field); if (null != text) { result.setField(field, text); } } // snippet fields for (String field : snippetFields) { String text = data.getField(field); if (null != text) { result.setField("snippet_" + field, sn.snippet(positiveTerms, field, text)); } } } } long endTime = System.currentTimeMillis(); logger.debug("(search) fetching & snippeting took: " + (endTime - startTime) + " ms."); return results; }