Esempio n. 1
0
  // Helper method to transform an IndexTank Document to a Lucene Document
  private static org.apache.lucene.document.Document asLuceneDocument(Document itd) {
    org.apache.lucene.document.Document doc = new org.apache.lucene.document.Document();
    for (String field : itd.getFieldNames()) {
      doc.add(new Field(field, itd.getField(field), Field.Store.NO, Field.Index.ANALYZED));
    }

    return doc;
  }
Esempio n. 2
0
  /** @inheritDoc */
  public synchronized void add(final String docId, final Document itdoc) {
    if (null == docId) {
      logger.error("No documentId specified. Ignoring addition.");
      return;
    }

    org.apache.lucene.document.Document doc = asLuceneDocument(itdoc);
    org.apache.lucene.document.Field docidPayloadField =
        new org.apache.lucene.document.Field(
            LsiIndex.PAYLOAD_TERM_FIELD, docId, Field.Store.NO, Field.Index.ANALYZED);
    doc.add(docidPayloadField);

    doc.add(new Field("documentId", docId, Field.Store.NO, Field.Index.NOT_ANALYZED));
    try {
      if (logger.isDebugEnabled()) {
        logger.debug("Adding document with docId=" + docId + ". Doc is " + itdoc.getFieldNames());
      }
      writer.updateDocument(docIdTerm(docId), doc);
    } catch (IOException e) {
      logger.error(e);
    }
  }
  /**
   * @see AbstractDocumentSearcher#search(Query query, int start, int limit, int
   *     scoringFunctionIndex, Map<String, String> extraParameters).
   * @param extraParameters: It will process 'fetch_fields', 'snippet_fields' and 'snippet_type'.
   *     'fetch_fields' and 'snippet_fields' are comma-separated lists of field names to fetch an
   *     snippet. 'snippet_type' can be either 'html' or 'lines'. 'html' is the default.
   */
  @Override
  public SearchResults search(
      Query query,
      int start,
      int limit,
      int scoringFunctionIndex,
      Map<String, String> extraParameters)
      throws InterruptedException {
    // call delegate searcher
    SearchResults results =
        this.delegate.search(query, start, limit, scoringFunctionIndex, extraParameters);

    long startTime = System.currentTimeMillis();

    String[] fetchFields = parseFields(extraParameters, "fetch");
    String[] snippetFields = parseFields(extraParameters, "snippet");
    Set<TermQuery> positiveTerms = query.getRoot().getPositiveTerms();

    // find out which snippeter type is the right one for this query
    String snType = extraParameters.get("snippet_type");
    Snippeter sn = null;
    if (null == snType || "html".equalsIgnoreCase(snType)) {
      sn = this.snippeters.get(SnippeterType.HTML_AWARE);
    } else if ("lines".equalsIgnoreCase(snType)) {
      sn = this.snippeters.get(SnippeterType.LINE_AWARE);
    } else {
      throw new IllegalArgumentException("snippet_type has to be either 'html' or 'lines'");
    }

    if (fetchFields.length + snippetFields.length > 0) {
      for (SearchResult result : results.getResults()) {
        Document data = storage.getDocument(result.getDocId());

        // fetch fields
        for (String field : fetchFields) {
          // handle '*', as a fetch all
          if ("*".equals(field.trim())) {
            // assume we get the actual fields, not a copy.
            result.getFields().putAll(data.asMap());
            break;
          }
          String text = data.getField(field);
          if (null != text) {
            result.setField(field, text);
          }
        }

        // snippet fields
        for (String field : snippetFields) {
          String text = data.getField(field);
          if (null != text) {
            result.setField("snippet_" + field, sn.snippet(positiveTerms, field, text));
          }
        }
      }
    }
    long endTime = System.currentTimeMillis();
    logger.debug("(search) fetching & snippeting took: " + (endTime - startTime) + " ms.");

    return results;
  }