Java LuceneIndex Examples, net.sourceforge.docfetcher.model.LuceneIndex Java Examples

Example #1

0

Show file

File: Searcher.java Project: yewkwanghooi/FetchDoc

  @ImmutableCopy
  @NotNull
  @ThreadSafe
  public List<ResultDocument> search(@NotNull String queryString)
      throws SearchException, CheckedOutOfMemoryError {
    /*
     * Note: For the desktop interface, we'll always search in all available
     * indexes, even those which are unchecked on the filter panel. This
     * allows the user to re-check the unchecked indexes and see previously
     * hidden results without starting another search.
     */

    // Create Lucene query
    QueryWrapper queryWrapper = createQuery(queryString);
    Query query = queryWrapper.query;
    boolean isPhraseQuery = queryWrapper.isPhraseQuery;

    /*
     * Notes regarding the following code:
     *
     * 1) Lucene will throw an IOException if the user deletes one or more
     * indexes while a search is running over the affected indexes. This can
     * happen when two DocFetcher instances are running.
     *
     * 2) All the information needed for displaying the results must be
     * loaded and returned immediately rather than lazily, because after the
     * search the user might delete one or more indexes. This also means the
     * result documents must not access the indexes later on.
     */

    readLock.lock();
    try {
      checkIndexesExist();

      // Perform search; might throw OutOfMemoryError
      ScoreDoc[] scoreDocs = luceneSearcher.search(query, MAX_RESULTS).scoreDocs;

      // Create result documents
      ResultDocument[] results = new ResultDocument[scoreDocs.length];
      for (int i = 0; i < scoreDocs.length; i++) {
        Document doc = luceneSearcher.doc(scoreDocs[i].doc);
        float score = scoreDocs[i].score;
        LuceneIndex index = indexes.get(luceneSearcher.subSearcher(i));
        IndexingConfig config = index.getConfig();
        results[i] =
            new ResultDocument(
                doc, score, query, isPhraseQuery, config, fileFactory, outlookMailFactory);
      }
      return Arrays.asList(results);
    } catch (IllegalArgumentException e) {
      throw wrapEmptyIndexException(e);
    } catch (IOException e) {
      throw new SearchException(e.getMessage()); // TODO i18n
    } catch (OutOfMemoryError e) {
      throw new CheckedOutOfMemoryError(e);
    } finally {
      readLock.unlock();
    }
  }

Example #2

0

Show file

File: Searcher.java Project: yewkwanghooi/FetchDoc

  @ImmutableCopy
  @NotNull
  @ThreadSafe
  public List<ResultDocument> list(@NotNull Set<String> uids)
      throws SearchException, CheckedOutOfMemoryError {
    // Construct a filter that only matches documents with the given UIDs
    TermsFilter uidFilter = new TermsFilter();
    String fieldName = Fields.UID.key();
    for (String uid : uids) uidFilter.addTerm(new Term(fieldName, uid));

    Query query = new MatchAllDocsQuery();

    readLock.lock();
    try {
      checkIndexesExist();

      // Perform search; might throw OutOfMemoryError
      ScoreDoc[] scoreDocs = luceneSearcher.search(query, uidFilter, MAX_RESULTS).scoreDocs;

      // Create result documents
      ResultDocument[] results = new ResultDocument[scoreDocs.length];
      for (int i = 0; i < results.length; i++) {
        Document doc = luceneSearcher.doc(scoreDocs[i].doc);
        float score = scoreDocs[i].score;
        LuceneIndex index = indexes.get(luceneSearcher.subSearcher(i));
        IndexingConfig config = index.getConfig();
        results[i] =
            new ResultDocument(doc, score, query, true, config, fileFactory, outlookMailFactory);
      }

      // Sort results by title
      Arrays.sort(
          results,
          new Comparator<ResultDocument>() {
            public int compare(ResultDocument o1, ResultDocument o2) {
              return AlphanumComparator.ignoreCaseInstance.compare(o1.getTitle(), o2.getTitle());
            }
          });

      return Arrays.asList(results);
    } catch (IllegalArgumentException e) {
      throw wrapEmptyIndexException(e);
    } catch (IOException e) {
      throw new SearchException(e.getMessage()); // TODO i18n
    } catch (OutOfMemoryError e) {
      throw new CheckedOutOfMemoryError(e);
    } finally {
      readLock.unlock();
    }
  }

Example #3

0

Show file

File: Searcher.java Project: yewkwanghooi/FetchDoc

 // Checks that all indexes still exist
 @NotNull
 @NotThreadSafe
 private void checkIndexesExist() throws SearchException {
   if (indexes.isEmpty())
     throw new SearchException(
         "Nothing to search in: No indexes have been created yet."); // TODO i18n
   for (LuceneIndex index : indexes) {
     File indexDir = index.getIndexDirPath().getCanonicalFile();
     if (indexDir != null && !indexDir.isDirectory()) {
       String msg = "Folders not found:"; // TODO i18n folders_not_found
       msg += "\n" + indexDir;
       throw new SearchException(msg);
     }
   }
 }

Example #4

0

Show file

File: Searcher.java Project: yewkwanghooi/FetchDoc

 // Caller must close returned searcher
 @NotNull
 @NotThreadSafe
 private List<CorruptedIndex> setLuceneSearcher(@NotNull List<LuceneIndex> indexes)
     throws IOException {
   this.indexes = Util.checkNotNull(indexes);
   Searchable[] searchables = new Searchable[indexes.size()];
   LazyList<CorruptedIndex> corrupted = new LazyList<CorruptedIndex>();
   for (int i = 0; i < indexes.size(); i++) {
     LuceneIndex index = indexes.get(i);
     try {
       searchables[i] = new IndexSearcher(index.getLuceneDir());
     } catch (IOException e) {
       Util.printErr(e);
       searchables[i] = new DummySearchable();
       corrupted.add(new CorruptedIndex(index, e));
     }
   }
   luceneSearcher = new MultiSearcher(searchables);
   return corrupted;
 }

Example #5

0

Show file

File: Searcher.java Project: yewkwanghooi/FetchDoc

  /**
   * For the given query, returns the requested page of results. This method should not be called
   * anymore after {@link #shutdown()} has been called, otherwise an IOException will be thrown.
   */
  @NotNull
  @ThreadSafe
  public ResultPage search(@NotNull WebQuery webQuery)
      throws IOException, SearchException, CheckedOutOfMemoryError {
    Util.checkNotNull(webQuery);

    if (ioException != null) throw ioException;

    List<Filter> filters = new ArrayList<Filter>(3);

    // Add size filter to filter chain
    if (webQuery.minSize != null || webQuery.maxSize != null) {
      filters.add(
          NumericRangeFilter.newLongRange(
              Fields.SIZE.key(), webQuery.minSize, webQuery.maxSize, true, true));
    }

    // Add type filter to filter chain
    if (webQuery.parsers != null) {
      TermsFilter typeFilter = new TermsFilter();
      String fieldName = Fields.PARSER.key();
      typeFilter.addTerm(new Term(fieldName, Fields.EMAIL_PARSER));
      for (Parser parser : webQuery.parsers) {
        String parserName = parser.getClass().getSimpleName();
        typeFilter.addTerm(new Term(fieldName, parserName));
      }
      filters.add(typeFilter);
    }

    // Add location filter to filter chain
    if (webQuery.indexes != null) {
      Filter[] indexFilters = new Filter[webQuery.indexes.size()];
      int i = 0;
      for (LuceneIndex index : webQuery.indexes) {
        Path path = index.getRootFolder().getPath();
        String uid = index.getDocumentType().createUniqueId(path);
        Term prefix = new Term(Fields.UID.key(), uid + "/");
        indexFilters[i++] = new PrefixFilter(prefix);
      }
      filters.add(new ChainedFilter(indexFilters, ChainedFilter.OR));
    }

    // Construct filter chain
    Filter filter =
        filters.size() == 0
            ? null
            : new ChainedFilter(filters.toArray(new Filter[filters.size()]), ChainedFilter.AND);

    // Create query
    QueryWrapper queryWrapper = createQuery(webQuery.query);
    Query query = queryWrapper.query;
    boolean isPhraseQuery = queryWrapper.isPhraseQuery;

    readLock.lock();
    try {
      checkIndexesExist();

      // Perform search; might throw OutOfMemoryError
      int maxResults = (webQuery.pageIndex + 1) * PAGE_SIZE;
      TopDocs topDocs = luceneSearcher.search(query, filter, maxResults);
      ScoreDoc[] scoreDocs = topDocs.scoreDocs;

      // Compute start and end indices of returned page
      int start;
      int end = scoreDocs.length;
      if (end <= PAGE_SIZE) {
        start = 0;
      } else {
        int r = end % PAGE_SIZE;
        start = end - (r == 0 ? PAGE_SIZE : r);
      }

      // Create and fill list of result documents to return
      ResultDocument[] results = new ResultDocument[end - start];
      for (int i = start; i < end; i++) {
        Document doc = luceneSearcher.doc(scoreDocs[i].doc);
        float score = scoreDocs[i].score;
        LuceneIndex index = indexes.get(luceneSearcher.subSearcher(i));
        IndexingConfig config = index.getConfig();
        results[i - start] =
            new ResultDocument(
                doc, score, query, isPhraseQuery, config, fileFactory, outlookMailFactory);
      }

      int hitCount = topDocs.totalHits;
      int newPageIndex = start / PAGE_SIZE;
      int pageCount = (int) Math.ceil((float) hitCount / PAGE_SIZE);

      return new ResultPage(Arrays.asList(results), newPageIndex, pageCount, hitCount);
    } catch (IllegalArgumentException e) {
      throw wrapEmptyIndexException(e);
    } catch (OutOfMemoryError e) {
      throw new CheckedOutOfMemoryError(e);
    } finally {
      readLock.unlock();
    }
  }