Esempio n. 1
0
  /**
   * Search files.
   *
   * @param userId User ID to filter on
   * @param searchQuery Search query on title and description
   * @param fullSearchQuery Search query on all fields
   * @return List of document IDs
   * @throws Exception
   */
  public Set<String> search(String userId, String searchQuery, String fullSearchQuery)
      throws Exception {
    // Escape query and add quotes so QueryParser generate a PhraseQuery
    searchQuery = "\"" + QueryParserUtil.escape(searchQuery + " " + fullSearchQuery) + "\"";
    fullSearchQuery = "\"" + QueryParserUtil.escape(fullSearchQuery) + "\"";

    // Build search query
    StandardQueryParser qpHelper =
        new StandardQueryParser(new DocsStandardAnalyzer(Version.LUCENE_42));
    qpHelper.setPhraseSlop(100000); // PhraseQuery add terms

    // Search on documents and files
    BooleanQuery query = new BooleanQuery();
    query.add(qpHelper.parse(searchQuery, "title"), Occur.SHOULD);
    query.add(qpHelper.parse(searchQuery, "description"), Occur.SHOULD);
    query.add(qpHelper.parse(fullSearchQuery, "content"), Occur.SHOULD);

    // Filter on provided user ID
    List<Term> terms = new ArrayList<Term>();
    if (userId != null) {
      terms.add(new Term("user_id", userId));
    }
    TermsFilter userFilter = new TermsFilter(terms);

    // Search
    DirectoryReader directoryReader =
        AppContext.getInstance().getIndexingService().getDirectoryReader();
    Set<String> documentIdList = new HashSet<String>();
    if (directoryReader == null) {
      // The directory reader is not yet initialized (probably because there is nothing indexed)
      return documentIdList;
    }
    IndexSearcher searcher = new IndexSearcher(directoryReader);
    TopDocs topDocs = searcher.search(query, userFilter, Integer.MAX_VALUE);
    ScoreDoc[] docs = topDocs.scoreDocs;

    // Extract document IDs
    for (int i = 0; i < docs.length; i++) {
      org.apache.lucene.document.Document document = searcher.doc(docs[i].doc);
      String type = document.get("type");
      String documentId = null;
      if (type.equals("document")) {
        documentId = document.get("id");
      } else if (type.equals("file")) {
        documentId = document.get("document_id");
      }
      documentIdList.add(documentId);
    }

    return documentIdList;
  }
  @SuppressWarnings("unchecked")
  @Override
  public List<String> getClusterByCarrot2(String query) {
    // TODO Auto-generated method stub
    List<String> strs = new ArrayList<String>();
    final Controller controller = ControllerFactory.createCachingPooling(IDocumentSource.class);
    final List<org.carrot2.core.Document> documents = Lists.newArrayList();
    try {
      q = getParser().parse(QueryParserUtil.escape(query));
      docs = getIndexSearcher().search(q, Integer.MAX_VALUE);
      hits = docs.scoreDocs;
      for (int i = 0; i < hits.length; i++) {
        Document doc = getIndexSearcher().doc(hits[i].doc);
        documents.add(
            new org.carrot2.core.Document(
                doc.get(CONTENTS_FIELD), doc.get(TITLE_FIELD), doc.get(USER_FIELD)));
      }
      final ProcessingResult byTopicClusters =
          controller.process(documents, query, LingoClusteringAlgorithm.class);
      final List<Cluster> clustersByTopic = byTopicClusters.getClusters();
      final ProcessingResult byDomainClusters =
          controller.process(documents, query, ByUrlClusteringAlgorithm.class);
      final List<Cluster> clustersByDomain = byDomainClusters.getClusters();
      for (Cluster c : clustersByDomain) {
        strs.add(c.getLabel());
      }
      for (Cluster c : clustersByTopic) {
        strs.add(c.getLabel());
      }
    } catch (Exception ex) {

    }
    return strs;
  }
 @Override
 public SearchResult runSearch(String query, int hitsPerPage) throws Exception {
   // TODO Auto-generated method stub
   results.clear();
   q = getParser().parse(QueryParserUtil.escape(query));
   Long start = System.currentTimeMillis();
   docs = getIndexSearcher().search(q, Integer.MAX_VALUE);
   hits = docs.scoreDocs;
   int totalHits = docs.totalHits;
   for (int i = 0; i < hits.length; i++) {
     Document doc = getIndexSearcher().doc(hits[i].doc);
     Result res =
         new Result(
             doc.get(CONTENTS_FIELD),
             doc.get(TITLE_FIELD),
             doc.get(ID_FIELD),
             doc.get(USER_FIELD),
             doc.get(CONTENT_TYPE_FIELD),
             doc.get(DATE_FIELD),
             doc.get(TIME_FIELD));
     results.add(res);
   }
   searchResult.setSearchTime((System.currentTimeMillis() - start) / 1000.0000);
   searchResult.setQuery(query);
   searchResult.setHits(totalHits);
   searchResult.setResults(results);
   return searchResult;
 }