/** * Search files. * * @param userId User ID to filter on * @param searchQuery Search query on title and description * @param fullSearchQuery Search query on all fields * @return List of document IDs * @throws Exception */ public Set<String> search(String userId, String searchQuery, String fullSearchQuery) throws Exception { // Escape query and add quotes so QueryParser generate a PhraseQuery searchQuery = "\"" + QueryParserUtil.escape(searchQuery + " " + fullSearchQuery) + "\""; fullSearchQuery = "\"" + QueryParserUtil.escape(fullSearchQuery) + "\""; // Build search query StandardQueryParser qpHelper = new StandardQueryParser(new DocsStandardAnalyzer(Version.LUCENE_42)); qpHelper.setPhraseSlop(100000); // PhraseQuery add terms // Search on documents and files BooleanQuery query = new BooleanQuery(); query.add(qpHelper.parse(searchQuery, "title"), Occur.SHOULD); query.add(qpHelper.parse(searchQuery, "description"), Occur.SHOULD); query.add(qpHelper.parse(fullSearchQuery, "content"), Occur.SHOULD); // Filter on provided user ID List<Term> terms = new ArrayList<Term>(); if (userId != null) { terms.add(new Term("user_id", userId)); } TermsFilter userFilter = new TermsFilter(terms); // Search DirectoryReader directoryReader = AppContext.getInstance().getIndexingService().getDirectoryReader(); Set<String> documentIdList = new HashSet<String>(); if (directoryReader == null) { // The directory reader is not yet initialized (probably because there is nothing indexed) return documentIdList; } IndexSearcher searcher = new IndexSearcher(directoryReader); TopDocs topDocs = searcher.search(query, userFilter, Integer.MAX_VALUE); ScoreDoc[] docs = topDocs.scoreDocs; // Extract document IDs for (int i = 0; i < docs.length; i++) { org.apache.lucene.document.Document document = searcher.doc(docs[i].doc); String type = document.get("type"); String documentId = null; if (type.equals("document")) { documentId = document.get("id"); } else if (type.equals("file")) { documentId = document.get("document_id"); } documentIdList.add(documentId); } return documentIdList; }
@SuppressWarnings("unchecked") @Override public List<String> getClusterByCarrot2(String query) { // TODO Auto-generated method stub List<String> strs = new ArrayList<String>(); final Controller controller = ControllerFactory.createCachingPooling(IDocumentSource.class); final List<org.carrot2.core.Document> documents = Lists.newArrayList(); try { q = getParser().parse(QueryParserUtil.escape(query)); docs = getIndexSearcher().search(q, Integer.MAX_VALUE); hits = docs.scoreDocs; for (int i = 0; i < hits.length; i++) { Document doc = getIndexSearcher().doc(hits[i].doc); documents.add( new org.carrot2.core.Document( doc.get(CONTENTS_FIELD), doc.get(TITLE_FIELD), doc.get(USER_FIELD))); } final ProcessingResult byTopicClusters = controller.process(documents, query, LingoClusteringAlgorithm.class); final List<Cluster> clustersByTopic = byTopicClusters.getClusters(); final ProcessingResult byDomainClusters = controller.process(documents, query, ByUrlClusteringAlgorithm.class); final List<Cluster> clustersByDomain = byDomainClusters.getClusters(); for (Cluster c : clustersByDomain) { strs.add(c.getLabel()); } for (Cluster c : clustersByTopic) { strs.add(c.getLabel()); } } catch (Exception ex) { } return strs; }
@Override public SearchResult runSearch(String query, int hitsPerPage) throws Exception { // TODO Auto-generated method stub results.clear(); q = getParser().parse(QueryParserUtil.escape(query)); Long start = System.currentTimeMillis(); docs = getIndexSearcher().search(q, Integer.MAX_VALUE); hits = docs.scoreDocs; int totalHits = docs.totalHits; for (int i = 0; i < hits.length; i++) { Document doc = getIndexSearcher().doc(hits[i].doc); Result res = new Result( doc.get(CONTENTS_FIELD), doc.get(TITLE_FIELD), doc.get(ID_FIELD), doc.get(USER_FIELD), doc.get(CONTENT_TYPE_FIELD), doc.get(DATE_FIELD), doc.get(TIME_FIELD)); results.add(res); } searchResult.setSearchTime((System.currentTimeMillis() - start) / 1000.0000); searchResult.setQuery(query); searchResult.setHits(totalHits); searchResult.setResults(results); return searchResult; }