public Hits search( long companyId, long groupId, long[] categoryIds, long threadId, String keywords) throws SystemException { Searcher searcher = null; try { HitsImpl hits = new HitsImpl(); BooleanQuery contextQuery = new BooleanQuery(); LuceneUtil.addRequiredTerm(contextQuery, LuceneFields.PORTLET_ID, Indexer.PORTLET_ID); if (groupId > 0) { LuceneUtil.addRequiredTerm(contextQuery, LuceneFields.GROUP_ID, groupId); } if ((categoryIds != null) && (categoryIds.length > 0)) { BooleanQuery categoryIdsQuery = new BooleanQuery(); for (int i = 0; i < categoryIds.length; i++) { Term term = new Term("categoryId", String.valueOf(categoryIds[i])); TermQuery termQuery = new TermQuery(term); categoryIdsQuery.add(termQuery, BooleanClause.Occur.SHOULD); } contextQuery.add(categoryIdsQuery, BooleanClause.Occur.MUST); } if (threadId > 0) { LuceneUtil.addTerm(contextQuery, "threadId", threadId); } BooleanQuery searchQuery = new BooleanQuery(); if (Validator.isNotNull(keywords)) { LuceneUtil.addTerm(searchQuery, LuceneFields.USER_NAME, keywords); LuceneUtil.addTerm(searchQuery, LuceneFields.TITLE, keywords); LuceneUtil.addTerm(searchQuery, LuceneFields.CONTENT, keywords); LuceneUtil.addTerm(searchQuery, LuceneFields.TAG_ENTRY, keywords); } BooleanQuery fullQuery = new BooleanQuery(); fullQuery.add(contextQuery, BooleanClause.Occur.MUST); if (searchQuery.clauses().size() > 0) { fullQuery.add(searchQuery, BooleanClause.Occur.MUST); } searcher = LuceneUtil.getSearcher(companyId); hits.recordHits(searcher.search(fullQuery), searcher); return hits; } catch (Exception e) { return LuceneUtil.closeSearcher(searcher, keywords, e); } }
public TopFieldDocsSearchResult searchBySession(String queryStr, int startFrom, String operator) { try { queryStr = queryStr.trim(); QueryParser parser = new QueryParser("contents", analyzer); Operator op = QueryParser.AND_OPERATOR; if (QueryParser.AND_OPERATOR.toString().equalsIgnoreCase(operator)) { parser.setDefaultOperator(QueryParser.AND_OPERATOR); } else { parser.setDefaultOperator(QueryParser.OR_OPERATOR); } Query query; query = parser.parse(queryStr); Sort sort = new Sort("summary", true); TopFieldDocs tfd = searcher.search(query, null, startFrom + 10, sort); TopFieldDocsSearchResult result = new TopFieldDocsSearchResult(tfd, searcher); return result; } catch (ParseException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } return null; }
private void testQuery(Expression<?> expr, String expectedQuery, int expectedHits) throws Exception { Query query = serializer.toQuery(expr, metadata); TopDocs docs = searcher.search(query, 100); assertEquals(expectedHits, docs.totalHits); assertEquals(expectedQuery, query.toString()); }
/** * Returns a number of random songs. * * @param criteria Search criteria. * @return List of random songs. */ public List<MediaFile> getRandomSongs(RandomSearchCriteria criteria) { List<MediaFile> result = new ArrayList<MediaFile>(); IndexReader reader = null; try { reader = createIndexReader(SONG); Searcher searcher = new IndexSearcher(reader); BooleanQuery query = new BooleanQuery(); query.add( new TermQuery(new Term(FIELD_MEDIA_TYPE, MediaFile.MediaType.MUSIC.name().toLowerCase())), BooleanClause.Occur.MUST); if (criteria.getGenre() != null) { String genre = normalizeGenre(criteria.getGenre()); query.add(new TermQuery(new Term(FIELD_GENRE, genre)), BooleanClause.Occur.MUST); } if (criteria.getFromYear() != null || criteria.getToYear() != null) { NumericRangeQuery<Integer> rangeQuery = NumericRangeQuery.newIntRange( FIELD_YEAR, criteria.getFromYear(), criteria.getToYear(), true, true); query.add(rangeQuery, BooleanClause.Occur.MUST); } List<SpanTermQuery> musicFolderQueries = new ArrayList<SpanTermQuery>(); for (MusicFolder musicFolder : criteria.getMusicFolders()) { musicFolderQueries.add( new SpanTermQuery(new Term(FIELD_FOLDER, musicFolder.getPath().getPath()))); } query.add( new SpanOrQuery(musicFolderQueries.toArray(new SpanQuery[musicFolderQueries.size()])), BooleanClause.Occur.MUST); TopDocs topDocs = searcher.search(query, null, Integer.MAX_VALUE); List<ScoreDoc> scoreDocs = Lists.newArrayList(topDocs.scoreDocs); Random random = new Random(System.currentTimeMillis()); while (!scoreDocs.isEmpty() && result.size() < criteria.getCount()) { int index = random.nextInt(scoreDocs.size()); Document doc = searcher.doc(scoreDocs.remove(index).doc); int id = Integer.valueOf(doc.get(FIELD_ID)); try { addIfNotNull(mediaFileService.getMediaFile(id), result); } catch (Exception x) { LOG.warn("Failed to get media file " + id); } } } catch (Throwable x) { LOG.error("Failed to search or random songs.", x); } finally { FileUtil.closeQuietly(reader); } return result; }
protected Hits query(String db, String defaultField, String queryString) throws IOException, CorruptIndexException, ParseException { Directory directory = FSDirectory.getDirectory(indexPath(db)); IndexReader reader = IndexReader.open(directory); Searcher searcher = new IndexSearcher(reader); Analyzer analyzer = new StandardAnalyzer(); QueryParser qp = new QueryParser(defaultField, analyzer); Query query = qp.parse(queryString); return searcher.search(query); }
private TopDocCollector doPagingSearch(Query query, int startFrom) throws IOException { TopDocCollector collector = new TopDocCollector(startFrom + 10); searcher.search(query, collector); if (logger.isDebugEnabled()) { logger.debug(query.toString()); logger.debug(collector.toString()); } return collector; }
// Make sure the documents returned by the search match the expected list // Copied from TestSort.java private void assertMatches(Searcher searcher, Query query, Sort sort, String expectedResult) throws IOException { ScoreDoc[] result = searcher.search(query, null, 1000, sort).scoreDocs; StringBuilder buff = new StringBuilder(10); int n = result.length; for (int i = 0; i < n; ++i) { Document doc = searcher.doc(result[i].doc); String[] v = doc.getValues("tracer"); for (int j = 0; j < v.length; ++j) { buff.append(v[j]); } } assertEquals(expectedResult, buff.toString()); }
/** @see org.wyona.yarep.impl.repo.vfs.DateIndexerSearcher#getRevisionYoungerThan(Date) */ public Revision getRevisionYoungerThan(Date date) throws Exception { // log.debug("Get revision younger than: " + DateIndexerSearcherImplV1.format(date)); Date youngerThanDate = new Date(date.getTime() + 1); // log.debug("Get revision for date: " + DateIndexerSearcherImplV1.format(youngerThanDate)); try { org.apache.lucene.search.Searcher searcher = new IndexSearcher(indexDir.getAbsolutePath()); if (searcher != null) { org.apache.lucene.search.Query query = org.apache.lucene.search.NumericRangeQuery.newLongRange( CREATION_DATE_FIELD_NAME, new Long(youngerThanDate.getTime()), new Long(new Date().getTime()), true, true); org.apache.lucene.search.Hits hits = searcher.search( query, new Sort(new SortField(CREATION_DATE_FIELD_NAME, SortField.LONG))); // log.debug("Query \"" + query + "\" on field '" + CREATION_DATE_FIELD_NAME + "' returned " // + hits.length() + " hits"); String revisionName = null; if (hits != null && hits.length() > 0) { /* DEBUG for (int i = 0; i < hits.length();i++) { revisionName = hits.doc(i).getField(REVISION_NAME_FIELD_NAME).stringValue(); log.warn("DEBUG: Found revision name '" + revisionName + "' (Creation date: " + new Date(new Long(revisionName).longValue()) + ")"); } */ revisionName = hits.doc(0).getField(REVISION_NAME_FIELD_NAME).stringValue(); } searcher.close(); if (revisionName != null) { return node.getRevision(revisionName); } else { return null; } } else { log.error("Searcher could not be initialized for index directory '" + indexDir + "'!"); return null; } } catch (Exception e) { log.error(e, e); return null; } }
/** * Performs a search using Lucene * * @param index with the documents to search * @param query to search * @param fieldName field where search inside the documents * @return the search result */ public static LuceneSearchResult search(LuceneIndex index, String query, String fieldName) { try { Searcher searcher = new IndexSearcher(index.getDirectory()); Analyzer analyzer = new StandardAnalyzer(); QueryParser parser = new QueryParser(fieldName, analyzer); Query q = parser.parse(query); Hits hits = searcher.search(q); LuceneSearchResult lsr = new LuceneSearchResult(hits, index); searcher.close(); return lsr; } catch (Exception e) { org.apache.commons.logging.LogFactory.getLog(LuceneSearcher.class).error(e); } return null; }
private Document findByKey(IndexReader reader, Field keyField) throws ParseException, IOException { Searcher searcher = new IndexSearcher(reader); QueryParser queryParser = new QueryParser(luceneVersion, keyField.name(), queryAnalyzer); queryParser.setDefaultOperator(QueryParser.Operator.AND); String queryString = keyField.name() + ":" + keyField.stringValue(); Query query = queryParser.parse(queryString); TopDocs docs = searcher.search(query, 10000); ScoreDoc[] scoreDocs = docs.scoreDocs; if (scoreDocs.length != 1) { return null; } ScoreDoc doc = scoreDocs[0]; return searcher.doc(doc.doc); }
public void run() { try { String searchQuery = (new BufferedReader(new InputStreamReader(searchSocket.getInputStream()))) .readLine() .trim(); IndexReader reader = writer.getReader(); Searcher searcher = new IndexSearcher(reader); QueryParser indexParser = new QueryParser(Version.LUCENE_30, "data", analyzer); SortField hitSortField = new SortField("date", SortField.LONG); Sort hitSort = new Sort(hitSortField); TopFieldDocs hits = searcher.search(indexParser.parse(searchQuery), null, 1000, hitSort); PrintWriter searchReply = new PrintWriter(searchSocket.getOutputStream(), true); searchReply.println(hits.totalHits + " Hits for " + searchQuery); for (int i = 0; i < hits.totalHits; i++) { Document document = searcher.doc(hits.scoreDocs[i].doc); String host = document.get("hostname"); String date = document.get("date"); String data = document.get("data"); searchReply.print("host: " + host + ", date: " + date + ", data: " + data + "\n\n"); } searchReply.close(); searcher.close(); reader.close(); searchSocket.close(); } catch (Exception ex) { System.out.print("Exception: " + ex + "\n"); } }
/** @see org.wyona.yarep.impl.repo.vfs.DateIndexerSearcher#getMostRecentRevision() */ public Revision getMostRecentRevision() { // log.debug("Get most recent revision..."); try { org.apache.lucene.search.Searcher searcher = new IndexSearcher(indexDir.getAbsolutePath()); if (searcher != null) { /* String q = "*:*"; org.apache.lucene.search.Query query = new org.apache.lucene.queryParser.QueryParser(CREATION_DATE_FIELD_NAME, getAnalyzer()).parse(q); */ org.apache.lucene.search.Query query = new org.apache.lucene.search.MatchAllDocsQuery(); org.apache.lucene.search.Hits hits = searcher.search( query, new Sort(new SortField(CREATION_DATE_FIELD_NAME, SortField.LONG))); // log.debug("Query \"" + query + "\" on field '" + CREATION_DATE_FIELD_NAME + "' returned " // + hits.length() + " hits"); String revisionName = null; if (hits != null && hits.length() > 0) { /* DEBUG for (int i = 0; i < hits.length();i++) { revisionName = hits.doc(i).getField(REVISION_NAME_FIELD_NAME).stringValue(); log.warn("DEBUG: Found revision name '" + revisionName + "' (Creation date: " + new Date(new Long(revisionName).longValue()) + ")"); } */ revisionName = hits.doc(hits.length() - 1).getField(REVISION_NAME_FIELD_NAME).stringValue(); } searcher.close(); if (revisionName != null) { return node.getRevision(revisionName); } else { return null; } } else { log.error("Searcher could not be initialized for index directory '" + indexDir + "'!"); return null; } } catch (Exception e) { log.error(e, e); return null; } }
/** * Returns a number of random albums, using ID3 tag. * * @param count Number of albums to return. * @param musicFolders Only return albums from these folders. * @return List of random albums. */ public List<Album> getRandomAlbumsId3(int count, List<MusicFolder> musicFolders) { List<Album> result = new ArrayList<Album>(); IndexReader reader = null; try { reader = createIndexReader(ALBUM_ID3); Searcher searcher = new IndexSearcher(reader); List<SpanTermQuery> musicFolderQueries = new ArrayList<SpanTermQuery>(); for (MusicFolder musicFolder : musicFolders) { musicFolderQueries.add( new SpanTermQuery( new Term(FIELD_FOLDER_ID, NumericUtils.intToPrefixCoded(musicFolder.getId())))); } Query query = new SpanOrQuery(musicFolderQueries.toArray(new SpanQuery[musicFolderQueries.size()])); TopDocs topDocs = searcher.search(query, null, Integer.MAX_VALUE); List<ScoreDoc> scoreDocs = Lists.newArrayList(topDocs.scoreDocs); Random random = new Random(System.currentTimeMillis()); while (!scoreDocs.isEmpty() && result.size() < count) { int index = random.nextInt(scoreDocs.size()); Document doc = searcher.doc(scoreDocs.remove(index).doc); int id = Integer.valueOf(doc.get(FIELD_ID)); try { addIfNotNull(albumDao.getAlbum(id), result); } catch (Exception x) { LOG.warn("Failed to get album file " + id, x); } } } catch (Throwable x) { LOG.error("Failed to search for random albums.", x); } finally { FileUtil.closeQuietly(reader); } return result; }
/** * Do the search. * * @param conn the database connection * @param text the query * @param limit the limit * @param offset the offset * @param data whether the raw data should be returned * @return the result set */ protected static ResultSet search( Connection conn, String text, int limit, int offset, boolean data) throws SQLException { SimpleResultSet result = createResultSet(data); if (conn.getMetaData().getURL().startsWith("jdbc:columnlist:")) { // this is just to query the result set columns return result; } if (text == null || text.trim().length() == 0) { return result; } try { IndexAccess access = getIndexAccess(conn); /*## LUCENE2 ## access.modifier.flush(); String path = getIndexPath(conn); IndexReader reader = IndexReader.open(path); Analyzer analyzer = new StandardAnalyzer(); Searcher searcher = new IndexSearcher(reader); QueryParser parser = new QueryParser(LUCENE_FIELD_DATA, analyzer); Query query = parser.parse(text); Hits hits = searcher.search(query); int max = hits.length(); if (limit == 0) { limit = max; } for (int i = 0; i < limit && i + offset < max; i++) { Document doc = hits.doc(i + offset); float score = hits.score(i + offset); //*/ // ## LUCENE3 ## // take a reference as the searcher may change Searcher searcher = access.searcher; // reuse the same analyzer; it's thread-safe; // also allows subclasses to control the analyzer used. Analyzer analyzer = access.writer.getAnalyzer(); QueryParser parser = new QueryParser(Version.LUCENE_30, LUCENE_FIELD_DATA, analyzer); Query query = parser.parse(text); // Lucene 3 insists on a hard limit and will not provide // a total hits value. Take at least 100 which is // an optimal limit for Lucene as any more // will trigger writing results to disk. int maxResults = (limit == 0 ? 100 : limit) + offset; TopDocs docs = searcher.search(query, maxResults); if (limit == 0) { limit = docs.totalHits; } for (int i = 0, len = docs.scoreDocs.length; i < limit && i + offset < docs.totalHits && i + offset < len; i++) { ScoreDoc sd = docs.scoreDocs[i + offset]; Document doc = searcher.doc(sd.doc); float score = sd.score; // */ String q = doc.get(LUCENE_FIELD_QUERY); if (data) { int idx = q.indexOf(" WHERE "); JdbcConnection c = (JdbcConnection) conn; Session session = (Session) c.getSession(); Parser p = new Parser(session); String tab = q.substring(0, idx); ExpressionColumn expr = (ExpressionColumn) p.parseExpression(tab); String schemaName = expr.getOriginalTableAliasName(); String tableName = expr.getColumnName(); q = q.substring(idx + " WHERE ".length()); Object[][] columnData = parseKey(conn, q); result.addRow(schemaName, tableName, columnData[0], columnData[1], score); } else { result.addRow(q, score); } } /*## LUCENE2 ## // TODO keep it open if possible reader.close(); //*/ } catch (Exception e) { throw convertException(e); } return result; }
public SearchResult search( SearchCriteria criteria, List<MusicFolder> musicFolders, IndexType indexType) { SearchResult result = new SearchResult(); int offset = criteria.getOffset(); int count = criteria.getCount(); result.setOffset(offset); IndexReader reader = null; try { reader = createIndexReader(indexType); Searcher searcher = new IndexSearcher(reader); Analyzer analyzer = new SubsonicAnalyzer(); MultiFieldQueryParser queryParser = new MultiFieldQueryParser( LUCENE_VERSION, indexType.getFields(), analyzer, indexType.getBoosts()); BooleanQuery query = new BooleanQuery(); query.add(queryParser.parse(analyzeQuery(criteria.getQuery())), BooleanClause.Occur.MUST); List<SpanTermQuery> musicFolderQueries = new ArrayList<SpanTermQuery>(); for (MusicFolder musicFolder : musicFolders) { if (indexType == ALBUM_ID3 || indexType == ARTIST_ID3) { musicFolderQueries.add( new SpanTermQuery( new Term(FIELD_FOLDER_ID, NumericUtils.intToPrefixCoded(musicFolder.getId())))); } else { musicFolderQueries.add( new SpanTermQuery(new Term(FIELD_FOLDER, musicFolder.getPath().getPath()))); } } query.add( new SpanOrQuery(musicFolderQueries.toArray(new SpanQuery[musicFolderQueries.size()])), BooleanClause.Occur.MUST); TopDocs topDocs = searcher.search(query, null, offset + count); result.setTotalHits(topDocs.totalHits); int start = Math.min(offset, topDocs.totalHits); int end = Math.min(start + count, topDocs.totalHits); for (int i = start; i < end; i++) { Document doc = searcher.doc(topDocs.scoreDocs[i].doc); switch (indexType) { case SONG: case ARTIST: case ALBUM: MediaFile mediaFile = mediaFileService.getMediaFile(Integer.valueOf(doc.get(FIELD_ID))); addIfNotNull(mediaFile, result.getMediaFiles()); break; case ARTIST_ID3: Artist artist = artistDao.getArtist(Integer.valueOf(doc.get(FIELD_ID))); addIfNotNull(artist, result.getArtists()); break; case ALBUM_ID3: Album album = albumDao.getAlbum(Integer.valueOf(doc.get(FIELD_ID))); addIfNotNull(album, result.getAlbums()); break; default: break; } } } catch (Throwable x) { LOG.error("Failed to execute Lucene search.", x); } finally { FileUtil.closeQuietly(reader); } return result; }
/** * Index the fileset. * * @exception IOException if Lucene I/O exception TODO: refactor!!!!! */ private void indexDocs() throws IOException { Date start = new Date(); boolean create = overwrite; // If the index directory doesn't exist, // create it and force create mode if (indexDir.mkdirs() && !overwrite) { create = true; } FSDirectory dir = FSDirectory.open(indexDir); try { Searcher searcher = null; boolean checkLastModified = false; if (!create) { try { searcher = new IndexSearcher(dir, true); checkLastModified = true; } catch (IOException ioe) { log("IOException: " + ioe.getMessage()); // Empty - ignore, which indicates to index all // documents } } log("checkLastModified = " + checkLastModified, Project.MSG_VERBOSE); IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_CURRENT, analyzer) .setOpenMode(create ? OpenMode.CREATE : OpenMode.APPEND); LogMergePolicy lmp = (LogMergePolicy) conf.getMergePolicy(); lmp.setUseCompoundFile(useCompoundIndex); lmp.setMergeFactor(mergeFactor); IndexWriter writer = new IndexWriter(dir, conf); int totalFiles = 0; int totalIndexed = 0; int totalIgnored = 0; try { for (int i = 0; i < rcs.size(); i++) { ResourceCollection rc = rcs.elementAt(i); if (rc.isFilesystemOnly()) { Iterator resources = rc.iterator(); while (resources.hasNext()) { Resource r = (Resource) resources.next(); if (!r.isExists() || !(r instanceof FileResource)) { continue; } totalFiles++; File file = ((FileResource) r).getFile(); if (!file.exists() || !file.canRead()) { throw new BuildException( "File \"" + file.getAbsolutePath() + "\" does not exist or is not readable."); } boolean indexIt = true; if (checkLastModified) { Term pathTerm = new Term("path", file.getPath()); TermQuery query = new TermQuery(pathTerm); ScoreDoc[] hits = searcher.search(query, null, 1).scoreDocs; // if document is found, compare the // indexed last modified time with the // current file // - don't index if up to date if (hits.length > 0) { Document doc = searcher.doc(hits[0].doc); String indexModified = doc.get("modified").trim(); if (indexModified != null) { long lastModified = 0; try { lastModified = DateTools.stringToTime(indexModified); } catch (ParseException e) { // if modified time is not parsable, skip } if (lastModified == file.lastModified()) { // TODO: remove existing document indexIt = false; } } } } if (indexIt) { try { log("Indexing " + file.getPath(), Project.MSG_VERBOSE); Document doc = handler.getDocument(file); if (doc == null) { totalIgnored++; } else { // Add the path of the file as a field named "path". Use a Keyword field, so // that the index stores the path, and so that the path is searchable doc.add( new Field( "path", file.getPath(), Field.Store.YES, Field.Index.NOT_ANALYZED)); // Add the last modified date of the file a field named "modified". Use a // Keyword field, so that it's searchable, but so that no attempt is made // to tokenize the field into words. doc.add( new Field( "modified", DateTools.timeToString( file.lastModified(), DateTools.Resolution.MILLISECOND), Field.Store.YES, Field.Index.NOT_ANALYZED)); writer.addDocument(doc); totalIndexed++; } } catch (DocumentHandlerException e) { throw new BuildException(e); } } } // for j } // if (fs != null) } // for i writer.optimize(); } // try finally { // always make sure everything gets closed, // no matter how we exit. writer.close(); if (searcher != null) { searcher.close(); } } Date end = new Date(); log( totalIndexed + " out of " + totalFiles + " indexed (" + totalIgnored + " ignored) in " + (end.getTime() - start.getTime()) + " milliseconds"); } finally { dir.close(); } }