/** * Returns a number of random songs. * * @param criteria Search criteria. * @return List of random songs. */ public List<MediaFile> getRandomSongs(RandomSearchCriteria criteria) { List<MediaFile> result = new ArrayList<MediaFile>(); IndexReader reader = null; try { reader = createIndexReader(SONG); Searcher searcher = new IndexSearcher(reader); BooleanQuery query = new BooleanQuery(); query.add( new TermQuery(new Term(FIELD_MEDIA_TYPE, MediaFile.MediaType.MUSIC.name().toLowerCase())), BooleanClause.Occur.MUST); if (criteria.getGenre() != null) { String genre = normalizeGenre(criteria.getGenre()); query.add(new TermQuery(new Term(FIELD_GENRE, genre)), BooleanClause.Occur.MUST); } if (criteria.getFromYear() != null || criteria.getToYear() != null) { NumericRangeQuery<Integer> rangeQuery = NumericRangeQuery.newIntRange( FIELD_YEAR, criteria.getFromYear(), criteria.getToYear(), true, true); query.add(rangeQuery, BooleanClause.Occur.MUST); } List<SpanTermQuery> musicFolderQueries = new ArrayList<SpanTermQuery>(); for (MusicFolder musicFolder : criteria.getMusicFolders()) { musicFolderQueries.add( new SpanTermQuery(new Term(FIELD_FOLDER, musicFolder.getPath().getPath()))); } query.add( new SpanOrQuery(musicFolderQueries.toArray(new SpanQuery[musicFolderQueries.size()])), BooleanClause.Occur.MUST); TopDocs topDocs = searcher.search(query, null, Integer.MAX_VALUE); List<ScoreDoc> scoreDocs = Lists.newArrayList(topDocs.scoreDocs); Random random = new Random(System.currentTimeMillis()); while (!scoreDocs.isEmpty() && result.size() < criteria.getCount()) { int index = random.nextInt(scoreDocs.size()); Document doc = searcher.doc(scoreDocs.remove(index).doc); int id = Integer.valueOf(doc.get(FIELD_ID)); try { addIfNotNull(mediaFileService.getMediaFile(id), result); } catch (Exception x) { LOG.warn("Failed to get media file " + id); } } } catch (Throwable x) { LOG.error("Failed to search or random songs.", x); } finally { FileUtil.closeQuietly(reader); } return result; }
// Make sure the documents returned by the search match the expected list // Copied from TestSort.java private void assertMatches(Searcher searcher, Query query, Sort sort, String expectedResult) throws IOException { ScoreDoc[] result = searcher.search(query, null, 1000, sort).scoreDocs; StringBuilder buff = new StringBuilder(10); int n = result.length; for (int i = 0; i < n; ++i) { Document doc = searcher.doc(result[i].doc); String[] v = doc.getValues("tracer"); for (int j = 0; j < v.length; ++j) { buff.append(v[j]); } } assertEquals(expectedResult, buff.toString()); }
public HitDetails getDetails(Hit hit) throws IOException { Document doc = luceneSearcher.doc(Integer.valueOf(hit.getUniqueKey())); List docFields = doc.getFields(); String[] fields = new String[docFields.size()]; String[] values = new String[docFields.size()]; for (int i = 0; i < docFields.size(); i++) { Field field = (Field) docFields.get(i); fields[i] = field.name(); values[i] = field.stringValue(); } return new HitDetails(fields, values); }
private Document findByKey(IndexReader reader, Field keyField) throws ParseException, IOException { Searcher searcher = new IndexSearcher(reader); QueryParser queryParser = new QueryParser(luceneVersion, keyField.name(), queryAnalyzer); queryParser.setDefaultOperator(QueryParser.Operator.AND); String queryString = keyField.name() + ":" + keyField.stringValue(); Query query = queryParser.parse(queryString); TopDocs docs = searcher.search(query, 10000); ScoreDoc[] scoreDocs = docs.scoreDocs; if (scoreDocs.length != 1) { return null; } ScoreDoc doc = scoreDocs[0]; return searcher.doc(doc.doc); }
public void run() { try { String searchQuery = (new BufferedReader(new InputStreamReader(searchSocket.getInputStream()))) .readLine() .trim(); IndexReader reader = writer.getReader(); Searcher searcher = new IndexSearcher(reader); QueryParser indexParser = new QueryParser(Version.LUCENE_30, "data", analyzer); SortField hitSortField = new SortField("date", SortField.LONG); Sort hitSort = new Sort(hitSortField); TopFieldDocs hits = searcher.search(indexParser.parse(searchQuery), null, 1000, hitSort); PrintWriter searchReply = new PrintWriter(searchSocket.getOutputStream(), true); searchReply.println(hits.totalHits + " Hits for " + searchQuery); for (int i = 0; i < hits.totalHits; i++) { Document document = searcher.doc(hits.scoreDocs[i].doc); String host = document.get("hostname"); String date = document.get("date"); String data = document.get("data"); searchReply.print("host: " + host + ", date: " + date + ", data: " + data + "\n\n"); } searchReply.close(); searcher.close(); reader.close(); searchSocket.close(); } catch (Exception ex) { System.out.print("Exception: " + ex + "\n"); } }
/** * Returns a number of random albums, using ID3 tag. * * @param count Number of albums to return. * @param musicFolders Only return albums from these folders. * @return List of random albums. */ public List<Album> getRandomAlbumsId3(int count, List<MusicFolder> musicFolders) { List<Album> result = new ArrayList<Album>(); IndexReader reader = null; try { reader = createIndexReader(ALBUM_ID3); Searcher searcher = new IndexSearcher(reader); List<SpanTermQuery> musicFolderQueries = new ArrayList<SpanTermQuery>(); for (MusicFolder musicFolder : musicFolders) { musicFolderQueries.add( new SpanTermQuery( new Term(FIELD_FOLDER_ID, NumericUtils.intToPrefixCoded(musicFolder.getId())))); } Query query = new SpanOrQuery(musicFolderQueries.toArray(new SpanQuery[musicFolderQueries.size()])); TopDocs topDocs = searcher.search(query, null, Integer.MAX_VALUE); List<ScoreDoc> scoreDocs = Lists.newArrayList(topDocs.scoreDocs); Random random = new Random(System.currentTimeMillis()); while (!scoreDocs.isEmpty() && result.size() < count) { int index = random.nextInt(scoreDocs.size()); Document doc = searcher.doc(scoreDocs.remove(index).doc); int id = Integer.valueOf(doc.get(FIELD_ID)); try { addIfNotNull(albumDao.getAlbum(id), result); } catch (Exception x) { LOG.warn("Failed to get album file " + id, x); } } } catch (Throwable x) { LOG.error("Failed to search for random albums.", x); } finally { FileUtil.closeQuietly(reader); } return result; }
/** * Do the search. * * @param conn the database connection * @param text the query * @param limit the limit * @param offset the offset * @param data whether the raw data should be returned * @return the result set */ protected static ResultSet search( Connection conn, String text, int limit, int offset, boolean data) throws SQLException { SimpleResultSet result = createResultSet(data); if (conn.getMetaData().getURL().startsWith("jdbc:columnlist:")) { // this is just to query the result set columns return result; } if (text == null || text.trim().length() == 0) { return result; } try { IndexAccess access = getIndexAccess(conn); /*## LUCENE2 ## access.modifier.flush(); String path = getIndexPath(conn); IndexReader reader = IndexReader.open(path); Analyzer analyzer = new StandardAnalyzer(); Searcher searcher = new IndexSearcher(reader); QueryParser parser = new QueryParser(LUCENE_FIELD_DATA, analyzer); Query query = parser.parse(text); Hits hits = searcher.search(query); int max = hits.length(); if (limit == 0) { limit = max; } for (int i = 0; i < limit && i + offset < max; i++) { Document doc = hits.doc(i + offset); float score = hits.score(i + offset); //*/ // ## LUCENE3 ## // take a reference as the searcher may change Searcher searcher = access.searcher; // reuse the same analyzer; it's thread-safe; // also allows subclasses to control the analyzer used. Analyzer analyzer = access.writer.getAnalyzer(); QueryParser parser = new QueryParser(Version.LUCENE_30, LUCENE_FIELD_DATA, analyzer); Query query = parser.parse(text); // Lucene 3 insists on a hard limit and will not provide // a total hits value. Take at least 100 which is // an optimal limit for Lucene as any more // will trigger writing results to disk. int maxResults = (limit == 0 ? 100 : limit) + offset; TopDocs docs = searcher.search(query, maxResults); if (limit == 0) { limit = docs.totalHits; } for (int i = 0, len = docs.scoreDocs.length; i < limit && i + offset < docs.totalHits && i + offset < len; i++) { ScoreDoc sd = docs.scoreDocs[i + offset]; Document doc = searcher.doc(sd.doc); float score = sd.score; // */ String q = doc.get(LUCENE_FIELD_QUERY); if (data) { int idx = q.indexOf(" WHERE "); JdbcConnection c = (JdbcConnection) conn; Session session = (Session) c.getSession(); Parser p = new Parser(session); String tab = q.substring(0, idx); ExpressionColumn expr = (ExpressionColumn) p.parseExpression(tab); String schemaName = expr.getOriginalTableAliasName(); String tableName = expr.getColumnName(); q = q.substring(idx + " WHERE ".length()); Object[][] columnData = parseKey(conn, q); result.addRow(schemaName, tableName, columnData[0], columnData[1], score); } else { result.addRow(q, score); } } /*## LUCENE2 ## // TODO keep it open if possible reader.close(); //*/ } catch (Exception e) { throw convertException(e); } return result; }
public SearchResult search( SearchCriteria criteria, List<MusicFolder> musicFolders, IndexType indexType) { SearchResult result = new SearchResult(); int offset = criteria.getOffset(); int count = criteria.getCount(); result.setOffset(offset); IndexReader reader = null; try { reader = createIndexReader(indexType); Searcher searcher = new IndexSearcher(reader); Analyzer analyzer = new SubsonicAnalyzer(); MultiFieldQueryParser queryParser = new MultiFieldQueryParser( LUCENE_VERSION, indexType.getFields(), analyzer, indexType.getBoosts()); BooleanQuery query = new BooleanQuery(); query.add(queryParser.parse(analyzeQuery(criteria.getQuery())), BooleanClause.Occur.MUST); List<SpanTermQuery> musicFolderQueries = new ArrayList<SpanTermQuery>(); for (MusicFolder musicFolder : musicFolders) { if (indexType == ALBUM_ID3 || indexType == ARTIST_ID3) { musicFolderQueries.add( new SpanTermQuery( new Term(FIELD_FOLDER_ID, NumericUtils.intToPrefixCoded(musicFolder.getId())))); } else { musicFolderQueries.add( new SpanTermQuery(new Term(FIELD_FOLDER, musicFolder.getPath().getPath()))); } } query.add( new SpanOrQuery(musicFolderQueries.toArray(new SpanQuery[musicFolderQueries.size()])), BooleanClause.Occur.MUST); TopDocs topDocs = searcher.search(query, null, offset + count); result.setTotalHits(topDocs.totalHits); int start = Math.min(offset, topDocs.totalHits); int end = Math.min(start + count, topDocs.totalHits); for (int i = start; i < end; i++) { Document doc = searcher.doc(topDocs.scoreDocs[i].doc); switch (indexType) { case SONG: case ARTIST: case ALBUM: MediaFile mediaFile = mediaFileService.getMediaFile(Integer.valueOf(doc.get(FIELD_ID))); addIfNotNull(mediaFile, result.getMediaFiles()); break; case ARTIST_ID3: Artist artist = artistDao.getArtist(Integer.valueOf(doc.get(FIELD_ID))); addIfNotNull(artist, result.getArtists()); break; case ALBUM_ID3: Album album = albumDao.getAlbum(Integer.valueOf(doc.get(FIELD_ID))); addIfNotNull(album, result.getAlbums()); break; default: break; } } } catch (Throwable x) { LOG.error("Failed to execute Lucene search.", x); } finally { FileUtil.closeQuietly(reader); } return result; }
/** * Index the fileset. * * @exception IOException if Lucene I/O exception TODO: refactor!!!!! */ private void indexDocs() throws IOException { Date start = new Date(); boolean create = overwrite; // If the index directory doesn't exist, // create it and force create mode if (indexDir.mkdirs() && !overwrite) { create = true; } FSDirectory dir = FSDirectory.open(indexDir); try { Searcher searcher = null; boolean checkLastModified = false; if (!create) { try { searcher = new IndexSearcher(dir, true); checkLastModified = true; } catch (IOException ioe) { log("IOException: " + ioe.getMessage()); // Empty - ignore, which indicates to index all // documents } } log("checkLastModified = " + checkLastModified, Project.MSG_VERBOSE); IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_CURRENT, analyzer) .setOpenMode(create ? OpenMode.CREATE : OpenMode.APPEND); LogMergePolicy lmp = (LogMergePolicy) conf.getMergePolicy(); lmp.setUseCompoundFile(useCompoundIndex); lmp.setMergeFactor(mergeFactor); IndexWriter writer = new IndexWriter(dir, conf); int totalFiles = 0; int totalIndexed = 0; int totalIgnored = 0; try { for (int i = 0; i < rcs.size(); i++) { ResourceCollection rc = rcs.elementAt(i); if (rc.isFilesystemOnly()) { Iterator resources = rc.iterator(); while (resources.hasNext()) { Resource r = (Resource) resources.next(); if (!r.isExists() || !(r instanceof FileResource)) { continue; } totalFiles++; File file = ((FileResource) r).getFile(); if (!file.exists() || !file.canRead()) { throw new BuildException( "File \"" + file.getAbsolutePath() + "\" does not exist or is not readable."); } boolean indexIt = true; if (checkLastModified) { Term pathTerm = new Term("path", file.getPath()); TermQuery query = new TermQuery(pathTerm); ScoreDoc[] hits = searcher.search(query, null, 1).scoreDocs; // if document is found, compare the // indexed last modified time with the // current file // - don't index if up to date if (hits.length > 0) { Document doc = searcher.doc(hits[0].doc); String indexModified = doc.get("modified").trim(); if (indexModified != null) { long lastModified = 0; try { lastModified = DateTools.stringToTime(indexModified); } catch (ParseException e) { // if modified time is not parsable, skip } if (lastModified == file.lastModified()) { // TODO: remove existing document indexIt = false; } } } } if (indexIt) { try { log("Indexing " + file.getPath(), Project.MSG_VERBOSE); Document doc = handler.getDocument(file); if (doc == null) { totalIgnored++; } else { // Add the path of the file as a field named "path". Use a Keyword field, so // that the index stores the path, and so that the path is searchable doc.add( new Field( "path", file.getPath(), Field.Store.YES, Field.Index.NOT_ANALYZED)); // Add the last modified date of the file a field named "modified". Use a // Keyword field, so that it's searchable, but so that no attempt is made // to tokenize the field into words. doc.add( new Field( "modified", DateTools.timeToString( file.lastModified(), DateTools.Resolution.MILLISECOND), Field.Store.YES, Field.Index.NOT_ANALYZED)); writer.addDocument(doc); totalIndexed++; } } catch (DocumentHandlerException e) { throw new BuildException(e); } } } // for j } // if (fs != null) } // for i writer.optimize(); } // try finally { // always make sure everything gets closed, // no matter how we exit. writer.close(); if (searcher != null) { searcher.close(); } } Date end = new Date(); log( totalIndexed + " out of " + totalFiles + " indexed (" + totalIgnored + " ignored) in " + (end.getTime() - start.getTime()) + " milliseconds"); } finally { dir.close(); } }
// private static int[] oldToNew(IndexReader reader, Searcher searcher) throws IOException { private static DocScore[] newToOld(IndexReader reader, Searcher searcher) throws IOException { int readerMax = reader.maxDoc(); DocScore[] newToOld = new DocScore[readerMax]; // use site, an indexed, un-tokenized field to get boost // byte[] boosts = reader.norms("site"); TODO MC /* TODO MC */ Document docMeta; Pattern includes = Pattern.compile("\\|"); String value = NutchConfiguration.create().get(INCLUDE_EXTENSIONS_KEY, ""); String includeExtensions[] = includes.split(value); Hashtable<String, Boolean> validExtensions = new Hashtable<String, Boolean>(); for (int i = 0; i < includeExtensions.length; i++) { validExtensions.put(includeExtensions[i], true); System.out.println("extension boosted " + includeExtensions[i]); } /* TODO MC */ for (int oldDoc = 0; oldDoc < readerMax; oldDoc++) { float score; if (reader.isDeleted(oldDoc)) { // score = 0.0f; score = -1f; // TODO MC } else { // score = Similarity.decodeNorm(boosts[oldDoc]); TODO MC /* TODO MC */ docMeta = searcher.doc(oldDoc); if (validExtensions.get(docMeta.get("subType")) == null) { // searched extensions will have higher scores score = -0.5f; } else { score = Integer.parseInt(docMeta.get("inlinks")); /* if (score==0) { score=0.001f; // TODO MC - to not erase } */ } /* TODO MC */ // System.out.println("Score for old document "+oldDoc+" is "+score+" and type // "+docMeta.get("subType")); // TODO MC debug remove } DocScore docScore = new DocScore(); docScore.doc = oldDoc; docScore.score = score; newToOld[oldDoc] = docScore; } System.out.println("Sorting " + newToOld.length + " documents."); Arrays.sort(newToOld); // HeapSorter.sort(newToOld); // TODO MC - due to the lack of space /* TODO MC int[] oldToNew = new int[readerMax]; for (int newDoc = 0; newDoc < readerMax; newDoc++) { DocScore docScore = newToOld[newDoc]; //oldToNew[docScore.oldDoc] = docScore.score > 0.0f ? newDoc : -1; // TODO MC oldToNew[docScore.oldDoc] = newDoc; // TODO MC } */ /* TODO MC * for (int newDoc = 0; newDoc < readerMax; newDoc++) { DocScore docScore = newToOld[newDoc]; System.out.println("Score for new document "+newDoc+" is "+docScore.score); // TODO MC debug remove } * TODO MC */ // return oldToNew; TODO MC return newToOld; // TODO MC }