/** * Note: if you use a counting {@link Facets} implementation, you can amortize the sampled counts * by calling this method. Uses the {@link FacetsConfig} and the {@link IndexSearcher} to * determine the upper bound for each facet value. */ public FacetResult amortizeFacetCounts( FacetResult res, FacetsConfig config, IndexSearcher searcher) throws IOException { if (res == null || totalHits <= sampleSize) { return res; } LabelAndValue[] fixedLabelValues = new LabelAndValue[res.labelValues.length]; IndexReader reader = searcher.getIndexReader(); DimConfig dimConfig = config.getDimConfig(res.dim); // +2 to prepend dimension, append child label String[] childPath = new String[res.path.length + 2]; childPath[0] = res.dim; System.arraycopy(res.path, 0, childPath, 1, res.path.length); // reuse for (int i = 0; i < res.labelValues.length; i++) { childPath[res.path.length + 1] = res.labelValues[i].label; String fullPath = FacetsConfig.pathToString(childPath, childPath.length); int max = reader.docFreq(new Term(dimConfig.indexFieldName, fullPath)); int correctedCount = (int) (res.labelValues[i].value.doubleValue() / samplingRate); correctedCount = Math.min(max, correctedCount); fixedLabelValues[i] = new LabelAndValue(res.labelValues[i].label, correctedCount); } // cap the total count on the total number of non-deleted documents in the reader int correctedTotalCount = res.value.intValue(); if (correctedTotalCount > 0) { correctedTotalCount = Math.min(reader.numDocs(), (int) (res.value.doubleValue() / samplingRate)); } return new FacetResult( res.dim, res.path, correctedTotalCount, fixedLabelValues, res.childCount); }
public void testMoreThan32ProhibitedClauses() throws Exception { final Directory d = newDirectory(); final RandomIndexWriter w = new RandomIndexWriter(random(), d); Document doc = new Document(); doc.add( new TextField( "field", "0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33", Field.Store.NO)); w.addDocument(doc); doc = new Document(); doc.add(new TextField("field", "33", Field.Store.NO)); w.addDocument(doc); final IndexReader r = w.getReader(); w.close(); final IndexSearcher s = newSearcher(r); final BooleanQuery q = new BooleanQuery(); for (int term = 0; term < 33; term++) { q.add( new BooleanClause( new TermQuery(new Term("field", "" + term)), BooleanClause.Occur.MUST_NOT)); } q.add(new BooleanClause(new TermQuery(new Term("field", "33")), BooleanClause.Occur.SHOULD)); final int[] count = new int[1]; s.search( q, new Collector() { private Scorer scorer; @Override public void setScorer(Scorer scorer) { // Make sure we got BooleanScorer: this.scorer = scorer; assertEquals( "Scorer is implemented by wrong class", BooleanScorer.class.getName() + "$BucketScorer", scorer.getClass().getName()); } @Override public void collect(int doc) { count[0]++; } @Override public void setNextReader(AtomicReaderContext context) {} @Override public boolean acceptsDocsOutOfOrder() { return true; } }); assertEquals(1, count[0]); r.close(); d.close(); }
private void dumpDocuments() throws IOException { outputBanner("Documents"); int totalDocs = mIndexReader.numDocs(); outputLn(); outputLn("There are " + totalDocs + " documents in this index."); mConsole.debug("Total number of documents: " + totalDocs); for (int i = 0; i < totalDocs; i++) { Document doc = null; try { doc = mIndexReader.document(i, null); } catch (IllegalArgumentException e) { if ("attempt to access a deleted document".equals(e.getMessage())) { mConsole.warn( "encountered exception while dumping document " + i + ": " + e.getMessage()); } else { throw e; } } dumpDocument(i, doc); if ((i + 1) % 100 == 0) { mConsole.debug("Dumped " + (i + 1) + " documents"); } } }
private void remove(Class entity, Serializable id) { log.trace("remove from Lucene index: " + entity + "#" + id); DocumentBuilder builder = workspace.getDocumentBuilder(entity); Term term = builder.getTerm(id); IndexReader reader = workspace.getIndexReader(entity); TermDocs termDocs = null; try { // TODO is there a faster way? // TODO include TermDocs into the workspace? termDocs = reader.termDocs(term); String entityName = entity.getName(); while (termDocs.next()) { int docIndex = termDocs.doc(); if (entityName.equals(reader.document(docIndex).get(DocumentBuilder.CLASS_FIELDNAME))) { // remove only the one of the right class // loop all to remove all the matches (defensive code) reader.deleteDocument(docIndex); } } } catch (Exception e) { throw new HibernateException("Unable to remove from Lucene index: " + entity + "#" + id, e); } finally { if (termDocs != null) try { termDocs.close(); } catch (IOException e) { log.warn("Unable to close termDocs properly", e); } } }
public void testFarsiRangeFilterCollating( Analyzer analyzer, String firstBeg, String firstEnd, String secondBeg, String secondEnd) throws Exception { Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); Document doc = new Document(); doc.add(new Field("content", "\u0633\u0627\u0628", Field.Store.YES, Field.Index.ANALYZED)); doc.add(new Field("body", "body", Field.Store.YES, Field.Index.NOT_ANALYZED)); writer.addDocument(doc); writer.close(); IndexReader reader = IndexReader.open(dir); IndexSearcher searcher = new IndexSearcher(reader); Query query = new TermQuery(new Term("body", "body")); // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi // orders the U+0698 character before the U+0633 character, so the single // index Term below should NOT be returned by a TermRangeFilter with a Farsi // Collator (or an Arabic one for the case when Farsi searcher not // supported). ScoreDoc[] result = searcher.search(query, new TermRangeFilter("content", firstBeg, firstEnd, true, true), 1) .scoreDocs; assertEquals("The index Term should not be included.", 0, result.length); result = searcher.search(query, new TermRangeFilter("content", secondBeg, secondEnd, true, true), 1) .scoreDocs; assertEquals("The index Term should be included.", 1, result.length); searcher.close(); reader.close(); dir.close(); }
/** * give the id list of sentences, from Lucene index * * @param input input word * @param catalogName catalog (domain) name which we'd like to search in * @param limit how many hits are needed (0 means all) */ public List<String> query(String input, String catalogName, int limit) { List<String> res = new ArrayList<String>(); try { catalog c = catalogs.get(catalogName); IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(c.indexPath))); IndexSearcher searcher = new IndexSearcher(reader); QueryParser parser = new QueryParser("contents", analyzer); Query query = parser.parse(QueryParser.escape(input)); int n = limit > 0 ? limit : searcher.count(query); if (n == 0) n = 1; TopDocs results = searcher.search(query, n); int endPos = limit; if (limit != 0) endPos = Math.min(results.totalHits, limit); // 1st n hits else endPos = results.totalHits; // all hits for (int i = 0; i < endPos; i++) { int id = results.scoreDocs[i].doc; Document doc = searcher.doc(id); res.add(doc.get("filename")); } reader.close(); return res; } catch (ParseException e) { log(e.getMessage()); } catch (IOException e) { log(e.getMessage()); } return res; }
public void testSpanNot() throws Exception { SpanQuery[] clauses = new SpanQuery[2]; clauses[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "one")); clauses[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "three")); SpanQuery spq = new SpanNearQuery(clauses, 5, true); SpanNotQuery snq = new SpanNotQuery(spq, new SpanTermQuery(new Term(PayloadHelper.FIELD, "two"))); Directory directory = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter( random(), directory, newIndexWriterConfig(new PayloadAnalyzer()).setSimilarity(similarity)); Document doc = new Document(); doc.add(newTextField(PayloadHelper.FIELD, "one two three one four three", Field.Store.YES)); writer.addDocument(doc); IndexReader reader = writer.getReader(); writer.close(); checkSpans(MultiSpansWrapper.wrap(reader, snq, SpanWeight.Postings.PAYLOADS), 1, new int[] {2}); reader.close(); directory.close(); }
public void test() throws Exception { BaseDirectoryWrapper d = newDirectory(); d.setCheckIndexOnClose(false); // we nuke files, but verify the reader still works RandomIndexWriter w = new RandomIndexWriter(random(), d); int numDocs = atLeast(100); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); doc.add(newField("foo", "bar", TextField.TYPE_NOT_STORED)); w.addDocument(doc); } IndexReader r = w.getReader(); w.commit(); w.close(); for (String fileName : d.listAll()) { try { d.deleteFile(fileName); // may succeed, e.g. if the file is completely read into RAM. } catch (IOException ioe) { // ignore: this means codec (correctly) is holding // the file open } } for (LeafReaderContext cxt : r.leaves()) { TestUtil.checkReader(cxt.reader()); } r.close(); d.close(); }
public void testEvilSearcherFactory() throws Exception { final Directory dir = newDirectory(); final RandomIndexWriter w = new RandomIndexWriter(random(), dir); w.commit(); final IndexReader other = DirectoryReader.open(dir); final SearcherFactory theEvilOne = new SearcherFactory() { @Override public IndexSearcher newSearcher(IndexReader ignored) { return LuceneTestCase.newSearcher(other); } }; try { new SearcherManager(w.w, false, theEvilOne); fail("didn't hit expected exception"); } catch (IllegalStateException ise) { // expected } w.close(); other.close(); dir.close(); }
public void search01() { try { IndexReader reader = IndexReader.open(directory); IndexSearcher searcher = new IndexSearcher(reader); TermQuery query = new TermQuery(new Term("email", "*****@*****.**")); TopDocs tds = searcher.search(query, 10); for (ScoreDoc sd : tds.scoreDocs) { Document doc = searcher.doc(sd.doc); System.out.println( "(" + sd.doc + "-" + doc.getBoost() + "-" + sd.score + ")" + doc.get("name") + "[" + doc.get("email") + "]-->" + doc.get("id") + "," + doc.get("attach") + "," + doc.get("date") + "," + doc.getValues("email")[1]); } reader.close(); } catch (CorruptIndexException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } }
public void doTest(int[] docs) throws Exception { Directory dir = makeIndex(); IndexReader reader = IndexReader.open(dir, true); for (int i = 0; i < docs.length; i++) { Document d = reader.document(docs[i], SELECTOR); d.get(MAGIC_FIELD); List<Fieldable> fields = d.getFields(); for (Iterator<Fieldable> fi = fields.iterator(); fi.hasNext(); ) { Fieldable f = null; try { f = fi.next(); String fname = f.name(); String fval = f.stringValue(); assertNotNull(docs[i] + " FIELD: " + fname, fval); String[] vals = fval.split("#"); if (!dataset.contains(vals[0]) || !dataset.contains(vals[1])) { fail("FIELD:" + fname + ",VAL:" + fval); } } catch (Exception e) { throw new Exception(docs[i] + " WTF: " + f.name(), e); } } } reader.close(); dir.close(); }
public void test() throws IOException { assertTrue(dir != null); assertTrue(fieldInfos != null); IndexReader reader = DirectoryReader.open(dir); Document doc = reader.document(0); assertTrue(doc != null); assertTrue(doc.getField(DocHelper.TEXT_FIELD_1_KEY) != null); Field field = (Field) doc.getField(DocHelper.TEXT_FIELD_2_KEY); assertTrue(field != null); assertTrue(field.fieldType().storeTermVectors()); assertFalse(field.fieldType().omitNorms()); assertTrue(field.fieldType().indexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); field = (Field) doc.getField(DocHelper.TEXT_FIELD_3_KEY); assertTrue(field != null); assertFalse(field.fieldType().storeTermVectors()); assertTrue(field.fieldType().omitNorms()); assertTrue(field.fieldType().indexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); field = (Field) doc.getField(DocHelper.NO_TF_KEY); assertTrue(field != null); assertFalse(field.fieldType().storeTermVectors()); assertFalse(field.fieldType().omitNorms()); assertTrue(field.fieldType().indexOptions() == IndexOptions.DOCS); DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor(DocHelper.TEXT_FIELD_3_KEY); reader.document(0, visitor); final List<IndexableField> fields = visitor.getDocument().getFields(); assertEquals(1, fields.size()); assertEquals(DocHelper.TEXT_FIELD_3_KEY, fields.get(0).name()); reader.close(); }
// LUCENE-1262 public void testExceptions() throws Throwable { Path indexDir = createTempDir("testfieldswriterexceptions"); Directory fsDir = newFSDirectory(indexDir); FaultyFSDirectory dir = new FaultyFSDirectory(fsDir); IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random())).setOpenMode(OpenMode.CREATE); IndexWriter writer = new IndexWriter(dir, iwc); for (int i = 0; i < 2; i++) writer.addDocument(testDoc); writer.forceMerge(1); writer.close(); IndexReader reader = DirectoryReader.open(dir); dir.startFailing(); boolean exc = false; for (int i = 0; i < 2; i++) { try { reader.document(i); } catch (IOException ioe) { // expected exc = true; } try { reader.document(i); } catch (IOException ioe) { // expected exc = true; } } assertTrue(exc); reader.close(); dir.close(); }
/* * index all child directories(only first level directories) in parent directory * and indexed data is stored in the same name source directory */ private long indexDirectories(String parent, String[] dirs, String index, SetupParameters Pa) throws FileHandlerException, IOException { long sumDocs = 0; // index each directory in parent directory for (int i = 0; i < dirs.length; i++) { System.out.println("\t-----FOLDER----- :" + dirs[i].toUpperCase()); String dir_index = index + "/" + dirs[i]; if ((index.endsWith("\\")) || (index.endsWith("/"))) { dir_index = index + dirs[i]; } Directory di = FSDirectory.getDirectory(new File(dir_index), true); Pa.setDir(di); Pa.setWriter(new IndexWriter(Pa.getDir(), Pa.getAnalyzer(), true)); // //get name of directory contains website to index // int begin=dirs[i].lastIndexOf("\\"); // if(begin==-1) begin=dirs[i].lastIndexOf("/"); // int end=dirs[i].length()-1; // String dir_site=dirs[i].substring(begin, end); this.index(dirs[i].toLowerCase(), Pa.getWriter(), new File(parent + "\\" + dirs[i])); Pa.getWriter().optimize(); Pa.getWriter().close(); IndexReader reader = Pa.getReader().open(Pa.getDir()); sumDocs += reader.numDocs(); reader.close(); } return sumDocs; }
public String getSpecificFreqTermInIndex( int KIntopK, ArrayList<String> sentQueries, int specificFrec, boolean allranges, boolean versionOld) { IndexReader indexReader = null; try { indexReader = IndexReader.open(indexDirectory); } catch (CorruptIndexException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } String mostFrerqTerm = ""; try { mostFrerqTerm = freqTermsFinderInIndex.SpecificFreqTerms( indexDirectory, analyzer, indexReader, KIntopK, sentQueries, specificFrec, allranges, versionOld); indexReader.close(); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } return mostFrerqTerm; }
public Query percolateQuery( String documentType, PercolateQuery.QueryStore queryStore, BytesReference documentSource, IndexSearcher searcher) throws IOException { IndexReader indexReader = searcher.getIndexReader(); Query candidateMatchesQuery = createCandidateQuery(indexReader); Query verifiedMatchesQuery; // We can only skip the MemoryIndex verification when percolating a single document. // When the document being percolated contains a nested object field then the MemoryIndex // contains multiple // documents. In this case the term query that indicates whether memory index verification can // be skipped // can incorrectly indicate that non nested queries would match, while their nested variants // would not. if (indexReader.maxDoc() == 1) { verifiedMatchesQuery = new TermQuery(new Term(extractionResultField.name(), EXTRACTION_COMPLETE)); } else { verifiedMatchesQuery = new MatchNoDocsQuery("nested docs, so no verified matches"); } return new PercolateQuery( documentType, queryStore, documentSource, candidateMatchesQuery, searcher, verifiedMatchesQuery); }
@Override public void collect(int doc) throws IOException { BytesWrap parentId = typeCache.parentIdByDoc(doc); if (parentId == null) { return; } for (Tuple<IndexReader, IdReaderTypeCache> tuple : readers) { IndexReader indexReader = tuple.v1(); IdReaderTypeCache idReaderTypeCache = tuple.v2(); if (idReaderTypeCache == null) { // might be if we don't have that doc with that type in this reader continue; } int parentDocId = idReaderTypeCache.docById(parentId); if (parentDocId != -1 && !indexReader.isDeleted(parentDocId)) { OpenBitSet docIdSet = parentDocs().get(indexReader.getCoreCacheKey()); if (docIdSet == null) { docIdSet = new OpenBitSet(indexReader.maxDoc()); parentDocs.put(indexReader.getCoreCacheKey(), docIdSet); } docIdSet.fastSet(parentDocId); return; } } }
public void testUpdateSameDoc() throws Exception { final Directory dir = newDirectory(); final LineFileDocs docs = new LineFileDocs(random()); for (int r = 0; r < 3; r++) { final IndexWriter w = new IndexWriter( dir, newIndexWriterConfig(new MockAnalyzer(random())).setMaxBufferedDocs(2)); final int numUpdates = atLeast(20); int numThreads = TestUtil.nextInt(random(), 2, 6); IndexingThread[] threads = new IndexingThread[numThreads]; for (int i = 0; i < numThreads; i++) { threads[i] = new IndexingThread(docs, w, numUpdates); threads[i].start(); } for (int i = 0; i < numThreads; i++) { threads[i].join(); } w.close(); } IndexReader open = DirectoryReader.open(dir); assertEquals(1, open.numDocs()); open.close(); docs.close(); dir.close(); }
@Override public ScoredDocuments rerank(ScoredDocuments docs, RerankerContext context) { IndexReader reader = context.getIndexSearcher().getIndexReader(); for (int i = 0; i < docs.documents.length; i++) { Terms terms = null; try { terms = reader.getTermVector(docs.ids[i], StatusField.TEXT.name); } catch (IOException e) { continue; } String qid = context.getQueryId().replaceFirst("^MB0*", ""); String docid = docs.documents[i].getField(StatusField.ID.name).stringValue(); out.print(qrels.getRelevanceGrade(qid, docid)); out.print(" qid:" + qid); out.print(" 1:" + docs.scores[i]); float[] intFeatures = this.extractorChain.extractAll(docs.documents[i], terms, context); for (int j = 0; j < intFeatures.length; j++) { out.print(" " + (j + 2) + ":" + intFeatures[j]); } out.print(" # docid:" + docid); out.print("\n"); } return docs; }
public void testCachingWorks() throws Exception { Directory dir = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), dir); writer.close(); IndexReader reader = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(dir)); AtomicReaderContext context = (AtomicReaderContext) reader.getContext(); MockFilter filter = new MockFilter(); CachingWrapperFilter cacher = new CachingWrapperFilter(filter); // first time, nested filter is called DocIdSet strongRef = cacher.getDocIdSet(context, context.reader().getLiveDocs()); assertTrue("first time", filter.wasCalled()); // make sure no exception if cache is holding the wrong docIdSet cacher.getDocIdSet(context, context.reader().getLiveDocs()); // second time, nested filter should not be called filter.clear(); cacher.getDocIdSet(context, context.reader().getLiveDocs()); assertFalse("second time", filter.wasCalled()); reader.close(); dir.close(); }
private static Map<String, List<String>> generate_result(Directory directory) { Map<String, List<String>> result_map = new HashMap<String, List<String>>(); try { IndexReader reader = IndexReader.open(directory); TermEnum termEnum = reader.terms(); while (termEnum.next()) { String termEnumString = termEnum.term().toString(); if (termEnumString.startsWith("content:")) { String term = termEnumString.substring(termEnumString.lastIndexOf(":") + 1); TermDocs termDocs = reader.termDocs(termEnum.term()); while (termDocs.next()) { Document doc = reader.document(termDocs.doc()); String relative_path = doc.get("relative_path"); if (result_map.containsKey(relative_path)) { result_map.get(relative_path).add(term + termDocs.freq()); } else { result_map.put(relative_path, new ArrayList<String>()); } } } } } catch (IOException e) { e.printStackTrace(); } finally { } return result_map; }
public void testNullDocIdSetIterator() throws Exception { Directory dir = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), dir); writer.close(); IndexReader reader = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(dir)); AtomicReaderContext context = (AtomicReaderContext) reader.getContext(); final Filter filter = new Filter() { @Override public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) { return new DocIdSet() { @Override public DocIdSetIterator iterator() { return null; } }; } }; CachingWrapperFilter cacher = new CachingWrapperFilter(filter); // the caching filter should return the empty set constant assertNull(cacher.getDocIdSet(context, context.reader().getLiveDocs())); reader.close(); dir.close(); }
/** * Find words for a more-like-this query former. * * @param docNum the id of the lucene document from which to find terms */ private PriorityQueue<ScoreTerm> retrieveTerms(int docNum) throws IOException { Map<String, Map<String, Int>> field2termFreqMap = new HashMap<>(); for (String fieldName : fieldNames) { final Fields vectors = ir.getTermVectors(docNum); final Terms vector; if (vectors != null) { vector = vectors.terms(fieldName); } else { vector = null; } // field does not store term vector info if (vector == null) { Document d = ir.document(docNum); IndexableField[] fields = d.getFields(fieldName); for (IndexableField field : fields) { final String stringValue = field.stringValue(); if (stringValue != null) { addTermFrequencies(new StringReader(stringValue), field2termFreqMap, fieldName); } } } else { addTermFrequencies(field2termFreqMap, vector, fieldName); } } return createQueue(field2termFreqMap); }
private static void assertDocIdSetCacheable( IndexReader reader, Filter filter, boolean shouldCacheable) throws IOException { assertTrue(reader.getContext() instanceof AtomicReaderContext); AtomicReaderContext context = (AtomicReaderContext) reader.getContext(); final CachingWrapperFilter cacher = new CachingWrapperFilter(filter); final DocIdSet originalSet = filter.getDocIdSet(context, context.reader().getLiveDocs()); final DocIdSet cachedSet = cacher.getDocIdSet(context, context.reader().getLiveDocs()); if (originalSet == null) { assertNull(cachedSet); } if (cachedSet == null) { assertTrue(originalSet == null || originalSet.iterator() == null); } else { assertTrue(cachedSet.isCacheable()); assertEquals(shouldCacheable, originalSet.isCacheable()); // System.out.println("Original: "+originalSet.getClass().getName()+" -- cached: // "+cachedSet.getClass().getName()); if (originalSet.isCacheable()) { assertEquals( "Cached DocIdSet must be of same class like uncached, if cacheable", originalSet.getClass(), cachedSet.getClass()); } else { assertTrue( "Cached DocIdSet must be an FixedBitSet if the original one was not cacheable", cachedSet instanceof FixedBitSet || cachedSet == null); } } }
public static Map<String, Integer> termFrequencies( IndexSearcher indexSearcher, Query documentFilterQuery, String fieldName, String propName, String altName) { try { String luceneField = ComplexFieldUtil.propertyField(fieldName, propName, altName); Weight weight = indexSearcher.createNormalizedWeight(documentFilterQuery, false); Map<String, Integer> freq = new HashMap<>(); IndexReader indexReader = indexSearcher.getIndexReader(); for (LeafReaderContext arc : indexReader.leaves()) { if (weight == null) throw new RuntimeException("weight == null"); if (arc == null) throw new RuntimeException("arc == null"); if (arc.reader() == null) throw new RuntimeException("arc.reader() == null"); Scorer scorer = weight.scorer(arc, arc.reader().getLiveDocs()); if (scorer != null) { while (scorer.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { getFrequenciesFromTermVector( indexReader, scorer.docID() + arc.docBase, luceneField, freq); } } } return freq; } catch (IOException e) { throw ExUtil.wrapRuntimeException(e); } }
/** @return the indexs */ public List<Index> getIndexes() { List<Index> indexes = new ArrayList<Index>(); // Method[] methods = Index.class.getDeclaredMethods(); int numDocs = reader.numDocs(); // System.out.println(numDocs); for (int i = 0; i < numDocs; i++) { try { Document document = reader.document(i); List<Fieldable> f = document.getFields(); Index index = new Index(); for (Fieldable fieldable : f) { Field field = (Field) fieldable; Method m = Index.class.getDeclaredMethod("set" + field.name(), new Class[] {String.class}); m.invoke(index, new Object[] {field.stringValue()}); // Method m2 = Index.class.getDeclaredMethod("get" + field.name(), new Class[]{}); // Object val = m2.invoke(index, new Object[]{}); // System.out.println(m2.getName()+" = "+val); // System.out.println(m.getName() + " " + field.stringValue()); } // System.out.println("RHAAR-"+i+" = "+index.getRHaarFeature()); indexes.add(index); } catch (Exception e) { e.printStackTrace(); } } return indexes; }
public void testMethod() throws Exception { Directory directory = newDirectory(); String[] values = new String[] {"1", "2", "3", "4"}; RandomIndexWriter writer = new RandomIndexWriter(random(), directory); for (int i = 0; i < values.length; i++) { Document doc = new Document(); doc.add(newStringField(FIELD, values[i], Field.Store.YES)); writer.addDocument(doc); } IndexReader ir = writer.getReader(); writer.close(); BooleanQuery booleanQuery1 = new BooleanQuery(); booleanQuery1.add(new TermQuery(new Term(FIELD, "1")), BooleanClause.Occur.SHOULD); booleanQuery1.add(new TermQuery(new Term(FIELD, "2")), BooleanClause.Occur.SHOULD); BooleanQuery query = new BooleanQuery(); query.add(booleanQuery1, BooleanClause.Occur.MUST); query.add(new TermQuery(new Term(FIELD, "9")), BooleanClause.Occur.MUST_NOT); IndexSearcher indexSearcher = newSearcher(ir); ScoreDoc[] hits = indexSearcher.search(query, null, 1000).scoreDocs; assertEquals("Number of matched documents", 2, hits.length); ir.close(); directory.close(); }
public TermFreqVector searchIndexReturnFreqTerms(String searchString, String termString) { System.out.println("Searching for '" + searchString + "'"); // Directory directory = FSDirectory.getDirectory(); IndexReader indexReader; TermFreqVector termFreqDoc = null; try { indexReader = IndexReader.open(indexDirectory); IndexSearcher indexSearcher = new IndexSearcher(indexReader); Term term = new Term(termString, searchString); TermQuery query = new TermQuery(term); TopDocs topDocs = indexSearcher.search(query, 10); if (topDocs.scoreDocs.length > 0) { // while(it.hasNext()){ int docId = topDocs.scoreDocs[0].doc; Document doc = indexSearcher.doc(docId); // textOfURL = doc.get("text"); // sourceCodeOfURL = doc.get("html"); // this.docId = docID; termFreqDoc = indexReader.getTermFreqVector(docId, "text"); } } catch (CorruptIndexException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } return termFreqDoc; }
public static BoboIndexReader getBoboIndexReader(Directory idxDir) throws BrowseException { try { if (!BoboIndexReader.indexExists(idxDir)) { throw new BrowseException("Index does not exist at: " + idxDir); } } catch (IOException ioe) { throw new BrowseException(ioe.getMessage(), ioe); } IndexReader reader = null; try { reader = IndexReader.open(idxDir, true); } catch (IOException ioe) { throw new BrowseException(ioe.getMessage(), ioe); } BoboIndexReader bReader = null; try { bReader = BoboIndexReader.getInstance(reader); } catch (IOException ioe) { if (reader != null) { try { reader.close(); } catch (IOException e) { logger.error(e.getMessage(), e); } } throw new BrowseException(ioe.getMessage(), ioe); } return bReader; }
public static void main(String[] args) throws IOException, ParseException { String indexDir = "C:/lucenedir"; Directory directory = FSDirectory.open(Paths.get(indexDir)); IndexReader reader = DirectoryReader.open(directory); IndexSearcher searcher = new IndexSearcher(reader); int day = (int) (new Date().getTime() / Constans.DAY_MILLIS); QueryParser parser = new QueryParser("contents", new StandardAnalyzer()); Query query = parser.parse("java in action"); Query customScoreQuery = new RecencyBoostCustomScoreQuery(query, 2.0, day, 6 * 365, "pubmonthAsDay"); Sort sort = new Sort( new SortField[] { SortField.FIELD_SCORE, new SortField("title2", SortField.Type.STRING) }); TopDocs hits = searcher.search(customScoreQuery, null, Integer.MAX_VALUE, sort, true, false); for (int i = 0; i < hits.scoreDocs.length; i++) { // 两种方式取Document都行,其实searcher.doc内部本质还是调用reader.document // Document doc = reader.document(hits.scoreDocs[i].doc); Document doc = searcher.doc(hits.scoreDocs[i].doc); System.out.println( (1 + i) + ": " + doc.get("title") + ": pubmonth=" + doc.get("pubmonth") + " score=" + hits.scoreDocs[i].score); } reader.close(); directory.close(); }