@Override protected void setUp() throws Exception { super.setUp(); RAMDirectory dirA = new RAMDirectory(); RAMDirectory dirB = new RAMDirectory(); IndexWriter wA = new IndexWriter(dirA, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); IndexWriter wB = new IndexWriter(dirB, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); long theLong = Long.MAX_VALUE; double theDouble = Double.MAX_VALUE; byte theByte = Byte.MAX_VALUE; short theShort = Short.MAX_VALUE; int theInt = Integer.MAX_VALUE; float theFloat = Float.MAX_VALUE; for (int i = 0; i < NUM_DOCS; i++) { Document doc = new Document(); doc.add( new Field( "theLong", String.valueOf(theLong--), Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.add( new Field( "theDouble", String.valueOf(theDouble--), Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.add( new Field( "theByte", String.valueOf(theByte--), Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.add( new Field( "theShort", String.valueOf(theShort--), Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.add( new Field("theInt", String.valueOf(theInt--), Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.add( new Field( "theFloat", String.valueOf(theFloat--), Field.Store.NO, Field.Index.NOT_ANALYZED)); if (0 == i % 3) { wA.addDocument(doc); } else { wB.addDocument(doc); } } wA.close(); wB.close(); readerA = IndexReader.open(dirA, true); readerB = IndexReader.open(dirB, true); readerX = new MultiReader(new IndexReader[] {readerA, readerB}); }
public void testDuellMemIndex() throws IOException { LineFileDocs lineFileDocs = new LineFileDocs(random()); int numDocs = atLeast(10); MemoryIndex memory = randomMemoryIndex(); for (int i = 0; i < numDocs; i++) { Directory dir = newDirectory(); MockAnalyzer mockAnalyzer = new MockAnalyzer(random()); mockAnalyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH)); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(random(), mockAnalyzer)); Document nextDoc = lineFileDocs.nextDoc(); Document doc = new Document(); for (IndexableField field : nextDoc.getFields()) { if (field.fieldType().indexOptions() != IndexOptions.NONE) { doc.add(field); if (random().nextInt(3) == 0) { doc.add(field); // randomly add the same field twice } } } writer.addDocument(doc); writer.close(); for (IndexableField field : doc) { memory.addField(field.name(), ((Field) field).stringValue(), mockAnalyzer); } DirectoryReader competitor = DirectoryReader.open(dir); LeafReader memIndexReader = (LeafReader) memory.createSearcher().getIndexReader(); TestUtil.checkReader(memIndexReader); duellReaders(competitor, memIndexReader); IOUtils.close(competitor, memIndexReader); memory.reset(); dir.close(); } lineFileDocs.close(); }
public void index() throws IOException { final IndexWriterConfig indexWriterConfig = new IndexWriterConfig(this.analyzer); indexWriterConfig.setOpenMode(OpenMode.CREATE); final IndexWriter indexWriter = new IndexWriter(this.dir, indexWriterConfig); indexDocs(indexWriter); indexWriter.close(); }
@Override public void index(List<AgeObject> aol) { try { IndexWriter iWriter = new IndexWriter( index, analyzer, objectList == null, IndexWriter.MaxFieldLength.UNLIMITED); if (objectList == null) objectList = aol; else objectList.addAll(aol); for (AgeObject ao : aol) { Document doc = new Document(); for (TextFieldExtractor tfe : extractors) doc.add( new Field( tfe.getName(), tfe.getExtractor().getValue(ao), Field.Store.NO, Field.Index.ANALYZED)); iWriter.addDocument(doc); } iWriter.close(); defaultFieldName = extractors.iterator().next().getName(); } catch (CorruptIndexException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } }
// TODO: randomize public IndexSearcher setUp(Random random, Similarity similarity, int numDocs) throws IOException { Directory directory = new MockDirectoryWrapper(random, new RAMDirectory()); PayloadAnalyzer analyzer = new PayloadAnalyzer(); IndexWriter writer = new IndexWriter( directory, new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer).setSimilarity(similarity)); // writer.infoStream = System.out; for (int i = 0; i < numDocs; i++) { Document doc = new Document(); doc.add(new Field(FIELD, English.intToEnglish(i), Field.Store.YES, Field.Index.ANALYZED)); doc.add( new Field( MULTI_FIELD, English.intToEnglish(i) + " " + English.intToEnglish(i), Field.Store.YES, Field.Index.ANALYZED)); doc.add( new Field( NO_PAYLOAD_FIELD, English.intToEnglish(i), Field.Store.YES, Field.Index.ANALYZED)); writer.addDocument(doc); } reader = IndexReader.open(writer, true); writer.close(); IndexSearcher searcher = LuceneTestCase.newSearcher(reader); searcher.setSimilarity(similarity); return searcher; }
@Before public void setUp() throws Exception { serializer = new LuceneSerializer(true, true); entityPath = new PathBuilder<Object>(Object.class, "obj"); title = entityPath.getString("title"); author = entityPath.getString("author"); text = entityPath.getString("text"); publisher = entityPath.getString("publisher"); year = entityPath.getNumber("year", Integer.class); rating = entityPath.getString("rating"); gross = entityPath.getNumber("gross", Double.class); titles = entityPath.getCollection("title", String.class, StringPath.class); longField = entityPath.getNumber("longField", Long.class); shortField = entityPath.getNumber("shortField", Short.class); byteField = entityPath.getNumber("byteField", Byte.class); floatField = entityPath.getNumber("floatField", Float.class); idx = new RAMDirectory(); config = new IndexWriterConfig(new StandardAnalyzer()) .setOpenMode(IndexWriterConfig.OpenMode.CREATE); writer = new IndexWriter(idx, config); writer.addDocument(createDocument()); writer.close(); IndexReader reader = DirectoryReader.open(idx); searcher = new IndexSearcher(reader); }
public void createIndex() { loadTweets("datasets/sentiment-short.csv", 100); directory = new RAMDirectory(); try { IndexWriter writer = getWriter(); for (int i = 0; i < tweets.size(); i++) { Document doc = new Document(); doc.add( new Field( "tweet", tweets.get(i).getText(), Field.Store.YES, Field.Index.ANALYZED, TermVector.YES)); writer.addDocument(doc); } System.out.println("Docs: " + writer.numDocs()); writer.close(); } catch (Exception e) { e.printStackTrace(); } }
private void initializeIndex(String[] values) throws IOException { IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); for (int i = 0; i < values.length; i++) { insertDoc(writer, values[i]); } writer.close(); }
/** @see org.wyona.yarep.impl.repo.vfs.DateIndexerSearcher#addRevision(String) */ public void addRevision(String revisionName) throws Exception { Date creationDate = node.getRevision(revisionName) .getCreationDate(); // WARN: Older creation dates might not have milliseconds and hence // are not corresponding exactly with the revision name, hence in // order to build the date index correctly one needs to use the // creation date log.debug( "Add revision '" + revisionName + "' with creation date '" + creationDate + "' to date index ..."); Document doc = new Document(); doc.add( new NumericField(CREATION_DATE_FIELD_NAME, Field.Store.YES, true) .setLongValue(creationDate.getTime())); // doc.add(new Field(CREATION_DATE_FIELD_NAME, // org.apache.lucene.document.DateTools.dateToString(creationDate, // org.apache.lucene.document.DateTools.Resolution.MILLISECOND), Field.Store.YES, // Field.Index.NOT_ANALYZED)); doc.add( new Field( REVISION_NAME_FIELD_NAME, revisionName, Field.Store.YES, Field.Index.NOT_ANALYZED)); IndexWriter iw = getIndexWriter(); Term revisionNameTerm = new Term(REVISION_NAME_FIELD_NAME, revisionName); iw.updateDocument(revisionNameTerm, doc); iw.optimize(); iw.close(); }
private void indexDoc(File doc) { if (doc.exists() && doc.canRead()) { IndexWriter indexWriter = null; try { indexWriter = new IndexWriter(this.dir, this.iwc); this.indexDocs(indexWriter, doc); indexWriter.commit(); } catch (IOException e) { if (indexWriter != null) { try { indexWriter.rollback(); } catch (IOException e2) { // TODO Auto-generated catch block e2.printStackTrace(); } } // TODO Auto-generated catch block e.printStackTrace(); } finally { if (indexWriter != null) { try { indexWriter.close(); } catch (IOException e2) { // TODO Auto-generated catch block e2.printStackTrace(); } } } } }
public static void main(String[] args) throws Exception { // setup Lucene to use an in-memory index Directory directory = new RAMDirectory(); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_43); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_43, analyzer); IndexWriter writer = new IndexWriter(directory, iwc); // index a few documents writer.addDocument(createDocument("1", "foo bar baz")); writer.addDocument(createDocument("2", "red green blue")); writer.addDocument(createDocument("3", "The Lucene was made by Doug Cutting")); writer.close(); IndexReader reader = DirectoryReader.open(directory); IndexSearcher searcher = new IndexSearcher(reader); Query query = new PrefixQuery(new Term(FIELD, "cut")); // 自动在结尾添加 * // display search results TopDocs topDocs = searcher.search(query, 10); for (ScoreDoc scoreDoc : topDocs.scoreDocs) { Document doc = searcher.doc(scoreDoc.doc); System.out.println(doc); } }
/** * Gracefully shuts down readers and writers for a db * * @param db Database name */ protected void close(String db) { IndexWriter writer = writers.get(db); try { if (writer != null) writer.close(); } catch (IOException e) { } }
/* * Test per field codec support - adding fields with random codecs */ @Test public void testStressPerFieldCodec() throws IOException { Directory dir = newDirectory(random()); final int docsPerRound = 97; int numRounds = atLeast(1); for (int i = 0; i < numRounds; i++) { int num = TestUtil.nextInt(random(), 30, 60); IndexWriterConfig config = newIndexWriterConfig(random(), TEST_VERSION_CURRENT, new MockAnalyzer(random())); config.setOpenMode(OpenMode.CREATE_OR_APPEND); IndexWriter writer = newWriter(dir, config); for (int j = 0; j < docsPerRound; j++) { final Document doc = new Document(); for (int k = 0; k < num; k++) { FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); customType.setTokenized(random().nextBoolean()); customType.setOmitNorms(random().nextBoolean()); Field field = newField("" + k, TestUtil.randomRealisticUnicodeString(random(), 128), customType); doc.add(field); } writer.addDocument(doc); } if (random().nextBoolean()) { writer.forceMerge(1); } writer.commit(); assertEquals((i + 1) * docsPerRound, writer.maxDoc()); writer.close(); } dir.close(); }
private void indexFiles(ArrayList<String> images, DocumentBuilder builder, String indexPath) throws IOException { // eventually check if the directory is there or not ... IndexWriter iw = LuceneUtils.createIndexWriter(testIndex, false); int count = 0; long time = System.currentTimeMillis(); for (String identifier : images) { // TODO: cut toes from the image ... -> doesn't work out very well. Stable at first, // decreasing then. // TODO: Joint Histogram ... // TODO: LSA / PCA on the vectors ...-> this looks like a job for me :-D // TODO: local features ... Document doc = null; if (cutImages) { BufferedImage bimg = ImageUtils.cropImage(ImageIO.read(new FileInputStream(identifier)), 0, 0, 200, 69); doc = builder.createDocument(bimg, identifier); } else doc = builder.createDocument(new FileInputStream(identifier), identifier); iw.addDocument(doc); count++; if (count % 100 == 0) { int percent = (int) Math.floor(((double) count * 100.0) / (double) images.size()); double timeTemp = (double) (System.currentTimeMillis() - time) / 1000d; int secsLeft = (int) Math.round(((timeTemp / (double) count) * (double) images.size()) - timeTemp); System.out.println(percent + "% finished (" + count + " files), " + secsLeft + " s left"); } } long timeTaken = (System.currentTimeMillis() - time); float sec = ((float) timeTaken) / 1000f; System.out.println(sec + " seconds taken, " + (timeTaken / count) + " ms per image."); iw.commit(); iw.close(); }
protected void indexList(List<AgeObject> aol, boolean append) { try { if (searcher != null) { searcher.getIndexReader().close(); searcher.close(); } IndexWriterConfig idxCfg = new IndexWriterConfig(Version.LUCENE_36, analyzer); idxCfg.setRAMBufferSizeMB(50); idxCfg.setOpenMode(append ? OpenMode.APPEND : OpenMode.CREATE); IndexWriter iWriter = new IndexWriter(index, idxCfg); for (Document d : new DocCollection(aol, extractors)) iWriter.addDocument(d); iWriter.close(); searcher = new IndexSearcher(IndexReader.open(index)); } catch (CorruptIndexException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } }
public void testDemo() throws IOException, ParseException { Analyzer analyzer = new StandardAnalyzer(TEST_VERSION_CURRENT); // Store the index in memory: Directory directory = new RAMDirectory(); // To store an index on disk, use this instead: // Directory directory = FSDirectory.open("/tmp/testindex"); IndexWriter iwriter = new IndexWriter(directory, analyzer, true, new IndexWriter.MaxFieldLength(25000)); Document doc = new Document(); String text = "This is the text to be indexed."; doc.add(new Field("fieldname", text, Field.Store.YES, Field.Index.ANALYZED)); iwriter.addDocument(doc); iwriter.close(); // Now search the index: IndexSearcher isearcher = new IndexSearcher(directory, true); // read-only=true // Parse a simple query that searches for "text": QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, "fieldname", analyzer); Query query = parser.parse("text"); ScoreDoc[] hits = isearcher.search(query, null, 1000).scoreDocs; assertEquals(1, hits.length); // Iterate through the results: for (int i = 0; i < hits.length; i++) { Document hitDoc = isearcher.doc(hits[i].doc); assertEquals("This is the text to be indexed.", hitDoc.get("fieldname")); } isearcher.close(); directory.close(); }
public void testListenerCalled() throws Exception { Directory dir = newDirectory(); IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, null)); final AtomicBoolean afterRefreshCalled = new AtomicBoolean(false); SearcherManager sm = new SearcherManager(iw, true, new SearcherFactory()); sm.addListener( new ReferenceManager.RefreshListener() { @Override public void beforeRefresh() {} @Override public void afterRefresh(boolean didRefresh) { if (didRefresh) { afterRefreshCalled.set(true); } } }); iw.addDocument(new Document()); iw.commit(); assertFalse(afterRefreshCalled.get()); sm.maybeRefreshBlocking(); assertTrue(afterRefreshCalled.get()); sm.close(); iw.close(); dir.close(); }
// Verify: do stress test, by opening IndexReaders and // IndexWriters over & over in 2 threads and making sure // no unexpected exceptions are raised: public void testStressLocks() throws Exception { Path tempPath = createTempDir(); assumeFalse("cannot handle buggy Files.delete", TestUtil.hasWindowsFS(tempPath)); Directory dir = getDirectory(tempPath); // First create a 1 doc index: IndexWriter w = new IndexWriter( dir, new IndexWriterConfig(new MockAnalyzer(random())).setOpenMode(OpenMode.CREATE)); addDoc(w); w.close(); WriterThread writer = new WriterThread(100, dir); SearcherThread searcher = new SearcherThread(100, dir); writer.start(); searcher.start(); while (writer.isAlive() || searcher.isAlive()) { Thread.sleep(1000); } assertTrue("IndexWriter hit unexpected exceptions", !writer.hitException); assertTrue("IndexSearcher hit unexpected exceptions", !searcher.hitException); dir.close(); }
public void createRandomTerms(int nDocs, int nTerms, double power, Directory dir) throws Exception { int[] freq = new int[nTerms]; terms = new Term[nTerms]; for (int i = 0; i < nTerms; i++) { int f = (nTerms + 1) - i; // make first terms less frequent freq[i] = (int) Math.ceil(Math.pow(f, power)); terms[i] = new Term("f", Character.toString((char) ('A' + i))); } IndexWriter iw = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); for (int i = 0; i < nDocs; i++) { Document d = new Document(); for (int j = 0; j < nTerms; j++) { if (r.nextInt(freq[j]) == 0) { d.add(new Field("f", terms[j].text(), Field.Store.NO, Field.Index.NOT_ANALYZED)); // System.out.println(d); } } iw.addDocument(d); } iw.optimize(); iw.close(); }
/** Build the example index. */ private void index() throws IOException { IndexWriterConfig iwc = new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE); IndexWriter indexWriter = new IndexWriter(indexDir, iwc); // Writes facet ords to a separate directory from the main index DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir); Document doc = new Document(); // 3 occurrences for tag 'lucene' doc.add(new IntAssociationFacetField(3, "tags", "lucene")); // 87% confidence level of genre 'computing' doc.add(new FloatAssociationFacetField(0.87f, "genre", "computing")); indexWriter.addDocument(config.build(taxoWriter, doc)); doc = new Document(); // 1 occurrence for tag 'lucene' doc.add(new IntAssociationFacetField(1, "tags", "lucene")); // 2 occurrence for tag 'solr' doc.add(new IntAssociationFacetField(2, "tags", "solr")); // 75% confidence level of genre 'computing' doc.add(new FloatAssociationFacetField(0.75f, "genre", "computing")); // 34% confidence level of genre 'software' doc.add(new FloatAssociationFacetField(0.34f, "genre", "software")); indexWriter.addDocument(config.build(taxoWriter, doc)); indexWriter.close(); taxoWriter.close(); }
/** * 关闭Writer * * @param writer */ public static void closeWriter(IndexWriter writer) { try { if (writer != null) writer.close(); } catch (Exception ex) { ex.printStackTrace(); } }
public void merge() { IndexWriter writer = null; try { writer = new IndexWriter( directory, new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35))); // 会将索引合并为两段,这两段中被删除的数据会被清空 // 特别注意:此处Lucene在3.5之后不建议使用,因为会消耗大量的开销 // Lucene会根据情况自己处理 writer.forceMerge(2); } catch (CorruptIndexException e) { e.printStackTrace(); } catch (LockObtainFailedException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } finally { try { if (writer != null) writer.close(); } catch (CorruptIndexException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } }
private static Directory index(Analyzer analyzer, String processingPath) { RAMDirectory directory = null; IndexWriter indexWriter = null; try { directory = new RAMDirectory(); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_35, analyzer); indexWriter = new IndexWriter(directory, iwc); File file = new File(processingPath); index_h("", file, indexWriter); } catch (IOException e) { e.printStackTrace(); } finally { if (indexWriter != null) { try { indexWriter.close(); } catch (CorruptIndexException e1) { // TODO Auto-generated catch block e1.printStackTrace(); } catch (IOException e1) { // TODO Auto-generated catch block e1.printStackTrace(); } } } return directory; }
public void forceDelete() { IndexWriter writer = null; try { writer = new IndexWriter( directory, new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35))); writer.forceMergeDeletes(); } catch (CorruptIndexException e) { e.printStackTrace(); } catch (LockObtainFailedException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } finally { try { if (writer != null) writer.close(); } catch (CorruptIndexException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } }
/** Build the example index. */ public void index() throws IOException { IndexWriter writer = new IndexWriter( indexDir, new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE)); // TODO: we could index in radians instead ... saves all the conversions in getBoundingBoxFilter // Add documents with latitude/longitude location: // we index these both as DoublePoints (for bounding box/ranges) and as NumericDocValuesFields // (for scoring) Document doc = new Document(); doc.add(new DoublePoint("latitude", 40.759011)); doc.add(new NumericDocValuesField("latitude", Double.doubleToRawLongBits(40.759011))); doc.add(new DoublePoint("longitude", -73.9844722)); doc.add(new NumericDocValuesField("longitude", Double.doubleToRawLongBits(-73.9844722))); writer.addDocument(doc); doc = new Document(); doc.add(new DoublePoint("latitude", 40.718266)); doc.add(new NumericDocValuesField("latitude", Double.doubleToRawLongBits(40.718266))); doc.add(new DoublePoint("longitude", -74.007819)); doc.add(new NumericDocValuesField("longitude", Double.doubleToRawLongBits(-74.007819))); writer.addDocument(doc); doc = new Document(); doc.add(new DoublePoint("latitude", 40.7051157)); doc.add(new NumericDocValuesField("latitude", Double.doubleToRawLongBits(40.7051157))); doc.add(new DoublePoint("longitude", -74.0088305)); doc.add(new NumericDocValuesField("longitude", Double.doubleToRawLongBits(-74.0088305))); writer.addDocument(doc); // Open near-real-time searcher searcher = new IndexSearcher(DirectoryReader.open(writer)); writer.close(); }
public void delete() { IndexWriter writer = null; try { writer = new IndexWriter( directory, new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35))); // 参数是一个选项,可以是一个Query,也可以是一个term,term是一个精确查找的值 // 此时删除的文档并不会被完全删除,而是存储在一个回收站中的,可以恢复 writer.deleteAll(); // 删除所有 // writer.deleteDocuments(new Term("id","1")); writer.commit(); // 执行删除 } catch (CorruptIndexException e) { e.printStackTrace(); } catch (LockObtainFailedException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } finally { try { if (writer != null) writer.close(); } catch (CorruptIndexException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } }
public void testFarsiRangeFilterCollating( Analyzer analyzer, String firstBeg, String firstEnd, String secondBeg, String secondEnd) throws Exception { Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); Document doc = new Document(); doc.add(new Field("content", "\u0633\u0627\u0628", Field.Store.YES, Field.Index.ANALYZED)); doc.add(new Field("body", "body", Field.Store.YES, Field.Index.NOT_ANALYZED)); writer.addDocument(doc); writer.close(); IndexReader reader = IndexReader.open(dir); IndexSearcher searcher = new IndexSearcher(reader); Query query = new TermQuery(new Term("body", "body")); // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi // orders the U+0698 character before the U+0633 character, so the single // index Term below should NOT be returned by a TermRangeFilter with a Farsi // Collator (or an Arabic one for the case when Farsi searcher not // supported). ScoreDoc[] result = searcher.search(query, new TermRangeFilter("content", firstBeg, firstEnd, true, true), 1) .scoreDocs; assertEquals("The index Term should not be included.", 0, result.length); result = searcher.search(query, new TermRangeFilter("content", secondBeg, secondEnd, true, true), 1) .scoreDocs; assertEquals("The index Term should be included.", 1, result.length); searcher.close(); reader.close(); dir.close(); }
public void update() { IndexWriter writer = null; try { writer = new IndexWriter( directory, new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35))); /* * Lucene并没有提供更新,这里的更新操作其实是如下两个操作的合集 * 先删除之后再添加 */ Document doc = new Document(); doc.add(new Field("id", "11", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); doc.add(new Field("email", emails[0], Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("content", contents[0], Field.Store.NO, Field.Index.ANALYZED)); doc.add(new Field("name", names[0], Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); writer.updateDocument(new Term("id", "1"), doc); } catch (CorruptIndexException e) { e.printStackTrace(); } catch (LockObtainFailedException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } finally { try { if (writer != null) writer.close(); } catch (CorruptIndexException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } }
private void indexFiles(String dir, String index, int featureIndex, boolean createNewIndex) throws IOException { ArrayList<String> images = FileUtils.getAllImages(new File(dir), true); IndexWriter iw = LuceneUtils.createIndexWriter( index, createNewIndex, LuceneUtils.AnalyzerType.WhitespaceAnalyzer); // select one feature for the large index: int count = 0; long ms = System.currentTimeMillis(); DocumentBuilder builder = new ChainedDocumentBuilder(); ((ChainedDocumentBuilder) builder).addBuilder(builders[featureIndex]); // ((ChainedDocumentBuilder) builder).addBuilder(builders[0]); for (Iterator<String> iterator = images.iterator(); iterator.hasNext(); ) { count++; if (count > 100 && count % 5000 == 0) { System.out.println( count + " files indexed. " + (System.currentTimeMillis() - ms) / (count) + " ms per file"); } String file = iterator.next(); try { iw.addDocument(builder.createDocument(new FileInputStream(file), file)); } catch (Exception e) { System.err.println("Error: " + e.getMessage()); } } iw.close(); }
@Test public void testDirectoryCleaned() throws Exception { final RAMDirectory directory = new RAMDirectory(); final StandardAnalyzer analyzer = new StandardAnalyzer(DefaultIndexManager.LUCENE_VERSION); { IndexWriterConfig conf = new IndexWriterConfig(DefaultIndexManager.LUCENE_VERSION, analyzer); final IndexWriter writer = new IndexWriter(directory, conf); writer.addDocument(new Document()); writer.close(); } final DefaultConfiguration configuration = new DefaultConfiguration(directory, analyzer); final DefaultIndexEngine engine = new DefaultIndexEngine( new Supplier<IndexSearcher>() { public IndexSearcher get() { throw new AssertionFailedError("no searcher required"); } }, new Function<Index.UpdateMode, Writer>() { public Writer get(final Index.UpdateMode mode) { throw new AssertionFailedError("no writer required"); } }, configuration, FlushPolicy.NONE); assertEquals(1, new IndexSearcher(directory).getIndexReader().numDocs()); engine.clean(); assertEquals(0, new IndexSearcher(directory).getIndexReader().numDocs()); }