public DirectoryReader getReader(boolean applyDeletions) throws IOException { getReaderCalled = true; if (r.nextInt(20) == 2) { doRandomForceMerge(); } if (!applyDeletions || r.nextBoolean()) { if (LuceneTestCase.VERBOSE) { System.out.println("RIW.getReader: use NRT reader"); } if (r.nextInt(5) == 1) { w.commit(); } return w.getReader(applyDeletions); } else { if (LuceneTestCase.VERBOSE) { System.out.println("RIW.getReader: open new reader"); } w.commit(); if (r.nextBoolean()) { return DirectoryReader.open(w.getDirectory()); } else { return w.getReader(applyDeletions); } } }
@Test public void baseUIMAAnalyzerIntegrationTest() throws Exception { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(analyzer)); // add the first doc Document doc = new Document(); String dummyTitle = "this is a dummy title "; doc.add(new TextField("title", dummyTitle, Field.Store.YES)); String dummyContent = "there is some content written here"; doc.add(new TextField("contents", dummyContent, Field.Store.YES)); writer.addDocument(doc); writer.commit(); // try the search over the first doc DirectoryReader directoryReader = DirectoryReader.open(dir); IndexSearcher indexSearcher = newSearcher(directoryReader); TopDocs result = indexSearcher.search(new MatchAllDocsQuery(), 1); assertTrue(result.totalHits > 0); Document d = indexSearcher.doc(result.scoreDocs[0].doc); assertNotNull(d); assertNotNull(d.getField("title")); assertEquals(dummyTitle, d.getField("title").stringValue()); assertNotNull(d.getField("contents")); assertEquals(dummyContent, d.getField("contents").stringValue()); // add a second doc doc = new Document(); String dogmasTitle = "dogmas"; doc.add(new TextField("title", dogmasTitle, Field.Store.YES)); String dogmasContents = "white men can't jump"; doc.add(new TextField("contents", dogmasContents, Field.Store.YES)); writer.addDocument(doc); writer.commit(); directoryReader.close(); directoryReader = DirectoryReader.open(dir); indexSearcher = newSearcher(directoryReader); result = indexSearcher.search(new MatchAllDocsQuery(), 2); Document d1 = indexSearcher.doc(result.scoreDocs[1].doc); assertNotNull(d1); assertNotNull(d1.getField("title")); assertEquals(dogmasTitle, d1.getField("title").stringValue()); assertNotNull(d1.getField("contents")); assertEquals(dogmasContents, d1.getField("contents").stringValue()); // do a matchalldocs query to retrieve both docs result = indexSearcher.search(new MatchAllDocsQuery(), 2); assertEquals(2, result.totalHits); writer.close(); indexSearcher.getIndexReader().close(); dir.close(); }
public void index(List<CObj> l, boolean onlynew) throws IOException { for (CObj o : l) { indexNoCommit(o, onlynew); } writer.commit(); }
@Override public void deleteOnCreator(Integer creator) throws Exception { boolean create = true; File indexDir = new File(getIndexPath()); if (!indexDir.exists()) { indexDir.mkdirs(); } else { if (indexDir.list().length > 0) { create = false; } } Directory dir = FSDirectory.open(indexDir); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_10_2, analyzer); if (create) { iwc.setOpenMode(OpenMode.CREATE); } else { iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); } IndexWriter writer = null; try { writer = new IndexWriter(dir, iwc); writer.deleteDocuments( new Term( FIELD_LABEL_CREATE_USER, StringUtils.zeroPadding(creator, IndexingValue.ID_ZEROPADDING_DIGIT))); writer.commit(); } finally { if (writer != null) { writer.close(); } } }
@Override public void deleteItem(String id) throws Exception { boolean create = true; File indexDir = new File(getIndexPath()); if (!indexDir.exists()) { indexDir.mkdirs(); } else { if (indexDir.list().length > 0) { create = false; } } Directory dir = FSDirectory.open(indexDir); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_10_2, analyzer); if (create) { iwc.setOpenMode(OpenMode.CREATE); } else { iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); } IndexWriter writer = null; try { writer = new IndexWriter(dir, iwc); writer.deleteDocuments(new Term(FIELD_LABEL_ID, id)); writer.commit(); } finally { if (writer != null) { writer.close(); } } }
@Test public void indexTest() throws CorruptIndexException, LockObtainFailedException, IOException, ParseException { MyStaticValue.userLibrary = "/home/ansj/workspace/ansj_seg/library/default.dic"; HashSet<String> hs = new HashSet<String>(); hs.add("的"); Analyzer analyzer = new AnsjAnalyzer(TYPE.dic); Directory directory = null; IndexWriter iwriter = null; String text = "季德胜蛇药片 10片*6板 "; UserDefineLibrary.insertWord("蛇药片", "n", 1000); IndexWriterConfig ic = new IndexWriterConfig(analyzer); // 建立内存索引对象 directory = new RAMDirectory(); iwriter = new IndexWriter(directory, ic); addContent(iwriter, text); iwriter.commit(); iwriter.close(); System.out.println("索引建立完毕"); Analyzer queryAnalyzer = new AnsjAnalyzer(AnsjAnalyzer.TYPE.dic, hs); System.out.println("index ok to search!"); search(queryAnalyzer, directory, "\"季德胜蛇药片\""); }
public void dump(OutputStream outputStream, IndexWriter indexWriter, Lock commitLock) throws IOException { IndexCommit indexCommit = null; String segmentsFileName = null; // Lock up to stop external commits while recording dump IndexCommit commitLock.lock(); try { // Commit all pending changes to ensure dumped index is integrated indexWriter.commit(); // Record IndexCommit, then release the lock to other dumpers and // committers to move on indexCommit = _lastIndexCommit; segmentsFileName = indexCommit.getSegmentsFileName(); _dumpingSegmensFiletNames.add(segmentsFileName); } finally { commitLock.unlock(); } try { IndexCommitSerializationUtil.serializeIndex(indexCommit, outputStream); } finally { // Clear the dumping segments file name, so the old index can be // removed. _dumpingSegmensFiletNames.remove(segmentsFileName); } }
/** * 批量删除文档 * * @param entity * @return * @throws Exception */ public static String deleteDocument(List<String> ids, Class clazz) throws Exception { List<String> luceneFields = ClassUtils.getLuceneFields(clazz); if (CollectionUtils.isEmpty(luceneFields)) { return "error"; } if (CollectionUtils.isEmpty(ids)) { return "error"; } String pkName = ClassUtils.getEntityInfoByClass(clazz).getPkName(); // 索引写入配置 IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer); // 获取索引目录文件 Directory directory = getDirectory(clazz); if (directory == null) { return null; } IndexWriter indexWriter = new IndexWriter(directory, indexWriterConfig); for (String t : ids) { /* * Term term = new Term(pkName, t); * indexWriter.deleteDocuments(term); */ // 获取读取的索引 // QueryParser parser = new QueryParser(field, analyzer); QueryParser parser = new MultiFieldQueryParser(new String[] {pkName}, analyzer); // 需要查询的关键字 Query query = parser.parse(t.toString()); indexWriter.deleteDocuments(query); } indexWriter.commit(); indexWriter.close(); // 记得关闭,否则删除不会被同步到索引文件中 directory.close(); // 关闭目录 return null; }
public void index(List<CObj> l) throws IOException { for (CObj o : l) { indexNoCommit(o, false); } writer.commit(); }
private void indexDoc(File doc) { if (doc.exists() && doc.canRead()) { IndexWriter indexWriter = null; try { indexWriter = new IndexWriter(this.dir, this.iwc); this.indexDocs(indexWriter, doc); indexWriter.commit(); } catch (IOException e) { if (indexWriter != null) { try { indexWriter.rollback(); } catch (IOException e2) { // TODO Auto-generated catch block e2.printStackTrace(); } } // TODO Auto-generated catch block e.printStackTrace(); } finally { if (indexWriter != null) { try { indexWriter.close(); } catch (IOException e2) { // TODO Auto-generated catch block e2.printStackTrace(); } } } } }
/* * Test per field codec support - adding fields with random codecs */ @Test public void testStressPerFieldCodec() throws IOException { Directory dir = newDirectory(random()); final int docsPerRound = 97; int numRounds = atLeast(1); for (int i = 0; i < numRounds; i++) { int num = TestUtil.nextInt(random(), 30, 60); IndexWriterConfig config = newIndexWriterConfig(random(), TEST_VERSION_CURRENT, new MockAnalyzer(random())); config.setOpenMode(OpenMode.CREATE_OR_APPEND); IndexWriter writer = newWriter(dir, config); for (int j = 0; j < docsPerRound; j++) { final Document doc = new Document(); for (int k = 0; k < num; k++) { FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); customType.setTokenized(random().nextBoolean()); customType.setOmitNorms(random().nextBoolean()); Field field = newField("" + k, TestUtil.randomRealisticUnicodeString(random(), 128), customType); doc.add(field); } writer.addDocument(doc); } if (random().nextBoolean()) { writer.forceMerge(1); } writer.commit(); assertEquals((i + 1) * docsPerRound, writer.maxDoc()); writer.close(); } dir.close(); }
private void indexFiles(ArrayList<String> images, DocumentBuilder builder, String indexPath) throws IOException { // eventually check if the directory is there or not ... IndexWriter iw = LuceneUtils.createIndexWriter(testIndex, false); int count = 0; long time = System.currentTimeMillis(); for (String identifier : images) { // TODO: cut toes from the image ... -> doesn't work out very well. Stable at first, // decreasing then. // TODO: Joint Histogram ... // TODO: LSA / PCA on the vectors ...-> this looks like a job for me :-D // TODO: local features ... Document doc = null; if (cutImages) { BufferedImage bimg = ImageUtils.cropImage(ImageIO.read(new FileInputStream(identifier)), 0, 0, 200, 69); doc = builder.createDocument(bimg, identifier); } else doc = builder.createDocument(new FileInputStream(identifier), identifier); iw.addDocument(doc); count++; if (count % 100 == 0) { int percent = (int) Math.floor(((double) count * 100.0) / (double) images.size()); double timeTemp = (double) (System.currentTimeMillis() - time) / 1000d; int secsLeft = (int) Math.round(((timeTemp / (double) count) * (double) images.size()) - timeTemp); System.out.println(percent + "% finished (" + count + " files), " + secsLeft + " s left"); } } long timeTaken = (System.currentTimeMillis() - time); float sec = ((float) timeTaken) / 1000f; System.out.println(sec + " seconds taken, " + (timeTaken / count) + " ms per image."); iw.commit(); iw.close(); }
@Override protected void commitVersion(String version) throws IOException { HashMap<String, String> versionMap = new HashMap<String, String>(); versionMap.put(VERSION_NAME, version); _idxWriter.commit(versionMap); updateReader(); }
@Test public void testSuggestOnMostlyDeletedDocuments() throws Exception { Analyzer analyzer = new MockAnalyzer(random()); // using IndexWriter instead of RandomIndexWriter IndexWriter iw = new IndexWriter(dir, iwcWithSuggestField(analyzer, "suggest_field")); int num = Math.min(1000, atLeast(10)); for (int i = 1; i <= num; i++) { Document document = new Document(); document.add(new SuggestField("suggest_field", "abc_" + i, i)); document.add(new StoredField("weight_fld", i)); document.add(new DimensionalIntField("weight_fld", i)); iw.addDocument(document); if (usually()) { iw.commit(); } } iw.deleteDocuments(DimensionalRangeQuery.new1DIntRange("weight_fld", 2, true, null, false)); DirectoryReader reader = DirectoryReader.open(iw, true); SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader); PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_")); TopSuggestDocs suggest = indexSearcher.suggest(query, 1); assertSuggestions(suggest, new Entry("abc_1", 1)); reader.close(); iw.close(); }
@Test public void cretaeIndex() throws Exception { IndexWriter indexWriter = LuceneUtil.getIndexWriter(); indexWriter.addDocument(getDocument()); indexWriter.commit(); }
private void deleteDocument() throws CorruptIndexException, LockObtainFailedException, IOException { IndexWriter indexWriter = getWriter(); indexWriter.deleteDocuments(new Term("ids", "1")); indexWriter.commit(); indexWriter.close(); }
@PostConstruct public void createOrVerifyIndex() throws Exception { LOGGER.info("Initializing Index..........................please Wait..0%"); index = new File(appproperties.getLuceneIndexPath()); suggest = new File(appproperties.getLiceneSuggestIndexPath()); directory = FSDirectory.open(index, NoLockFactory.getNoLockFactory()); suggestDirectory = FSDirectory.open(suggest, NoLockFactory.getNoLockFactory()); iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); writer = new IndexWriter(directory, iwc); writer.commit(); indexReader = DirectoryReader.open(directory); indexSearcher = new IndexSearcher(indexReader, executorService); parser = new MultiFieldQueryParser(new String[] {TITLE_FIELD, CONTENTS_FIELD}, analyzer); suggester = new AnalyzingInfixSuggester( Version.LATEST, suggestDirectory, analyzer, analyzer, AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS); if (!suggest.exists() && !suggest.isDirectory()) { LOGGER.info( "Lucene Suggest did not exist.........................................Building Please wait.........0%"); suggester.build(new IndexFileIterator(new ArrayList<IndexFile>().iterator())); suggester.refresh(); LOGGER.info( "Lucene Suggest Build Complete...................................................................100%"); } LOGGER.info("Lucene Ready............................................100%"); }
/** * Merges the given taxonomy and index directories and commits the changes to the given writers. */ public static void merge( Directory srcIndexDir, Directory srcTaxDir, OrdinalMap map, IndexWriter destIndexWriter, DirectoryTaxonomyWriter destTaxWriter) throws IOException { // merge the taxonomies destTaxWriter.addTaxonomy(srcTaxDir, map); int ordinalMap[] = map.getMap(); FacetIndexingParams params = new DefaultFacetIndexingParams(); DirectoryReader reader = DirectoryReader.open(srcIndexDir, -1); List<AtomicReaderContext> leaves = reader.leaves(); AtomicReader wrappedLeaves[] = new AtomicReader[leaves.size()]; for (int i = 0; i < leaves.size(); i++) { wrappedLeaves[i] = new OrdinalMappingAtomicReader(leaves.get(i).reader(), ordinalMap, params); } try { destIndexWriter.addIndexes(new MultiReader(wrappedLeaves)); // commit changes to taxonomy and index respectively. destTaxWriter.commit(); destIndexWriter.commit(); } finally { reader.close(); } }
/** * Optimizes the index forcing merge of all segments that have deleted documents.. This operation * may block until all merging completes. * * @param doWait {@code true} if the call should block until the operation completes. * @throws IOException If Lucene throws IO errors. */ @Override public void forceMergeDeletes(boolean doWait) throws IOException { Log.info("%s merging index segments with deletions", logName); indexWriter.forceMergeDeletes(doWait); indexWriter.commit(); Log.info("%s merging index segments with deletions completed", logName); }
public OperationResponse commitAndOptimize() { try { if (logger.isDebugEnabled()) { logger.debug("commiting..."); } indexWriter.commit(); if (logger.isDebugEnabled()) { logger.debug("commit finish."); } if (logger.isDebugEnabled()) { logger.debug("optimizing..."); } indexWriter.forceMerge(defaultMergeSize); if (logger.isDebugEnabled()) { logger.debug("optimize finish."); } reopenSearcher(); } catch (IOException e) { logger.error("optimize error", e); return new OperationResponse(e.getMessage(), ResultCodes.COMMON_ERROR); } catch (OutOfMemoryError e) { CloseUtil.close(indexWriter); logger.error("error of OOM", e); return new OperationResponse(e.getMessage(), ResultCodes.COMMON_ERROR); } return new OperationResponse(); }
/** * Optimizes the index forcing merge segments leaving the specified number of segments. This * operation may block until all merging completes. * * @param maxNumSegments The maximum number of segments left in the index after merging finishes. * @param doWait {@code true} if the call should block until the operation completes. * @throws IOException If Lucene throws IO errors. */ @Override public void forceMerge(int maxNumSegments, boolean doWait) throws IOException { Log.info("%s merging index segments to %d", logName, maxNumSegments); indexWriter.forceMerge(maxNumSegments, doWait); indexWriter.commit(); Log.info("%s segments merge completed", logName); }
public void delete() { IndexWriter writer = null; try { writer = new IndexWriter( directory, new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35))); // 参数是一个选项,可以是一个Query,也可以是一个term,term是一个精确查找的值 // 此时删除的文档并不会被完全删除,而是存储在一个回收站中的,可以恢复 writer.deleteAll(); // 删除所有 // writer.deleteDocuments(new Term("id","1")); writer.commit(); // 执行删除 } catch (CorruptIndexException e) { e.printStackTrace(); } catch (LockObtainFailedException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } finally { try { if (writer != null) writer.close(); } catch (CorruptIndexException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } }
public void testListenerCalled() throws Exception { Directory dir = newDirectory(); IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, null)); final AtomicBoolean afterRefreshCalled = new AtomicBoolean(false); SearcherManager sm = new SearcherManager(iw, true, new SearcherFactory()); sm.addListener( new ReferenceManager.RefreshListener() { @Override public void beforeRefresh() {} @Override public void afterRefresh(boolean didRefresh) { if (didRefresh) { afterRefreshCalled.set(true); } } }); iw.addDocument(new Document()); iw.commit(); assertFalse(afterRefreshCalled.get()); sm.maybeRefreshBlocking(); assertTrue(afterRefreshCalled.get()); sm.close(); iw.close(); dir.close(); }
@Test public void testSuggestOnAllDeletedDocuments() throws Exception { Analyzer analyzer = new MockAnalyzer(random()); // using IndexWriter instead of RandomIndexWriter IndexWriter iw = new IndexWriter(dir, iwcWithSuggestField(analyzer, "suggest_field")); int num = Math.min(1000, atLeast(10)); for (int i = 0; i < num; i++) { Document document = new Document(); document.add(new SuggestField("suggest_field", "abc_" + i, i)); document.add(newStringField("delete", "delete", Field.Store.NO)); iw.addDocument(document); if (usually()) { iw.commit(); } } iw.deleteDocuments(new Term("delete", "delete")); DirectoryReader reader = DirectoryReader.open(iw, true); SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader); PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_")); TopSuggestDocs suggest = indexSearcher.suggest(query, num); assertThat(suggest.totalHits, equalTo(0)); reader.close(); iw.close(); }
/** * Remove a stale file (uidIter.term().text()) from the index database (and the xref file) * * @throws java.io.IOException if an error occurs */ private void removeFile() throws IOException { String path = Util.uid2url(uidIter.term().utf8ToString()); for (IndexChangedListener listener : listeners) { listener.fileRemove(path); } writer.deleteDocuments(new Term(QueryBuilder.U, uidIter.term())); writer.prepareCommit(); writer.commit(); File xrefFile; if (RuntimeEnvironment.getInstance().isCompressXref()) { xrefFile = new File(xrefDir, path + ".gz"); } else { xrefFile = new File(xrefDir, path); } File parent = xrefFile.getParentFile(); if (!xrefFile.delete() && xrefFile.exists()) { log.log(Level.INFO, "Failed to remove obsolete xref-file: {0}", xrefFile.getAbsolutePath()); } // Remove the parent directory if it's empty if (parent.delete()) { log.log(Level.FINE, "Removed empty xref dir:{0}", parent.getAbsolutePath()); } setDirty(); for (IndexChangedListener listener : listeners) { listener.fileRemoved(path); } }
public void init() throws IOException { // analyzer = new StandardAnalyzer(); analyzer = new GenenskapAnalyzer(); IndexWriterConfig idxconf = new IndexWriterConfig(Version.LUCENE_4_10_2, analyzer); SimpleFSDirectory fsdir = new SimpleFSDirectory(indexdir); writer = new IndexWriter(fsdir, idxconf); writer.commit(); }
@PreDestroy public void closeAll() throws Exception { writer.commit(); writer.close(); indexReader.close(); suggester.close(); directory.close(); }
/** Write the document to the index and close it */ public void finish() { try { indexWriter.commit(); indexWriter.close(); } catch (IOException ex) { System.err.println("We had a problem closing the index: " + ex.getMessage()); } }
/** 删除索引数据 */ public void deleteIndexData(UserAllParamsDomain bozhu) { try { writer.deleteDocuments(new Term("username", bozhu.getUsername())); // commit提交删除 writer.commit(); } catch (IOException e) { logger.info("User: "******"'s indexed data deleted error."); } }
/** 更新索引数据 */ public void updateIndexData(UserAllParamsDomain bozhu) { try { writer.updateDocument(new Term("username", bozhu.getUsername()), getDoc(bozhu)); // 提交更新 writer.commit(); } catch (IOException e) { logger.info("User: "******"'s indexed data updated error."); } }