private void _initIndexWriter() { try { Analyzer analyzer = new LimitTokenCountAnalyzer( LuceneHelperUtil.getAnalyzer(), PropsValues.LUCENE_ANALYZER_MAX_TOKENS); IndexWriterConfig indexWriterConfig = new IndexWriterConfig(LuceneHelperUtil.getVersion(), analyzer); indexWriterConfig.setIndexDeletionPolicy(_dumpIndexDeletionPolicy); indexWriterConfig.setMergePolicy(_getMergePolicy()); indexWriterConfig.setMergeScheduler(_getMergeScheduler()); indexWriterConfig.setRAMBufferSizeMB(PropsValues.LUCENE_BUFFER_SIZE); _indexWriter = new IndexWriter(getLuceneDir(), indexWriterConfig); if (!IndexReader.indexExists(getLuceneDir())) { // Workaround for LUCENE-2386 if (_log.isDebugEnabled()) { _log.debug("Creating missing index"); } _indexWriter.commit(); } } catch (Exception e) { _log.error("Initializing Lucene writer failed for " + _companyId, e); } }
static IndexWriter createWriter(String filename) throws IOException { IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_48, new StandardAnalyzer(Version.LUCENE_48)); indexWriterConfig.setRAMBufferSizeMB(100); indexWriterConfig.setOpenMode(OpenMode.CREATE); return new IndexWriter(FSDirectory.open(new File("output/" + filename)), indexWriterConfig); }
private IndexWriter createWriter(boolean create) throws IOException { try { final IndexWriterConfig iwc = new IndexWriterConfig(engineConfig.getAnalyzer()); iwc.setCommitOnClose(false); // we by default don't commit on close iwc.setOpenMode( create ? IndexWriterConfig.OpenMode.CREATE : IndexWriterConfig.OpenMode.APPEND); iwc.setIndexDeletionPolicy(deletionPolicy); // with tests.verbose, lucene sets this up: plumb to align with filesystem stream boolean verbose = false; try { verbose = Boolean.parseBoolean(System.getProperty("tests.verbose")); } catch (Throwable ignore) { } iwc.setInfoStream(verbose ? InfoStream.getDefault() : new LoggerInfoStream(logger)); iwc.setMergeScheduler(mergeScheduler); MergePolicy mergePolicy = config().getMergePolicy(); // Give us the opportunity to upgrade old segments while performing // background merges mergePolicy = new ElasticsearchMergePolicy(mergePolicy); iwc.setMergePolicy(mergePolicy); iwc.setSimilarity(engineConfig.getSimilarity()); iwc.setRAMBufferSizeMB(engineConfig.getIndexingBufferSize().mbFrac()); iwc.setCodec(engineConfig.getCodec()); iwc.setUseCompoundFile( true); // always use compound on flush - reduces # of file-handles on refresh return new IndexWriter(store.directory(), iwc); } catch (LockObtainFailedException ex) { logger.warn("could not lock IndexWriter", ex); throw ex; } }
protected void indexList(List<AgeObject> aol, boolean append) { try { if (searcher != null) { searcher.getIndexReader().close(); searcher.close(); } IndexWriterConfig idxCfg = new IndexWriterConfig(Version.LUCENE_36, analyzer); idxCfg.setRAMBufferSizeMB(50); idxCfg.setOpenMode(append ? OpenMode.APPEND : OpenMode.CREATE); IndexWriter iWriter = new IndexWriter(index, idxCfg); for (Document d : new DocCollection(aol, extractors)) iWriter.addDocument(d); iWriter.close(); searcher = new IndexSearcher(IndexReader.open(index)); } catch (CorruptIndexException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } }
private IndexWriter getIndexWriter(File file) throws IOException { FSDirectory dir = FSDirectory.open(file); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_40, new StandardAnalyzer(Version.LUCENE_40)); config.setOpenMode(OpenMode.CREATE_OR_APPEND); config.setRAMBufferSizeMB(RamBufferSizeMB); return new IndexWriter(dir, config); }
/** * Builds a new {@code RowDirectory} using the specified directory path and analyzer. * * @param keyspace The keyspace name. * @param table The table name. * @param name The index name. * @param path The path of the directory in where the Lucene files will be stored. * @param ramBufferMB The index writer buffer size in MB. * @param maxMergeMB NRTCachingDirectory max merge size in MB. * @param maxCachedMB NRTCachingDirectory max cached MB. * @param analyzer The default {@link Analyzer}. * @param refreshSeconds The index readers refresh time in seconds. Writings are not visible until * this time. * @param refreshCallback A runnable to be run on index refresh. * @throws IOException If Lucene throws IO errors. */ public LuceneIndex( String keyspace, String table, String name, Path path, Integer ramBufferMB, Integer maxMergeMB, Integer maxCachedMB, Analyzer analyzer, Double refreshSeconds, Runnable refreshCallback) throws IOException { this.path = path; this.refreshCallback = refreshCallback; this.logName = String.format("Lucene index %s.%s.%s", keyspace, table, name); // Open or create directory FSDirectory fsDirectory = FSDirectory.open(path); directory = new NRTCachingDirectory(fsDirectory, maxMergeMB, maxCachedMB); // Setup index writer IndexWriterConfig config = new IndexWriterConfig(analyzer); config.setRAMBufferSizeMB(ramBufferMB); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); config.setUseCompoundFile(true); config.setMergePolicy(new TieredMergePolicy()); indexWriter = new IndexWriter(directory, config); // Setup NRT search SearcherFactory searcherFactory = new SearcherFactory() { public IndexSearcher newSearcher(IndexReader reader) throws IOException { LuceneIndex.this.refreshCallBack(); IndexSearcher searcher = new IndexSearcher(reader); searcher.setSimilarity(new NoIDFSimilarity()); return searcher; } }; TrackingIndexWriter trackingIndexWriter = new TrackingIndexWriter(indexWriter); searcherManager = new SearcherManager(indexWriter, true, searcherFactory); searcherReopener = new ControlledRealTimeReopenThread<>( trackingIndexWriter, searcherManager, refreshSeconds, refreshSeconds); searcherReopener.start(); // Start the refresher thread // Register JMX MBean try { objectName = new ObjectName( String.format( "com.stratio.cassandra.lucene:type=LuceneIndexes,keyspace=%s,table=%s,index=%s", keyspace, table, name)); ManagementFactory.getPlatformMBeanServer().registerMBean(this, objectName); } catch (MBeanException | OperationsException e) { Log.error(e, "Error while registering MBean"); } }
public CreateIndex(String pPath) throws IOException { analyzer = new StandardAnalyzer(); iwc = new IndexWriterConfig(analyzer); iwc.setOpenMode(OpenMode.CREATE); iwc.setRAMBufferSizeMB(ConfigConstant.INDEX_BUFFER_SIZE_IN_MB); // iwc.setCommitOnClose(true); writer = new IndexWriter(FSDirectory.open(Paths.get(pPath)), iwc); LOCAL_CACHE = new HashMap<>(); }
private static IndexWriter create_index_writer( String indexPath, IndexWriterConfig.OpenMode openMode) throws IOException { Directory dir = FSDirectory.open(new File(indexPath)); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_40); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_40, analyzer); iwc.setOpenMode(openMode); iwc.setRAMBufferSizeMB(256.0); IndexWriter writer = new IndexWriter(dir, iwc); return writer; // indexDocs(writer, docDir); }
public static IndexWriterConfig getIndexWriterConfig(Analyzer analyzer, boolean create) { IndexWriterConfig config = new IndexWriterConfig(analyzer); config.setOpenMode(create ? OpenMode.CREATE : OpenMode.CREATE_OR_APPEND); config.setRAMBufferSizeMB(150); // faster indexing // Set merge factor (if using LogMergePolicy, which is the default up to version LUCENE_32, // so yes) MergePolicy mp = config.getMergePolicy(); if (mp instanceof LogMergePolicy) { ((LogMergePolicy) mp).setMergeFactor(40); // faster indexing } return config; }
public NTriplesFileLuceneSyntacticIndexCreator( InputStream nTriplesStream, String indexPath, String searchField) throws IOException { // setup the index Directory directory = FSDirectory.open(new File(indexPath)); // setup the index analyzer Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_43); IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_43, analyzer); indexWriterConfig.setRAMBufferSizeMB(1024.0); indexWriterConfig.setOpenMode(OpenMode.CREATE); IndexWriter writer = new IndexWriter(directory, indexWriterConfig); System.out.println("Creating index ..."); // setup the index fields, here two fields, for URI and text FieldType stringType = new FieldType(StringField.TYPE_STORED); stringType.setStoreTermVectors(false); FieldType textType = new FieldType(TextField.TYPE_STORED); textType.setStoreTermVectors(false); Set<Document> documents = new HashSet<Document>(); Iterator<Triple> iterator = RiotReader.createIteratorTriples(nTriplesStream, Lang.NTRIPLES, null); Triple triple; String text; String uri; Document doc; int i = 0; while (iterator.hasNext()) { triple = iterator.next(); uri = triple.getSubject().getURI(); text = triple.getObject().getLiteralLexicalForm(); doc = new Document(); doc.add(new Field("uri", uri, stringType)); doc.add(new Field(searchField, text, textType)); writer.addDocument(doc); if (i++ % 10000 == 0) { // writer.commit(); System.out.println(i); } } writer.commit(); writer.close(); }
public void applySetting(IndexWriterConfig writerConfig, int value) { writerConfig.setRAMBufferSizeMB(value); }
/** * Update the content of this index database * * @throws IOException if an error occurs * @throws HistoryException if an error occurs when accessing the history */ public void update() throws IOException, HistoryException { synchronized (lock) { if (running) { throw new IOException("Indexer already running!"); } running = true; interrupted = false; } String ctgs = RuntimeEnvironment.getInstance().getCtags(); if (ctgs != null) { ctags = new Ctags(); ctags.setBinary(ctgs); } if (ctags == null) { LOGGER.severe("Unable to run ctags! searching definitions will not work!"); } if (ctags != null) { String filename = RuntimeEnvironment.getInstance().getCTagsExtraOptionsFile(); if (filename != null) { ctags.setCTagsExtraOptionsFile(filename); } } try { Analyzer analyzer = AnalyzerGuru.getAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); iwc.setRAMBufferSizeMB(RuntimeEnvironment.getInstance().getRamBufferSize()); writer = new IndexWriter(indexDirectory, iwc); writer.commit(); // to make sure index exists on the disk if (directories.isEmpty()) { if (project == null) { directories.add(""); } else { directories.add(project.getPath()); } } for (String dir : directories) { File sourceRoot; if ("".equals(dir)) { sourceRoot = RuntimeEnvironment.getInstance().getSourceRootFile(); } else { sourceRoot = new File(RuntimeEnvironment.getInstance().getSourceRootFile(), dir); } HistoryGuru.getInstance().ensureHistoryCacheExists(sourceRoot); String startuid = Util.path2uid(dir, ""); IndexReader reader = DirectoryReader.open(indexDirectory); // open existing index Terms terms = null; int numDocs = reader.numDocs(); if (numDocs > 0) { Fields uFields = MultiFields.getFields(reader); // reader.getTermVectors(0); terms = uFields.terms(QueryBuilder.U); } try { if (numDocs > 0) { uidIter = terms.iterator(); TermsEnum.SeekStatus stat = uidIter.seekCeil(new BytesRef(startuid)); // init uid if (stat == TermsEnum.SeekStatus.END) { uidIter = null; LOGGER.log( Level.WARNING, "Couldn't find a start term for {0}, empty u field?", startuid); } } // The code below traverses the tree to get total count. int file_cnt = 0; if (RuntimeEnvironment.getInstance().isPrintProgress()) { LOGGER.log(Level.INFO, "Counting files in {0} ...", dir); file_cnt = indexDown(sourceRoot, dir, true, 0, 0); LOGGER.log( Level.INFO, "Need to process: {0} files for {1}", new Object[] {file_cnt, dir}); } indexDown(sourceRoot, dir, false, 0, file_cnt); while (uidIter != null && uidIter.term() != null && uidIter.term().utf8ToString().startsWith(startuid)) { removeFile(); BytesRef next = uidIter.next(); if (next == null) { uidIter = null; } } } finally { reader.close(); } } } finally { if (writer != null) { try { writer.prepareCommit(); writer.commit(); writer.close(); } catch (IOException e) { LOGGER.log(Level.WARNING, "An error occured while closing writer", e); } } if (ctags != null) { try { ctags.close(); } catch (IOException e) { LOGGER.log(Level.WARNING, "An error occured while closing ctags process", e); } } synchronized (lock) { running = false; } } if (!isInterrupted() && isDirty()) { if (RuntimeEnvironment.getInstance().isOptimizeDatabase()) { optimize(); } RuntimeEnvironment env = RuntimeEnvironment.getInstance(); File timestamp = new File(env.getDataRootFile(), "timestamp"); String purpose = "used for timestamping the index database."; if (timestamp.exists()) { if (!timestamp.setLastModified(System.currentTimeMillis())) { LOGGER.log( Level.WARNING, "Failed to set last modified time on ''{0}'', {1}", new Object[] {timestamp.getAbsolutePath(), purpose}); } } else { if (!timestamp.createNewFile()) { LOGGER.log( Level.WARNING, "Failed to create file ''{0}'', {1}", new Object[] {timestamp.getAbsolutePath(), purpose}); } } } }
public static void main(String[] args) { String usage = "java org.apache.lucene.demo.IndexFiles" + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n" + "This indexes the documents in DOCS_PATH, creating a Lucene index" + "in INDEX_PATH that can be searched with SearchFiles"; String indexPath = "index"; String docsPath = null; boolean create = true; for (int i = 0; i < args.length; i++) { if ("-index".equals(args[i])) { indexPath = args[i + 1]; i++; } else if ("-docs".equals(args[i])) { docsPath = args[i + 1]; i++; } else if ("-update".equals(args[i])) { create = false; } } if (docsPath == null) { System.err.println("Usage: " + usage); System.exit(1); } final File docDir = new File(docsPath); if (!docDir.exists() || !docDir.canRead()) { System.out.println( "Document directory '" + docDir.getAbsolutePath() + "' does not exist or is not readable, please check the path"); System.exit(1); } Date start = new Date(); try { System.out.println("Indexing to directory '" + indexPath + "'..."); Directory dir = FSDirectory.open(new File(indexPath)); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_41); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_41, analyzer); if (create) { // Create a new index in the directory, removing any // previously indexed documents: iwc.setOpenMode(OpenMode.CREATE); } else { // Add new documents to an existing index: iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); } // Optional: for better indexing performance, if you // are indexing many documents, increase the RAM // buffer. But if you do this, increase the max heap // size to the JVM (eg add -Xmx512m or -Xmx1g): // iwc.setRAMBufferSizeMB(512.0); IndexWriter writer = new IndexWriter(dir, iwc); indexDocs(writer, docDir); // NOTE: if you want to maximize search performance, // you can optionally call forceMerge here. This can be // a terribly costly operation, so generally it's only // worth it when your index is relatively static (ie // you're done adding documents to it): // // writer.forceMerge(1); writer.close(); Date end = new Date(); IndexReader reader = IndexReader.open(FSDirectory.open(new File(indexPath))); int num = reader.numDocs(); System.out.println(end.getTime() - start.getTime() + " total milliseconds"); System.out.println("Contains " + num + " Documents"); } catch (IOException e) { System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); } }