static IndexWriter createWriter(String filename) throws IOException { IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_48, new StandardAnalyzer(Version.LUCENE_48)); indexWriterConfig.setRAMBufferSizeMB(100); indexWriterConfig.setOpenMode(OpenMode.CREATE); return new IndexWriter(FSDirectory.open(new File("output/" + filename)), indexWriterConfig); }
@BeforeClass public static void beforeClassCountingFacetsAggregatorTest() throws Exception { indexDir = newDirectory(); taxoDir = newDirectory(); // create an index which has: // 1. Segment with no categories, but matching results // 2. Segment w/ categories, but no results // 3. Segment w/ categories and results // 4. Segment w/ categories, but only some results IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random())); conf.setMergePolicy( NoMergePolicy.INSTANCE); // prevent merges, so we can control the index segments IndexWriter indexWriter = new IndexWriter(indexDir, conf); TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir); allExpectedCounts = newCounts(); termExpectedCounts = newCounts(); // segment w/ no categories indexDocsNoFacets(indexWriter); // segment w/ categories, no content indexDocsWithFacetsNoTerms(indexWriter, taxoWriter, allExpectedCounts); // segment w/ categories and content indexDocsWithFacetsAndTerms(indexWriter, taxoWriter, allExpectedCounts); // segment w/ categories and some content indexDocsWithFacetsAndSomeTerms(indexWriter, taxoWriter, allExpectedCounts); IOUtils.close(indexWriter, taxoWriter); }
/* * Test per field codec support - adding fields with random codecs */ @Test public void testStressPerFieldCodec() throws IOException { Directory dir = newDirectory(random()); final int docsPerRound = 97; int numRounds = atLeast(1); for (int i = 0; i < numRounds; i++) { int num = TestUtil.nextInt(random(), 30, 60); IndexWriterConfig config = newIndexWriterConfig(random(), TEST_VERSION_CURRENT, new MockAnalyzer(random())); config.setOpenMode(OpenMode.CREATE_OR_APPEND); IndexWriter writer = newWriter(dir, config); for (int j = 0; j < docsPerRound; j++) { final Document doc = new Document(); for (int k = 0; k < num; k++) { FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); customType.setTokenized(random().nextBoolean()); customType.setOmitNorms(random().nextBoolean()); Field field = newField("" + k, TestUtil.randomRealisticUnicodeString(random(), 128), customType); doc.add(field); } writer.addDocument(doc); } if (random().nextBoolean()) { writer.forceMerge(1); } writer.commit(); assertEquals((i + 1) * docsPerRound, writer.maxDoc()); writer.close(); } dir.close(); }
/** * @Title: createIndex @Description: 建立索引 * * @param @param documentList * @param @throws IOException * @return void * @throws */ public static void createIndex(List<Document> documentList, String path) throws IOException { // 在当前路径下创建一个叫indexDir的目录 File file = new File(path); String pathAll = file.getParentFile().getParentFile().toString() + "\\index"; File indexDir = new File(pathAll); // 创建索引目录 Directory directory = FSDirectory.open(indexDir); // 创建一个分词器 Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_36); // 创建索引配置器 IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_36, analyzer); LogMergePolicy mergePolicy = new LogByteSizeMergePolicy(); // 设置segment添加文档(Document)时的合并频率 // 值较小,建立索引的速度就较慢 // 值较大,建立索引的速度就较快,>10适合批量建立索引 mergePolicy.setMergeFactor(50); // 设置segment最大合并文档(Document)数 // 值较小有利于追加索引的速度 // 值较大,适合批量建立索引和更快的搜索 mergePolicy.setMaxMergeDocs(5000); // 启用复合式索引文件格式,合并多个segment mergePolicy.setUseCompoundFile(true); indexWriterConfig.setMergePolicy(mergePolicy); // 设置索引的打开模式 indexWriterConfig.setOpenMode(OpenMode.CREATE_OR_APPEND); // 创建索引器 IndexWriter indexWriter = new IndexWriter(directory, indexWriterConfig); for (Document document : documentList) { // 把文档添加到索引库 indexWriter.addDocument(document); } // 提交索引到磁盘上的索引库,关闭索引器 indexWriter.close(); }
/** Generate a spelling suggestion for the definitions stored in defs */ public void createSpellingSuggestions() { IndexReader indexReader = null; SpellChecker checker; try { log.info("Generating spelling suggestion index ... "); indexReader = DirectoryReader.open(indexDirectory); checker = new SpellChecker(spellDirectory); // TODO below seems only to index "defs" , possible bug ? Analyzer analyzer = AnalyzerGuru.getAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(SearchEngine.LUCENE_VERSION, analyzer); iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); checker.indexDictionary(new LuceneDictionary(indexReader, QueryBuilder.DEFS), iwc, false); log.info("done"); } catch (IOException e) { log.log(Level.SEVERE, "ERROR: Generating spelling: {0}", e); } finally { if (indexReader != null) { try { indexReader.close(); } catch (IOException e) { log.log(Level.WARNING, "An error occured while closing reader", e); } } if (spellDirectory != null) { spellDirectory.close(); } } }
protected void indexList(List<AgeObject> aol, boolean append) { try { if (searcher != null) { searcher.getIndexReader().close(); searcher.close(); } IndexWriterConfig idxCfg = new IndexWriterConfig(Version.LUCENE_36, analyzer); idxCfg.setRAMBufferSizeMB(50); idxCfg.setOpenMode(append ? OpenMode.APPEND : OpenMode.CREATE); IndexWriter iWriter = new IndexWriter(index, idxCfg); for (Document d : new DocCollection(aol, extractors)) iWriter.addDocument(d); iWriter.close(); searcher = new IndexSearcher(IndexReader.open(index)); } catch (CorruptIndexException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } }
@Override public void deleteOnCreator(Integer creator) throws Exception { boolean create = true; File indexDir = new File(getIndexPath()); if (!indexDir.exists()) { indexDir.mkdirs(); } else { if (indexDir.list().length > 0) { create = false; } } Directory dir = FSDirectory.open(indexDir); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_10_2, analyzer); if (create) { iwc.setOpenMode(OpenMode.CREATE); } else { iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); } IndexWriter writer = null; try { writer = new IndexWriter(dir, iwc); writer.deleteDocuments( new Term( FIELD_LABEL_CREATE_USER, StringUtils.zeroPadding(creator, IndexingValue.ID_ZEROPADDING_DIGIT))); writer.commit(); } finally { if (writer != null) { writer.close(); } } }
public void writeIndex(IndexingValue indexingValue) throws Exception { boolean create = true; File indexDir = new File(getIndexPath()); if (!indexDir.exists()) { indexDir.mkdirs(); } else { if (indexDir.list().length > 0) { create = false; } } Directory dir = FSDirectory.open(indexDir); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_10_2, analyzer); if (create) { iwc.setOpenMode(OpenMode.CREATE); } else { iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); } IndexWriter writer = null; try { writer = new IndexWriter(dir, iwc); addDoc(writer, indexingValue); } finally { if (writer != null) { writer.close(); } } }
public void index() throws IOException { final IndexWriterConfig indexWriterConfig = new IndexWriterConfig(this.analyzer); indexWriterConfig.setOpenMode(OpenMode.CREATE); final IndexWriter indexWriter = new IndexWriter(this.dir, indexWriterConfig); indexDocs(indexWriter); indexWriter.close(); }
@Override public void deleteItem(String id) throws Exception { boolean create = true; File indexDir = new File(getIndexPath()); if (!indexDir.exists()) { indexDir.mkdirs(); } else { if (indexDir.list().length > 0) { create = false; } } Directory dir = FSDirectory.open(indexDir); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_10_2, analyzer); if (create) { iwc.setOpenMode(OpenMode.CREATE); } else { iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); } IndexWriter writer = null; try { writer = new IndexWriter(dir, iwc); writer.deleteDocuments(new Term(FIELD_LABEL_ID, id)); writer.commit(); } finally { if (writer != null) { writer.close(); } } }
/** Test that core cache key (needed for NRT) is working */ public void testCoreCacheKey() throws Exception { Directory dir = newDirectory(); IndexWriterConfig iwc = new IndexWriterConfig(null); iwc.setMaxBufferedDocs(100); iwc.setMergePolicy(NoMergePolicy.INSTANCE); IndexWriter iw = new IndexWriter(dir, iwc); // add two docs, id:0 and id:1 Document doc = new Document(); Field idField = new StringField("id", "", Field.Store.NO); doc.add(idField); idField.setStringValue("0"); iw.addDocument(doc); idField.setStringValue("1"); iw.addDocument(doc); // open reader ShardId shardId = new ShardId("fake", "_na_", 1); DirectoryReader ir = ElasticsearchDirectoryReader.wrap(DirectoryReader.open(iw, true), shardId); assertEquals(2, ir.numDocs()); assertEquals(1, ir.leaves().size()); // delete id:0 and reopen iw.deleteDocuments(new Term("id", "0")); DirectoryReader ir2 = DirectoryReader.openIfChanged(ir); // we should have the same cache key as before assertEquals(1, ir2.numDocs()); assertEquals(1, ir2.leaves().size()); assertSame( ir.leaves().get(0).reader().getCoreCacheKey(), ir2.leaves().get(0).reader().getCoreCacheKey()); IOUtils.close(ir, ir2, iw, dir); }
private void _initIndexWriter() { try { Analyzer analyzer = new LimitTokenCountAnalyzer( LuceneHelperUtil.getAnalyzer(), PropsValues.LUCENE_ANALYZER_MAX_TOKENS); IndexWriterConfig indexWriterConfig = new IndexWriterConfig(LuceneHelperUtil.getVersion(), analyzer); indexWriterConfig.setIndexDeletionPolicy(_dumpIndexDeletionPolicy); indexWriterConfig.setMergePolicy(_getMergePolicy()); indexWriterConfig.setMergeScheduler(_getMergeScheduler()); indexWriterConfig.setRAMBufferSizeMB(PropsValues.LUCENE_BUFFER_SIZE); _indexWriter = new IndexWriter(getLuceneDir(), indexWriterConfig); if (!IndexReader.indexExists(getLuceneDir())) { // Workaround for LUCENE-2386 if (_log.isDebugEnabled()) { _log.debug("Creating missing index"); } _indexWriter.commit(); } } catch (Exception e) { _log.error("Initializing Lucene writer failed for " + _companyId, e); } }
IndexWriter createIndexWriter(final boolean create, final Directory directory) throws IOException { final IndexWriterConfig indexWriterConfig = new IndexWriterConfig(ANALYZER); indexWriterConfig.setOpenMode(create ? OpenMode.CREATE : OpenMode.APPEND); indexWriterConfig.setSimilarity(SIMILARITY); return new IndexWriter(directory, indexWriterConfig); }
private IndexWriter getIndexWriter(File file) throws IOException { FSDirectory dir = FSDirectory.open(file); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_40, new StandardAnalyzer(Version.LUCENE_40)); config.setOpenMode(OpenMode.CREATE_OR_APPEND); config.setRAMBufferSizeMB(RamBufferSizeMB); return new IndexWriter(dir, config); }
/** * Builds a new {@code RowDirectory} using the specified directory path and analyzer. * * @param keyspace The keyspace name. * @param table The table name. * @param name The index name. * @param path The path of the directory in where the Lucene files will be stored. * @param ramBufferMB The index writer buffer size in MB. * @param maxMergeMB NRTCachingDirectory max merge size in MB. * @param maxCachedMB NRTCachingDirectory max cached MB. * @param analyzer The default {@link Analyzer}. * @param refreshSeconds The index readers refresh time in seconds. Writings are not visible until * this time. * @param refreshCallback A runnable to be run on index refresh. * @throws IOException If Lucene throws IO errors. */ public LuceneIndex( String keyspace, String table, String name, Path path, Integer ramBufferMB, Integer maxMergeMB, Integer maxCachedMB, Analyzer analyzer, Double refreshSeconds, Runnable refreshCallback) throws IOException { this.path = path; this.refreshCallback = refreshCallback; this.logName = String.format("Lucene index %s.%s.%s", keyspace, table, name); // Open or create directory FSDirectory fsDirectory = FSDirectory.open(path); directory = new NRTCachingDirectory(fsDirectory, maxMergeMB, maxCachedMB); // Setup index writer IndexWriterConfig config = new IndexWriterConfig(analyzer); config.setRAMBufferSizeMB(ramBufferMB); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); config.setUseCompoundFile(true); config.setMergePolicy(new TieredMergePolicy()); indexWriter = new IndexWriter(directory, config); // Setup NRT search SearcherFactory searcherFactory = new SearcherFactory() { public IndexSearcher newSearcher(IndexReader reader) throws IOException { LuceneIndex.this.refreshCallBack(); IndexSearcher searcher = new IndexSearcher(reader); searcher.setSimilarity(new NoIDFSimilarity()); return searcher; } }; TrackingIndexWriter trackingIndexWriter = new TrackingIndexWriter(indexWriter); searcherManager = new SearcherManager(indexWriter, true, searcherFactory); searcherReopener = new ControlledRealTimeReopenThread<>( trackingIndexWriter, searcherManager, refreshSeconds, refreshSeconds); searcherReopener.start(); // Start the refresher thread // Register JMX MBean try { objectName = new ObjectName( String.format( "com.stratio.cassandra.lucene:type=LuceneIndexes,keyspace=%s,table=%s,index=%s", keyspace, table, name)); ManagementFactory.getPlatformMBeanServer().registerMBean(this, objectName); } catch (MBeanException | OperationsException e) { Log.error(e, "Error while registering MBean"); } }
/** Retrieve an IndexWriter configuration object. */ private IndexWriterConfig retrieveIndexWriterConfig(boolean create) { IndexWriterConfig indexWriterConfig = new IndexWriterConfig(USE_LUCENE_VERSION, new StandardAnalyzer(USE_LUCENE_VERSION)); indexWriterConfig.setOpenMode( ((create) ? IndexWriterConfig.OpenMode.CREATE : IndexWriterConfig.OpenMode.CREATE_OR_APPEND)); return indexWriterConfig; }
public CreateIndex(String pPath) throws IOException { analyzer = new StandardAnalyzer(); iwc = new IndexWriterConfig(analyzer); iwc.setOpenMode(OpenMode.CREATE); iwc.setRAMBufferSizeMB(ConfigConstant.INDEX_BUFFER_SIZE_IN_MB); // iwc.setCommitOnClose(true); writer = new IndexWriter(FSDirectory.open(Paths.get(pPath)), iwc); LOCAL_CACHE = new HashMap<>(); }
/** * index the given catalog (a domain of the corpus: newspaper, literature, stc) * * @param catalogName name of the catalog (domain) * @param create create index (removing any previous index) or just update */ public boolean index(String catalogName, boolean create) { try { catalog c = catalogs.get(catalogName); if (c == null) { log("unknown catalog: " + catalogName); return false; } String docsPath = c.docPath; String indexPath = c.indexPath; final Path docDir = Paths.get(docsPath); if (!Files.isReadable(docDir)) { log( "Document directory '" + docDir.toAbsolutePath() + "' does not exist or is not readable, please check the path"); return false; } Date start = new Date(); log("Indexing to directory '" + indexPath + "'..."); Directory dir = FSDirectory.open(Paths.get(indexPath)); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); if (create) { // Create a new index in the directory, removing any // previously indexed documents: iwc.setOpenMode(OpenMode.CREATE); } else { // Add new documents to an existing index: iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); } // Optional: for better indexing performance, if you // are indexing many documents, increase the RAM // buffer. But if you do this, increase the max heap // size to the JVM (eg add -Xmx512m or -Xmx1g): // // iwc.setRAMBufferSizeMB(256.0); IndexWriter writer = new IndexWriter(dir, iwc); indexDocs(writer, docDir); writer.close(); Date end = new Date(); log(new String(" " + (end.getTime() - start.getTime()) + " total milliseconds")); } catch (IOException e) { log(e.getMessage()); return false; } return true; }
public void mergeIndex() throws IOException { File indexDir = new File(FILE_INDEX); FSDirectory fsdir = FSDirectory.open(indexDir); Analyzer luceneAnalyzer = new StandardAnalyzer(Version.LUCENE_47); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_47, luceneAnalyzer); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); IndexWriter fswriter = new IndexWriter(fsdir, config); fswriter.addIndexes(new Directory[] {ramdir}); // 合并数据 fswriter.close(); }
IndexWriter getIndexWriter(Directory luceneDir) throws CorruptIndexException, LockObtainFailedException, IOException, ProviderException { IndexWriter writer = null; IndexWriterConfig writerConfig = new IndexWriterConfig(Version.LUCENE_36, getLuceneAnalyzer()); writerConfig.setOpenMode(OpenMode.CREATE_OR_APPEND); writer = new IndexWriter(luceneDir, writerConfig); // writer.setInfoStream( System.out ); return writer; }
private void doTest(Random random, PrintWriter out, boolean useCompoundFiles, int MAX_DOCS) throws Exception { Directory directory = newDirectory(); Analyzer analyzer = new MockAnalyzer(random); IndexWriterConfig conf = newIndexWriterConfig(analyzer); final MergePolicy mp = conf.getMergePolicy(); mp.setNoCFSRatio(useCompoundFiles ? 1.0 : 0.0); IndexWriter writer = new IndexWriter(directory, conf); if (VERBOSE) { System.out.println("TEST: now build index MAX_DOCS=" + MAX_DOCS); } for (int j = 0; j < MAX_DOCS; j++) { Document d = new Document(); d.add(newTextField(PRIORITY_FIELD, HIGH_PRIORITY, Field.Store.YES)); d.add(newTextField(ID_FIELD, Integer.toString(j), Field.Store.YES)); writer.addDocument(d); } writer.close(); // try a search without OR IndexReader reader = DirectoryReader.open(directory); IndexSearcher searcher = newSearcher(reader); Query query = new TermQuery(new Term(PRIORITY_FIELD, HIGH_PRIORITY)); out.println("Query: " + query.toString(PRIORITY_FIELD)); if (VERBOSE) { System.out.println("TEST: search query=" + query); } final Sort sort = new Sort(SortField.FIELD_SCORE, new SortField(ID_FIELD, SortField.Type.INT)); ScoreDoc[] hits = searcher.search(query, null, MAX_DOCS, sort).scoreDocs; printHits(out, hits, searcher); checkHits(hits, MAX_DOCS, searcher); // try a new search with OR searcher = newSearcher(reader); hits = null; BooleanQuery booleanQuery = new BooleanQuery(); booleanQuery.add( new TermQuery(new Term(PRIORITY_FIELD, HIGH_PRIORITY)), BooleanClause.Occur.SHOULD); booleanQuery.add( new TermQuery(new Term(PRIORITY_FIELD, MED_PRIORITY)), BooleanClause.Occur.SHOULD); out.println("Query: " + booleanQuery.toString(PRIORITY_FIELD)); hits = searcher.search(booleanQuery, null, MAX_DOCS, sort).scoreDocs; printHits(out, hits, searcher); checkHits(hits, MAX_DOCS, searcher); reader.close(); directory.close(); }
public void open() throws IOException { if (_closed) { IndexWriterConfig idxWriterConfig = new IndexWriterConfig(Version.LUCENE_34, new StandardAnalyzer(Version.LUCENE_34)); idxWriterConfig.setMergePolicy(new ZoieMergePolicy()); idxWriterConfig.setOpenMode(OpenMode.CREATE_OR_APPEND); _idxWriter = new IndexWriter(_dir, idxWriterConfig); updateReader(); _closed = false; } }
public static IndexWriter openWriter( Directory directory, int maxMergeDocs, boolean useSerialMerger) throws CorruptIndexException, LockObtainFailedException, IOException { IndexWriterConfig indexWriterConfig = new IndexWriterConfig(LUCENE_VERSION, analyzer); if (useSerialMerger) { indexWriterConfig.setMergeScheduler(mergeScheduler); } LogMergePolicy mergePolicy = new LogByteSizeMergePolicy(); mergePolicy.setMaxMergeDocs(maxMergeDocs); indexWriterConfig.setMergePolicy(mergePolicy); return new IndexWriter(directory, indexWriterConfig); }
private static IndexWriter create_index_writer( String indexPath, IndexWriterConfig.OpenMode openMode) throws IOException { Directory dir = FSDirectory.open(new File(indexPath)); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_40); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_40, analyzer); iwc.setOpenMode(openMode); iwc.setRAMBufferSizeMB(256.0); IndexWriter writer = new IndexWriter(dir, iwc); return writer; // indexDocs(writer, docDir); }
protected DirectoryReader indexDocsWithLucene(TestDoc[] testDocs) throws IOException { Map<String, Analyzer> mapping = new HashMap<String, Analyzer>(); for (TestFieldSetting field : testDocs[0].fieldSettings) { if (field.storedPayloads) { mapping.put( field.name, new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer tokenizer = new StandardTokenizer(Version.CURRENT.luceneVersion, reader); TokenFilter filter = new LowerCaseFilter(Version.CURRENT.luceneVersion, tokenizer); filter = new TypeAsPayloadTokenFilter(filter); return new TokenStreamComponents(tokenizer, filter); } }); } } PerFieldAnalyzerWrapper wrapper = new PerFieldAnalyzerWrapper( new StandardAnalyzer(Version.CURRENT.luceneVersion, CharArraySet.EMPTY_SET), mapping); Directory dir = new RAMDirectory(); IndexWriterConfig conf = new IndexWriterConfig(Version.CURRENT.luceneVersion, wrapper); conf.setOpenMode(IndexWriterConfig.OpenMode.CREATE); IndexWriter writer = new IndexWriter(dir, conf); for (TestDoc doc : testDocs) { Document d = new Document(); d.add(new Field("id", doc.id, StringField.TYPE_STORED)); for (int i = 0; i < doc.fieldContent.length; i++) { FieldType type = new FieldType(TextField.TYPE_STORED); TestFieldSetting fieldSetting = doc.fieldSettings[i]; type.setStoreTermVectorOffsets(fieldSetting.storedOffset); type.setStoreTermVectorPayloads(fieldSetting.storedPayloads); type.setStoreTermVectorPositions( fieldSetting.storedPositions || fieldSetting.storedPayloads || fieldSetting.storedOffset); type.setStoreTermVectors(true); type.freeze(); d.add(new Field(fieldSetting.name, doc.fieldContent[i], type)); } writer.updateDocument(new Term("id", doc.id), d); writer.commit(); } writer.close(); return DirectoryReader.open(dir); }
public static IndexWriterConfig getIndexWriterConfig(Analyzer analyzer, boolean create) { IndexWriterConfig config = new IndexWriterConfig(analyzer); config.setOpenMode(create ? OpenMode.CREATE : OpenMode.CREATE_OR_APPEND); config.setRAMBufferSizeMB(150); // faster indexing // Set merge factor (if using LogMergePolicy, which is the default up to version LUCENE_32, // so yes) MergePolicy mp = config.getMergePolicy(); if (mp instanceof LogMergePolicy) { ((LogMergePolicy) mp).setMergeFactor(40); // faster indexing } return config; }
/** Override this to customize index settings, e.g. which codec to use. */ protected IndexWriterConfig getIndexWriterConfig( Version matchVersion, Analyzer indexAnalyzer, IndexWriterConfig.OpenMode openMode) { IndexWriterConfig iwc = new IndexWriterConfig(matchVersion, indexAnalyzer); iwc.setCodec(new Lucene46Codec()); iwc.setOpenMode(openMode); // This way all merged segments will be sorted at // merge time, allow for per-segment early termination // when those segments are searched: iwc.setMergePolicy(new SortingMergePolicy(iwc.getMergePolicy(), SORT)); return iwc; }
private DefaultIndexEngine getRamDirectory() throws IOException { final RAMDirectory directory = new RAMDirectory(); IndexWriterConfig conf = new IndexWriterConfig( DefaultIndexManager.LUCENE_VERSION, new StandardAnalyzer(DefaultIndexManager.LUCENE_VERSION)); conf.setOpenMode(IndexWriterConfig.OpenMode.CREATE); new IndexWriter(directory, conf).close(); final DefaultConfiguration configuration = new DefaultConfiguration( directory, new StandardAnalyzer(DefaultIndexManager.LUCENE_VERSION)); return new DefaultIndexEngine(configuration, FlushPolicy.FLUSH); }
private static void adicionaMensagemJogadorAppMIndice( String id, String hashtag_jogador, String utilizador, String imagem, String mensagem, String data_ano_mes_dia, String data_hora_minuto, String data_amd_formatada, String data_hm_formatada, String lingua, String tipo_mensagem, String fonte) throws IOException { // TODO Auto-generated method stub StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_34); File file = new File("C:\\Workspace\\SocialSports\\Indices_LuceneAppM\\" + hashtag_jogador); Directory index = new SimpleFSDirectory(file); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_34, analyzer); if (!file.exists()) { iwc.setOpenMode(OpenMode.CREATE); } else { iwc.setOpenMode(OpenMode.APPEND); } IndexWriter index_writer = new IndexWriter(index, iwc); // Um campo indexado "INDEX.ANALYZED" significa que é pesquisável ou // que podem ser feitas pesquisas com base nesse campo. // Um campo guardado "STORE.YES" (no índice) significa que o seu conteúdo pode ser visto // como resultado de pesquisas Document doc = new Document(); doc.add(new Field("id", id, Field.Store.YES, Index.ANALYZED)); doc.add(new Field("hashtag", hashtag_jogador, Field.Store.YES, Index.ANALYZED)); doc.add(new Field("utilizador", utilizador, Field.Store.YES, Index.NOT_ANALYZED)); doc.add(new Field("imagem", imagem, Field.Store.YES, Index.NOT_ANALYZED)); doc.add(new Field("mensagem", mensagem, Field.Store.YES, Index.NOT_ANALYZED)); doc.add(new Field("data_ano_mes_dia", data_ano_mes_dia, Field.Store.YES, Index.ANALYZED)); doc.add(new Field("data_hora_minuto", data_hora_minuto, Field.Store.YES, Index.ANALYZED)); doc.add( new Field("data_amd_formatada", data_amd_formatada, Field.Store.YES, Index.NOT_ANALYZED)); doc.add(new Field("data_hm_formatada", data_hm_formatada, Field.Store.YES, Index.NOT_ANALYZED)); doc.add(new Field("lingua", lingua, Field.Store.YES, Index.ANALYZED)); doc.add(new Field("tipo_mensagem", tipo_mensagem, Field.Store.YES, Index.ANALYZED)); doc.add(new Field("fonte", fonte, Field.Store.YES, Index.ANALYZED)); index_writer.addDocument(doc); index_writer.optimize(); index_writer.close(); }
public NTriplesFileLuceneSyntacticIndexCreator( InputStream nTriplesStream, String indexPath, String searchField) throws IOException { // setup the index Directory directory = FSDirectory.open(new File(indexPath)); // setup the index analyzer Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_43); IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_43, analyzer); indexWriterConfig.setRAMBufferSizeMB(1024.0); indexWriterConfig.setOpenMode(OpenMode.CREATE); IndexWriter writer = new IndexWriter(directory, indexWriterConfig); System.out.println("Creating index ..."); // setup the index fields, here two fields, for URI and text FieldType stringType = new FieldType(StringField.TYPE_STORED); stringType.setStoreTermVectors(false); FieldType textType = new FieldType(TextField.TYPE_STORED); textType.setStoreTermVectors(false); Set<Document> documents = new HashSet<Document>(); Iterator<Triple> iterator = RiotReader.createIteratorTriples(nTriplesStream, Lang.NTRIPLES, null); Triple triple; String text; String uri; Document doc; int i = 0; while (iterator.hasNext()) { triple = iterator.next(); uri = triple.getSubject().getURI(); text = triple.getObject().getLiteralLexicalForm(); doc = new Document(); doc.add(new Field("uri", uri, stringType)); doc.add(new Field(searchField, text, textType)); writer.addDocument(doc); if (i++ % 10000 == 0) { // writer.commit(); System.out.println(i); } } writer.commit(); writer.close(); }