/** create a RandomIndexWriter with the provided config */ public RandomIndexWriter(Random r, Directory dir, IndexWriterConfig c) throws IOException { // TODO: this should be solved in a different way; Random should not be shared (!). this.r = new Random(r.nextLong()); w = mockIndexWriter(dir, c, r); flushAt = _TestUtil.nextInt(r, 10, 1000); codec = w.getConfig().getCodec(); if (LuceneTestCase.VERBOSE) { System.out.println("RIW dir=" + dir + " config=" + w.getConfig()); System.out.println("codec default=" + codec.getName()); } // Make sure we sometimes test indices that don't get // any forced merges: doRandomForceMerge = !(c.getMergePolicy() instanceof NoMergePolicy) && r.nextBoolean(); }
@Override protected final void writerSegmentStats(SegmentsStats stats) { stats.addVersionMapMemoryInBytes(versionMap.ramBytesUsed()); stats.addIndexWriterMemoryInBytes(indexWriter.ramBytesUsed()); stats.addIndexWriterMaxMemoryInBytes( (long) (indexWriter.getConfig().getRAMBufferSizeMB() * 1024 * 1024)); }
/** * Indexes the given file using the given writer, or if a directory is given, recurses over files * and directories found under the given directory. * * <p>NOTE: This method indexes one document per input file. This is slow. For good throughput, * put multiple documents into your input file(s). An example of this is in the benchmark module, * which can create "line doc" files, one document per line, using the <a * href="../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html" * >WriteLineDocTask</a>. * * @param writer Writer to the index where the given file/dir info will be stored * @param file The file to index, or the directory to recurse into to find files to index * @throws IOException If there is a low-level I/O error */ private void indexDocs(IndexWriter writer, File file) throws IOException { // do not try to index files that cannot be read if (file.canRead()) { if (!file.isDirectory()) { FileInputStream fis; try { fis = new FileInputStream(file); } catch (FileNotFoundException fnfe) { // at least on windows, some temporary files raise this exception with an "access denied" // message // checking if the file can be read doesn't help return; } try { // make a new, empty document Document doc = new Document(); // Add the path of the file as a field named "path". Use a // field that is indexed (i.e. searchable), but don't tokenize // the field into separate words and don't index term frequency // or positional information: Field pathField = new StringField("path", file.getPath(), Field.Store.YES); doc.add(pathField); // Add the last modified date of the file a field named "modified". // Use a LongField that is indexed (i.e. efficiently filterable with // NumericRangeFilter). This indexes to milli-second resolution, which // is often too fine. You could instead create a number based on // year/month/day/hour/minutes/seconds, down the resolution you require. // For example the long value 2011021714 would mean // February 17, 2011, 2-3 PM. doc.add(new LongField("modified", file.lastModified(), Field.Store.NO)); // Add the contents of the file to a field named "contents". Specify a Reader, // so that the text of the file is tokenized and indexed, but not stored. // Note that FileReader expects the file to be in UTF-8 encoding. // If that's not the case searching for special characters will fail. doc.add( new TextField("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8")))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old document can be there): System.out.println("adding " + file); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have been indexed) so // we use updateDocument instead to replace the old one matching the exact // path, if present: System.out.println("updating " + file); writer.updateDocument(new Term("path", file.getPath()), doc); } } finally { fis.close(); } } } }
/** Indexes a single document */ static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException { try (InputStream stream = Files.newInputStream(file)) { // make a new, empty document Document doc = new Document(); // Add the path of the file as a field named "path". Use a // field that is indexed (i.e. searchable), but don't tokenize // the field into separate words and don't index term frequency // or positional information: Field pathField = new StringField("path", file.toString(), Field.Store.YES); doc.add(pathField); String f = file.getFileName().toString(); f = f.replaceFirst("\\.txt", ""); doc.add(new StringField("filename", f, Field.Store.YES)); // Add the last modified date of the file a field named "modified". // Use a LongField that is indexed (i.e. efficiently filterable with // NumericRangeFilter). This indexes to milli-second resolution, which // is often too fine. You could instead create a number based on // year/month/day/hour/minutes/seconds, down the resolution you require. // For example the long value 2011021714 would mean // February 17, 2011, 2-3 PM. doc.add(new LongField("modified", lastModified, Field.Store.NO)); // Add the contents of the file to a field named "contents". Specify a Reader, // so that the text of the file is tokenized and indexed, but not stored. // Note that FileReader expects the file to be in UTF-8 encoding. // If that's not the case searching for special characters will fail. doc.add( new TextField( "contents", new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8)))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old document can be there): // log("adding " + file); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have been indexed) so // we use updateDocument instead to replace the old one matching the exact // path, if present: // log("updating " + file); writer.updateDocument(new Term("path", file.toString()), doc); } } }
private void addDoc(IndexWriter writer, IndexingValue indexingValue) throws Exception { Document doc = new Document(); // type Field typeField = new IntField(FIELD_LABEL_TYPE, indexingValue.getType(), Field.Store.YES); doc.add(typeField); // id Field idField = new StringField(FIELD_LABEL_ID, indexingValue.getId(), Field.Store.YES); doc.add(idField); // タイトル doc.add(new TextField(FIELD_LABEL_TITLE, indexingValue.getTitle(), Field.Store.YES)); // 内容 doc.add(new TextField(FIELD_LABEL_CONTENTS, indexingValue.getContents(), Field.Store.YES)); // タグ Field tagField = new TextField(FIELD_LABEL_TAGS, indexingValue.getTags(), Field.Store.YES); doc.add(tagField); // アクセスできるユーザ Field userField = new TextField(FIELD_LABEL_USERS, indexingValue.getUsers().toString(), Field.Store.YES); doc.add(userField); // アクセスできるグループ Field groupField = new TextField(FIELD_LABEL_GROUPS, indexingValue.getGroups().toString(), Field.Store.YES); doc.add(groupField); // 登録者 Field creatorField = new StringField(FIELD_LABEL_CREATE_USER, indexingValue.getCreator(), Field.Store.YES); doc.add(creatorField); // 時刻 Field timeField = new LongField(FIELD_LABEL_TIME, indexingValue.getTime(), Field.Store.YES); doc.add(timeField); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { log.debug("adding " + indexingValue.getId()); writer.addDocument(doc); } else { log.debug("updating " + indexingValue.getId()); writer.updateDocument(new Term(FIELD_LABEL_ID, indexingValue.getId()), doc); } }
@Override public void forceMerge( final boolean flush, int maxNumSegments, boolean onlyExpungeDeletes, final boolean upgrade, final boolean upgradeOnlyAncientSegments) throws EngineException, EngineClosedException, IOException { /* * We do NOT acquire the readlock here since we are waiting on the merges to finish * that's fine since the IW.rollback should stop all the threads and trigger an IOException * causing us to fail the forceMerge * * The way we implement upgrades is a bit hackish in the sense that we set an instance * variable and that this setting will thus apply to the next forced merge that will be run. * This is ok because (1) this is the only place we call forceMerge, (2) we have a single * thread for optimize, and the 'optimizeLock' guarding this code, and (3) ConcurrentMergeScheduler * syncs calls to findForcedMerges. */ assert indexWriter.getConfig().getMergePolicy() instanceof ElasticsearchMergePolicy : "MergePolicy is " + indexWriter.getConfig().getMergePolicy().getClass().getName(); ElasticsearchMergePolicy mp = (ElasticsearchMergePolicy) indexWriter.getConfig().getMergePolicy(); optimizeLock.lock(); try { ensureOpen(); if (upgrade) { logger.info( "starting segment upgrade upgradeOnlyAncientSegments={}", upgradeOnlyAncientSegments); mp.setUpgradeInProgress(true, upgradeOnlyAncientSegments); } store.incRef(); // increment the ref just to ensure nobody closes the store while we optimize try { if (onlyExpungeDeletes) { assert upgrade == false; indexWriter.forceMergeDeletes(true /* blocks and waits for merges*/); } else if (maxNumSegments <= 0) { assert upgrade == false; indexWriter.maybeMerge(); } else { indexWriter.forceMerge(maxNumSegments, true /* blocks and waits for merges*/); } if (flush) { if (tryRenewSyncCommit() == false) { flush(false, true); } } if (upgrade) { logger.info("finished segment upgrade"); } } finally { store.decRef(); } } catch (Throwable t) { maybeFailEngine("force merge", t); throw t; } finally { try { mp.setUpgradeInProgress( false, false); // reset it just to make sure we reset it in a case of an error } finally { optimizeLock.unlock(); } } }
LiveIndexWriterConfig getCurrentIndexWriterConfig() { return indexWriter.getConfig(); }