/* * Test a deletion policy that keeps last N commits. */ public void testKeepLastNDeletionPolicy() throws IOException { final int N = 5; for (int pass = 0; pass < 2; pass++) { boolean useCompoundFile = (pass % 2) != 0; Directory dir = newDirectory(); if (dir instanceof MockDirectoryWrapper) { // test manually deletes files ((MockDirectoryWrapper) dir).setEnableVirusScanner(false); } KeepLastNDeletionPolicy policy = new KeepLastNDeletionPolicy(N); for (int j = 0; j < N + 1; j++) { IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random())) .setOpenMode(OpenMode.CREATE) .setIndexDeletionPolicy(policy) .setMaxBufferedDocs(10); MergePolicy mp = conf.getMergePolicy(); mp.setNoCFSRatio(useCompoundFile ? 1.0 : 0.0); IndexWriter writer = new IndexWriter(dir, conf); policy = (KeepLastNDeletionPolicy) writer.getConfig().getIndexDeletionPolicy(); for (int i = 0; i < 17; i++) { addDoc(writer); } writer.forceMerge(1); writer.close(); } assertTrue(policy.numDelete > 0); assertEquals(N + 1, policy.numOnInit); assertEquals(N + 1, policy.numOnCommit); // Simplistic check: just verify only the past N segments_N's still // exist, and, I can open a reader on each: long gen = SegmentInfos.getLastCommitGeneration(dir); for (int i = 0; i < N + 1; i++) { try { IndexReader reader = DirectoryReader.open(dir); reader.close(); if (i == N) { fail("should have failed on commits prior to last " + N); } } catch (IOException e) { if (i != N) { throw e; } } if (i < N) { dir.deleteFile(IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen)); } gen--; } dir.close(); } }
final void finishCommit(Directory dir) throws IOException { if (pendingSegnOutput == null) throw new IllegalStateException("prepareCommit was not called"); boolean success = false; try { pendingSegnOutput.finishCommit(); pendingSegnOutput.close(); pendingSegnOutput = null; success = true; } finally { if (!success) rollbackCommit(dir); } // NOTE: if we crash here, we have left a segments_N // file in the directory in a possibly corrupt state (if // some bytes made it to stable storage and others // didn't). But, the segments_N file includes checksum // at the end, which should catch this case. So when a // reader tries to read it, it will throw a // CorruptIndexException, which should cause the retry // logic in SegmentInfos to kick in and load the last // good (previous) segments_N-1 file. final String fileName = IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", generation); success = false; try { dir.sync(Collections.singleton(fileName)); success = true; } finally { if (!success) { try { dir.deleteFile(fileName); } catch (Throwable t) { // Suppress so we keep throwing the original exception } } } lastGeneration = generation; try { IndexOutput genOutput = dir.createOutput(IndexFileNames.SEGMENTS_GEN); try { genOutput.writeInt(FORMAT_LOCKLESS); genOutput.writeLong(generation); genOutput.writeLong(generation); } finally { genOutput.close(); } } catch (ThreadInterruptedException t) { throw t; } catch (Throwable t) { // It's OK if we fail to write this file since it's // used only as one of the retry fallbacks. } }
/** Get the next segments_N filename that will be written. */ public String getNextSegmentFileName() { long nextGeneration; if (generation == -1) { nextGeneration = 1; } else { nextGeneration = generation + 1; } return IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", nextGeneration); }
/** * Sets some otherwise hard-to-test properties: random segment names, ID values, document count, * etc and round-trips */ public void testRandom() throws Exception { Codec codec = getCodec(); Version[] versions = getVersions(); for (int i = 0; i < 10; i++) { Directory dir = newDirectory(); Version version = versions[random().nextInt(versions.length)]; String name = "_" + Integer.toString(random().nextInt(Integer.MAX_VALUE), Character.MAX_RADIX); int docCount = TestUtil.nextInt(random(), 1, IndexWriter.MAX_DOCS); boolean isCompoundFile = random().nextBoolean(); Set<String> files = new HashSet<>(); int numFiles = random().nextInt(10); for (int j = 0; j < numFiles; j++) { String file = IndexFileNames.segmentFileName(name, "", Integer.toString(j)); files.add(file); dir.createOutput(file, IOContext.DEFAULT).close(); } Map<String, String> diagnostics = new HashMap<>(); int numDiags = random().nextInt(10); for (int j = 0; j < numDiags; j++) { diagnostics.put( TestUtil.randomUnicodeString(random()), TestUtil.randomUnicodeString(random())); } byte id[] = new byte[StringHelper.ID_LENGTH]; random().nextBytes(id); Map<String, String> attributes = new HashMap<>(); int numAttributes = random().nextInt(10); for (int j = 0; j < numAttributes; j++) { attributes.put( TestUtil.randomUnicodeString(random()), TestUtil.randomUnicodeString(random())); } SegmentInfo info = new SegmentInfo( dir, version, name, docCount, isCompoundFile, codec, diagnostics, id, attributes, null); info.setFiles(files); codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT); SegmentInfo info2 = codec.segmentInfoFormat().read(dir, name, id, IOContext.DEFAULT); assertEquals(info, info2); dir.close(); } }
/** Returns the committed segments_N filename. */ final String finishCommit(Directory dir) throws IOException { if (pendingCommit == false) { throw new IllegalStateException("prepareCommit was not called"); } boolean success = false; final String dest; try { final String src = IndexFileNames.fileNameFromGeneration(IndexFileNames.PENDING_SEGMENTS, "", generation); dest = IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", generation); dir.renameFile(src, dest); success = true; } finally { if (!success) { // deletes pending_segments_N: rollbackCommit(dir); } } pendingCommit = false; lastGeneration = generation; return dest; }
final void rollbackCommit(Directory dir) { if (pendingCommit) { pendingCommit = false; // we try to clean up our pending_segments_N // Must carefully compute fileName from "generation" // since lastGeneration isn't incremented: final String pending = IndexFileNames.fileNameFromGeneration(IndexFileNames.PENDING_SEGMENTS, "", generation); // Suppress so we keep throwing the original exception // in our caller IOUtils.deleteFilesIgnoringExceptions(dir, pending); } }
final void rollbackCommit(Directory dir) throws IOException { if (pendingSegnOutput != null) { try { pendingSegnOutput.close(); } catch (Throwable t) { // Suppress so we keep throwing the original exception // in our caller } // Must carefully compute fileName from "generation" // since lastGeneration isn't incremented: try { final String segmentFileName = IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", generation); dir.deleteFile(segmentFileName); } catch (Throwable t) { // Suppress so we keep throwing the original exception // in our caller } pendingSegnOutput = null; } }
/** * Get the filename of the segments_N file for the most recent commit in the list of index files. * * @param files -- array of file names to check */ public static String getLastCommitSegmentsFileName(String[] files) { return IndexFileNames.fileNameFromGeneration( IndexFileNames.SEGMENTS, "", getLastCommitGeneration(files)); }
/* * Test a deletion policy that keeps last N commits * around, through creates. */ public void testKeepLastNDeletionPolicyWithCreates() throws IOException { final int N = 10; for (int pass = 0; pass < 2; pass++) { boolean useCompoundFile = (pass % 2) != 0; Directory dir = newDirectory(); if (dir instanceof MockDirectoryWrapper) { // test manually deletes files ((MockDirectoryWrapper) dir).setEnableVirusScanner(false); } IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random())) .setOpenMode(OpenMode.CREATE) .setIndexDeletionPolicy(new KeepLastNDeletionPolicy(N)) .setMaxBufferedDocs(10); MergePolicy mp = conf.getMergePolicy(); mp.setNoCFSRatio(useCompoundFile ? 1.0 : 0.0); IndexWriter writer = new IndexWriter(dir, conf); KeepLastNDeletionPolicy policy = (KeepLastNDeletionPolicy) writer.getConfig().getIndexDeletionPolicy(); writer.close(); Term searchTerm = new Term("content", "aaa"); Query query = new TermQuery(searchTerm); for (int i = 0; i < N + 1; i++) { conf = newIndexWriterConfig(new MockAnalyzer(random())) .setOpenMode(OpenMode.APPEND) .setIndexDeletionPolicy(policy) .setMaxBufferedDocs(10); mp = conf.getMergePolicy(); mp.setNoCFSRatio(useCompoundFile ? 1.0 : 0.0); writer = new IndexWriter(dir, conf); policy = (KeepLastNDeletionPolicy) writer.getConfig().getIndexDeletionPolicy(); for (int j = 0; j < 17; j++) { addDocWithID(writer, i * (N + 1) + j); } // this is a commit writer.close(); conf = new IndexWriterConfig(new MockAnalyzer(random())) .setIndexDeletionPolicy(policy) .setMergePolicy(NoMergePolicy.INSTANCE); writer = new IndexWriter(dir, conf); policy = (KeepLastNDeletionPolicy) writer.getConfig().getIndexDeletionPolicy(); writer.deleteDocuments(new Term("id", "" + (i * (N + 1) + 3))); // this is a commit writer.close(); IndexReader reader = DirectoryReader.open(dir); IndexSearcher searcher = newSearcher(reader); ScoreDoc[] hits = searcher.search(query, 1000).scoreDocs; assertEquals(16, hits.length); reader.close(); writer = new IndexWriter( dir, newIndexWriterConfig(new MockAnalyzer(random())) .setOpenMode(OpenMode.CREATE) .setIndexDeletionPolicy(policy)); policy = (KeepLastNDeletionPolicy) writer.getConfig().getIndexDeletionPolicy(); // This will not commit: there are no changes // pending because we opened for "create": writer.close(); } assertEquals(3 * (N + 1) + 1, policy.numOnInit); assertEquals(3 * (N + 1) + 1, policy.numOnCommit); IndexReader rwReader = DirectoryReader.open(dir); IndexSearcher searcher = newSearcher(rwReader); ScoreDoc[] hits = searcher.search(query, 1000).scoreDocs; assertEquals(0, hits.length); // Simplistic check: just verify only the past N segments_N's still // exist, and, I can open a reader on each: long gen = SegmentInfos.getLastCommitGeneration(dir); int expectedCount = 0; rwReader.close(); for (int i = 0; i < N + 1; i++) { try { IndexReader reader = DirectoryReader.open(dir); // Work backwards in commits on what the expected // count should be. searcher = newSearcher(reader); hits = searcher.search(query, 1000).scoreDocs; assertEquals(expectedCount, hits.length); if (expectedCount == 0) { expectedCount = 16; } else if (expectedCount == 16) { expectedCount = 17; } else if (expectedCount == 17) { expectedCount = 0; } reader.close(); if (i == N) { fail("should have failed on commits before last " + N); } } catch (IOException e) { if (i != N) { throw e; } } if (i < N) { dir.deleteFile(IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen)); } gen--; } dir.close(); } }
/** Run {@link #doBody} on the provided commit. */ public T run(IndexCommit commit) throws IOException { if (commit != null) { if (directory != commit.getDirectory()) throw new IOException("the specified commit does not match the specified Directory"); return doBody(commit.getSegmentsFileName()); } long lastGen = -1; long gen = -1; IOException exc = null; // Loop until we succeed in calling doBody() without // hitting an IOException. An IOException most likely // means an IW deleted our commit while opening // the time it took us to load the now-old infos files // (and segments files). It's also possible it's a // true error (corrupt index). To distinguish these, // on each retry we must see "forward progress" on // which generation we are trying to load. If we // don't, then the original error is real and we throw // it. for (; ; ) { lastGen = gen; String files[] = directory.listAll(); String files2[] = directory.listAll(); Arrays.sort(files); Arrays.sort(files2); if (!Arrays.equals(files, files2)) { // listAll() is weakly consistent, this means we hit "concurrent modification exception" continue; } gen = getLastCommitGeneration(files); if (infoStream != null) { message("directory listing gen=" + gen); } if (gen == -1) { throw new IndexNotFoundException( "no segments* file found in " + directory + ": files: " + Arrays.toString(files)); } else if (gen > lastGen) { String segmentFileName = IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen); try { T t = doBody(segmentFileName); if (infoStream != null) { message("success on " + segmentFileName); } return t; } catch (IOException err) { // Save the original root cause: if (exc == null) { exc = err; } if (infoStream != null) { message( "primary Exception on '" + segmentFileName + "': " + err + "'; will retry: gen = " + gen); } } } else { throw exc; } } }
/* * Test a silly deletion policy that keeps all commits around. */ public void testKeepAllDeletionPolicy() throws IOException { for (int pass = 0; pass < 2; pass++) { if (VERBOSE) { System.out.println("TEST: cycle pass="******"TEST: open writer for forceMerge"); } writer = new IndexWriter(dir, conf); policy = (KeepAllDeletionPolicy) writer.getConfig().getIndexDeletionPolicy(); writer.forceMerge(1); writer.close(); } assertEquals(needsMerging ? 2 : 1, policy.numOnInit); // If we are not auto committing then there should // be exactly 2 commits (one per close above): assertEquals(1 + (needsMerging ? 1 : 0), policy.numOnCommit); // Test listCommits Collection<IndexCommit> commits = DirectoryReader.listCommits(dir); // 2 from closing writer assertEquals(1 + (needsMerging ? 1 : 0), commits.size()); // Make sure we can open a reader on each commit: for (final IndexCommit commit : commits) { IndexReader r = DirectoryReader.open(commit); r.close(); } // Simplistic check: just verify all segments_N's still // exist, and, I can open a reader on each: long gen = SegmentInfos.getLastCommitGeneration(dir); while (gen > 0) { IndexReader reader = DirectoryReader.open(dir); reader.close(); dir.deleteFile(IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen)); gen--; if (gen > 0) { // Now that we've removed a commit point, which // should have orphan'd at least one index file. // Open & close a writer and assert that it // actually removed something: int preCount = dir.listAll().length; writer = new IndexWriter( dir, newIndexWriterConfig(new MockAnalyzer(random())) .setOpenMode(OpenMode.APPEND) .setIndexDeletionPolicy(policy)); writer.close(); int postCount = dir.listAll().length; assertTrue(postCount < preCount); } } dir.close(); } }
/* * Test "by time expiration" deletion policy: */ public void testExpirationTimeDeletionPolicy() throws IOException, InterruptedException { final double SECONDS = 2.0; Directory dir = newDirectory(); if (dir instanceof MockDirectoryWrapper) { // test manually deletes files ((MockDirectoryWrapper) dir).setEnableVirusScanner(false); } IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random())) .setIndexDeletionPolicy(new ExpirationTimeDeletionPolicy(dir, SECONDS)); MergePolicy mp = conf.getMergePolicy(); mp.setNoCFSRatio(1.0); IndexWriter writer = new IndexWriter(dir, conf); ExpirationTimeDeletionPolicy policy = (ExpirationTimeDeletionPolicy) writer.getConfig().getIndexDeletionPolicy(); Map<String, String> commitData = new HashMap<>(); commitData.put("commitTime", String.valueOf(System.currentTimeMillis())); writer.setCommitData(commitData); writer.commit(); writer.close(); long lastDeleteTime = 0; final int targetNumDelete = TestUtil.nextInt(random(), 1, 5); while (policy.numDelete < targetNumDelete) { // Record last time when writer performed deletes of // past commits lastDeleteTime = System.currentTimeMillis(); conf = newIndexWriterConfig(new MockAnalyzer(random())) .setOpenMode(OpenMode.APPEND) .setIndexDeletionPolicy(policy); mp = conf.getMergePolicy(); mp.setNoCFSRatio(1.0); writer = new IndexWriter(dir, conf); policy = (ExpirationTimeDeletionPolicy) writer.getConfig().getIndexDeletionPolicy(); for (int j = 0; j < 17; j++) { addDoc(writer); } commitData = new HashMap<>(); commitData.put("commitTime", String.valueOf(System.currentTimeMillis())); writer.setCommitData(commitData); writer.commit(); writer.close(); Thread.sleep((int) (1000.0 * (SECONDS / 5.0))); } // Then simplistic check: just verify that the // segments_N's that still exist are in fact within SECONDS // seconds of the last one's mod time, and, that I can // open a reader on each: long gen = SegmentInfos.getLastCommitGeneration(dir); String fileName = IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen); boolean oneSecondResolution = true; while (gen > 0) { try { IndexReader reader = DirectoryReader.open(dir); reader.close(); fileName = IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen); // if we are on a filesystem that seems to have only // 1 second resolution, allow +1 second in commit // age tolerance: SegmentInfos sis = SegmentInfos.readCommit(dir, fileName); long modTime = Long.parseLong(sis.getUserData().get("commitTime")); oneSecondResolution &= (modTime % 1000) == 0; final long leeway = (long) ((SECONDS + (oneSecondResolution ? 1.0 : 0.0)) * 1000); assertTrue( "commit point was older than " + SECONDS + " seconds (" + (lastDeleteTime - modTime) + " msec) but did not get deleted ", lastDeleteTime - modTime <= leeway); } catch (IOException e) { // OK break; } dir.deleteFile(IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen)); gen--; } dir.close(); }
public Object run(IndexCommit commit) throws CorruptIndexException, IOException { if (commit != null) { if (directory != commit.getDirectory()) throw new IOException("the specified commit does not match the specified Directory"); return doBody(commit.getSegmentsFileName()); } String segmentFileName = null; long lastGen = -1; long gen = 0; int genLookaheadCount = 0; IOException exc = null; int retryCount = 0; boolean useFirstMethod = true; // Loop until we succeed in calling doBody() without // hitting an IOException. An IOException most likely // means a commit was in process and has finished, in // the time it took us to load the now-old infos files // (and segments files). It's also possible it's a // true error (corrupt index). To distinguish these, // on each retry we must see "forward progress" on // which generation we are trying to load. If we // don't, then the original error is real and we throw // it. // We have three methods for determining the current // generation. We try the first two in parallel (when // useFirstMethod is true), and fall back to the third // when necessary. while (true) { if (useFirstMethod) { // List the directory and use the highest // segments_N file. This method works well as long // as there is no stale caching on the directory // contents (NOTE: NFS clients often have such stale // caching): String[] files = null; long genA = -1; files = directory.listAll(); if (files != null) { genA = getCurrentSegmentGeneration(files); } if (infoStream != null) { message("directory listing genA=" + genA); } // Also open segments.gen and read its // contents. Then we take the larger of the two // gens. This way, if either approach is hitting // a stale cache (NFS) we have a better chance of // getting the right generation. long genB = -1; for (int i = 0; i < defaultGenFileRetryCount; i++) { IndexInput genInput = null; try { genInput = directory.openInput(IndexFileNames.SEGMENTS_GEN); } catch (FileNotFoundException e) { if (infoStream != null) { message("segments.gen open: FileNotFoundException " + e); } break; } catch (IOException e) { if (infoStream != null) { message("segments.gen open: IOException " + e); } } if (genInput != null) { try { int version = genInput.readInt(); if (version == FORMAT_LOCKLESS) { long gen0 = genInput.readLong(); long gen1 = genInput.readLong(); if (infoStream != null) { message("fallback check: " + gen0 + "; " + gen1); } if (gen0 == gen1) { // The file is consistent. genB = gen0; break; } } } catch (IOException err2) { // will retry } finally { genInput.close(); } } try { Thread.sleep(defaultGenFileRetryPauseMsec); } catch (InterruptedException ie) { throw new ThreadInterruptedException(ie); } } if (infoStream != null) { message(IndexFileNames.SEGMENTS_GEN + " check: genB=" + genB); } // Pick the larger of the two gen's: if (genA > genB) gen = genA; else gen = genB; if (gen == -1) { // Neither approach found a generation throw new IndexNotFoundException( "no segments* file found in " + directory + ": files: " + Arrays.toString(files)); } } if (useFirstMethod && lastGen == gen && retryCount >= 2) { // Give up on first method -- this is 3rd cycle on // listing directory and checking gen file to // attempt to locate the segments file. useFirstMethod = false; } // Second method: since both directory cache and // file contents cache seem to be stale, just // advance the generation. if (!useFirstMethod) { if (genLookaheadCount < defaultGenLookaheadCount) { gen++; genLookaheadCount++; if (infoStream != null) { message("look ahead increment gen to " + gen); } } else { // All attempts have failed -- throw first exc: throw exc; } } else if (lastGen == gen) { // This means we're about to try the same // segments_N last tried. retryCount++; } else { // Segment file has advanced since our last loop // (we made "progress"), so reset retryCount: retryCount = 0; } lastGen = gen; segmentFileName = IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen); try { Object v = doBody(segmentFileName); if (infoStream != null) { message("success on " + segmentFileName); } return v; } catch (IOException err) { // Save the original root cause: if (exc == null) { exc = err; } if (infoStream != null) { message( "primary Exception on '" + segmentFileName + "': " + err + "'; will retry: retryCount=" + retryCount + "; gen = " + gen); } if (gen > 1 && useFirstMethod && retryCount == 1) { // This is our second time trying this same segments // file (because retryCount is 1), and, there is // possibly a segments_(N-1) (because gen > 1). // So, check if the segments_(N-1) exists and // try it if so: String prevSegmentFileName = IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen - 1); final boolean prevExists; prevExists = directory.fileExists(prevSegmentFileName); if (prevExists) { if (infoStream != null) { message("fallback to prior segment file '" + prevSegmentFileName + "'"); } try { Object v = doBody(prevSegmentFileName); if (infoStream != null) { message("success on fallback " + prevSegmentFileName); } return v; } catch (IOException err2) { if (infoStream != null) { message( "secondary Exception on '" + prevSegmentFileName + "': " + err2 + "'; will retry"); } } } } } } }
/** * Read a particular segmentFileName. Note that this may throw an IOException if a commit is in * process. * * @param directory -- directory containing the segments file * @param segmentFileName -- segment file to load * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ public final void read(Directory directory, String segmentFileName) throws CorruptIndexException, IOException { boolean success = false; // Clear any previous segments: this.clear(); ChecksumIndexInput input = new ChecksumIndexInput(directory.openInput(segmentFileName)); generation = generationFromSegmentsFileName(segmentFileName); lastGeneration = generation; try { int format = input.readInt(); if (format < 0) { // file contains explicit format info // check that it is a format we can understand if (format < CURRENT_FORMAT) throw new CorruptIndexException("Unknown format version: " + format); version = input.readLong(); // read version counter = input.readInt(); // read counter } else { // file is in old format without explicit format info counter = format; } for (int i = input.readInt(); i > 0; i--) { // read segmentInfos SegmentInfo si = new SegmentInfo(directory, format, input); if (si.getVersion() == null) { // It's a pre-3.1 segment, upgrade its version to either 3.0 or 2.x Directory dir = directory; if (si.getDocStoreOffset() != -1) { if (si.getDocStoreIsCompoundFile()) { dir = new CompoundFileReader( dir, IndexFileNames.segmentFileName( si.getDocStoreSegment(), IndexFileNames.COMPOUND_FILE_STORE_EXTENSION), 1024); } } else if (si.getUseCompoundFile()) { dir = new CompoundFileReader( dir, IndexFileNames.segmentFileName(si.name, IndexFileNames.COMPOUND_FILE_EXTENSION), 1024); } try { String store = si.getDocStoreOffset() != -1 ? si.getDocStoreSegment() : si.name; si.setVersion(FieldsReader.detectCodeVersion(dir, store)); } finally { // If we opened the directory, close it if (dir != directory) dir.close(); } } add(si); } if (format >= 0) { // in old format the version number may be at the end of the file if (input.getFilePointer() >= input.length()) version = System.currentTimeMillis(); // old file format without version number else version = input.readLong(); // read version } if (format <= FORMAT_USER_DATA) { if (format <= FORMAT_DIAGNOSTICS) { userData = input.readStringStringMap(); } else if (0 != input.readByte()) { userData = Collections.singletonMap("userData", input.readString()); } else { userData = Collections.<String, String>emptyMap(); } } else { userData = Collections.<String, String>emptyMap(); } if (format <= FORMAT_CHECKSUM) { final long checksumNow = input.getChecksum(); final long checksumThen = input.readLong(); if (checksumNow != checksumThen) throw new CorruptIndexException("checksum mismatch in segments file"); } success = true; } finally { input.close(); if (!success) { // Clear any segment infos we had loaded so we // have a clean slate on retry: this.clear(); } } }
private void write(Directory directory) throws IOException { long nextGeneration = getNextPendingGeneration(); String segmentFileName = IndexFileNames.fileNameFromGeneration(IndexFileNames.PENDING_SEGMENTS, "", nextGeneration); // Always advance the generation on write: generation = nextGeneration; IndexOutput segnOutput = null; boolean success = false; try { segnOutput = directory.createOutput(segmentFileName, IOContext.DEFAULT); CodecUtil.writeIndexHeader( segnOutput, "segments", VERSION_CURRENT, StringHelper.randomId(), Long.toString(nextGeneration, Character.MAX_RADIX)); segnOutput.writeVInt(Version.LATEST.major); segnOutput.writeVInt(Version.LATEST.minor); segnOutput.writeVInt(Version.LATEST.bugfix); segnOutput.writeLong(version); segnOutput.writeInt(counter); // write counter segnOutput.writeInt(size()); if (size() > 0) { Version minSegmentVersion = null; // We do a separate loop up front so we can write the minSegmentVersion before // any SegmentInfo; this makes it cleaner to throw IndexFormatTooOldExc at read time: for (SegmentCommitInfo siPerCommit : this) { Version segmentVersion = siPerCommit.info.getVersion(); if (minSegmentVersion == null || segmentVersion.onOrAfter(minSegmentVersion) == false) { minSegmentVersion = segmentVersion; } } segnOutput.writeVInt(minSegmentVersion.major); segnOutput.writeVInt(minSegmentVersion.minor); segnOutput.writeVInt(minSegmentVersion.bugfix); } // write infos for (SegmentCommitInfo siPerCommit : this) { SegmentInfo si = siPerCommit.info; segnOutput.writeString(si.name); byte segmentID[] = si.getId(); // TODO: remove this in lucene 6, we don't need to include 4.x segments in commits anymore if (segmentID == null) { segnOutput.writeByte((byte) 0); } else { if (segmentID.length != StringHelper.ID_LENGTH) { throw new IllegalStateException( "cannot write segment: invalid id segment=" + si.name + "id=" + StringHelper.idToString(segmentID)); } segnOutput.writeByte((byte) 1); segnOutput.writeBytes(segmentID, segmentID.length); } segnOutput.writeString(si.getCodec().getName()); segnOutput.writeLong(siPerCommit.getDelGen()); int delCount = siPerCommit.getDelCount(); if (delCount < 0 || delCount > si.maxDoc()) { throw new IllegalStateException( "cannot write segment: invalid maxDoc segment=" + si.name + " maxDoc=" + si.maxDoc() + " delCount=" + delCount); } segnOutput.writeInt(delCount); segnOutput.writeLong(siPerCommit.getFieldInfosGen()); segnOutput.writeLong(siPerCommit.getDocValuesGen()); segnOutput.writeSetOfStrings(siPerCommit.getFieldInfosFiles()); final Map<Integer, Set<String>> dvUpdatesFiles = siPerCommit.getDocValuesUpdatesFiles(); segnOutput.writeInt(dvUpdatesFiles.size()); for (Entry<Integer, Set<String>> e : dvUpdatesFiles.entrySet()) { segnOutput.writeInt(e.getKey()); segnOutput.writeSetOfStrings(e.getValue()); } } segnOutput.writeMapOfStrings(userData); CodecUtil.writeFooter(segnOutput); segnOutput.close(); directory.sync(Collections.singleton(segmentFileName)); success = true; } finally { if (success) { pendingCommit = true; } else { // We hit an exception above; try to close the file // but suppress any exception: IOUtils.closeWhileHandlingException(segnOutput); // Try not to leave a truncated segments_N file in // the index: IOUtils.deleteFilesIgnoringExceptions(directory, segmentFileName); } } }
/** Get the segments_N filename in use by this segment infos. */ public String getCurrentSegmentFileName() { return IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", lastGeneration); }
/** * Get the filename of the current segments_N file in the directory. * * @param directory -- directory to search for the latest segments_N file */ public static String getCurrentSegmentFileName(Directory directory) throws IOException { return IndexFileNames.fileNameFromGeneration( IndexFileNames.SEGMENTS, "", getCurrentSegmentGeneration(directory)); }
/** * Get the filename of the current segments_N file from a list of files. * * @param files -- array of file names to check */ public static String getCurrentSegmentFileName(String[] files) throws IOException { return IndexFileNames.fileNameFromGeneration( IndexFileNames.SEGMENTS, "", getCurrentSegmentGeneration(files)); }
public void testDocsStuckInRAMForever() throws Exception { Directory dir = newDirectory(); IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); iwc.setRAMBufferSizeMB(.2); Codec codec = Codec.forName("Lucene49"); iwc.setCodec(codec); iwc.setMergePolicy(NoMergePolicy.INSTANCE); final IndexWriter w = new IndexWriter(dir, iwc); final CountDownLatch startingGun = new CountDownLatch(1); Thread[] threads = new Thread[2]; for (int i = 0; i < threads.length; i++) { final int threadID = i; threads[i] = new Thread() { @Override public void run() { try { startingGun.await(); for (int j = 0; j < 1000; j++) { Document doc = new Document(); doc.add(newStringField("field", "threadID" + threadID, Field.Store.NO)); w.addDocument(doc); } } catch (Exception e) { throw new RuntimeException(e); } } }; threads[i].start(); } startingGun.countDown(); for (Thread t : threads) { t.join(); } Set<String> segSeen = new HashSet<>(); int thread0Count = 0; int thread1Count = 0; // At this point the writer should have 2 thread states w/ docs; now we index with only 1 thread // until we see all 1000 thread0 & thread1 // docs flushed. If the writer incorrectly holds onto previously indexed docs forever then this // will run forever: while (thread0Count < 1000 || thread1Count < 1000) { Document doc = new Document(); doc.add(newStringField("field", "threadIDmain", Field.Store.NO)); w.addDocument(doc); for (String fileName : dir.listAll()) { if (fileName.endsWith(".si")) { String segName = IndexFileNames.parseSegmentName(fileName); if (segSeen.contains(segName) == false) { segSeen.add(segName); SegmentInfo si = new Lucene46SegmentInfoFormat() .getSegmentInfoReader() .read(dir, segName, IOContext.DEFAULT); si.setCodec(codec); SegmentCommitInfo sci = new SegmentCommitInfo(si, 0, -1, -1, -1); SegmentReader sr = new SegmentReader(sci, 1, IOContext.DEFAULT); try { thread0Count += sr.docFreq(new Term("field", "threadID0")); thread1Count += sr.docFreq(new Term("field", "threadID1")); } finally { sr.close(); } } } } } w.close(); dir.close(); }