@Override public void onInit(List<? extends IndexCommit> commits) throws IOException { verifyCommitOrder(commits); numOnInit++; // On init, delete all commit points: for (final IndexCommit commit : commits) { commit.delete(); assertTrue(commit.isDeleted()); } }
private synchronized IndexReader doOpenNoWriter(final boolean openReadOnly, IndexCommit commit) throws CorruptIndexException, IOException { if (commit == null) { if (hasChanges) { // We have changes, which means we are not readOnly: assert readOnly == false; // and we hold the write lock: assert writeLock != null; // so no other writer holds the write lock, which // means no changes could have been done to the index: assert isCurrent(); if (openReadOnly) { return clone(openReadOnly); } else { return null; } } else if (isCurrent()) { if (openReadOnly != readOnly) { // Just fallback to clone return clone(openReadOnly); } else { return null; } } } else { if (directory != commit.getDirectory()) { throw new IOException("the specified commit does not match the specified Directory"); } if (segmentInfos != null && commit.getSegmentsFileName().equals(segmentInfos.getCurrentSegmentFileName())) { if (readOnly != openReadOnly) { // Just fallback to clone return clone(openReadOnly); } else { return null; } } } return (IndexReader) new SegmentInfos.FindSegmentsFile(directory) { @Override protected Object doBody(String segmentFileName) throws CorruptIndexException, IOException { SegmentInfos infos = new SegmentInfos(); infos.read(directory, segmentFileName); return doOpenIfChanged(infos, false, openReadOnly); } }.run(commit); }
@Override public void onCommit(List<? extends IndexCommit> commits) throws IOException { IndexCommit lastCommit = commits.get(commits.size() - 1); DirectoryReader r = DirectoryReader.open(dir); assertEquals( "lastCommit.segmentCount()=" + lastCommit.getSegmentCount() + " vs IndexReader.segmentCount=" + r.leaves().size(), r.leaves().size(), lastCommit.getSegmentCount()); r.close(); verifyCommitOrder(commits); numOnCommit++; }
@Override public void onCommit(List<? extends IndexCommit> commits) throws IOException { verifyCommitOrder(commits); IndexCommit lastCommit = commits.get(commits.size() - 1); // Any commit older than expireTime should be deleted: double expireTime = getCommitTime(lastCommit) / 1000.0 - expirationTimeSeconds; for (final IndexCommit commit : commits) { double modTime = getCommitTime(commit) / 1000.0; if (commit != lastCommit && modTime < expireTime) { commit.delete(); numDelete += 1; } } }
public synchronized void onInit(List<? extends IndexCommit> commits) throws IOException { primary.onInit(wrapCommits(commits)); lastCommit = commits.get(commits.size() - 1); /* * Assign snapshotted IndexCommits to their correct snapshot IDs as * specified in the constructor. */ for (IndexCommit commit : commits) { Set<String> ids = segmentsFileToIDs.get(commit.getSegmentsFileName()); if (ids != null) { for (String id : ids) { idToSnapshot.get(id).commit = commit; } } } /* * Second, see if there are any instances where a snapshot ID was specified * in the constructor but an IndexCommit doesn't exist. In this case, the ID * should be removed. * * Note: This code is protective for extreme cases where IDs point to * non-existent segments. As the constructor should have received its * information via a call to getSnapshots(), the data should be well-formed. */ // Find lost snapshots ArrayList<String> idsToRemove = null; for (Entry<String, SnapshotInfo> e : idToSnapshot.entrySet()) { if (e.getValue().commit == null) { if (idsToRemove == null) { idsToRemove = new ArrayList<String>(); } idsToRemove.add(e.getKey()); } } // Finally, remove those 'lost' snapshots. if (idsToRemove != null) { for (String id : idsToRemove) { SnapshotInfo info = idToSnapshot.remove(id); segmentsFileToIDs.remove(info.segmentsFileName); } } }
@Override public void delete() { synchronized (SnapshotDeletionPolicy.this) { // Suppress the delete request if this commit point is // currently snapshotted. if (shouldDelete(getSegmentsFileName())) { cp.delete(); } } }
/** * Snapshots the last commit. Once a commit is 'snapshotted,' it is protected from deletion (as * long as this {@link IndexDeletionPolicy} is used). The commit can be removed by calling {@link * #release(String)} using the same ID parameter followed by a call to {@link * IndexWriter#deleteUnusedFiles()}. * * <p><b>NOTE:</b> ID must be unique in the system. If the same ID is used twice, an {@link * IllegalStateException} is thrown. * * <p><b>NOTE:</b> while the snapshot is held, the files it references will not be deleted, which * will consume additional disk space in your index. If you take a snapshot at a particularly bad * time (say just before you call forceMerge) then in the worst case this could consume an extra * 1X of your total index size, until you release the snapshot. * * @param id a unique identifier of the commit that is being snapshotted. * @throws IllegalStateException if either there is no 'last commit' to snapshot, or if the * parameter 'ID' refers to an already snapshotted commit. * @return the {@link IndexCommit} that was snapshotted. */ public synchronized IndexCommit snapshot(String id) throws IOException { if (lastCommit == null) { // no commit exists. Really shouldn't happen, but might be if SDP is // accessed before onInit or onCommit were called. throw new IllegalStateException("No index commit to snapshot"); } // Can't use the same snapshot ID twice... checkSnapshotted(id); registerSnapshotInfo(id, lastCommit.getSegmentsFileName(), lastCommit); return lastCommit; }
private void verifyCommitOrder(List<? extends IndexCommit> commits) { if (commits.isEmpty()) { return; } final IndexCommit firstCommit = commits.get(0); long last = SegmentInfos.generationFromSegmentsFileName(firstCommit.getSegmentsFileName()); assertEquals(last, firstCommit.getGeneration()); for (int i = 1; i < commits.size(); i++) { final IndexCommit commit = commits.get(i); long now = SegmentInfos.generationFromSegmentsFileName(commit.getSegmentsFileName()); assertTrue("SegmentInfos commits are out-of-order", now > last); assertEquals(now, commit.getGeneration()); last = now; } }
/* Uses KeepAllDeletionPolicy to keep all commits around, * then, opens a new IndexWriter on a previous commit * point. */ public void testOpenPriorSnapshot() throws IOException { Directory dir = newDirectory(); IndexWriter writer = new IndexWriter( dir, newIndexWriterConfig(new MockAnalyzer(random())) .setIndexDeletionPolicy(new KeepAllDeletionPolicy(dir)) .setMaxBufferedDocs(2) .setMergePolicy(newLogMergePolicy(10))); KeepAllDeletionPolicy policy = (KeepAllDeletionPolicy) writer.getConfig().getIndexDeletionPolicy(); for (int i = 0; i < 10; i++) { addDoc(writer); if ((1 + i) % 2 == 0) writer.commit(); } writer.close(); Collection<IndexCommit> commits = DirectoryReader.listCommits(dir); assertEquals(5, commits.size()); IndexCommit lastCommit = null; for (final IndexCommit commit : commits) { if (lastCommit == null || commit.getGeneration() > lastCommit.getGeneration()) lastCommit = commit; } assertTrue(lastCommit != null); // Now add 1 doc and merge writer = new IndexWriter( dir, newIndexWriterConfig(new MockAnalyzer(random())).setIndexDeletionPolicy(policy)); addDoc(writer); assertEquals(11, writer.numDocs()); writer.forceMerge(1); writer.close(); assertEquals(6, DirectoryReader.listCommits(dir).size()); // Now open writer on the commit just before merge: writer = new IndexWriter( dir, newIndexWriterConfig(new MockAnalyzer(random())) .setIndexDeletionPolicy(policy) .setIndexCommit(lastCommit)); assertEquals(10, writer.numDocs()); // Should undo our rollback: writer.rollback(); DirectoryReader r = DirectoryReader.open(dir); // Still merged, still 11 docs assertEquals(1, r.leaves().size()); assertEquals(11, r.numDocs()); r.close(); writer = new IndexWriter( dir, newIndexWriterConfig(new MockAnalyzer(random())) .setIndexDeletionPolicy(policy) .setIndexCommit(lastCommit)); assertEquals(10, writer.numDocs()); // Commits the rollback: writer.close(); // Now 8 because we made another commit assertEquals(7, DirectoryReader.listCommits(dir).size()); r = DirectoryReader.open(dir); // Not fully merged because we rolled it back, and now only // 10 docs assertTrue(r.leaves().size() > 1); assertEquals(10, r.numDocs()); r.close(); // Re-merge writer = new IndexWriter( dir, newIndexWriterConfig(new MockAnalyzer(random())).setIndexDeletionPolicy(policy)); writer.forceMerge(1); writer.close(); r = DirectoryReader.open(dir); assertEquals(1, r.leaves().size()); assertEquals(10, r.numDocs()); r.close(); // Now open writer on the commit just before merging, // but this time keeping only the last commit: writer = new IndexWriter( dir, newIndexWriterConfig(new MockAnalyzer(random())).setIndexCommit(lastCommit)); assertEquals(10, writer.numDocs()); // Reader still sees fully merged index, because writer // opened on the prior commit has not yet committed: r = DirectoryReader.open(dir); assertEquals(1, r.leaves().size()); assertEquals(10, r.numDocs()); r.close(); writer.close(); // Now reader sees not-fully-merged index: r = DirectoryReader.open(dir); assertTrue(r.leaves().size() > 1); assertEquals(10, r.numDocs()); r.close(); dir.close(); }
@Override public long getGeneration() { return cp.getGeneration(); }
static long getCommitTime(IndexCommit commit) throws IOException { return Long.parseLong(commit.getUserData().get("commitTime")); }
@Override public String getSegmentsFileName() { return cp.getSegmentsFileName(); }
@Override public Map<String, String> getUserData() throws IOException { return cp.getUserData(); }
public void testEqualsHashCode() throws Exception { // LUCENE-2417: equals and hashCode() impl was inconsistent final Directory dir = new RAMDirectory(); IndexCommit ic1 = new IndexCommit() { @Override public String getSegmentsFileName() { return "a"; } @Override public long getVersion() { return 12; } @Override public Directory getDirectory() { return dir; } @Override public Collection<String> getFileNames() throws IOException { return null; } @Override public void delete() {} @Override public long getGeneration() { return 0; } @Override public long getTimestamp() throws IOException { return 1; } @Override public Map<String, String> getUserData() throws IOException { return null; } @Override public boolean isDeleted() { return false; } @Override public boolean isOptimized() { return false; } }; IndexCommit ic2 = new IndexCommit() { @Override public String getSegmentsFileName() { return "b"; } @Override public long getVersion() { return 12; } @Override public Directory getDirectory() { return dir; } @Override public Collection<String> getFileNames() throws IOException { return null; } @Override public void delete() {} @Override public long getGeneration() { return 0; } @Override public long getTimestamp() throws IOException { return 1; } @Override public Map<String, String> getUserData() throws IOException { return null; } @Override public boolean isDeleted() { return false; } @Override public boolean isOptimized() { return false; } }; assertEquals(ic1, ic2); assertEquals("hash codes are not equals", ic1.hashCode(), ic2.hashCode()); }
public Object run(IndexCommit commit) throws CorruptIndexException, IOException { if (commit != null) { if (directory != commit.getDirectory()) throw new IOException("the specified commit does not match the specified Directory"); return doBody(commit.getSegmentsFileName()); } String segmentFileName = null; long lastGen = -1; long gen = 0; int genLookaheadCount = 0; IOException exc = null; int retryCount = 0; boolean useFirstMethod = true; // Loop until we succeed in calling doBody() without // hitting an IOException. An IOException most likely // means a commit was in process and has finished, in // the time it took us to load the now-old infos files // (and segments files). It's also possible it's a // true error (corrupt index). To distinguish these, // on each retry we must see "forward progress" on // which generation we are trying to load. If we // don't, then the original error is real and we throw // it. // We have three methods for determining the current // generation. We try the first two in parallel (when // useFirstMethod is true), and fall back to the third // when necessary. while (true) { if (useFirstMethod) { // List the directory and use the highest // segments_N file. This method works well as long // as there is no stale caching on the directory // contents (NOTE: NFS clients often have such stale // caching): String[] files = null; long genA = -1; files = directory.listAll(); if (files != null) { genA = getCurrentSegmentGeneration(files); } if (infoStream != null) { message("directory listing genA=" + genA); } // Also open segments.gen and read its // contents. Then we take the larger of the two // gens. This way, if either approach is hitting // a stale cache (NFS) we have a better chance of // getting the right generation. long genB = -1; for (int i = 0; i < defaultGenFileRetryCount; i++) { IndexInput genInput = null; try { genInput = directory.openInput(IndexFileNames.SEGMENTS_GEN); } catch (FileNotFoundException e) { if (infoStream != null) { message("segments.gen open: FileNotFoundException " + e); } break; } catch (IOException e) { if (infoStream != null) { message("segments.gen open: IOException " + e); } } if (genInput != null) { try { int version = genInput.readInt(); if (version == FORMAT_LOCKLESS) { long gen0 = genInput.readLong(); long gen1 = genInput.readLong(); if (infoStream != null) { message("fallback check: " + gen0 + "; " + gen1); } if (gen0 == gen1) { // The file is consistent. genB = gen0; break; } } } catch (IOException err2) { // will retry } finally { genInput.close(); } } try { Thread.sleep(defaultGenFileRetryPauseMsec); } catch (InterruptedException ie) { throw new ThreadInterruptedException(ie); } } if (infoStream != null) { message(IndexFileNames.SEGMENTS_GEN + " check: genB=" + genB); } // Pick the larger of the two gen's: if (genA > genB) gen = genA; else gen = genB; if (gen == -1) { // Neither approach found a generation throw new IndexNotFoundException( "no segments* file found in " + directory + ": files: " + Arrays.toString(files)); } } if (useFirstMethod && lastGen == gen && retryCount >= 2) { // Give up on first method -- this is 3rd cycle on // listing directory and checking gen file to // attempt to locate the segments file. useFirstMethod = false; } // Second method: since both directory cache and // file contents cache seem to be stale, just // advance the generation. if (!useFirstMethod) { if (genLookaheadCount < defaultGenLookaheadCount) { gen++; genLookaheadCount++; if (infoStream != null) { message("look ahead increment gen to " + gen); } } else { // All attempts have failed -- throw first exc: throw exc; } } else if (lastGen == gen) { // This means we're about to try the same // segments_N last tried. retryCount++; } else { // Segment file has advanced since our last loop // (we made "progress"), so reset retryCount: retryCount = 0; } lastGen = gen; segmentFileName = IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen); try { Object v = doBody(segmentFileName); if (infoStream != null) { message("success on " + segmentFileName); } return v; } catch (IOException err) { // Save the original root cause: if (exc == null) { exc = err; } if (infoStream != null) { message( "primary Exception on '" + segmentFileName + "': " + err + "'; will retry: retryCount=" + retryCount + "; gen = " + gen); } if (gen > 1 && useFirstMethod && retryCount == 1) { // This is our second time trying this same segments // file (because retryCount is 1), and, there is // possibly a segments_(N-1) (because gen > 1). // So, check if the segments_(N-1) exists and // try it if so: String prevSegmentFileName = IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen - 1); final boolean prevExists; prevExists = directory.fileExists(prevSegmentFileName); if (prevExists) { if (infoStream != null) { message("fallback to prior segment file '" + prevSegmentFileName + "'"); } try { Object v = doBody(prevSegmentFileName); if (infoStream != null) { message("success on fallback " + prevSegmentFileName); } return v; } catch (IOException err2) { if (infoStream != null) { message( "secondary Exception on '" + prevSegmentFileName + "': " + err2 + "'; will retry"); } } } } } } }
/** Run {@link #doBody} on the provided commit. */ public T run(IndexCommit commit) throws IOException { if (commit != null) { if (directory != commit.getDirectory()) throw new IOException("the specified commit does not match the specified Directory"); return doBody(commit.getSegmentsFileName()); } long lastGen = -1; long gen = -1; IOException exc = null; // Loop until we succeed in calling doBody() without // hitting an IOException. An IOException most likely // means an IW deleted our commit while opening // the time it took us to load the now-old infos files // (and segments files). It's also possible it's a // true error (corrupt index). To distinguish these, // on each retry we must see "forward progress" on // which generation we are trying to load. If we // don't, then the original error is real and we throw // it. for (; ; ) { lastGen = gen; String files[] = directory.listAll(); String files2[] = directory.listAll(); Arrays.sort(files); Arrays.sort(files2); if (!Arrays.equals(files, files2)) { // listAll() is weakly consistent, this means we hit "concurrent modification exception" continue; } gen = getLastCommitGeneration(files); if (infoStream != null) { message("directory listing gen=" + gen); } if (gen == -1) { throw new IndexNotFoundException( "no segments* file found in " + directory + ": files: " + Arrays.toString(files)); } else if (gen > lastGen) { String segmentFileName = IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen); try { T t = doBody(segmentFileName); if (infoStream != null) { message("success on " + segmentFileName); } return t; } catch (IOException err) { // Save the original root cause: if (exc == null) { exc = err; } if (infoStream != null) { message( "primary Exception on '" + segmentFileName + "': " + err + "'; will retry: gen = " + gen); } } } else { throw exc; } } }
@Override public Collection<String> getFileNames() throws IOException { return cp.getFileNames(); }
@Override public long getVersion() { return cp.getVersion(); }
@Override public int getSegmentCount() { return cp.getSegmentCount(); }
@Override public boolean isDeleted() { return cp.isDeleted(); }
@Override public Directory getDirectory() { return cp.getDirectory(); }
public void testFutureCommit() throws Exception { Directory dir = newDirectory(); IndexWriter w = new IndexWriter( dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())) .setIndexDeletionPolicy(NoDeletionPolicy.INSTANCE)); Document doc = new Document(); w.addDocument(doc); // commit to "first" Map<String, String> commitData = new HashMap<String, String>(); commitData.put("tag", "first"); w.commit(commitData); // commit to "second" w.addDocument(doc); commitData.put("tag", "second"); w.commit(commitData); w.close(); // open "first" with IndexWriter IndexCommit commit = null; for (IndexCommit c : DirectoryReader.listCommits(dir)) { if (c.getUserData().get("tag").equals("first")) { commit = c; break; } } assertNotNull(commit); w = new IndexWriter( dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())) .setIndexDeletionPolicy(NoDeletionPolicy.INSTANCE) .setIndexCommit(commit)); assertEquals(1, w.numDocs()); // commit IndexWriter to "third" w.addDocument(doc); commitData.put("tag", "third"); w.commit(commitData); w.close(); // make sure "second" commit is still there commit = null; for (IndexCommit c : DirectoryReader.listCommits(dir)) { if (c.getUserData().get("tag").equals("second")) { commit = c; break; } } assertNotNull(commit); dir.close(); }