private void recoverFromTranslog(TranslogRecoveryPerformer handler) throws IOException { Translog.TranslogGeneration translogGeneration = translog.getGeneration(); final int opsRecovered; try { Translog.Snapshot snapshot = translog.newSnapshot(); opsRecovered = handler.recoveryFromSnapshot(this, snapshot); } catch (Throwable e) { throw new EngineException(shardId, "failed to recover from translog", e); } // flush if we recovered something or if we have references to older translogs // note: if opsRecovered == 0 and we have older translogs it means they are corrupted or 0 // length. assert allowCommits.get() == false : "commits are allowed but shouldn't"; allowCommits.set(true); // we are good - now we can commit if (opsRecovered > 0) { logger.trace( "flushing post recovery from translog. ops recovered [{}]. committed translog id [{}]. current id [{}]", opsRecovered, translogGeneration == null ? null : translogGeneration.translogFileGeneration, translog.currentFileGeneration()); flush(true, true); } else if (translog.isCurrent(translogGeneration) == false) { commitIndexWriter( indexWriter, translog, lastCommittedSegmentInfos.getUserData().get(Engine.SYNC_COMMIT_ID)); } }
@Override protected boolean maybeFailEngine(String source, Throwable t) { boolean shouldFail = super.maybeFailEngine(source, t); if (shouldFail) { return true; } // Check for AlreadyClosedException if (t instanceof AlreadyClosedException) { // if we are already closed due to some tragic exception // we need to fail the engine. it might have already been failed before // but we are double-checking it's failed and closed if (indexWriter.isOpen() == false && indexWriter.getTragicException() != null) { failEngine( "already closed by tragic event on the index writer", indexWriter.getTragicException()); } else if (translog.isOpen() == false && translog.getTragicException() != null) { failEngine("already closed by tragic event on the translog", translog.getTragicException()); } return true; } else if (t != null && ((indexWriter.isOpen() == false && indexWriter.getTragicException() == t) || (translog.isOpen() == false && translog.getTragicException() == t))) { // this spot on - we are handling the tragic event exception here so we have to fail the // engine // right away failEngine(source, t); return true; } return false; }
@Override public EngineException[] bulk(Bulk bulk) throws EngineException { EngineException[] failures = null; rwl.readLock().lock(); try { IndexWriter writer = this.indexWriter; if (writer == null) { throw new EngineClosedException(shardId); } for (int i = 0; i < bulk.ops().length; i++) { Operation op = bulk.ops()[i]; if (op == null) { continue; } try { switch (op.opType()) { case CREATE: Create create = (Create) op; writer.addDocument(create.doc(), create.analyzer()); translog.add(new Translog.Create(create)); break; case INDEX: Index index = (Index) op; writer.updateDocument(index.uid(), index.doc(), index.analyzer()); translog.add(new Translog.Index(index)); break; case DELETE: Delete delete = (Delete) op; writer.deleteDocuments(delete.uid()); translog.add(new Translog.Delete(delete)); break; } } catch (Exception e) { if (failures == null) { failures = new EngineException[bulk.ops().length]; } switch (op.opType()) { case CREATE: failures[i] = new CreateFailedEngineException(shardId, (Create) op, e); break; case INDEX: failures[i] = new IndexFailedEngineException(shardId, (Index) op, e); break; case DELETE: failures[i] = new DeleteFailedEngineException(shardId, (Delete) op, e); break; } } } dirty = true; } finally { rwl.readLock().unlock(); } return failures; }
@Override public void flush(Flush flush) throws EngineException { if (indexWriter == null) { throw new EngineClosedException(shardId); } // check outside the lock as well so we can check without blocking on the write lock if (disableFlushCounter > 0) { throw new FlushNotAllowedEngineException( shardId, "Recovery is in progress, flush is not allowed"); } rwl.writeLock().lock(); try { if (indexWriter == null) { throw new EngineClosedException(shardId); } if (disableFlushCounter > 0) { throw new FlushNotAllowedEngineException( shardId, "Recovery is in progress, flush is not allowed"); } if (flush.full()) { // disable refreshing, not dirty dirty = false; refreshMutex.set(true); try { // that's ok if the index writer failed and is in inconsistent state // we will get an exception on a dirty operation, and will cause the shard // to be allocated to a different node indexWriter.close(); indexWriter = createWriter(); AcquirableResource<ReaderSearcherHolder> current = nrtResource; nrtResource = buildNrtResource(indexWriter); current.markForClose(); translog.newTranslog(newTransactionLogId()); } catch (IOException e) { throw new FlushFailedEngineException(shardId, e); } finally { refreshMutex.set(false); } } else { try { indexWriter.commit(); translog.newTranslog(newTransactionLogId()); } catch (IOException e) { throw new FlushFailedEngineException(shardId, e); } } } finally { rwl.writeLock().unlock(); } if (flush.refresh()) { refresh(new Refresh(false)); } }
private void maybeFSyncTranslogs() { if (indexSettings.getTranslogDurability() == Translog.Durability.ASYNC) { for (IndexShard shard : this.shards.values()) { try { Translog translog = shard.getTranslog(); if (translog.syncNeeded()) { translog.sync(); } } catch (EngineClosedException | AlreadyClosedException ex) { // fine - continue; } catch (IOException e) { logger.warn("failed to sync translog", e); } } } }
final boolean tryRenewSyncCommit() { boolean renewed = false; try (ReleasableLock lock = writeLock.acquire()) { ensureOpen(); ensureCanFlush(); String syncId = lastCommittedSegmentInfos.getUserData().get(SYNC_COMMIT_ID); if (syncId != null && translog.totalOperations() == 0 && indexWriter.hasUncommittedChanges()) { logger.trace("start renewing sync commit [{}]", syncId); commitIndexWriter(indexWriter, translog, syncId); logger.debug("successfully sync committed. sync id [{}].", syncId); lastCommittedSegmentInfos = store.readLastCommittedSegmentsInfo(); renewed = true; } } catch (IOException ex) { maybeFailEngine("renew sync commit", ex); throw new EngineException(shardId, "failed to renew sync commit", ex); } if (renewed) { // refresh outside of the write lock refresh("renew sync commit"); } return renewed; }
@Override public GetResult get(Get get, Function<String, Searcher> searcherFactory) throws EngineException { try (ReleasableLock lock = readLock.acquire()) { ensureOpen(); if (get.realtime()) { VersionValue versionValue = versionMap.getUnderLock(get.uid().bytes()); if (versionValue != null) { if (versionValue.delete()) { return GetResult.NOT_EXISTS; } if (get.versionType().isVersionConflictForReads(versionValue.version(), get.version())) { Uid uid = Uid.createUid(get.uid().text()); throw new VersionConflictEngineException( shardId, uid.type(), uid.id(), get.versionType().explainConflictForReads(versionValue.version(), get.version())); } Translog.Operation op = translog.read(versionValue.translogLocation()); if (op != null) { return new GetResult(true, versionValue.version(), op.getSource()); } } } // no version, get the version from the index, we know that we refresh on flush return getFromSearcher(get, searcherFactory); } }
private boolean innerIndex(Index index) throws IOException { synchronized (dirtyLock(index.uid())) { lastWriteNanos = index.startTime(); final long currentVersion; final boolean deleted; VersionValue versionValue = versionMap.getUnderLock(index.uid().bytes()); if (versionValue == null) { currentVersion = loadCurrentVersionFromIndex(index.uid()); deleted = currentVersion == Versions.NOT_FOUND; } else { deleted = versionValue.delete(); if (engineConfig.isEnableGcDeletes() && versionValue.delete() && (engineConfig.getThreadPool().estimatedTimeInMillis() - versionValue.time()) > getGcDeletesInMillis()) { currentVersion = Versions.NOT_FOUND; // deleted, and GC } else { currentVersion = versionValue.version(); } } long expectedVersion = index.version(); if (isVersionConflictForWrites(index, currentVersion, deleted, expectedVersion)) { if (index.origin() != Operation.Origin.RECOVERY) { throw new VersionConflictEngineException( shardId, index.type(), index.id(), index .versionType() .explainConflictForWrites(currentVersion, expectedVersion, deleted)); } return false; } long updatedVersion = index.versionType().updateVersion(currentVersion, expectedVersion); final boolean created; index.updateVersion(updatedVersion); if (currentVersion == Versions.NOT_FOUND) { // document does not exists, we can optimize for create created = true; index(index, indexWriter); } else { created = update(index, versionValue, indexWriter); } Translog.Location translogLocation = translog.add(new Translog.Index(index)); versionMap.putUnderLock( index.uid().bytes(), new VersionValue(updatedVersion, translogLocation)); index.setTranslogLocation(translogLocation); return created; } }
@Override public ByteSizeValue estimateFlushableMemorySize() { rwl.readLock().lock(); try { long bytes = IndexWriters.estimateRamSize(indexWriter); bytes += translog.estimateMemorySize().bytes(); return new ByteSizeValue(bytes); } catch (Exception e) { return null; } finally { rwl.readLock().unlock(); } }
@Override public void delete(DeleteByQuery delete) throws EngineException { rwl.readLock().lock(); try { IndexWriter writer = this.indexWriter; if (writer == null) { throw new EngineClosedException(shardId); } writer.deleteDocuments(delete.query()); translog.add(new Translog.DeleteByQuery(delete)); dirty = true; } catch (IOException e) { throw new DeleteByQueryFailedEngineException(shardId, delete, e); } finally { rwl.readLock().unlock(); } }
@Override public void start() throws EngineException { rwl.writeLock().lock(); try { if (indexWriter != null) { throw new EngineAlreadyStartedException(shardId); } if (logger.isDebugEnabled()) { logger.debug( "Starting engine with ram_buffer_size[" + indexingBufferSize + "], refresh_interval[" + refreshInterval + "]"); } try { this.indexWriter = createWriter(); } catch (IOException e) { throw new EngineCreationFailureException(shardId, "Failed to create engine", e); } try { translog.newTranslog(newTransactionLogId()); this.nrtResource = buildNrtResource(indexWriter); } catch (IOException e) { try { indexWriter.rollback(); } catch (IOException e1) { // ignore } finally { try { indexWriter.close(); } catch (IOException e1) { // ignore } } throw new EngineCreationFailureException(shardId, "Failed to open reader on writer", e); } } finally { rwl.writeLock().unlock(); } }
@Override public void index(Index index) throws EngineException { rwl.readLock().lock(); try { IndexWriter writer = this.indexWriter; if (writer == null) { throw new EngineClosedException(shardId); } writer.updateDocument(index.uid(), index.doc(), index.analyzer()); translog.add(new Translog.Index(index)); dirty = true; if (index.refresh()) { refresh(new Refresh(false)); } } catch (IOException e) { throw new IndexFailedEngineException(shardId, index, e); } finally { rwl.readLock().unlock(); } }
@Override public void create(Create create) throws EngineException { rwl.readLock().lock(); try { IndexWriter writer = this.indexWriter; if (writer == null) { throw new EngineClosedException(shardId); } writer.addDocument(create.doc(), create.analyzer()); translog.add(new Translog.Create(create)); dirty = true; if (create.refresh()) { refresh(new Refresh(false)); } } catch (IOException e) { throw new CreateFailedEngineException(shardId, create, e); } finally { rwl.readLock().unlock(); } }
@Override public <T> T snapshot(SnapshotHandler<T> snapshotHandler) throws EngineException { SnapshotIndexCommit snapshotIndexCommit = null; Translog.Snapshot traslogSnapshot = null; rwl.readLock().lock(); try { snapshotIndexCommit = deletionPolicy.snapshot(); traslogSnapshot = translog.snapshot(); } catch (Exception e) { if (snapshotIndexCommit != null) snapshotIndexCommit.release(); throw new SnapshotFailedEngineException(shardId, e); } finally { rwl.readLock().unlock(); } try { return snapshotHandler.snapshot(snapshotIndexCommit, traslogSnapshot); } finally { snapshotIndexCommit.release(); traslogSnapshot.release(); } }
private void commitIndexWriter(IndexWriter writer, Translog translog, String syncId) throws IOException { ensureCanFlush(); try { Translog.TranslogGeneration translogGeneration = translog.getGeneration(); logger.trace( "committing writer with translog id [{}] and sync id [{}] ", translogGeneration.translogFileGeneration, syncId); Map<String, String> commitData = new HashMap<>(2); commitData.put( Translog.TRANSLOG_GENERATION_KEY, Long.toString(translogGeneration.translogFileGeneration)); commitData.put(Translog.TRANSLOG_UUID_KEY, translogGeneration.translogUUID); if (syncId != null) { commitData.put(Engine.SYNC_COMMIT_ID, syncId); } indexWriter.setCommitData(commitData); writer.commit(); } catch (Throwable ex) { failEngine("lucene commit failed", ex); throw ex; } }
@Override public void recover(RecoveryHandler recoveryHandler) throws EngineException { // take a write lock here so it won't happen while a flush is in progress // this means that next commits will not be allowed once the lock is released rwl.writeLock().lock(); try { disableFlushCounter++; } finally { rwl.writeLock().unlock(); } SnapshotIndexCommit phase1Snapshot; try { phase1Snapshot = deletionPolicy.snapshot(); } catch (IOException e) { --disableFlushCounter; throw new RecoveryEngineException(shardId, 1, "Snapshot failed", e); } try { recoveryHandler.phase1(phase1Snapshot); } catch (Exception e) { --disableFlushCounter; phase1Snapshot.release(); throw new RecoveryEngineException(shardId, 1, "Execution failed", e); } Translog.Snapshot phase2Snapshot; try { phase2Snapshot = translog.snapshot(); } catch (Exception e) { --disableFlushCounter; phase1Snapshot.release(); throw new RecoveryEngineException(shardId, 2, "Snapshot failed", e); } try { recoveryHandler.phase2(phase2Snapshot); } catch (Exception e) { --disableFlushCounter; phase1Snapshot.release(); phase2Snapshot.release(); throw new RecoveryEngineException(shardId, 2, "Execution failed", e); } rwl.writeLock().lock(); Translog.Snapshot phase3Snapshot; try { phase3Snapshot = translog.snapshot(phase2Snapshot); } catch (Exception e) { --disableFlushCounter; rwl.writeLock().unlock(); phase1Snapshot.release(); phase2Snapshot.release(); throw new RecoveryEngineException(shardId, 3, "Snapshot failed", e); } try { recoveryHandler.phase3(phase3Snapshot); } catch (Exception e) { throw new RecoveryEngineException(shardId, 3, "Execution failed", e); } finally { --disableFlushCounter; rwl.writeLock().unlock(); phase1Snapshot.release(); phase2Snapshot.release(); phase3Snapshot.release(); } }
private void innerDelete(Delete delete) throws IOException { synchronized (dirtyLock(delete.uid())) { lastWriteNanos = delete.startTime(); final long currentVersion; final boolean deleted; VersionValue versionValue = versionMap.getUnderLock(delete.uid().bytes()); if (versionValue == null) { currentVersion = loadCurrentVersionFromIndex(delete.uid()); deleted = currentVersion == Versions.NOT_FOUND; } else { deleted = versionValue.delete(); if (engineConfig.isEnableGcDeletes() && versionValue.delete() && (engineConfig.getThreadPool().estimatedTimeInMillis() - versionValue.time()) > getGcDeletesInMillis()) { currentVersion = Versions.NOT_FOUND; // deleted, and GC } else { currentVersion = versionValue.version(); } } long updatedVersion; long expectedVersion = delete.version(); if (delete .versionType() .isVersionConflictForWrites(currentVersion, expectedVersion, deleted)) { if (delete.origin() == Operation.Origin.RECOVERY) { return; } else { throw new VersionConflictEngineException( shardId, delete.type(), delete.id(), delete .versionType() .explainConflictForWrites(currentVersion, expectedVersion, deleted)); } } updatedVersion = delete.versionType().updateVersion(currentVersion, expectedVersion); final boolean found; if (currentVersion == Versions.NOT_FOUND) { // doc does not exist and no prior deletes found = false; } else if (versionValue != null && versionValue.delete()) { // a "delete on delete", in this case, we still increment the version, log it, and return // that version found = false; } else { // we deleted a currently existing document indexWriter.deleteDocuments(delete.uid()); found = true; } delete.updateVersion(updatedVersion, found); Translog.Location translogLocation = translog.add(new Translog.Delete(delete)); versionMap.putUnderLock( delete.uid().bytes(), new DeleteVersionValue( updatedVersion, engineConfig.getThreadPool().estimatedTimeInMillis(), translogLocation)); delete.setTranslogLocation(translogLocation); } }
@Override public CommitId flush(boolean force, boolean waitIfOngoing) throws EngineException { ensureOpen(); final byte[] newCommitId; /* * Unfortunately the lock order is important here. We have to acquire the readlock first otherwise * if we are flushing at the end of the recovery while holding the write lock we can deadlock if: * Thread 1: flushes via API and gets the flush lock but blocks on the readlock since Thread 2 has the writeLock * Thread 2: flushes at the end of the recovery holding the writeLock and blocks on the flushLock owned by Thread 1 */ try (ReleasableLock lock = readLock.acquire()) { ensureOpen(); if (flushLock.tryLock() == false) { // if we can't get the lock right away we block if needed otherwise barf if (waitIfOngoing) { logger.trace("waiting for in-flight flush to finish"); flushLock.lock(); logger.trace("acquired flush lock after blocking"); } else { throw new FlushNotAllowedEngineException(shardId, "already flushing..."); } } else { logger.trace("acquired flush lock immediately"); } try { if (indexWriter.hasUncommittedChanges() || force) { ensureCanFlush(); try { translog.prepareCommit(); logger.trace("starting commit for flush; commitTranslog=true"); commitIndexWriter(indexWriter, translog, null); logger.trace("finished commit for flush"); // we need to refresh in order to clear older version values refresh("version_table_flush"); // after refresh documents can be retrieved from the index so we can now commit the // translog translog.commit(); } catch (Throwable e) { throw new FlushFailedEngineException(shardId, e); } } /* * we have to inc-ref the store here since if the engine is closed by a tragic event * we don't acquire the write lock and wait until we have exclusive access. This might also * dec the store reference which can essentially close the store and unless we can inc the reference * we can't use it. */ store.incRef(); try { // reread the last committed segment infos lastCommittedSegmentInfos = store.readLastCommittedSegmentsInfo(); } catch (Throwable e) { if (isClosed.get() == false) { logger.warn("failed to read latest segment infos on flush", e); if (Lucene.isCorruptionException(e)) { throw new FlushFailedEngineException(shardId, e); } } } finally { store.decRef(); } newCommitId = lastCommittedSegmentInfos.getId(); } catch (FlushFailedEngineException ex) { maybeFailEngine("flush", ex); throw ex; } finally { flushLock.unlock(); } } // We don't have to do this here; we do it defensively to make sure that even if wall clock time // is misbehaving // (e.g., moves backwards) we will at least still sometimes prune deleted tombstones: if (engineConfig.isEnableGcDeletes()) { pruneDeletedTombstones(); } return new CommitId(newCommitId); }
protected Translog.Operation read(BufferedChecksumStreamInput inStream) throws IOException { return Translog.readOperation(inStream); }
public InternalEngine(EngineConfig engineConfig) throws EngineException { super(engineConfig); openMode = engineConfig.getOpenMode(); this.versionMap = new LiveVersionMap(); store.incRef(); IndexWriter writer = null; Translog translog = null; SearcherManager manager = null; EngineMergeScheduler scheduler = null; boolean success = false; try { this.lastDeleteVersionPruneTimeMSec = engineConfig.getThreadPool().estimatedTimeInMillis(); mergeScheduler = scheduler = new EngineMergeScheduler(engineConfig.getShardId(), engineConfig.getIndexSettings()); this.dirtyLocks = new Object [Runtime.getRuntime().availableProcessors() * 10]; // we multiply it to have enough... for (int i = 0; i < dirtyLocks.length; i++) { dirtyLocks[i] = new Object(); } throttle = new IndexThrottle(); this.searcherFactory = new SearchFactory(logger, isClosed, engineConfig); try { writer = createWriter(openMode == EngineConfig.OpenMode.CREATE_INDEX_AND_TRANSLOG); indexWriter = writer; translog = openTranslog(engineConfig, writer); assert translog.getGeneration() != null; } catch (IOException | TranslogCorruptedException e) { throw new EngineCreationFailureException(shardId, "failed to create engine", e); } catch (AssertionError e) { // IndexWriter throws AssertionError on init, if asserts are enabled, if any files don't // exist, but tests that // randomly throw FNFE/NSFE can also hit this: if (ExceptionsHelper.stackTrace(e) .contains("org.apache.lucene.index.IndexWriter.filesExist")) { throw new EngineCreationFailureException(shardId, "failed to create engine", e); } else { throw e; } } this.translog = translog; manager = createSearcherManager(); this.searcherManager = manager; this.versionMap.setManager(searcherManager); // don't allow commits until we are done with recovering allowCommits.compareAndSet(true, openMode != EngineConfig.OpenMode.OPEN_INDEX_AND_TRANSLOG); success = true; } finally { if (success == false) { IOUtils.closeWhileHandlingException(writer, translog, manager, scheduler); versionMap.clear(); if (isClosed.get() == false) { // failure we need to dec the store reference store.decRef(); } } } logger.trace("created new InternalEngine"); }