public synchronized void startAll() throws IOException { final DiscoveryNode pNode = getDiscoveryNode(primary.routingEntry().currentNodeId()); primary.markAsRecovering( "store", new RecoveryState(primary.shardId(), true, RecoveryState.Type.STORE, pNode, pNode)); primary.recoverFromStore(); primary.updateRoutingEntry(ShardRoutingHelper.moveToStarted(primary.routingEntry())); for (IndexShard replicaShard : replicas) { recoverReplica( replicaShard, (replica, sourceNode) -> new RecoveryTarget(replica, sourceNode, recoveryListener, version -> {})); } }
public void recoverReplica( IndexShard replica, BiFunction<IndexShard, DiscoveryNode, RecoveryTarget> targetSupplier, boolean markAsRecovering) throws IOException { final DiscoveryNode pNode = getPrimaryNode(); final DiscoveryNode rNode = getDiscoveryNode(replica.routingEntry().currentNodeId()); if (markAsRecovering) { replica.markAsRecovering( "remote", new RecoveryState(replica.shardId(), false, RecoveryState.Type.REPLICA, pNode, rNode)); } else { assertEquals(replica.state(), IndexShardState.RECOVERING); } replica.prepareForIndexRecovery(); RecoveryTarget recoveryTarget = targetSupplier.apply(replica, pNode); StartRecoveryRequest request = new StartRecoveryRequest( replica.shardId(), pNode, rNode, getMetadataSnapshotOrEmpty(replica), RecoveryState.Type.REPLICA, 0); RecoverySourceHandler recovery = new RecoverySourceHandler( primary, recoveryTarget, request, () -> 0L, e -> () -> {}, (int) ByteSizeUnit.MB.toKB(1), logger); recovery.recoverToTarget(); recoveryTarget.markAsDone(); replica.updateRoutingEntry(ShardRoutingHelper.moveToStarted(replica.routingEntry())); }
private void applyInitializingShard( final ClusterState state, final IndexMetaData indexMetaData, final ShardRouting shardRouting) { final IndexService indexService = indicesService.indexService(shardRouting.index()); if (indexService == null) { // got deleted on us, ignore return; } final RoutingTable routingTable = state.routingTable(); final DiscoveryNodes nodes = state.getNodes(); final int shardId = shardRouting.id(); if (indexService.hasShard(shardId)) { IndexShard indexShard = indexService.shardSafe(shardId); if (indexShard.state() == IndexShardState.STARTED || indexShard.state() == IndexShardState.POST_RECOVERY) { // the master thinks we are initializing, but we are already started or on POST_RECOVERY and // waiting // for master to confirm a shard started message (either master failover, or a cluster event // before // we managed to tell the master we started), mark us as started if (logger.isTraceEnabled()) { logger.trace( "{} master marked shard as initializing, but shard has state [{}], resending shard started to {}", indexShard.shardId(), indexShard.state(), nodes.masterNode()); } if (nodes.masterNode() != null) { shardStateAction.shardStarted( shardRouting, indexMetaData.getIndexUUID(), "master " + nodes.masterNode() + " marked shard as initializing, but shard state is [" + indexShard.state() + "], mark shard as started", nodes.masterNode()); } return; } else { if (indexShard.ignoreRecoveryAttempt()) { logger.trace( "ignoring recovery instruction for an existing shard {} (shard state: [{}])", indexShard.shardId(), indexShard.state()); return; } } } // if we're in peer recovery, try to find out the source node now so in case it fails, we will // not create the index shard DiscoveryNode sourceNode = null; if (isPeerRecovery(shardRouting)) { sourceNode = findSourceNodeForPeerRecovery(routingTable, nodes, shardRouting); if (sourceNode == null) { logger.trace( "ignoring initializing shard {} - no source node can be found.", shardRouting.shardId()); return; } } // if there is no shard, create it if (!indexService.hasShard(shardId)) { if (failedShards.containsKey(shardRouting.shardId())) { if (nodes.masterNode() != null) { shardStateAction.resendShardFailed( shardRouting, indexMetaData.getIndexUUID(), "master " + nodes.masterNode() + " marked shard as initializing, but shard is marked as failed, resend shard failure", nodes.masterNode()); } return; } try { if (logger.isDebugEnabled()) { logger.debug("[{}][{}] creating shard", shardRouting.index(), shardId); } IndexShard indexShard = indexService.createShard(shardId, shardRouting.primary()); indexShard.updateRoutingEntry( shardRouting, state.blocks().disableStatePersistence() == false); indexShard.addFailedEngineListener(failedEngineHandler); } catch (IndexShardAlreadyExistsException e) { // ignore this, the method call can happen several times } catch (Throwable e) { failAndRemoveShard(shardRouting, indexService, true, "failed to create shard", e); return; } } final IndexShard indexShard = indexService.shardSafe(shardId); if (indexShard.ignoreRecoveryAttempt()) { // we are already recovering (we can get to this state since the cluster event can happen // several // times while we recover) logger.trace( "ignoring recovery instruction for shard {} (shard state: [{}])", indexShard.shardId(), indexShard.state()); return; } if (isPeerRecovery(shardRouting)) { try { assert sourceNode != null : "peer recovery started but sourceNode is null"; // we don't mark this one as relocated at the end. // For primaries: requests in any case are routed to both when its relocating and that way // we handle // the edge case where its mark as relocated, and we might need to roll it back... // For replicas: we are recovering a backup from a primary RecoveryState.Type type = shardRouting.primary() ? RecoveryState.Type.RELOCATION : RecoveryState.Type.REPLICA; recoveryTarget.startRecovery( indexShard, type, sourceNode, new PeerRecoveryListener(shardRouting, indexService, indexMetaData)); } catch (Throwable e) { indexShard.failShard("corrupted preexisting index", e); handleRecoveryFailure(indexService, shardRouting, true, e); } } else { final IndexShardRoutingTable indexShardRouting = routingTable.index(shardRouting.index()).shard(shardRouting.id()); indexService .shard(shardId) .recoverFromStore( indexShardRouting, new StoreRecoveryService.RecoveryListener() { @Override public void onRecoveryDone() { shardStateAction.shardStarted( shardRouting, indexMetaData.getIndexUUID(), "after recovery from store"); } @Override public void onIgnoreRecovery(String reason) {} @Override public void onRecoveryFailed(IndexShardRecoveryException e) { handleRecoveryFailure(indexService, shardRouting, true, e); } }); } }
private void applyNewOrUpdatedShards(final ClusterChangedEvent event) { if (!indicesService.changesAllowed()) { return; } RoutingTable routingTable = event.state().routingTable(); RoutingNodes.RoutingNodeIterator routingNode = event.state().readOnlyRoutingNodes().routingNodeIter(event.state().nodes().localNodeId()); if (routingNode == null) { failedShards.clear(); return; } DiscoveryNodes nodes = event.state().nodes(); for (final ShardRouting shardRouting : routingNode) { final IndexService indexService = indicesService.indexService(shardRouting.index()); if (indexService == null) { // got deleted on us, ignore continue; } final IndexMetaData indexMetaData = event.state().metaData().index(shardRouting.index()); if (indexMetaData == null) { // the index got deleted on the metadata, we will clean it later in the apply deleted method // call continue; } final int shardId = shardRouting.id(); if (!indexService.hasShard(shardId) && shardRouting.started()) { if (failedShards.containsKey(shardRouting.shardId())) { if (nodes.masterNode() != null) { shardStateAction.resendShardFailed( shardRouting, indexMetaData.getIndexUUID(), "master " + nodes.masterNode() + " marked shard as started, but shard has previous failed. resending shard failure.", nodes.masterNode()); } } else { // the master thinks we are started, but we don't have this shard at all, mark it as // failed sendFailShard( shardRouting, indexMetaData.getIndexUUID(), "master [" + nodes.masterNode() + "] marked shard as started, but shard has not been created, mark shard as failed", null); } continue; } IndexShard indexShard = indexService.shard(shardId); if (indexShard != null) { ShardRouting currentRoutingEntry = indexShard.routingEntry(); // if the current and global routing are initializing, but are still not the same, its a // different "shard" being allocated // for example: a shard that recovers from one node and now needs to recover to another // node, // or a replica allocated and then allocating a primary because the primary // failed on another node boolean shardHasBeenRemoved = false; if (currentRoutingEntry.initializing() && shardRouting.initializing() && !currentRoutingEntry.equals(shardRouting)) { logger.debug( "[{}][{}] removing shard (different instance of it allocated on this node, current [{}], global [{}])", shardRouting.index(), shardRouting.id(), currentRoutingEntry, shardRouting); // closing the shard will also cancel any ongoing recovery. indexService.removeShard( shardRouting.id(), "removing shard (different instance of it allocated on this node)"); shardHasBeenRemoved = true; } else if (isPeerRecovery(shardRouting)) { final DiscoveryNode sourceNode = findSourceNodeForPeerRecovery(routingTable, nodes, shardRouting); // check if there is an existing recovery going, and if so, and the source node is not the // same, cancel the recovery to restart it final Predicate<RecoveryStatus> shouldCancel = new Predicate<RecoveryStatus>() { @Override public boolean apply(@Nullable RecoveryStatus status) { return status.sourceNode().equals(sourceNode) == false; } }; if (recoveryTarget.cancelRecoveriesForShard( indexShard.shardId(), "recovery source node changed", shouldCancel)) { logger.debug( "[{}][{}] removing shard (recovery source changed), current [{}], global [{}])", shardRouting.index(), shardRouting.id(), currentRoutingEntry, shardRouting); // closing the shard will also cancel any ongoing recovery. indexService.removeShard( shardRouting.id(), "removing shard (recovery source node changed)"); shardHasBeenRemoved = true; } } if (shardHasBeenRemoved == false && (shardRouting.equals(indexShard.routingEntry()) == false || shardRouting.version() > indexShard.routingEntry().version())) { if (shardRouting.primary() && indexShard.routingEntry().primary() == false && shardRouting.initializing() && indexShard.allowsPrimaryPromotion() == false) { logger.debug("{} reinitialize shard on primary promotion", indexShard.shardId()); indexService.removeShard(shardId, "promoted to primary"); } else { // if we happen to remove the shardRouting by id above we don't need to jump in here! indexShard.updateRoutingEntry( shardRouting, event.state().blocks().disableStatePersistence() == false); } } } if (shardRouting.initializing()) { applyInitializingShard(event.state(), indexMetaData, shardRouting); } } }
public synchronized IndexShard createShard(ShardRouting routing) throws IOException { final boolean primary = routing.primary(); /* * TODO: we execute this in parallel but it's a synced method. Yet, we might * be able to serialize the execution via the cluster state in the future. for now we just * keep it synced. */ if (closed.get()) { throw new IllegalStateException("Can't create shard " + routing.shardId() + ", closed"); } final Settings indexSettings = this.indexSettings.getSettings(); final ShardId shardId = routing.shardId(); boolean success = false; Store store = null; IndexShard indexShard = null; final ShardLock lock = nodeEnv.shardLock(shardId, TimeUnit.SECONDS.toMillis(5)); try { eventListener.beforeIndexShardCreated(shardId, indexSettings); ShardPath path; try { path = ShardPath.loadShardPath(logger, nodeEnv, shardId, this.indexSettings); } catch (IllegalStateException ex) { logger.warn("{} failed to load shard path, trying to remove leftover", shardId); try { ShardPath.deleteLeftoverShardDirectory(logger, nodeEnv, lock, this.indexSettings); path = ShardPath.loadShardPath(logger, nodeEnv, shardId, this.indexSettings); } catch (Throwable t) { t.addSuppressed(ex); throw t; } } if (path == null) { // TODO: we should, instead, hold a "bytes reserved" of how large we anticipate this shard // will be, e.g. for a shard // that's being relocated/replicated we know how large it will become once it's done // copying: // Count up how many shards are currently on each data path: Map<Path, Integer> dataPathToShardCount = new HashMap<>(); for (IndexShard shard : this) { Path dataPath = shard.shardPath().getRootStatePath(); Integer curCount = dataPathToShardCount.get(dataPath); if (curCount == null) { curCount = 0; } dataPathToShardCount.put(dataPath, curCount + 1); } path = ShardPath.selectNewPathForShard( nodeEnv, shardId, this.indexSettings, routing.getExpectedShardSize() == ShardRouting.UNAVAILABLE_EXPECTED_SHARD_SIZE ? getAvgShardSizeInBytes() : routing.getExpectedShardSize(), dataPathToShardCount); logger.debug("{} creating using a new path [{}]", shardId, path); } else { logger.debug("{} creating using an existing path [{}]", shardId, path); } if (shards.containsKey(shardId.id())) { throw new IndexShardAlreadyExistsException(shardId + " already exists"); } logger.debug("creating shard_id {}", shardId); // if we are on a shared FS we only own the shard (ie. we can safely delete it) if we are the // primary. final boolean canDeleteShardContent = IndexMetaData.isOnSharedFilesystem(indexSettings) == false || (primary && IndexMetaData.isOnSharedFilesystem(indexSettings)); store = new Store( shardId, this.indexSettings, indexStore.newDirectoryService(path), lock, new StoreCloseListener( shardId, canDeleteShardContent, () -> nodeServicesProvider.getIndicesQueryCache().onClose(shardId))); if (useShadowEngine(primary, indexSettings)) { indexShard = new ShadowIndexShard( shardId, this.indexSettings, path, store, indexCache, mapperService, similarityService, indexFieldData, engineFactory, eventListener, searcherWrapper, nodeServicesProvider); // no indexing listeners - shadow engines don't index } else { indexShard = new IndexShard( shardId, this.indexSettings, path, store, indexCache, mapperService, similarityService, indexFieldData, engineFactory, eventListener, searcherWrapper, nodeServicesProvider, listeners); } eventListener.indexShardStateChanged(indexShard, null, indexShard.state(), "shard created"); eventListener.afterIndexShardCreated(indexShard); indexShard.updateRoutingEntry(routing, true); shards = newMapBuilder(shards).put(shardId.id(), indexShard).immutableMap(); success = true; return indexShard; } finally { if (success == false) { IOUtils.closeWhileHandlingException(lock); closeShard("initialization failed", shardId, indexShard, store, eventListener); } } }