private void applyCleanedIndices(final ClusterChangedEvent event) {
   // handle closed indices, since they are not allocated on a node once they are closed
   // so applyDeletedIndices might not take them into account
   for (IndexService indexService : indicesService) {
     String index = indexService.index().getName();
     IndexMetaData indexMetaData = event.state().metaData().index(index);
     if (indexMetaData != null && indexMetaData.state() == IndexMetaData.State.CLOSE) {
       for (Integer shardId : indexService.shardIds()) {
         logger.debug("[{}][{}] removing shard (index is closed)", index, shardId);
         try {
           indexService.removeShard(shardId, "removing shard (index is closed)");
         } catch (Throwable e) {
           logger.warn("[{}] failed to remove shard (index is closed)", e, index);
         }
       }
     }
   }
   for (IndexService indexService : indicesService) {
     String index = indexService.index().getName();
     if (indexService.shardIds().isEmpty()) {
       if (logger.isDebugEnabled()) {
         logger.debug("[{}] cleaning index (no shards allocated)", index);
       }
       // clean the index
       removeIndex(index, "removing index (no shards allocated)");
     }
   }
 }
  private void applyDeletedIndices(final ClusterChangedEvent event) {
    final ClusterState previousState = event.previousState();
    final String localNodeId = event.state().nodes().localNodeId();
    assert localNodeId != null;

    for (IndexService indexService : indicesService) {
      IndexMetaData indexMetaData = event.state().metaData().index(indexService.index().name());
      if (indexMetaData != null) {
        if (!indexMetaData.isSameUUID(indexService.indexUUID())) {
          logger.debug(
              "[{}] mismatch on index UUIDs between cluster state and local state, cleaning the index so it will be recreated",
              indexMetaData.index());
          deleteIndex(
              indexMetaData.index(),
              "mismatch on index UUIDs between cluster state and local state, cleaning the index so it will be recreated");
        }
      }
    }

    for (String index : event.indicesDeleted()) {
      if (logger.isDebugEnabled()) {
        logger.debug("[{}] cleaning index, no longer part of the metadata", index);
      }
      final Settings indexSettings;
      final IndexService idxService = indicesService.indexService(index);
      if (idxService != null) {
        indexSettings = idxService.getIndexSettings();
        deleteIndex(index, "index no longer part of the metadata");
      } else {
        final IndexMetaData metaData = previousState.metaData().index(index);
        assert metaData != null;
        indexSettings = metaData.settings();
        indicesService.deleteClosedIndex(
            "closed index no longer part of the metadata", metaData, event.state());
      }
      try {
        nodeIndexDeletedAction.nodeIndexDeleted(event.state(), index, indexSettings, localNodeId);
      } catch (Throwable e) {
        logger.debug("failed to send to master index {} deleted event", e, index);
      }
    }
  }
 private void cleanFailedShards(final ClusterChangedEvent event) {
   RoutingTable routingTable = event.state().routingTable();
   RoutingNodes.RoutingNodeIterator routingNode =
       event.state().readOnlyRoutingNodes().routingNodeIter(event.state().nodes().localNodeId());
   if (routingNode == null) {
     failedShards.clear();
     return;
   }
   DiscoveryNodes nodes = event.state().nodes();
   long now = System.currentTimeMillis();
   String localNodeId = nodes.localNodeId();
   Iterator<Map.Entry<ShardId, FailedShard>> iterator = failedShards.entrySet().iterator();
   shards:
   while (iterator.hasNext()) {
     Map.Entry<ShardId, FailedShard> entry = iterator.next();
     FailedShard failedShard = entry.getValue();
     IndexRoutingTable indexRoutingTable = routingTable.index(entry.getKey().getIndex());
     if (indexRoutingTable != null) {
       IndexShardRoutingTable shardRoutingTable = indexRoutingTable.shard(entry.getKey().id());
       if (shardRoutingTable != null) {
         for (ShardRouting shardRouting : shardRoutingTable.assignedShards()) {
           if (localNodeId.equals(shardRouting.currentNodeId())) {
             // we have a timeout here just to make sure we don't have dangled failed shards for
             // some reason
             // its just another safely layer
             if (shardRouting.version() == failedShard.version
                 && ((now - failedShard.timestamp) < TimeValue.timeValueMinutes(60).millis())) {
               // It's the same failed shard - keep it if it hasn't timed out
               continue shards;
             } else {
               // Different version or expired, remove it
               break;
             }
           }
         }
       }
     }
     iterator.remove();
   }
 }
 private void applyDeletedShards(final ClusterChangedEvent event) {
   RoutingNodes.RoutingNodeIterator routingNode =
       event.state().readOnlyRoutingNodes().routingNodeIter(event.state().nodes().localNodeId());
   if (routingNode == null) {
     return;
   }
   IntHashSet newShardIds = new IntHashSet();
   for (IndexService indexService : indicesService) {
     String index = indexService.index().name();
     IndexMetaData indexMetaData = event.state().metaData().index(index);
     if (indexMetaData == null) {
       continue;
     }
     // now, go over and delete shards that needs to get deleted
     newShardIds.clear();
     for (ShardRouting shard : routingNode) {
       if (shard.index().equals(index)) {
         newShardIds.add(shard.id());
       }
     }
     for (Integer existingShardId : indexService.shardIds()) {
       if (!newShardIds.contains(existingShardId)) {
         if (indexMetaData.state() == IndexMetaData.State.CLOSE) {
           if (logger.isDebugEnabled()) {
             logger.debug("[{}][{}] removing shard (index is closed)", index, existingShardId);
           }
           indexService.removeShard(existingShardId, "removing shard (index is closed)");
         } else {
           // we can just remove the shard, without cleaning it locally, since we will clean it
           // when all shards are allocated in the IndicesStore
           if (logger.isDebugEnabled()) {
             logger.debug("[{}][{}] removing shard (not allocated)", index, existingShardId);
           }
           indexService.removeShard(existingShardId, "removing shard (not allocated)");
         }
       }
     }
   }
 }
  @Override
  public void clusterChanged(ClusterChangedEvent event) {
    if (!VALID_CLUSTER_STATES.contains(event.state().status())) {
      // Only process fully applied cluster states
      return;
    }

    if (eventBus != null) {
      final List<String> indicesDeleted = event.indicesDeleted();
      if (!indicesDeleted.isEmpty()) {
        eventBus.post(IndicesDeletedEvent.create(indicesDeleted));
      }
    }
  }
 private void applyNewIndices(final ClusterChangedEvent event) {
   // we only create indices for shards that are allocated
   RoutingNodes.RoutingNodeIterator routingNode =
       event.state().readOnlyRoutingNodes().routingNodeIter(event.state().nodes().localNodeId());
   if (routingNode == null) {
     return;
   }
   for (ShardRouting shard : routingNode) {
     if (!indicesService.hasIndex(shard.index())) {
       final IndexMetaData indexMetaData = event.state().metaData().index(shard.index());
       if (logger.isDebugEnabled()) {
         logger.debug("[{}] creating index", indexMetaData.index());
       }
       try {
         indicesService.createIndex(
             indexMetaData.index(),
             indexMetaData.settings(),
             event.state().nodes().localNode().id());
       } catch (Throwable e) {
         sendFailShard(shard, indexMetaData.getIndexUUID(), "failed to create index", e);
       }
     }
   }
 }
Exemplo n.º 7
0
  @Override
  public void clusterChanged(ClusterChangedEvent event) {
    if (event.state().blocks().disableStatePersistence()) {
      // reset the current metadata, we need to start fresh...
      this.currentMetaData = null;
      return;
    }

    MetaData newMetaData = event.state().metaData();

    // delete indices that were there before, but are deleted now
    // we need to do it so they won't be detected as dangling
    if (currentMetaData != null) {
      // only delete indices when we already received a state (currentMetaData != null)
      for (IndexMetaData current : currentMetaData) {
        if (!newMetaData.hasIndex(current.index())) {
          logger.debug(
              "[{}] deleting index that is no longer part of the metadata (indices: [{}])",
              current.index(),
              newMetaData.indices().keys());
          if (nodeEnv.hasNodeFile()) {
            FileSystemUtils.deleteRecursively(nodeEnv.indexLocations(new Index(current.index())));
          }
          try {
            nodeIndexDeletedAction.nodeIndexStoreDeleted(
                event.state(), current.index(), event.state().nodes().localNodeId());
          } catch (Exception e) {
            logger.debug(
                "[{}] failed to notify master on local index store deletion", e, current.index());
          }
        }
      }
    }

    currentMetaData = newMetaData;
  }
 private void applyAliases(ClusterChangedEvent event) {
   // check if aliases changed
   if (aliasesChanged(event)) {
     // go over and update aliases
     for (IndexMetaData indexMetaData : event.state().metaData()) {
       String index = indexMetaData.index();
       IndexService indexService = indicesService.indexService(index);
       if (indexService == null) {
         // we only create / update here
         continue;
       }
       IndexAliasesService indexAliasesService = indexService.aliasesService();
       indexAliasesService.setAliases(indexMetaData.getAliases());
     }
   }
 }
  @Override
  public void clusterChanged(final ClusterChangedEvent event) {
    if (!indicesService.changesAllowed()) {
      return;
    }

    if (!lifecycle.started()) {
      return;
    }

    synchronized (mutex) {
      // we need to clean the shards and indices we have on this node, since we
      // are going to recover them again once state persistence is disabled (no master / not
      // recovered)
      // TODO: this feels a bit hacky here, a block disables state persistence, and then we clean
      // the allocated shards, maybe another flag in blocks?
      if (event.state().blocks().disableStatePersistence()) {
        for (IndexService indexService : indicesService) {
          String index = indexService.index().getName();
          for (Integer shardId : indexService.shardIds()) {
            logger.debug("[{}][{}] removing shard (disabled block persistence)", index, shardId);
            try {
              indexService.removeShard(shardId, "removing shard (disabled block persistence)");
            } catch (Throwable e) {
              logger.warn("[{}] failed to remove shard (disabled block persistence)", e, index);
            }
          }
          removeIndex(index, "cleaning index (disabled block persistence)");
        }
        return;
      }

      cleanFailedShards(event);

      applyDeletedIndices(event);
      applyNewIndices(event);
      applyMappings(event);
      applyAliases(event);
      applyNewOrUpdatedShards(event);
      applyDeletedShards(event);
      applyCleanedIndices(event);
      applySettings(event);
    }
  }
Exemplo n.º 10
0
 @Override
 public void clusterChanged(ClusterChangedEvent event) {
   if (event.source().equals(CLUSTER_UPDATE_TASK_SOURCE)) {
     // that's us, ignore this event
     return;
   }
   if (event.state().nodes().localNodeMaster()) {
     // we are master, schedule the routing table updater
     if (scheduledRoutingTableFuture == null) {
       // a new master (us), make sure we reroute shards
       routingTableDirty = true;
       scheduledRoutingTableFuture =
           threadPool.scheduleWithFixedDelay(new RoutingTableUpdater(), schedule);
     }
     if (event.nodesRemoved()) {
       // if nodes were removed, we don't want to wait for the scheduled task
       // since we want to get primary election as fast as possible
       routingTableDirty = true;
       reroute();
       // Commented out since we make sure to reroute whenever shards changes state or metadata
       // changes state
       //            } else if (event.routingTableChanged()) {
       //                routingTableDirty = true;
       //                reroute();
     } else {
       if (event.nodesAdded()) {
         for (DiscoveryNode node : event.nodesDelta().addedNodes()) {
           if (node.dataNode()) {
             routingTableDirty = true;
             break;
           }
         }
       }
     }
   } else {
     FutureUtils.cancel(scheduledRoutingTableFuture);
     scheduledRoutingTableFuture = null;
   }
 }
 private void applySettings(ClusterChangedEvent event) {
   if (!event.metaDataChanged()) {
     return;
   }
   for (IndexMetaData indexMetaData : event.state().metaData()) {
     if (!indicesService.hasIndex(indexMetaData.index())) {
       // we only create / update here
       continue;
     }
     // if the index meta data didn't change, no need check for refreshed settings
     if (!event.indexMetaDataChanged(indexMetaData)) {
       continue;
     }
     String index = indexMetaData.index();
     IndexService indexService = indicesService.indexService(index);
     if (indexService == null) {
       // already deleted on us, ignore it
       continue;
     }
     IndexSettingsService indexSettingsService =
         indexService.injector().getInstance(IndexSettingsService.class);
     indexSettingsService.refreshSettings(indexMetaData.settings());
   }
 }
  private void applyNewOrUpdatedShards(final ClusterChangedEvent event) {
    if (!indicesService.changesAllowed()) {
      return;
    }

    RoutingTable routingTable = event.state().routingTable();
    RoutingNodes.RoutingNodeIterator routingNode =
        event.state().readOnlyRoutingNodes().routingNodeIter(event.state().nodes().localNodeId());

    if (routingNode == null) {
      failedShards.clear();
      return;
    }
    DiscoveryNodes nodes = event.state().nodes();

    for (final ShardRouting shardRouting : routingNode) {
      final IndexService indexService = indicesService.indexService(shardRouting.index());
      if (indexService == null) {
        // got deleted on us, ignore
        continue;
      }
      final IndexMetaData indexMetaData = event.state().metaData().index(shardRouting.index());
      if (indexMetaData == null) {
        // the index got deleted on the metadata, we will clean it later in the apply deleted method
        // call
        continue;
      }

      final int shardId = shardRouting.id();

      if (!indexService.hasShard(shardId) && shardRouting.started()) {
        if (failedShards.containsKey(shardRouting.shardId())) {
          if (nodes.masterNode() != null) {
            shardStateAction.resendShardFailed(
                shardRouting,
                indexMetaData.getIndexUUID(),
                "master "
                    + nodes.masterNode()
                    + " marked shard as started, but shard has previous failed. resending shard failure.",
                nodes.masterNode());
          }
        } else {
          // the master thinks we are started, but we don't have this shard at all, mark it as
          // failed
          sendFailShard(
              shardRouting,
              indexMetaData.getIndexUUID(),
              "master ["
                  + nodes.masterNode()
                  + "] marked shard as started, but shard has not been created, mark shard as failed",
              null);
        }
        continue;
      }

      IndexShard indexShard = indexService.shard(shardId);
      if (indexShard != null) {
        ShardRouting currentRoutingEntry = indexShard.routingEntry();
        // if the current and global routing are initializing, but are still not the same, its a
        // different "shard" being allocated
        // for example: a shard that recovers from one node and now needs to recover to another
        // node,
        //              or a replica allocated and then allocating a primary because the primary
        // failed on another node
        boolean shardHasBeenRemoved = false;
        if (currentRoutingEntry.initializing()
            && shardRouting.initializing()
            && !currentRoutingEntry.equals(shardRouting)) {
          logger.debug(
              "[{}][{}] removing shard (different instance of it allocated on this node, current [{}], global [{}])",
              shardRouting.index(),
              shardRouting.id(),
              currentRoutingEntry,
              shardRouting);
          // closing the shard will also cancel any ongoing recovery.
          indexService.removeShard(
              shardRouting.id(),
              "removing shard (different instance of it allocated on this node)");
          shardHasBeenRemoved = true;
        } else if (isPeerRecovery(shardRouting)) {
          final DiscoveryNode sourceNode =
              findSourceNodeForPeerRecovery(routingTable, nodes, shardRouting);
          // check if there is an existing recovery going, and if so, and the source node is not the
          // same, cancel the recovery to restart it
          final Predicate<RecoveryStatus> shouldCancel =
              new Predicate<RecoveryStatus>() {
                @Override
                public boolean apply(@Nullable RecoveryStatus status) {
                  return status.sourceNode().equals(sourceNode) == false;
                }
              };
          if (recoveryTarget.cancelRecoveriesForShard(
              indexShard.shardId(), "recovery source node changed", shouldCancel)) {
            logger.debug(
                "[{}][{}] removing shard (recovery source changed), current [{}], global [{}])",
                shardRouting.index(),
                shardRouting.id(),
                currentRoutingEntry,
                shardRouting);
            // closing the shard will also cancel any ongoing recovery.
            indexService.removeShard(
                shardRouting.id(), "removing shard (recovery source node changed)");
            shardHasBeenRemoved = true;
          }
        }
        if (shardHasBeenRemoved == false
            && (shardRouting.equals(indexShard.routingEntry()) == false
                || shardRouting.version() > indexShard.routingEntry().version())) {
          if (shardRouting.primary()
              && indexShard.routingEntry().primary() == false
              && shardRouting.initializing()
              && indexShard.allowsPrimaryPromotion() == false) {
            logger.debug("{} reinitialize shard on primary promotion", indexShard.shardId());
            indexService.removeShard(shardId, "promoted to primary");
          } else {
            // if we happen to remove the shardRouting by id above we don't need to jump in here!
            indexShard.updateRoutingEntry(
                shardRouting, event.state().blocks().disableStatePersistence() == false);
          }
        }
      }

      if (shardRouting.initializing()) {
        applyInitializingShard(event.state(), indexMetaData, shardRouting);
      }
    }
  }
 private boolean aliasesChanged(ClusterChangedEvent event) {
   return !event.state().metaData().aliases().equals(event.previousState().metaData().aliases())
       || !event.state().routingTable().equals(event.previousState().routingTable());
 }
  private void applyMappings(ClusterChangedEvent event) {
    // go over and update mappings
    for (IndexMetaData indexMetaData : event.state().metaData()) {
      if (!indicesService.hasIndex(indexMetaData.index())) {
        // we only create / update here
        continue;
      }
      List<String> typesToRefresh = Lists.newArrayList();
      String index = indexMetaData.index();
      IndexService indexService = indicesService.indexService(index);
      if (indexService == null) {
        // got deleted on us, ignore (closing the node)
        return;
      }
      try {
        MapperService mapperService = indexService.mapperService();
        // first, go over and update the _default_ mapping (if exists)
        if (indexMetaData.mappings().containsKey(MapperService.DEFAULT_MAPPING)) {
          boolean requireRefresh =
              processMapping(
                  index,
                  mapperService,
                  MapperService.DEFAULT_MAPPING,
                  indexMetaData.mapping(MapperService.DEFAULT_MAPPING).source());
          if (requireRefresh) {
            typesToRefresh.add(MapperService.DEFAULT_MAPPING);
          }
        }

        // go over and add the relevant mappings (or update them)
        for (ObjectCursor<MappingMetaData> cursor : indexMetaData.mappings().values()) {
          MappingMetaData mappingMd = cursor.value;
          String mappingType = mappingMd.type();
          CompressedXContent mappingSource = mappingMd.source();
          if (mappingType.equals(MapperService.DEFAULT_MAPPING)) { // we processed _default_ first
            continue;
          }
          boolean requireRefresh = processMapping(index, mapperService, mappingType, mappingSource);
          if (requireRefresh) {
            typesToRefresh.add(mappingType);
          }
        }
        if (!typesToRefresh.isEmpty() && sendRefreshMapping) {
          nodeMappingRefreshAction.nodeMappingRefresh(
              event.state(),
              new NodeMappingRefreshAction.NodeMappingRefreshRequest(
                  index,
                  indexMetaData.indexUUID(),
                  typesToRefresh.toArray(new String[typesToRefresh.size()]),
                  event.state().nodes().localNodeId()));
        }
      } catch (Throwable t) {
        // if we failed the mappings anywhere, we need to fail the shards for this index, note, we
        // safeguard
        // by creating the processing the mappings on the master, or on the node the mapping was
        // introduced on,
        // so this failure typically means wrong node level configuration or something similar
        for (IndexShard indexShard : indexService) {
          ShardRouting shardRouting = indexShard.routingEntry();
          failAndRemoveShard(shardRouting, indexService, true, "failed to update mappings", t);
        }
      }
    }
  }
Exemplo n.º 15
0
    private ClusterState applyUpdate(ClusterState currentState, ClusterChangedEvent task) {
      boolean clusterStateChanged = false;
      ClusterState tribeState = task.state();
      DiscoveryNodes.Builder nodes = DiscoveryNodes.builder(currentState.nodes());
      // -- merge nodes
      // go over existing nodes, and see if they need to be removed
      for (DiscoveryNode discoNode : currentState.nodes()) {
        String markedTribeName = discoNode.attributes().get(TRIBE_NAME);
        if (markedTribeName != null && markedTribeName.equals(tribeName)) {
          if (tribeState.nodes().get(discoNode.id()) == null) {
            clusterStateChanged = true;
            logger.info("[{}] removing node [{}]", tribeName, discoNode);
            nodes.remove(discoNode.id());
          }
        }
      }
      // go over tribe nodes, and see if they need to be added
      for (DiscoveryNode tribe : tribeState.nodes()) {
        if (currentState.nodes().get(tribe.id()) == null) {
          // a new node, add it, but also add the tribe name to the attributes
          Map<String, String> tribeAttr = new HashMap<>();
          for (ObjectObjectCursor<String, String> attr : tribe.attributes()) {
            tribeAttr.put(attr.key, attr.value);
          }
          tribeAttr.put(TRIBE_NAME, tribeName);
          DiscoveryNode discoNode =
              new DiscoveryNode(
                  tribe.name(),
                  tribe.id(),
                  tribe.getHostName(),
                  tribe.getHostAddress(),
                  tribe.address(),
                  unmodifiableMap(tribeAttr),
                  tribe.version());
          clusterStateChanged = true;
          logger.info("[{}] adding node [{}]", tribeName, discoNode);
          nodes.put(discoNode);
        }
      }

      // -- merge metadata
      ClusterBlocks.Builder blocks = ClusterBlocks.builder().blocks(currentState.blocks());
      MetaData.Builder metaData = MetaData.builder(currentState.metaData());
      RoutingTable.Builder routingTable = RoutingTable.builder(currentState.routingTable());
      // go over existing indices, and see if they need to be removed
      for (IndexMetaData index : currentState.metaData()) {
        String markedTribeName = index.getSettings().get(TRIBE_NAME);
        if (markedTribeName != null && markedTribeName.equals(tribeName)) {
          IndexMetaData tribeIndex = tribeState.metaData().index(index.getIndex());
          clusterStateChanged = true;
          if (tribeIndex == null || tribeIndex.getState() == IndexMetaData.State.CLOSE) {
            logger.info("[{}] removing index [{}]", tribeName, index.getIndex());
            removeIndex(blocks, metaData, routingTable, index);
          } else {
            // always make sure to update the metadata and routing table, in case
            // there are changes in them (new mapping, shards moving from initializing to started)
            routingTable.add(tribeState.routingTable().index(index.getIndex()));
            Settings tribeSettings =
                Settings.builder().put(tribeIndex.getSettings()).put(TRIBE_NAME, tribeName).build();
            metaData.put(IndexMetaData.builder(tribeIndex).settings(tribeSettings));
          }
        }
      }
      // go over tribe one, and see if they need to be added
      for (IndexMetaData tribeIndex : tribeState.metaData()) {
        // if there is no routing table yet, do nothing with it...
        IndexRoutingTable table = tribeState.routingTable().index(tribeIndex.getIndex());
        if (table == null) {
          continue;
        }
        final IndexMetaData indexMetaData = currentState.metaData().index(tribeIndex.getIndex());
        if (indexMetaData == null) {
          if (!droppedIndices.contains(tribeIndex.getIndex())) {
            // a new index, add it, and add the tribe name as a setting
            clusterStateChanged = true;
            logger.info("[{}] adding index [{}]", tribeName, tribeIndex.getIndex());
            addNewIndex(tribeState, blocks, metaData, routingTable, tribeIndex);
          }
        } else {
          String existingFromTribe = indexMetaData.getSettings().get(TRIBE_NAME);
          if (!tribeName.equals(existingFromTribe)) {
            // we have a potential conflict on index names, decide what to do...
            if (ON_CONFLICT_ANY.equals(onConflict)) {
              // we chose any tribe, carry on
            } else if (ON_CONFLICT_DROP.equals(onConflict)) {
              // drop the indices, there is a conflict
              clusterStateChanged = true;
              logger.info(
                  "[{}] dropping index [{}] due to conflict with [{}]",
                  tribeName,
                  tribeIndex.getIndex(),
                  existingFromTribe);
              removeIndex(blocks, metaData, routingTable, tribeIndex);
              droppedIndices.add(tribeIndex.getIndex());
            } else if (onConflict.startsWith(ON_CONFLICT_PREFER)) {
              // on conflict, prefer a tribe...
              String preferredTribeName = onConflict.substring(ON_CONFLICT_PREFER.length());
              if (tribeName.equals(preferredTribeName)) {
                // the new one is hte preferred one, replace...
                clusterStateChanged = true;
                logger.info(
                    "[{}] adding index [{}], preferred over [{}]",
                    tribeName,
                    tribeIndex.getIndex(),
                    existingFromTribe);
                removeIndex(blocks, metaData, routingTable, tribeIndex);
                addNewIndex(tribeState, blocks, metaData, routingTable, tribeIndex);
              } // else: either the existing one is the preferred one, or we haven't seen one, carry
                // on
            }
          }
        }
      }

      if (!clusterStateChanged) {
        return currentState;
      } else {
        return ClusterState.builder(currentState)
            .incrementVersion()
            .blocks(blocks)
            .nodes(nodes)
            .metaData(metaData)
            .routingTable(routingTable.build())
            .build();
      }
    }
Exemplo n.º 16
0
  <T> void runTasksForExecutor(ClusterStateTaskExecutor<T> executor) {
    final ArrayList<UpdateTask<T>> toExecute = new ArrayList<>();
    final Map<String, ArrayList<T>> processTasksBySource = new HashMap<>();
    synchronized (updateTasksPerExecutor) {
      List<UpdateTask> pending = updateTasksPerExecutor.remove(executor);
      if (pending != null) {
        for (UpdateTask<T> task : pending) {
          if (task.processed.getAndSet(true) == false) {
            logger.trace("will process {}", task.toString(executor));
            toExecute.add(task);
            processTasksBySource
                .computeIfAbsent(task.source, s -> new ArrayList<>())
                .add(task.task);
          } else {
            logger.trace("skipping {}, already processed", task.toString(executor));
          }
        }
      }
    }
    if (toExecute.isEmpty()) {
      return;
    }
    final String tasksSummary =
        processTasksBySource
            .entrySet()
            .stream()
            .map(
                entry -> {
                  String tasks = executor.describeTasks(entry.getValue());
                  return tasks.isEmpty() ? entry.getKey() : entry.getKey() + "[" + tasks + "]";
                })
            .reduce((s1, s2) -> s1 + ", " + s2)
            .orElse("");

    if (!lifecycle.started()) {
      logger.debug("processing [{}]: ignoring, cluster_service not started", tasksSummary);
      return;
    }
    logger.debug("processing [{}]: execute", tasksSummary);
    ClusterState previousClusterState = clusterState;
    if (!previousClusterState.nodes().isLocalNodeElectedMaster() && executor.runOnlyOnMaster()) {
      logger.debug("failing [{}]: local node is no longer master", tasksSummary);
      toExecute.stream().forEach(task -> task.listener.onNoLongerMaster(task.source));
      return;
    }
    ClusterStateTaskExecutor.BatchResult<T> batchResult;
    long startTimeNS = currentTimeInNanos();
    try {
      List<T> inputs =
          toExecute.stream().map(tUpdateTask -> tUpdateTask.task).collect(Collectors.toList());
      batchResult = executor.execute(previousClusterState, inputs);
    } catch (Exception e) {
      TimeValue executionTime =
          TimeValue.timeValueMillis(
              Math.max(0, TimeValue.nsecToMSec(currentTimeInNanos() - startTimeNS)));
      if (logger.isTraceEnabled()) {
        logger.trace(
            (Supplier<?>)
                () ->
                    new ParameterizedMessage(
                        "failed to execute cluster state update in [{}], state:\nversion [{}], source [{}]\n{}{}{}",
                        executionTime,
                        previousClusterState.version(),
                        tasksSummary,
                        previousClusterState.nodes().prettyPrint(),
                        previousClusterState.routingTable().prettyPrint(),
                        previousClusterState.getRoutingNodes().prettyPrint()),
            e);
      }
      warnAboutSlowTaskIfNeeded(executionTime, tasksSummary);
      batchResult =
          ClusterStateTaskExecutor.BatchResult.<T>builder()
              .failures(toExecute.stream().map(updateTask -> updateTask.task)::iterator, e)
              .build(previousClusterState);
    }

    assert batchResult.executionResults != null;
    assert batchResult.executionResults.size() == toExecute.size()
        : String.format(
            Locale.ROOT,
            "expected [%d] task result%s but was [%d]",
            toExecute.size(),
            toExecute.size() == 1 ? "" : "s",
            batchResult.executionResults.size());
    boolean assertsEnabled = false;
    assert (assertsEnabled = true);
    if (assertsEnabled) {
      for (UpdateTask<T> updateTask : toExecute) {
        assert batchResult.executionResults.containsKey(updateTask.task)
            : "missing task result for " + updateTask.toString(executor);
      }
    }

    ClusterState newClusterState = batchResult.resultingState;
    final ArrayList<UpdateTask<T>> proccessedListeners = new ArrayList<>();
    // fail all tasks that have failed and extract those that are waiting for results
    for (UpdateTask<T> updateTask : toExecute) {
      assert batchResult.executionResults.containsKey(updateTask.task)
          : "missing " + updateTask.toString(executor);
      final ClusterStateTaskExecutor.TaskResult executionResult =
          batchResult.executionResults.get(updateTask.task);
      executionResult.handle(
          () -> proccessedListeners.add(updateTask),
          ex -> {
            logger.debug(
                (Supplier<?>)
                    () ->
                        new ParameterizedMessage(
                            "cluster state update task {} failed", updateTask.toString(executor)),
                ex);
            updateTask.listener.onFailure(updateTask.source, ex);
          });
    }

    if (previousClusterState == newClusterState) {
      for (UpdateTask<T> task : proccessedListeners) {
        if (task.listener instanceof AckedClusterStateTaskListener) {
          // no need to wait for ack if nothing changed, the update can be counted as acknowledged
          ((AckedClusterStateTaskListener) task.listener).onAllNodesAcked(null);
        }
        task.listener.clusterStateProcessed(task.source, previousClusterState, newClusterState);
      }
      TimeValue executionTime =
          TimeValue.timeValueMillis(
              Math.max(0, TimeValue.nsecToMSec(currentTimeInNanos() - startTimeNS)));
      logger.debug(
          "processing [{}]: took [{}] no change in cluster_state", tasksSummary, executionTime);
      warnAboutSlowTaskIfNeeded(executionTime, tasksSummary);
      return;
    }

    try {
      ArrayList<Discovery.AckListener> ackListeners = new ArrayList<>();
      if (newClusterState.nodes().isLocalNodeElectedMaster()) {
        // only the master controls the version numbers
        Builder builder = ClusterState.builder(newClusterState).incrementVersion();
        if (previousClusterState.routingTable() != newClusterState.routingTable()) {
          builder.routingTable(
              RoutingTable.builder(newClusterState.routingTable())
                  .version(newClusterState.routingTable().version() + 1)
                  .build());
        }
        if (previousClusterState.metaData() != newClusterState.metaData()) {
          builder.metaData(
              MetaData.builder(newClusterState.metaData())
                  .version(newClusterState.metaData().version() + 1));
        }
        newClusterState = builder.build();
        for (UpdateTask<T> task : proccessedListeners) {
          if (task.listener instanceof AckedClusterStateTaskListener) {
            final AckedClusterStateTaskListener ackedListener =
                (AckedClusterStateTaskListener) task.listener;
            if (ackedListener.ackTimeout() == null || ackedListener.ackTimeout().millis() == 0) {
              ackedListener.onAckTimeout();
            } else {
              try {
                ackListeners.add(
                    new AckCountDownListener(
                        ackedListener,
                        newClusterState.version(),
                        newClusterState.nodes(),
                        threadPool));
              } catch (EsRejectedExecutionException ex) {
                if (logger.isDebugEnabled()) {
                  logger.debug(
                      "Couldn't schedule timeout thread - node might be shutting down", ex);
                }
                // timeout straightaway, otherwise we could wait forever as the timeout thread has
                // not started
                ackedListener.onAckTimeout();
              }
            }
          }
        }
      }
      final Discovery.AckListener ackListener = new DelegetingAckListener(ackListeners);

      newClusterState.status(ClusterState.ClusterStateStatus.BEING_APPLIED);

      if (logger.isTraceEnabled()) {
        logger.trace(
            "cluster state updated, source [{}]\n{}", tasksSummary, newClusterState.prettyPrint());
      } else if (logger.isDebugEnabled()) {
        logger.debug(
            "cluster state updated, version [{}], source [{}]",
            newClusterState.version(),
            tasksSummary);
      }

      ClusterChangedEvent clusterChangedEvent =
          new ClusterChangedEvent(tasksSummary, newClusterState, previousClusterState);
      // new cluster state, notify all listeners
      final DiscoveryNodes.Delta nodesDelta = clusterChangedEvent.nodesDelta();
      if (nodesDelta.hasChanges() && logger.isInfoEnabled()) {
        String summary = nodesDelta.shortSummary();
        if (summary.length() > 0) {
          logger.info("{}, reason: {}", summary, tasksSummary);
        }
      }

      nodeConnectionsService.connectToAddedNodes(clusterChangedEvent);

      // if we are the master, publish the new state to all nodes
      // we publish here before we send a notification to all the listeners, since if it fails
      // we don't want to notify
      if (newClusterState.nodes().isLocalNodeElectedMaster()) {
        logger.debug("publishing cluster state version [{}]", newClusterState.version());
        try {
          clusterStatePublisher.accept(clusterChangedEvent, ackListener);
        } catch (Discovery.FailedToCommitClusterStateException t) {
          final long version = newClusterState.version();
          logger.warn(
              (Supplier<?>)
                  () ->
                      new ParameterizedMessage(
                          "failing [{}]: failed to commit cluster state version [{}]",
                          tasksSummary,
                          version),
              t);
          proccessedListeners.forEach(task -> task.listener.onFailure(task.source, t));
          return;
        }
      }

      // update the current cluster state
      clusterState = newClusterState;
      logger.debug("set local cluster state to version {}", newClusterState.version());
      try {
        // nothing to do until we actually recover from the gateway or any other block indicates we
        // need to disable persistency
        if (clusterChangedEvent.state().blocks().disableStatePersistence() == false
            && clusterChangedEvent.metaDataChanged()) {
          final Settings incomingSettings = clusterChangedEvent.state().metaData().settings();
          clusterSettings.applySettings(incomingSettings);
        }
      } catch (Exception ex) {
        logger.warn("failed to apply cluster settings", ex);
      }
      for (ClusterStateListener listener : preAppliedListeners) {
        try {
          listener.clusterChanged(clusterChangedEvent);
        } catch (Exception ex) {
          logger.warn("failed to notify ClusterStateListener", ex);
        }
      }

      nodeConnectionsService.disconnectFromRemovedNodes(clusterChangedEvent);

      newClusterState.status(ClusterState.ClusterStateStatus.APPLIED);

      for (ClusterStateListener listener : postAppliedListeners) {
        try {
          listener.clusterChanged(clusterChangedEvent);
        } catch (Exception ex) {
          logger.warn("failed to notify ClusterStateListener", ex);
        }
      }

      // manual ack only from the master at the end of the publish
      if (newClusterState.nodes().isLocalNodeElectedMaster()) {
        try {
          ackListener.onNodeAck(newClusterState.nodes().getLocalNode(), null);
        } catch (Exception e) {
          final DiscoveryNode localNode = newClusterState.nodes().getLocalNode();
          logger.debug(
              (Supplier<?>)
                  () ->
                      new ParameterizedMessage(
                          "error while processing ack for master node [{}]", localNode),
              e);
        }
      }

      for (UpdateTask<T> task : proccessedListeners) {
        task.listener.clusterStateProcessed(task.source, previousClusterState, newClusterState);
      }

      try {
        executor.clusterStatePublished(clusterChangedEvent);
      } catch (Exception e) {
        logger.error(
            (Supplier<?>)
                () ->
                    new ParameterizedMessage(
                        "exception thrown while notifying executor of new cluster state publication [{}]",
                        tasksSummary),
            e);
      }

      TimeValue executionTime =
          TimeValue.timeValueMillis(
              Math.max(0, TimeValue.nsecToMSec(currentTimeInNanos() - startTimeNS)));
      logger.debug(
          "processing [{}]: took [{}] done applying updated cluster_state (version: {}, uuid: {})",
          tasksSummary,
          executionTime,
          newClusterState.version(),
          newClusterState.stateUUID());
      warnAboutSlowTaskIfNeeded(executionTime, tasksSummary);
    } catch (Exception e) {
      TimeValue executionTime =
          TimeValue.timeValueMillis(
              Math.max(0, TimeValue.nsecToMSec(currentTimeInNanos() - startTimeNS)));
      final long version = newClusterState.version();
      final String stateUUID = newClusterState.stateUUID();
      final String prettyPrint = newClusterState.prettyPrint();
      logger.warn(
          (Supplier<?>)
              () ->
                  new ParameterizedMessage(
                      "failed to apply updated cluster state in [{}]:\nversion [{}], uuid [{}], source [{}]\n{}",
                      executionTime,
                      version,
                      stateUUID,
                      tasksSummary,
                      prettyPrint),
          e);
      // TODO: do we want to call updateTask.onFailure here?
    }
  }
Exemplo n.º 17
0
  @Override
  public void clusterChanged(final ClusterChangedEvent event) {
    if (lifecycle.stoppedOrClosed()) {
      return;
    }

    final ClusterState state = event.state();

    if (state.nodes().isLocalNodeElectedMaster() == false) {
      // not our job to recover
      return;
    }
    if (state.blocks().hasGlobalBlock(STATE_NOT_RECOVERED_BLOCK) == false) {
      // already recovered
      return;
    }

    DiscoveryNodes nodes = state.nodes();
    if (state.nodes().getMasterNodeId() == null) {
      logger.debug("not recovering from gateway, no master elected yet");
    } else if (recoverAfterNodes != -1
        && (nodes.getMasterAndDataNodes().size()) < recoverAfterNodes) {
      logger.debug(
          "not recovering from gateway, nodes_size (data+master) [{}] < recover_after_nodes [{}]",
          nodes.getMasterAndDataNodes().size(),
          recoverAfterNodes);
    } else if (recoverAfterDataNodes != -1 && nodes.getDataNodes().size() < recoverAfterDataNodes) {
      logger.debug(
          "not recovering from gateway, nodes_size (data) [{}] < recover_after_data_nodes [{}]",
          nodes.getDataNodes().size(),
          recoverAfterDataNodes);
    } else if (recoverAfterMasterNodes != -1
        && nodes.getMasterNodes().size() < recoverAfterMasterNodes) {
      logger.debug(
          "not recovering from gateway, nodes_size (master) [{}] < recover_after_master_nodes [{}]",
          nodes.getMasterNodes().size(),
          recoverAfterMasterNodes);
    } else {
      boolean enforceRecoverAfterTime;
      String reason;
      if (expectedNodes == -1 && expectedMasterNodes == -1 && expectedDataNodes == -1) {
        // no expected is set, honor the setting if they are there
        enforceRecoverAfterTime = true;
        reason = "recover_after_time was set to [" + recoverAfterTime + "]";
      } else {
        // one of the expected is set, see if all of them meet the need, and ignore the timeout in
        // this case
        enforceRecoverAfterTime = false;
        reason = "";
        if (expectedNodes != -1
            && (nodes.getMasterAndDataNodes().size()
                < expectedNodes)) { // does not meet the expected...
          enforceRecoverAfterTime = true;
          reason =
              "expecting ["
                  + expectedNodes
                  + "] nodes, but only have ["
                  + nodes.getMasterAndDataNodes().size()
                  + "]";
        } else if (expectedDataNodes != -1
            && (nodes.getDataNodes().size() < expectedDataNodes)) { // does not meet the expected...
          enforceRecoverAfterTime = true;
          reason =
              "expecting ["
                  + expectedDataNodes
                  + "] data nodes, but only have ["
                  + nodes.getDataNodes().size()
                  + "]";
        } else if (expectedMasterNodes != -1
            && (nodes.getMasterNodes().size()
                < expectedMasterNodes)) { // does not meet the expected...
          enforceRecoverAfterTime = true;
          reason =
              "expecting ["
                  + expectedMasterNodes
                  + "] master nodes, but only have ["
                  + nodes.getMasterNodes().size()
                  + "]";
        }
      }
      performStateRecovery(enforceRecoverAfterTime, reason);
    }
  }
Exemplo n.º 18
0
  @Override
  public void clusterChanged(ClusterChangedEvent event) {
    if (event.state().blocks().disableStatePersistence()) {
      // reset the current metadata, we need to start fresh...
      this.currentMetaData = null;
      return;
    }

    MetaData newMetaData = event.state().metaData();
    // we don't check if metaData changed, since we might be called several times and we need to
    // check dangling...

    boolean success = true;
    // only applied to master node, writing the global and index level states
    if (event.state().nodes().localNode().masterNode()) {
      // check if the global state changed?
      if (currentMetaData == null || !MetaData.isGlobalStateEquals(currentMetaData, newMetaData)) {
        try {
          writeGlobalState("changed", newMetaData, currentMetaData);
        } catch (Exception e) {
          success = false;
        }
      }

      // check and write changes in indices
      for (IndexMetaData indexMetaData : newMetaData) {
        String writeReason = null;
        IndexMetaData currentIndexMetaData;
        if (currentMetaData == null) {
          // a new event..., check from the state stored
          currentIndexMetaData = loadIndex(indexMetaData.index());
        } else {
          currentIndexMetaData = currentMetaData.index(indexMetaData.index());
        }
        if (currentIndexMetaData == null) {
          writeReason = "freshly created";
        } else if (currentIndexMetaData.version() != indexMetaData.version()) {
          writeReason =
              "version changed from ["
                  + currentIndexMetaData.version()
                  + "] to ["
                  + indexMetaData.version()
                  + "]";
        }

        // we update the writeReason only if we really need to write it
        if (writeReason == null) {
          continue;
        }

        try {
          writeIndex(writeReason, indexMetaData, currentIndexMetaData);
        } catch (Exception e) {
          success = false;
        }
      }
    }

    // delete indices that were there before, but are deleted now
    // we need to do it so they won't be detected as dangling
    if (nodeEnv.hasNodeFile()) {
      if (currentMetaData != null) {
        // only delete indices when we already received a state (currentMetaData != null)
        // and we had a go at processing dangling indices at least once
        // this will also delete the _state of the index itself
        for (IndexMetaData current : currentMetaData) {
          if (danglingIndices.containsKey(current.index())) {
            continue;
          }
          if (!newMetaData.hasIndex(current.index())) {
            logger.debug(
                "[{}] deleting index that is no longer part of the metadata (indices: [{}])",
                current.index(),
                newMetaData.indices().keySet());
            FileSystemUtils.deleteRecursively(nodeEnv.indexLocations(new Index(current.index())));
          }
        }
      }
    }

    // handle dangling indices, we handle those for all nodes that have a node file (data or master)
    if (nodeEnv.hasNodeFile()) {
      if (danglingTimeout.millis() >= 0) {
        synchronized (danglingMutex) {
          for (String danglingIndex : danglingIndices.keySet()) {
            if (newMetaData.hasIndex(danglingIndex)) {
              logger.debug("[{}] no longer dangling (created), removing", danglingIndex);
              DanglingIndex removed = danglingIndices.remove(danglingIndex);
              removed.future.cancel(false);
            }
          }
          // delete indices that are no longer part of the metadata
          try {
            for (String indexName : nodeEnv.findAllIndices()) {
              // if we have the index on the metadata, don't delete it
              if (newMetaData.hasIndex(indexName)) {
                continue;
              }
              if (danglingIndices.containsKey(indexName)) {
                // already dangling, continue
                continue;
              }
              IndexMetaData indexMetaData = loadIndex(indexName);
              if (indexMetaData != null) {
                if (danglingTimeout.millis() == 0) {
                  logger.info(
                      "[{}] dangling index, exists on local file system, but not in cluster metadata, timeout set to 0, deleting now",
                      indexName);
                  FileSystemUtils.deleteRecursively(nodeEnv.indexLocations(new Index(indexName)));
                } else {
                  logger.info(
                      "[{}] dangling index, exists on local file system, but not in cluster metadata, scheduling to delete in [{}], auto import to cluster state [{}]",
                      indexName,
                      danglingTimeout,
                      autoImportDangled);
                  danglingIndices.put(
                      indexName,
                      new DanglingIndex(
                          indexName,
                          threadPool.schedule(
                              danglingTimeout,
                              ThreadPool.Names.SAME,
                              new RemoveDanglingIndex(indexName))));
                }
              }
            }
          } catch (Exception e) {
            logger.warn("failed to find dangling indices", e);
          }
        }
      }
      if (autoImportDangled.shouldImport() && !danglingIndices.isEmpty()) {
        final List<IndexMetaData> dangled = Lists.newArrayList();
        for (String indexName : danglingIndices.keySet()) {
          IndexMetaData indexMetaData = loadIndex(indexName);
          if (indexMetaData == null) {
            logger.debug("failed to find state for dangling index [{}]", indexName);
            continue;
          }
          // we might have someone copying over an index, renaming the directory, handle that
          if (!indexMetaData.index().equals(indexName)) {
            logger.info(
                "dangled index directory name is [{}], state name is [{}], renaming to directory name",
                indexName,
                indexMetaData.index());
            indexMetaData =
                IndexMetaData.newIndexMetaDataBuilder(indexMetaData).index(indexName).build();
          }
          if (autoImportDangled == AutoImportDangledState.CLOSED) {
            indexMetaData =
                IndexMetaData.newIndexMetaDataBuilder(indexMetaData)
                    .state(IndexMetaData.State.CLOSE)
                    .build();
          }
          if (indexMetaData != null) {
            dangled.add(indexMetaData);
          }
        }
        IndexMetaData[] dangledIndices = dangled.toArray(new IndexMetaData[dangled.size()]);
        try {
          allocateDangledIndices.allocateDangled(
              dangledIndices,
              new LocalAllocateDangledIndices.Listener() {
                @Override
                public void onResponse(
                    LocalAllocateDangledIndices.AllocateDangledResponse response) {
                  logger.trace("allocated dangled");
                }

                @Override
                public void onFailure(Throwable e) {
                  logger.info("failed to send allocated dangled", e);
                }
              });
        } catch (Exception e) {
          logger.warn("failed to send allocate dangled", e);
        }
      }
    }

    if (success) {
      currentMetaData = newMetaData;
    }
  }