/** * Construct a scaling executor builder; the settings will have the specified key prefix. * * @param name the name of the executor * @param core the minimum number of threads in the pool * @param max the maximum number of threads in the pool * @param keepAlive the time that spare threads above {@code core} threads will be kept alive * @param prefix the prefix for the settings keys */ public ScalingExecutorBuilder( final String name, final int core, final int max, final TimeValue keepAlive, final String prefix) { super(name); this.coreSetting = Setting.intSetting(settingsKey(prefix, "core"), core, Setting.Property.NodeScope); this.maxSetting = Setting.intSetting(settingsKey(prefix, "max"), max, Setting.Property.NodeScope); this.keepAliveSetting = Setting.timeSetting( settingsKey(prefix, "keep_alive"), keepAlive, Setting.Property.NodeScope); }
/** * Similar to the {@link ClusterRebalanceAllocationDecider} this {@link AllocationDecider} controls * the number of currently in-progress re-balance (relocation) operations and restricts node * allocations if the configured threshold is reached. The default number of concurrent rebalance * operations is set to <tt>2</tt> * * <p>Re-balance operations can be controlled in real-time via the cluster update API using * <tt>cluster.routing.allocation.cluster_concurrent_rebalance</tt>. Iff this setting is set to * <tt>-1</tt> the number of concurrent re-balance operations are unlimited. */ public class ConcurrentRebalanceAllocationDecider extends AllocationDecider { public static final String NAME = "concurrent_rebalance"; public static final Setting<Integer> CLUSTER_ROUTING_ALLOCATION_CLUSTER_CONCURRENT_REBALANCE_SETTING = Setting.intSetting( "cluster.routing.allocation.cluster_concurrent_rebalance", 2, -1, Property.Dynamic, Property.NodeScope); private volatile int clusterConcurrentRebalance; public ConcurrentRebalanceAllocationDecider(Settings settings, ClusterSettings clusterSettings) { super(settings); this.clusterConcurrentRebalance = CLUSTER_ROUTING_ALLOCATION_CLUSTER_CONCURRENT_REBALANCE_SETTING.get(settings); logger.debug("using [cluster_concurrent_rebalance] with [{}]", clusterConcurrentRebalance); clusterSettings.addSettingsUpdateConsumer( CLUSTER_ROUTING_ALLOCATION_CLUSTER_CONCURRENT_REBALANCE_SETTING, this::setClusterConcurrentRebalance); } private void setClusterConcurrentRebalance(int concurrentRebalance) { clusterConcurrentRebalance = concurrentRebalance; } @Override public Decision canRebalance(ShardRouting shardRouting, RoutingAllocation allocation) { if (clusterConcurrentRebalance == -1) { return allocation.decision(Decision.YES, NAME, "unlimited concurrent rebalances are allowed"); } int relocatingShards = allocation.routingNodes().getRelocatingShardCount(); if (relocatingShards >= clusterConcurrentRebalance) { return allocation.decision( Decision.NO, NAME, "too many shards are concurrently rebalancing [%d], limit: [%d]", relocatingShards, clusterConcurrentRebalance); } return allocation.decision( Decision.YES, NAME, "below threshold [%d] for concurrent rebalances, current rebalance shard count [%d]", clusterConcurrentRebalance, relocatingShards); } }
public class EsExecutors { /** * Settings key to manually set the number of available processors. This is used to adjust thread * pools sizes etc. per node. */ public static final Setting<Integer> PROCESSORS_SETTING = Setting.intSetting( "processors", Runtime.getRuntime().availableProcessors(), 1, Property.NodeScope); /** * Returns the number of available processors. Defaults to {@link Runtime#availableProcessors()} * but can be overridden by passing a {@link Settings} instance with the key "processors" set to * the desired value. * * @param settings a {@link Settings} instance from which to derive the available processors * @return the number of available processors */ public static int numberOfProcessors(final Settings settings) { return PROCESSORS_SETTING.get(settings); } public static PrioritizedEsThreadPoolExecutor newSinglePrioritizing( String name, ThreadFactory threadFactory, ThreadContext contextHolder) { return new PrioritizedEsThreadPoolExecutor( name, 1, 1, 0L, TimeUnit.MILLISECONDS, threadFactory, contextHolder); } public static EsThreadPoolExecutor newScaling( String name, int min, int max, long keepAliveTime, TimeUnit unit, ThreadFactory threadFactory, ThreadContext contextHolder) { ExecutorScalingQueue<Runnable> queue = new ExecutorScalingQueue<>(); EsThreadPoolExecutor executor = new EsThreadPoolExecutor( name, min, max, keepAliveTime, unit, queue, threadFactory, new ForceQueuePolicy(), contextHolder); queue.executor = executor; return executor; } public static EsThreadPoolExecutor newFixed( String name, int size, int queueCapacity, ThreadFactory threadFactory, ThreadContext contextHolder) { BlockingQueue<Runnable> queue; if (queueCapacity < 0) { queue = ConcurrentCollections.newBlockingQueue(); } else { queue = new SizeBlockingQueue<>( ConcurrentCollections.<Runnable>newBlockingQueue(), queueCapacity); } return new EsThreadPoolExecutor( name, size, size, 0, TimeUnit.MILLISECONDS, queue, threadFactory, new EsAbortPolicy(), contextHolder); } public static String threadName(Settings settings, String... names) { String namePrefix = Arrays.stream(names) .filter(name -> name != null) .collect(Collectors.joining(".", "[", "]")); return threadName(settings, namePrefix); } public static String threadName(Settings settings, String namePrefix) { if (Node.NODE_NAME_SETTING.exists(settings)) { return threadName(Node.NODE_NAME_SETTING.get(settings), namePrefix); } else { return threadName("", namePrefix); } } public static String threadName(final String nodeName, final String namePrefix) { return "elasticsearch" + (nodeName.isEmpty() ? "" : "[") + nodeName + (nodeName.isEmpty() ? "" : "]") + "[" + namePrefix + "]"; } public static ThreadFactory daemonThreadFactory(Settings settings, String namePrefix) { return daemonThreadFactory(threadName(settings, namePrefix)); } public static ThreadFactory daemonThreadFactory(Settings settings, String... names) { return daemonThreadFactory(threadName(settings, names)); } public static ThreadFactory daemonThreadFactory(String namePrefix) { return new EsThreadFactory(namePrefix); } static class EsThreadFactory implements ThreadFactory { final ThreadGroup group; final AtomicInteger threadNumber = new AtomicInteger(1); final String namePrefix; public EsThreadFactory(String namePrefix) { this.namePrefix = namePrefix; SecurityManager s = System.getSecurityManager(); group = (s != null) ? s.getThreadGroup() : Thread.currentThread().getThreadGroup(); } @Override public Thread newThread(Runnable r) { Thread t = new Thread(group, r, namePrefix + "[T#" + threadNumber.getAndIncrement() + "]", 0); t.setDaemon(true); return t; } } /** Cannot instantiate. */ private EsExecutors() {} static class ExecutorScalingQueue<E> extends LinkedTransferQueue<E> { ThreadPoolExecutor executor; public ExecutorScalingQueue() {} @Override public boolean offer(E e) { // first try to transfer to a waiting worker thread if (!tryTransfer(e)) { // check if there might be spare capacity in the thread // pool executor int left = executor.getMaximumPoolSize() - executor.getCorePoolSize(); if (left > 0) { // reject queuing the task to force the thread pool // executor to add a worker if it can; combined // with ForceQueuePolicy, this causes the thread // pool to always scale up to max pool size and we // only queue when there is no spare capacity return false; } else { return super.offer(e); } } else { return true; } } } /** * A handler for rejected tasks that adds the specified element to this queue, waiting if * necessary for space to become available. */ static class ForceQueuePolicy implements XRejectedExecutionHandler { @Override public void rejectedExecution(Runnable r, ThreadPoolExecutor executor) { try { executor.getQueue().put(r); } catch (InterruptedException e) { // should never happen since we never wait throw new EsRejectedExecutionException(e); } } @Override public long rejected() { return 0; } } }
public class GatewayService extends AbstractLifecycleComponent implements ClusterStateListener { public static final Setting<Integer> EXPECTED_NODES_SETTING = Setting.intSetting("gateway.expected_nodes", -1, -1, Property.NodeScope); public static final Setting<Integer> EXPECTED_DATA_NODES_SETTING = Setting.intSetting("gateway.expected_data_nodes", -1, -1, Property.NodeScope); public static final Setting<Integer> EXPECTED_MASTER_NODES_SETTING = Setting.intSetting("gateway.expected_master_nodes", -1, -1, Property.NodeScope); public static final Setting<TimeValue> RECOVER_AFTER_TIME_SETTING = Setting.positiveTimeSetting( "gateway.recover_after_time", TimeValue.timeValueMillis(0), Property.NodeScope); public static final Setting<Integer> RECOVER_AFTER_NODES_SETTING = Setting.intSetting("gateway.recover_after_nodes", -1, -1, Property.NodeScope); public static final Setting<Integer> RECOVER_AFTER_DATA_NODES_SETTING = Setting.intSetting("gateway.recover_after_data_nodes", -1, -1, Property.NodeScope); public static final Setting<Integer> RECOVER_AFTER_MASTER_NODES_SETTING = Setting.intSetting("gateway.recover_after_master_nodes", 0, 0, Property.NodeScope); public static final ClusterBlock STATE_NOT_RECOVERED_BLOCK = new ClusterBlock( 1, "state not recovered / initialized", true, true, RestStatus.SERVICE_UNAVAILABLE, ClusterBlockLevel.ALL); public static final TimeValue DEFAULT_RECOVER_AFTER_TIME_IF_EXPECTED_NODES_IS_SET = TimeValue.timeValueMinutes(5); private final Gateway gateway; private final ThreadPool threadPool; private final AllocationService allocationService; private final ClusterService clusterService; private final TimeValue recoverAfterTime; private final int recoverAfterNodes; private final int expectedNodes; private final int recoverAfterDataNodes; private final int expectedDataNodes; private final int recoverAfterMasterNodes; private final int expectedMasterNodes; private final AtomicBoolean recovered = new AtomicBoolean(); private final AtomicBoolean scheduledRecovery = new AtomicBoolean(); @Inject public GatewayService( Settings settings, AllocationService allocationService, ClusterService clusterService, ThreadPool threadPool, GatewayMetaState metaState, TransportNodesListGatewayMetaState listGatewayMetaState, Discovery discovery, IndicesService indicesService) { super(settings); this.gateway = new Gateway( settings, clusterService, metaState, listGatewayMetaState, discovery, indicesService); this.allocationService = allocationService; this.clusterService = clusterService; this.threadPool = threadPool; // allow to control a delay of when indices will get created this.expectedNodes = EXPECTED_NODES_SETTING.get(this.settings); this.expectedDataNodes = EXPECTED_DATA_NODES_SETTING.get(this.settings); this.expectedMasterNodes = EXPECTED_MASTER_NODES_SETTING.get(this.settings); if (RECOVER_AFTER_TIME_SETTING.exists(this.settings)) { recoverAfterTime = RECOVER_AFTER_TIME_SETTING.get(this.settings); } else if (expectedNodes >= 0 || expectedDataNodes >= 0 || expectedMasterNodes >= 0) { recoverAfterTime = DEFAULT_RECOVER_AFTER_TIME_IF_EXPECTED_NODES_IS_SET; } else { recoverAfterTime = null; } this.recoverAfterNodes = RECOVER_AFTER_NODES_SETTING.get(this.settings); this.recoverAfterDataNodes = RECOVER_AFTER_DATA_NODES_SETTING.get(this.settings); // default the recover after master nodes to the minimum master nodes in the discovery if (RECOVER_AFTER_MASTER_NODES_SETTING.exists(this.settings)) { recoverAfterMasterNodes = RECOVER_AFTER_MASTER_NODES_SETTING.get(this.settings); } else { // TODO: change me once the minimum_master_nodes is changed too recoverAfterMasterNodes = settings.getAsInt("discovery.zen.minimum_master_nodes", -1); } // Add the not recovered as initial state block, we don't allow anything until this.clusterService.addInitialStateBlock(STATE_NOT_RECOVERED_BLOCK); } @Override protected void doStart() { // use post applied so that the state will be visible to the background recovery thread we spawn // in performStateRecovery clusterService.addListener(this); } @Override protected void doStop() { clusterService.removeListener(this); } @Override protected void doClose() {} @Override public void clusterChanged(final ClusterChangedEvent event) { if (lifecycle.stoppedOrClosed()) { return; } final ClusterState state = event.state(); if (state.nodes().isLocalNodeElectedMaster() == false) { // not our job to recover return; } if (state.blocks().hasGlobalBlock(STATE_NOT_RECOVERED_BLOCK) == false) { // already recovered return; } DiscoveryNodes nodes = state.nodes(); if (state.nodes().getMasterNodeId() == null) { logger.debug("not recovering from gateway, no master elected yet"); } else if (recoverAfterNodes != -1 && (nodes.getMasterAndDataNodes().size()) < recoverAfterNodes) { logger.debug( "not recovering from gateway, nodes_size (data+master) [{}] < recover_after_nodes [{}]", nodes.getMasterAndDataNodes().size(), recoverAfterNodes); } else if (recoverAfterDataNodes != -1 && nodes.getDataNodes().size() < recoverAfterDataNodes) { logger.debug( "not recovering from gateway, nodes_size (data) [{}] < recover_after_data_nodes [{}]", nodes.getDataNodes().size(), recoverAfterDataNodes); } else if (recoverAfterMasterNodes != -1 && nodes.getMasterNodes().size() < recoverAfterMasterNodes) { logger.debug( "not recovering from gateway, nodes_size (master) [{}] < recover_after_master_nodes [{}]", nodes.getMasterNodes().size(), recoverAfterMasterNodes); } else { boolean enforceRecoverAfterTime; String reason; if (expectedNodes == -1 && expectedMasterNodes == -1 && expectedDataNodes == -1) { // no expected is set, honor the setting if they are there enforceRecoverAfterTime = true; reason = "recover_after_time was set to [" + recoverAfterTime + "]"; } else { // one of the expected is set, see if all of them meet the need, and ignore the timeout in // this case enforceRecoverAfterTime = false; reason = ""; if (expectedNodes != -1 && (nodes.getMasterAndDataNodes().size() < expectedNodes)) { // does not meet the expected... enforceRecoverAfterTime = true; reason = "expecting [" + expectedNodes + "] nodes, but only have [" + nodes.getMasterAndDataNodes().size() + "]"; } else if (expectedDataNodes != -1 && (nodes.getDataNodes().size() < expectedDataNodes)) { // does not meet the expected... enforceRecoverAfterTime = true; reason = "expecting [" + expectedDataNodes + "] data nodes, but only have [" + nodes.getDataNodes().size() + "]"; } else if (expectedMasterNodes != -1 && (nodes.getMasterNodes().size() < expectedMasterNodes)) { // does not meet the expected... enforceRecoverAfterTime = true; reason = "expecting [" + expectedMasterNodes + "] master nodes, but only have [" + nodes.getMasterNodes().size() + "]"; } } performStateRecovery(enforceRecoverAfterTime, reason); } } private void performStateRecovery(boolean enforceRecoverAfterTime, String reason) { final Gateway.GatewayStateRecoveredListener recoveryListener = new GatewayRecoveryListener(); if (enforceRecoverAfterTime && recoverAfterTime != null) { if (scheduledRecovery.compareAndSet(false, true)) { logger.info("delaying initial state recovery for [{}]. {}", recoverAfterTime, reason); threadPool.schedule( recoverAfterTime, ThreadPool.Names.GENERIC, () -> { if (recovered.compareAndSet(false, true)) { logger.info( "recover_after_time [{}] elapsed. performing state recovery...", recoverAfterTime); gateway.performStateRecovery(recoveryListener); } }); } } else { if (recovered.compareAndSet(false, true)) { threadPool .generic() .execute( new AbstractRunnable() { @Override public void onFailure(Exception e) { logger.warn("Recovery failed", e); // we reset `recovered` in the listener don't reset it here otherwise there // might be a race // that resets it to false while a new recover is already running? recoveryListener.onFailure("state recovery failed: " + e.getMessage()); } @Override protected void doRun() throws Exception { gateway.performStateRecovery(recoveryListener); } }); } } } public Gateway getGateway() { return gateway; } class GatewayRecoveryListener implements Gateway.GatewayStateRecoveredListener { @Override public void onSuccess(final ClusterState recoveredState) { logger.trace("successful state recovery, importing cluster state..."); clusterService.submitStateUpdateTask( "local-gateway-elected-state", new ClusterStateUpdateTask() { @Override public ClusterState execute(ClusterState currentState) { assert currentState.metaData().indices().isEmpty(); // remove the block, since we recovered from gateway ClusterBlocks.Builder blocks = ClusterBlocks.builder() .blocks(currentState.blocks()) .blocks(recoveredState.blocks()) .removeGlobalBlock(STATE_NOT_RECOVERED_BLOCK); MetaData.Builder metaDataBuilder = MetaData.builder(recoveredState.metaData()); // automatically generate a UID for the metadata if we need to metaDataBuilder.generateClusterUuidIfNeeded(); if (MetaData.SETTING_READ_ONLY_SETTING.get(recoveredState.metaData().settings()) || MetaData.SETTING_READ_ONLY_SETTING.get(currentState.metaData().settings())) { blocks.addGlobalBlock(MetaData.CLUSTER_READ_ONLY_BLOCK); } for (IndexMetaData indexMetaData : recoveredState.metaData()) { metaDataBuilder.put(indexMetaData, false); blocks.addBlocks(indexMetaData); } // update the state to reflect the new metadata and routing ClusterState updatedState = ClusterState.builder(currentState) .blocks(blocks) .metaData(metaDataBuilder) .build(); // initialize all index routing tables as empty RoutingTable.Builder routingTableBuilder = RoutingTable.builder(updatedState.routingTable()); for (ObjectCursor<IndexMetaData> cursor : updatedState.metaData().indices().values()) { routingTableBuilder.addAsRecovery(cursor.value); } // start with 0 based versions for routing table routingTableBuilder.version(0); // now, reroute updatedState = ClusterState.builder(updatedState) .routingTable(routingTableBuilder.build()) .build(); return allocationService.reroute(updatedState, "state recovered"); } @Override public void onFailure(String source, Exception e) { logger.error( (Supplier<?>) () -> new ParameterizedMessage("unexpected failure during [{}]", source), e); GatewayRecoveryListener.this.onFailure("failed to updated cluster state"); } @Override public void clusterStateProcessed( String source, ClusterState oldState, ClusterState newState) { logger.info( "recovered [{}] indices into cluster_state", newState.metaData().indices().size()); } }); } @Override public void onFailure(String message) { recovered.set(false); scheduledRecovery.set(false); // don't remove the block here, we don't want to allow anything in such a case logger.info("metadata state not restored, reason: {}", message); } } // used for testing public TimeValue recoverAfterTime() { return recoverAfterTime; } }
public final class HttpTransportSettings { public static final Setting<Boolean> SETTING_CORS_ENABLED = Setting.boolSetting("http.cors.enabled", false, false, Scope.CLUSTER); public static final Setting<String> SETTING_CORS_ALLOW_ORIGIN = new Setting<String>("http.cors.allow-origin", "", (value) -> value, false, Scope.CLUSTER); public static final Setting<Integer> SETTING_CORS_MAX_AGE = Setting.intSetting("http.cors.max-age", 1728000, false, Scope.CLUSTER); public static final Setting<String> SETTING_CORS_ALLOW_METHODS = new Setting<String>( "http.cors.allow-methods", "OPTIONS, HEAD, GET, POST, PUT, DELETE", (value) -> value, false, Scope.CLUSTER); public static final Setting<String> SETTING_CORS_ALLOW_HEADERS = new Setting<String>( "http.cors.allow-headers", "X-Requested-With, Content-Type, Content-Length", (value) -> value, false, Scope.CLUSTER); public static final Setting<Boolean> SETTING_CORS_ALLOW_CREDENTIALS = Setting.boolSetting("http.cors.allow-credentials", false, false, Scope.CLUSTER); public static final Setting<Boolean> SETTING_PIPELINING = Setting.boolSetting("http.pipelining", true, false, Scope.CLUSTER); public static final Setting<Integer> SETTING_PIPELINING_MAX_EVENTS = Setting.intSetting("http.pipelining.max_events", 10000, false, Scope.CLUSTER); public static final Setting<Boolean> SETTING_HTTP_COMPRESSION = Setting.boolSetting("http.compression", false, false, Scope.CLUSTER); public static final Setting<Integer> SETTING_HTTP_COMPRESSION_LEVEL = Setting.intSetting("http.compression_level", 6, false, Scope.CLUSTER); public static final Setting<List<String>> SETTING_HTTP_HOST = listSetting("http.host", emptyList(), s -> s, false, Scope.CLUSTER); public static final Setting<List<String>> SETTING_HTTP_PUBLISH_HOST = listSetting("http.publish_host", SETTING_HTTP_HOST, s -> s, false, Scope.CLUSTER); public static final Setting<List<String>> SETTING_HTTP_BIND_HOST = listSetting("http.bind_host", SETTING_HTTP_HOST, s -> s, false, Scope.CLUSTER); public static final Setting<PortsRange> SETTING_HTTP_PORT = new Setting<PortsRange>("http.port", "9200-9300", PortsRange::new, false, Scope.CLUSTER); public static final Setting<Integer> SETTING_HTTP_PUBLISH_PORT = Setting.intSetting("http.publish_port", 0, 0, false, Scope.CLUSTER); public static final Setting<Boolean> SETTING_HTTP_DETAILED_ERRORS_ENABLED = Setting.boolSetting("http.detailed_errors.enabled", true, false, Scope.CLUSTER); public static final Setting<ByteSizeValue> SETTING_HTTP_MAX_CONTENT_LENGTH = Setting.byteSizeSetting( "http.max_content_length", new ByteSizeValue(100, ByteSizeUnit.MB), false, Scope.CLUSTER); public static final Setting<ByteSizeValue> SETTING_HTTP_MAX_CHUNK_SIZE = Setting.byteSizeSetting( "http.max_chunk_size", new ByteSizeValue(8, ByteSizeUnit.KB), false, Scope.CLUSTER); public static final Setting<ByteSizeValue> SETTING_HTTP_MAX_HEADER_SIZE = Setting.byteSizeSetting( "http.max_header_size", new ByteSizeValue(8, ByteSizeUnit.KB), false, Scope.CLUSTER); public static final Setting<ByteSizeValue> SETTING_HTTP_MAX_INITIAL_LINE_LENGTH = Setting.byteSizeSetting( "http.max_initial_line_length", new ByteSizeValue(4, ByteSizeUnit.KB), false, Scope.CLUSTER); // don't reset cookies by default, since I don't think we really need to // note, parsing cookies was fixed in netty 3.5.1 regarding stack allocation, but still, // currently, we don't need cookies public static final Setting<Boolean> SETTING_HTTP_RESET_COOKIES = Setting.boolSetting("http.reset_cookies", false, false, Scope.CLUSTER); private HttpTransportSettings() {} }
/** * A base class for {@link org.elasticsearch.discovery.zen.fd.MasterFaultDetection} & {@link * org.elasticsearch.discovery.zen.fd.NodesFaultDetection}, making sure both use the same setting. */ public abstract class FaultDetection extends AbstractComponent { public static final Setting<Boolean> CONNECT_ON_NETWORK_DISCONNECT_SETTING = Setting.boolSetting( "discovery.zen.fd.connect_on_network_disconnect", false, Property.NodeScope); public static final Setting<TimeValue> PING_INTERVAL_SETTING = Setting.positiveTimeSetting( "discovery.zen.fd.ping_interval", timeValueSeconds(1), Property.NodeScope); public static final Setting<TimeValue> PING_TIMEOUT_SETTING = Setting.timeSetting( "discovery.zen.fd.ping_timeout", timeValueSeconds(30), Property.NodeScope); public static final Setting<Integer> PING_RETRIES_SETTING = Setting.intSetting("discovery.zen.fd.ping_retries", 3, Property.NodeScope); public static final Setting<Boolean> REGISTER_CONNECTION_LISTENER_SETTING = Setting.boolSetting( "discovery.zen.fd.register_connection_listener", true, Property.NodeScope); protected final ThreadPool threadPool; protected final ClusterName clusterName; protected final TransportService transportService; // used mainly for testing, should always be true protected final boolean registerConnectionListener; protected final FDConnectionListener connectionListener; protected final boolean connectOnNetworkDisconnect; protected final TimeValue pingInterval; protected final TimeValue pingRetryTimeout; protected final int pingRetryCount; public FaultDetection( Settings settings, ThreadPool threadPool, TransportService transportService, ClusterName clusterName) { super(settings); this.threadPool = threadPool; this.transportService = transportService; this.clusterName = clusterName; this.connectOnNetworkDisconnect = CONNECT_ON_NETWORK_DISCONNECT_SETTING.get(settings); this.pingInterval = PING_INTERVAL_SETTING.get(settings); this.pingRetryTimeout = PING_TIMEOUT_SETTING.get(settings); this.pingRetryCount = PING_RETRIES_SETTING.get(settings); this.registerConnectionListener = REGISTER_CONNECTION_LISTENER_SETTING.get(settings); this.connectionListener = new FDConnectionListener(); if (registerConnectionListener) { transportService.addConnectionListener(connectionListener); } } public void close() { transportService.removeConnectionListener(connectionListener); } /** * This method will be called when the {@link org.elasticsearch.transport.TransportService} raised * a node disconnected event */ abstract void handleTransportDisconnect(DiscoveryNode node); private class FDConnectionListener implements TransportConnectionListener { @Override public void onNodeConnected(DiscoveryNode node) {} @Override public void onNodeDisconnected(DiscoveryNode node) { handleTransportDisconnect(node); } } }
public class IndexMetaData implements Diffable<IndexMetaData>, FromXContentBuilder<IndexMetaData>, ToXContent { public interface Custom extends Diffable<Custom>, ToXContent { String type(); Custom fromMap(Map<String, Object> map) throws IOException; Custom fromXContent(XContentParser parser) throws IOException; /** * Merges from this to another, with this being more important, i.e., if something exists in * this and another, this will prevail. */ Custom mergeWith(Custom another); } public static Map<String, Custom> customPrototypes = new HashMap<>(); /** Register a custom index meta data factory. Make sure to call it from a static block. */ public static void registerPrototype(String type, Custom proto) { customPrototypes.put(type, proto); } @Nullable public static <T extends Custom> T lookupPrototype(String type) { //noinspection unchecked return (T) customPrototypes.get(type); } public static <T extends Custom> T lookupPrototypeSafe(String type) { //noinspection unchecked T proto = (T) customPrototypes.get(type); if (proto == null) { throw new IllegalArgumentException( "No custom metadata prototype registered for type [" + type + "]"); } return proto; } public static final ClusterBlock INDEX_READ_ONLY_BLOCK = new ClusterBlock( 5, "index read-only (api)", false, false, RestStatus.FORBIDDEN, EnumSet.of(ClusterBlockLevel.WRITE, ClusterBlockLevel.METADATA_WRITE)); public static final ClusterBlock INDEX_READ_BLOCK = new ClusterBlock( 7, "index read (api)", false, false, RestStatus.FORBIDDEN, EnumSet.of(ClusterBlockLevel.READ)); public static final ClusterBlock INDEX_WRITE_BLOCK = new ClusterBlock( 8, "index write (api)", false, false, RestStatus.FORBIDDEN, EnumSet.of(ClusterBlockLevel.WRITE)); public static final ClusterBlock INDEX_METADATA_BLOCK = new ClusterBlock( 9, "index metadata (api)", false, false, RestStatus.FORBIDDEN, EnumSet.of(ClusterBlockLevel.METADATA_WRITE, ClusterBlockLevel.METADATA_READ)); public static enum State { OPEN((byte) 0), CLOSE((byte) 1); private final byte id; State(byte id) { this.id = id; } public byte id() { return this.id; } public static State fromId(byte id) { if (id == 0) { return OPEN; } else if (id == 1) { return CLOSE; } throw new IllegalStateException("No state match for id [" + id + "]"); } public static State fromString(String state) { if ("open".equals(state)) { return OPEN; } else if ("close".equals(state)) { return CLOSE; } throw new IllegalStateException("No state match for [" + state + "]"); } } public static final String INDEX_SETTING_PREFIX = "index."; public static final String SETTING_NUMBER_OF_SHARDS = "index.number_of_shards"; public static final Setting<Integer> INDEX_NUMBER_OF_SHARDS_SETTING = Setting.intSetting(SETTING_NUMBER_OF_SHARDS, 5, 1, false, Setting.Scope.INDEX); public static final String SETTING_NUMBER_OF_REPLICAS = "index.number_of_replicas"; public static final Setting<Integer> INDEX_NUMBER_OF_REPLICAS_SETTING = Setting.intSetting(SETTING_NUMBER_OF_REPLICAS, 1, 0, true, Setting.Scope.INDEX); public static final String SETTING_SHADOW_REPLICAS = "index.shadow_replicas"; public static final Setting<Boolean> INDEX_SHADOW_REPLICAS_SETTING = Setting.boolSetting(SETTING_SHADOW_REPLICAS, false, false, Setting.Scope.INDEX); public static final String SETTING_SHARED_FILESYSTEM = "index.shared_filesystem"; public static final Setting<Boolean> INDEX_SHARED_FILESYSTEM_SETTING = Setting.boolSetting(SETTING_SHARED_FILESYSTEM, false, false, Setting.Scope.INDEX); public static final String SETTING_AUTO_EXPAND_REPLICAS = "index.auto_expand_replicas"; public static final Setting<AutoExpandReplicas> INDEX_AUTO_EXPAND_REPLICAS_SETTING = AutoExpandReplicas.SETTING; public static final String SETTING_READ_ONLY = "index.blocks.read_only"; public static final Setting<Boolean> INDEX_READ_ONLY_SETTING = Setting.boolSetting(SETTING_READ_ONLY, false, true, Setting.Scope.INDEX); public static final String SETTING_BLOCKS_READ = "index.blocks.read"; public static final Setting<Boolean> INDEX_BLOCKS_READ_SETTING = Setting.boolSetting(SETTING_BLOCKS_READ, false, true, Setting.Scope.INDEX); public static final String SETTING_BLOCKS_WRITE = "index.blocks.write"; public static final Setting<Boolean> INDEX_BLOCKS_WRITE_SETTING = Setting.boolSetting(SETTING_BLOCKS_WRITE, false, true, Setting.Scope.INDEX); public static final String SETTING_BLOCKS_METADATA = "index.blocks.metadata"; public static final Setting<Boolean> INDEX_BLOCKS_METADATA_SETTING = Setting.boolSetting(SETTING_BLOCKS_METADATA, false, true, Setting.Scope.INDEX); public static final String SETTING_VERSION_CREATED = "index.version.created"; public static final String SETTING_VERSION_CREATED_STRING = "index.version.created_string"; public static final String SETTING_VERSION_UPGRADED = "index.version.upgraded"; public static final String SETTING_VERSION_UPGRADED_STRING = "index.version.upgraded_string"; public static final String SETTING_VERSION_MINIMUM_COMPATIBLE = "index.version.minimum_compatible"; public static final String SETTING_CREATION_DATE = "index.creation_date"; public static final String SETTING_PRIORITY = "index.priority"; public static final Setting<Integer> INDEX_PRIORITY_SETTING = Setting.intSetting("index.priority", 1, 0, true, Setting.Scope.INDEX); public static final String SETTING_CREATION_DATE_STRING = "index.creation_date_string"; public static final String SETTING_INDEX_UUID = "index.uuid"; public static final String SETTING_DATA_PATH = "index.data_path"; public static final Setting<String> INDEX_DATA_PATH_SETTING = new Setting<>(SETTING_DATA_PATH, "", Function.identity(), false, Setting.Scope.INDEX); public static final String SETTING_SHARED_FS_ALLOW_RECOVERY_ON_ANY_NODE = "index.shared_filesystem.recover_on_any_node"; public static final Setting<Boolean> INDEX_SHARED_FS_ALLOW_RECOVERY_ON_ANY_NODE_SETTING = Setting.boolSetting( SETTING_SHARED_FS_ALLOW_RECOVERY_ON_ANY_NODE, false, true, Setting.Scope.INDEX); public static final String INDEX_UUID_NA_VALUE = "_na_"; public static final Setting<Settings> INDEX_ROUTING_REQUIRE_GROUP_SETTING = Setting.groupSetting("index.routing.allocation.require.", true, Setting.Scope.INDEX); public static final Setting<Settings> INDEX_ROUTING_INCLUDE_GROUP_SETTING = Setting.groupSetting("index.routing.allocation.include.", true, Setting.Scope.INDEX); public static final Setting<Settings> INDEX_ROUTING_EXCLUDE_GROUP_SETTING = Setting.groupSetting("index.routing.allocation.exclude.", true, Setting.Scope.INDEX); public static final IndexMetaData PROTO = IndexMetaData.builder("") .settings(Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)) .numberOfShards(1) .numberOfReplicas(0) .build(); public static final String KEY_ACTIVE_ALLOCATIONS = "active_allocations"; private final int numberOfShards; private final int numberOfReplicas; private final Index index; private final long version; private final State state; private final ImmutableOpenMap<String, AliasMetaData> aliases; private final Settings settings; private final ImmutableOpenMap<String, MappingMetaData> mappings; private final ImmutableOpenMap<String, Custom> customs; private final ImmutableOpenIntMap<Set<String>> activeAllocationIds; private final transient int totalNumberOfShards; private final DiscoveryNodeFilters requireFilters; private final DiscoveryNodeFilters includeFilters; private final DiscoveryNodeFilters excludeFilters; private final Version indexCreatedVersion; private final Version indexUpgradedVersion; private final org.apache.lucene.util.Version minimumCompatibleLuceneVersion; private IndexMetaData( Index index, long version, State state, int numberOfShards, int numberOfReplicas, Settings settings, ImmutableOpenMap<String, MappingMetaData> mappings, ImmutableOpenMap<String, AliasMetaData> aliases, ImmutableOpenMap<String, Custom> customs, ImmutableOpenIntMap<Set<String>> activeAllocationIds, DiscoveryNodeFilters requireFilters, DiscoveryNodeFilters includeFilters, DiscoveryNodeFilters excludeFilters, Version indexCreatedVersion, Version indexUpgradedVersion, org.apache.lucene.util.Version minimumCompatibleLuceneVersion) { this.index = index; this.version = version; this.state = state; this.numberOfShards = numberOfShards; this.numberOfReplicas = numberOfReplicas; this.totalNumberOfShards = numberOfShards * (numberOfReplicas + 1); this.settings = settings; this.mappings = mappings; this.customs = customs; this.aliases = aliases; this.activeAllocationIds = activeAllocationIds; this.requireFilters = requireFilters; this.includeFilters = includeFilters; this.excludeFilters = excludeFilters; this.indexCreatedVersion = indexCreatedVersion; this.indexUpgradedVersion = indexUpgradedVersion; this.minimumCompatibleLuceneVersion = minimumCompatibleLuceneVersion; } public Index getIndex() { return index; } public String getIndexUUID() { return index.getUUID(); } /** * Test whether the current index UUID is the same as the given one. Returns true if either are * _na_ */ public boolean isSameUUID(String otherUUID) { assert otherUUID != null; assert getIndexUUID() != null; if (INDEX_UUID_NA_VALUE.equals(otherUUID) || INDEX_UUID_NA_VALUE.equals(getIndexUUID())) { return true; } return otherUUID.equals(getIndexUUID()); } public long getVersion() { return this.version; } /** * Return the {@link Version} on which this index has been created. This information is typically * useful for backward compatibility. */ public Version getCreationVersion() { return indexCreatedVersion; } /** * Return the {@link Version} on which this index has been upgraded. This information is typically * useful for backward compatibility. */ public Version getUpgradedVersion() { return indexUpgradedVersion; } /** Return the {@link org.apache.lucene.util.Version} of the oldest lucene segment in the index */ public org.apache.lucene.util.Version getMinimumCompatibleVersion() { return minimumCompatibleLuceneVersion; } public long getCreationDate() { return settings.getAsLong(SETTING_CREATION_DATE, -1l); } public State getState() { return this.state; } public int getNumberOfShards() { return numberOfShards; } public int getNumberOfReplicas() { return numberOfReplicas; } public int getTotalNumberOfShards() { return totalNumberOfShards; } public Settings getSettings() { return settings; } public ImmutableOpenMap<String, AliasMetaData> getAliases() { return this.aliases; } public ImmutableOpenMap<String, MappingMetaData> getMappings() { return mappings; } @Nullable public MappingMetaData mapping(String mappingType) { return mappings.get(mappingType); } /** * Sometimes, the default mapping exists and an actual mapping is not created yet (introduced), in * this case, we want to return the default mapping in case it has some default mapping * definitions. * * <p>Note, once the mapping type is introduced, the default mapping is applied on the actual * typed MappingMetaData, setting its routing, timestamp, and so on if needed. */ @Nullable public MappingMetaData mappingOrDefault(String mappingType) { MappingMetaData mapping = mappings.get(mappingType); if (mapping != null) { return mapping; } return mappings.get(MapperService.DEFAULT_MAPPING); } public ImmutableOpenMap<String, Custom> getCustoms() { return this.customs; } @SuppressWarnings("unchecked") public <T extends Custom> T custom(String type) { return (T) customs.get(type); } public ImmutableOpenIntMap<Set<String>> getActiveAllocationIds() { return activeAllocationIds; } public Set<String> activeAllocationIds(int shardId) { assert shardId >= 0 && shardId < numberOfShards; return activeAllocationIds.get(shardId); } @Nullable public DiscoveryNodeFilters requireFilters() { return requireFilters; } @Nullable public DiscoveryNodeFilters includeFilters() { return includeFilters; } @Nullable public DiscoveryNodeFilters excludeFilters() { return excludeFilters; } @Override public boolean equals(Object o) { if (this == o) { return true; } if (o == null || getClass() != o.getClass()) { return false; } IndexMetaData that = (IndexMetaData) o; if (!aliases.equals(that.aliases)) { return false; } if (!index.equals(that.index)) { return false; } if (!mappings.equals(that.mappings)) { return false; } if (!settings.equals(that.settings)) { return false; } if (state != that.state) { return false; } if (!customs.equals(that.customs)) { return false; } if (!activeAllocationIds.equals(that.activeAllocationIds)) { return false; } return true; } @Override public int hashCode() { int result = index.hashCode(); result = 31 * result + state.hashCode(); result = 31 * result + aliases.hashCode(); result = 31 * result + settings.hashCode(); result = 31 * result + mappings.hashCode(); result = 31 * result + activeAllocationIds.hashCode(); return result; } @Override public Diff<IndexMetaData> diff(IndexMetaData previousState) { return new IndexMetaDataDiff(previousState, this); } @Override public Diff<IndexMetaData> readDiffFrom(StreamInput in) throws IOException { return new IndexMetaDataDiff(in); } @Override public IndexMetaData fromXContent(XContentParser parser, ParseFieldMatcher parseFieldMatcher) throws IOException { return Builder.fromXContent(parser); } @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { Builder.toXContent(this, builder, params); return builder; } private static class IndexMetaDataDiff implements Diff<IndexMetaData> { private final String index; private final long version; private final State state; private final Settings settings; private final Diff<ImmutableOpenMap<String, MappingMetaData>> mappings; private final Diff<ImmutableOpenMap<String, AliasMetaData>> aliases; private final Diff<ImmutableOpenMap<String, Custom>> customs; private final Diff<ImmutableOpenIntMap<Set<String>>> activeAllocationIds; public IndexMetaDataDiff(IndexMetaData before, IndexMetaData after) { index = after.index.getName(); version = after.version; state = after.state; settings = after.settings; mappings = DiffableUtils.diff( before.mappings, after.mappings, DiffableUtils.getStringKeySerializer()); aliases = DiffableUtils.diff(before.aliases, after.aliases, DiffableUtils.getStringKeySerializer()); customs = DiffableUtils.diff(before.customs, after.customs, DiffableUtils.getStringKeySerializer()); activeAllocationIds = DiffableUtils.diff( before.activeAllocationIds, after.activeAllocationIds, DiffableUtils.getVIntKeySerializer(), DiffableUtils.StringSetValueSerializer.getInstance()); } public IndexMetaDataDiff(StreamInput in) throws IOException { index = in.readString(); version = in.readLong(); state = State.fromId(in.readByte()); settings = Settings.readSettingsFromStream(in); mappings = DiffableUtils.readImmutableOpenMapDiff( in, DiffableUtils.getStringKeySerializer(), MappingMetaData.PROTO); aliases = DiffableUtils.readImmutableOpenMapDiff( in, DiffableUtils.getStringKeySerializer(), AliasMetaData.PROTO); customs = DiffableUtils.readImmutableOpenMapDiff( in, DiffableUtils.getStringKeySerializer(), new DiffableUtils.DiffableValueSerializer<String, Custom>() { @Override public Custom read(StreamInput in, String key) throws IOException { return lookupPrototypeSafe(key).readFrom(in); } @Override public Diff<Custom> readDiff(StreamInput in, String key) throws IOException { return lookupPrototypeSafe(key).readDiffFrom(in); } }); activeAllocationIds = DiffableUtils.readImmutableOpenIntMapDiff( in, DiffableUtils.getVIntKeySerializer(), DiffableUtils.StringSetValueSerializer.getInstance()); } @Override public void writeTo(StreamOutput out) throws IOException { out.writeString(index); out.writeLong(version); out.writeByte(state.id); Settings.writeSettingsToStream(settings, out); mappings.writeTo(out); aliases.writeTo(out); customs.writeTo(out); activeAllocationIds.writeTo(out); } @Override public IndexMetaData apply(IndexMetaData part) { Builder builder = builder(index); builder.version(version); builder.state(state); builder.settings(settings); builder.mappings.putAll(mappings.apply(part.mappings)); builder.aliases.putAll(aliases.apply(part.aliases)); builder.customs.putAll(customs.apply(part.customs)); builder.activeAllocationIds.putAll(activeAllocationIds.apply(part.activeAllocationIds)); return builder.build(); } } @Override public IndexMetaData readFrom(StreamInput in) throws IOException { Builder builder = new Builder(in.readString()); builder.version(in.readLong()); builder.state(State.fromId(in.readByte())); builder.settings(readSettingsFromStream(in)); int mappingsSize = in.readVInt(); for (int i = 0; i < mappingsSize; i++) { MappingMetaData mappingMd = MappingMetaData.PROTO.readFrom(in); builder.putMapping(mappingMd); } int aliasesSize = in.readVInt(); for (int i = 0; i < aliasesSize; i++) { AliasMetaData aliasMd = AliasMetaData.Builder.readFrom(in); builder.putAlias(aliasMd); } int customSize = in.readVInt(); for (int i = 0; i < customSize; i++) { String type = in.readString(); Custom customIndexMetaData = lookupPrototypeSafe(type).readFrom(in); builder.putCustom(type, customIndexMetaData); } int activeAllocationIdsSize = in.readVInt(); for (int i = 0; i < activeAllocationIdsSize; i++) { int key = in.readVInt(); Set<String> allocationIds = DiffableUtils.StringSetValueSerializer.getInstance().read(in, key); builder.putActiveAllocationIds(key, allocationIds); } return builder.build(); } @Override public void writeTo(StreamOutput out) throws IOException { out.writeString(index.getName()); // uuid will come as part of settings out.writeLong(version); out.writeByte(state.id()); writeSettingsToStream(settings, out); out.writeVInt(mappings.size()); for (ObjectCursor<MappingMetaData> cursor : mappings.values()) { cursor.value.writeTo(out); } out.writeVInt(aliases.size()); for (ObjectCursor<AliasMetaData> cursor : aliases.values()) { cursor.value.writeTo(out); } out.writeVInt(customs.size()); for (ObjectObjectCursor<String, Custom> cursor : customs) { out.writeString(cursor.key); cursor.value.writeTo(out); } out.writeVInt(activeAllocationIds.size()); for (IntObjectCursor<Set<String>> cursor : activeAllocationIds) { out.writeVInt(cursor.key); DiffableUtils.StringSetValueSerializer.getInstance().write(cursor.value, out); } } public static Builder builder(String index) { return new Builder(index); } public static Builder builder(IndexMetaData indexMetaData) { return new Builder(indexMetaData); } public static class Builder { private String index; private State state = State.OPEN; private long version = 1; private Settings settings = Settings.Builder.EMPTY_SETTINGS; private final ImmutableOpenMap.Builder<String, MappingMetaData> mappings; private final ImmutableOpenMap.Builder<String, AliasMetaData> aliases; private final ImmutableOpenMap.Builder<String, Custom> customs; private final ImmutableOpenIntMap.Builder<Set<String>> activeAllocationIds; public Builder(String index) { this.index = index; this.mappings = ImmutableOpenMap.builder(); this.aliases = ImmutableOpenMap.builder(); this.customs = ImmutableOpenMap.builder(); this.activeAllocationIds = ImmutableOpenIntMap.builder(); } public Builder(IndexMetaData indexMetaData) { this.index = indexMetaData.getIndex().getName(); this.state = indexMetaData.state; this.version = indexMetaData.version; this.settings = indexMetaData.getSettings(); this.mappings = ImmutableOpenMap.builder(indexMetaData.mappings); this.aliases = ImmutableOpenMap.builder(indexMetaData.aliases); this.customs = ImmutableOpenMap.builder(indexMetaData.customs); this.activeAllocationIds = ImmutableOpenIntMap.builder(indexMetaData.activeAllocationIds); } public String index() { return index; } public Builder index(String index) { this.index = index; return this; } public Builder numberOfShards(int numberOfShards) { settings = settingsBuilder().put(settings).put(SETTING_NUMBER_OF_SHARDS, numberOfShards).build(); return this; } public int numberOfShards() { return settings.getAsInt(SETTING_NUMBER_OF_SHARDS, -1); } public Builder numberOfReplicas(int numberOfReplicas) { settings = settingsBuilder().put(settings).put(SETTING_NUMBER_OF_REPLICAS, numberOfReplicas).build(); return this; } public int numberOfReplicas() { return settings.getAsInt(SETTING_NUMBER_OF_REPLICAS, -1); } public Builder creationDate(long creationDate) { settings = settingsBuilder().put(settings).put(SETTING_CREATION_DATE, creationDate).build(); return this; } public Builder settings(Settings.Builder settings) { this.settings = settings.build(); return this; } public Builder settings(Settings settings) { this.settings = settings; return this; } public MappingMetaData mapping(String type) { return mappings.get(type); } public Builder putMapping(String type, String source) throws IOException { try (XContentParser parser = XContentFactory.xContent(source).createParser(source)) { putMapping(new MappingMetaData(type, parser.mapOrdered())); } return this; } public Builder putMapping(MappingMetaData mappingMd) { mappings.put(mappingMd.type(), mappingMd); return this; } public Builder state(State state) { this.state = state; return this; } public Builder putAlias(AliasMetaData aliasMetaData) { aliases.put(aliasMetaData.alias(), aliasMetaData); return this; } public Builder putAlias(AliasMetaData.Builder aliasMetaData) { aliases.put(aliasMetaData.alias(), aliasMetaData.build()); return this; } public Builder removeAlias(String alias) { aliases.remove(alias); return this; } public Builder removeAllAliases() { aliases.clear(); return this; } public Builder putCustom(String type, Custom customIndexMetaData) { this.customs.put(type, customIndexMetaData); return this; } public Builder putActiveAllocationIds(int shardId, Set<String> allocationIds) { activeAllocationIds.put(shardId, new HashSet(allocationIds)); return this; } public long version() { return this.version; } public Builder version(long version) { this.version = version; return this; } public IndexMetaData build() { ImmutableOpenMap.Builder<String, AliasMetaData> tmpAliases = aliases; Settings tmpSettings = settings; // update default mapping on the MappingMetaData if (mappings.containsKey(MapperService.DEFAULT_MAPPING)) { MappingMetaData defaultMapping = mappings.get(MapperService.DEFAULT_MAPPING); for (ObjectCursor<MappingMetaData> cursor : mappings.values()) { cursor.value.updateDefaultMapping(defaultMapping); } } Integer maybeNumberOfShards = settings.getAsInt(SETTING_NUMBER_OF_SHARDS, null); if (maybeNumberOfShards == null) { throw new IllegalArgumentException("must specify numberOfShards for index [" + index + "]"); } int numberOfShards = maybeNumberOfShards; if (numberOfShards <= 0) { throw new IllegalArgumentException( "must specify positive number of shards for index [" + index + "]"); } Integer maybeNumberOfReplicas = settings.getAsInt(SETTING_NUMBER_OF_REPLICAS, null); if (maybeNumberOfReplicas == null) { throw new IllegalArgumentException( "must specify numberOfReplicas for index [" + index + "]"); } int numberOfReplicas = maybeNumberOfReplicas; if (numberOfReplicas < 0) { throw new IllegalArgumentException( "must specify non-negative number of shards for index [" + index + "]"); } // fill missing slots in activeAllocationIds with empty set if needed and make all entries // immutable ImmutableOpenIntMap.Builder<Set<String>> filledActiveAllocationIds = ImmutableOpenIntMap.builder(); for (int i = 0; i < numberOfShards; i++) { if (activeAllocationIds.containsKey(i)) { filledActiveAllocationIds.put( i, Collections.unmodifiableSet(new HashSet<>(activeAllocationIds.get(i)))); } else { filledActiveAllocationIds.put(i, Collections.emptySet()); } } final Map<String, String> requireMap = INDEX_ROUTING_REQUIRE_GROUP_SETTING.get(settings).getAsMap(); final DiscoveryNodeFilters requireFilters; if (requireMap.isEmpty()) { requireFilters = null; } else { requireFilters = DiscoveryNodeFilters.buildFromKeyValue(AND, requireMap); } Map<String, String> includeMap = INDEX_ROUTING_INCLUDE_GROUP_SETTING.get(settings).getAsMap(); final DiscoveryNodeFilters includeFilters; if (includeMap.isEmpty()) { includeFilters = null; } else { includeFilters = DiscoveryNodeFilters.buildFromKeyValue(OR, includeMap); } Map<String, String> excludeMap = INDEX_ROUTING_EXCLUDE_GROUP_SETTING.get(settings).getAsMap(); final DiscoveryNodeFilters excludeFilters; if (excludeMap.isEmpty()) { excludeFilters = null; } else { excludeFilters = DiscoveryNodeFilters.buildFromKeyValue(OR, excludeMap); } Version indexCreatedVersion = Version.indexCreated(settings); Version indexUpgradedVersion = settings.getAsVersion(IndexMetaData.SETTING_VERSION_UPGRADED, indexCreatedVersion); String stringLuceneVersion = settings.get(SETTING_VERSION_MINIMUM_COMPATIBLE); final org.apache.lucene.util.Version minimumCompatibleLuceneVersion; if (stringLuceneVersion != null) { try { minimumCompatibleLuceneVersion = org.apache.lucene.util.Version.parse(stringLuceneVersion); } catch (ParseException ex) { throw new IllegalStateException( "Cannot parse lucene version [" + stringLuceneVersion + "] in the [" + SETTING_VERSION_MINIMUM_COMPATIBLE + "] setting", ex); } } else { minimumCompatibleLuceneVersion = null; } final String uuid = settings.get(SETTING_INDEX_UUID, INDEX_UUID_NA_VALUE); return new IndexMetaData( new Index(index, uuid), version, state, numberOfShards, numberOfReplicas, tmpSettings, mappings.build(), tmpAliases.build(), customs.build(), filledActiveAllocationIds.build(), requireFilters, includeFilters, excludeFilters, indexCreatedVersion, indexUpgradedVersion, minimumCompatibleLuceneVersion); } public static void toXContent( IndexMetaData indexMetaData, XContentBuilder builder, ToXContent.Params params) throws IOException { builder.startObject( indexMetaData.getIndex().getName(), XContentBuilder.FieldCaseConversion.NONE); builder.field("version", indexMetaData.getVersion()); builder.field("state", indexMetaData.getState().toString().toLowerCase(Locale.ENGLISH)); boolean binary = params.paramAsBoolean("binary", false); builder.startObject("settings"); for (Map.Entry<String, String> entry : indexMetaData.getSettings().getAsMap().entrySet()) { builder.field(entry.getKey(), entry.getValue()); } builder.endObject(); builder.startArray("mappings"); for (ObjectObjectCursor<String, MappingMetaData> cursor : indexMetaData.getMappings()) { if (binary) { builder.value(cursor.value.source().compressed()); } else { byte[] data = cursor.value.source().uncompressed(); XContentParser parser = XContentFactory.xContent(data).createParser(data); Map<String, Object> mapping = parser.mapOrdered(); parser.close(); builder.map(mapping); } } builder.endArray(); for (ObjectObjectCursor<String, Custom> cursor : indexMetaData.getCustoms()) { builder.startObject(cursor.key, XContentBuilder.FieldCaseConversion.NONE); cursor.value.toXContent(builder, params); builder.endObject(); } builder.startObject("aliases"); for (ObjectCursor<AliasMetaData> cursor : indexMetaData.getAliases().values()) { AliasMetaData.Builder.toXContent(cursor.value, builder, params); } builder.endObject(); builder.startObject(KEY_ACTIVE_ALLOCATIONS); for (IntObjectCursor<Set<String>> cursor : indexMetaData.activeAllocationIds) { builder.startArray(String.valueOf(cursor.key)); for (String allocationId : cursor.value) { builder.value(allocationId); } builder.endArray(); } builder.endObject(); builder.endObject(); } public static IndexMetaData fromXContent(XContentParser parser) throws IOException { if (parser.currentToken() == null) { // fresh parser? move to the first token parser.nextToken(); } if (parser.currentToken() == XContentParser.Token.START_OBJECT) { // on a start object move to next token parser.nextToken(); } if (parser.currentToken() != XContentParser.Token.FIELD_NAME) { throw new IllegalArgumentException( "expected field name but got a " + parser.currentToken()); } Builder builder = new Builder(parser.currentName()); String currentFieldName = null; XContentParser.Token token = parser.nextToken(); if (token != XContentParser.Token.START_OBJECT) { throw new IllegalArgumentException("expected object but got a " + token); } while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { if (token == XContentParser.Token.FIELD_NAME) { currentFieldName = parser.currentName(); } else if (token == XContentParser.Token.START_OBJECT) { if ("settings".equals(currentFieldName)) { builder.settings( Settings.settingsBuilder() .put(SettingsLoader.Helper.loadNestedFromMap(parser.mapOrdered()))); } else if ("mappings".equals(currentFieldName)) { while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { if (token == XContentParser.Token.FIELD_NAME) { currentFieldName = parser.currentName(); } else if (token == XContentParser.Token.START_OBJECT) { String mappingType = currentFieldName; Map<String, Object> mappingSource = MapBuilder.<String, Object>newMapBuilder() .put(mappingType, parser.mapOrdered()) .map(); builder.putMapping(new MappingMetaData(mappingType, mappingSource)); } else { throw new IllegalArgumentException("Unexpected token: " + token); } } } else if ("aliases".equals(currentFieldName)) { while (parser.nextToken() != XContentParser.Token.END_OBJECT) { builder.putAlias(AliasMetaData.Builder.fromXContent(parser)); } } else if (KEY_ACTIVE_ALLOCATIONS.equals(currentFieldName)) { while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { if (token == XContentParser.Token.FIELD_NAME) { currentFieldName = parser.currentName(); } else if (token == XContentParser.Token.START_ARRAY) { String shardId = currentFieldName; Set<String> allocationIds = new HashSet<>(); while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { if (token == XContentParser.Token.VALUE_STRING) { allocationIds.add(parser.text()); } } builder.putActiveAllocationIds(Integer.valueOf(shardId), allocationIds); } else { throw new IllegalArgumentException("Unexpected token: " + token); } } } else if ("warmers".equals(currentFieldName)) { // TODO: do this in 4.0: // throw new IllegalArgumentException("Warmers are not supported anymore - are you // upgrading from 1.x?"); // ignore: warmers have been removed in 3.0 and are // simply ignored when upgrading from 2.x assert Version.CURRENT.major <= 3; parser.skipChildren(); } else { // check if its a custom index metadata Custom proto = lookupPrototype(currentFieldName); if (proto == null) { // TODO warn parser.skipChildren(); } else { Custom custom = proto.fromXContent(parser); builder.putCustom(custom.type(), custom); } } } else if (token == XContentParser.Token.START_ARRAY) { if ("mappings".equals(currentFieldName)) { while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { if (token == XContentParser.Token.VALUE_EMBEDDED_OBJECT) { builder.putMapping( new MappingMetaData(new CompressedXContent(parser.binaryValue()))); } else { Map<String, Object> mapping = parser.mapOrdered(); if (mapping.size() == 1) { String mappingType = mapping.keySet().iterator().next(); builder.putMapping(new MappingMetaData(mappingType, mapping)); } } } } else { throw new IllegalArgumentException("Unexpected field for an array " + currentFieldName); } } else if (token.isValue()) { if ("state".equals(currentFieldName)) { builder.state(State.fromString(parser.text())); } else if ("version".equals(currentFieldName)) { builder.version(parser.longValue()); } else { throw new IllegalArgumentException("Unexpected field [" + currentFieldName + "]"); } } else { throw new IllegalArgumentException("Unexpected token " + token); } } return builder.build(); } public static IndexMetaData readFrom(StreamInput in) throws IOException { return PROTO.readFrom(in); } } /** * Returns <code>true</code> iff the given settings indicate that the index associated with these * settings allocates it's shards on a shared filesystem. Otherwise <code>false</code>. The * default setting for this is the returned value from {@link * #isIndexUsingShadowReplicas(org.elasticsearch.common.settings.Settings)}. */ public static boolean isOnSharedFilesystem(Settings settings) { return settings.getAsBoolean(SETTING_SHARED_FILESYSTEM, isIndexUsingShadowReplicas(settings)); } /** * Returns <code>true</code> iff the given settings indicate that the index associated with these * settings uses shadow replicas. Otherwise <code>false</code>. The default setting for this is * <code>false</code>. */ public static boolean isIndexUsingShadowReplicas(Settings settings) { return settings.getAsBoolean(SETTING_SHADOW_REPLICAS, false); } /** * Adds human readable version and creation date settings. This method is used to display the * settings in a human readable format in REST API */ public static Settings addHumanReadableSettings(Settings settings) { Settings.Builder builder = Settings.builder().put(settings); Version version = settings.getAsVersion(SETTING_VERSION_CREATED, null); if (version != null) { builder.put(SETTING_VERSION_CREATED_STRING, version.toString()); } Version versionUpgraded = settings.getAsVersion(SETTING_VERSION_UPGRADED, null); if (versionUpgraded != null) { builder.put(SETTING_VERSION_UPGRADED_STRING, versionUpgraded.toString()); } Long creationDate = settings.getAsLong(SETTING_CREATION_DATE, null); if (creationDate != null) { DateTime creationDateTime = new DateTime(creationDate, DateTimeZone.UTC); builder.put(SETTING_CREATION_DATE_STRING, creationDateTime.toString()); } return builder.build(); } }
/** A component that holds all data paths for a single node. */ public final class NodeEnvironment implements Closeable { private final Logger logger; public static class NodePath { /* ${data.paths}/nodes/{node.id} */ public final Path path; /* ${data.paths}/nodes/{node.id}/indices */ public final Path indicesPath; /** Cached FileStore from path */ public final FileStore fileStore; /** * Cached result of Lucene's {@code IOUtils.spins} on path. This is a trilean value: null means * we could not determine it (we are not running on Linux, or we hit an exception trying), True * means the device possibly spins and False means it does not. */ public final Boolean spins; public final int majorDeviceNumber; public final int minorDeviceNumber; public NodePath(Path path) throws IOException { this.path = path; this.indicesPath = path.resolve(INDICES_FOLDER); this.fileStore = Environment.getFileStore(path); if (fileStore.supportsFileAttributeView("lucene")) { this.spins = (Boolean) fileStore.getAttribute("lucene:spins"); this.majorDeviceNumber = (int) fileStore.getAttribute("lucene:major_device_number"); this.minorDeviceNumber = (int) fileStore.getAttribute("lucene:minor_device_number"); } else { this.spins = null; this.majorDeviceNumber = -1; this.minorDeviceNumber = -1; } } /** * Resolves the given shards directory against this NodePath * ${data.paths}/nodes/{node.id}/indices/{index.uuid}/{shard.id} */ public Path resolve(ShardId shardId) { return resolve(shardId.getIndex()).resolve(Integer.toString(shardId.id())); } /** * Resolves index directory against this NodePath * ${data.paths}/nodes/{node.id}/indices/{index.uuid} */ public Path resolve(Index index) { return indicesPath.resolve(index.getUUID()); } @Override public String toString() { return "NodePath{" + "path=" + path + ", spins=" + spins + '}'; } } private final NodePath[] nodePaths; private final Path sharedDataPath; private final Lock[] locks; private final int nodeLockId; private final AtomicBoolean closed = new AtomicBoolean(false); private final Map<ShardId, InternalShardLock> shardLocks = new HashMap<>(); private final NodeMetaData nodeMetaData; /** Maximum number of data nodes that should run in an environment. */ public static final Setting<Integer> MAX_LOCAL_STORAGE_NODES_SETTING = Setting.intSetting("node.max_local_storage_nodes", 1, 1, Property.NodeScope); /** If true automatically append node lock id to custom data paths. */ public static final Setting<Boolean> ADD_NODE_LOCK_ID_TO_CUSTOM_PATH = Setting.boolSetting("node.add_lock_id_to_custom_path", true, Property.NodeScope); /** * Seed for determining a persisted unique uuid of this node. If the node has already a persisted * uuid on disk, this seed will be ignored and the uuid from disk will be reused. */ public static final Setting<Long> NODE_ID_SEED_SETTING = Setting.longSetting("node.id.seed", 0L, Long.MIN_VALUE, Property.NodeScope); /** If true the [verbose] SegmentInfos.infoStream logging is sent to System.out. */ public static final Setting<Boolean> ENABLE_LUCENE_SEGMENT_INFOS_TRACE_SETTING = Setting.boolSetting("node.enable_lucene_segment_infos_trace", false, Property.NodeScope); public static final String NODES_FOLDER = "nodes"; public static final String INDICES_FOLDER = "indices"; public static final String NODE_LOCK_FILENAME = "node.lock"; public NodeEnvironment(Settings settings, Environment environment) throws IOException { if (!DiscoveryNode.nodeRequiresLocalStorage(settings)) { nodePaths = null; sharedDataPath = null; locks = null; nodeLockId = -1; nodeMetaData = new NodeMetaData(generateNodeId(settings)); logger = Loggers.getLogger( getClass(), Node.addNodeNameIfNeeded(settings, this.nodeMetaData.nodeId())); return; } final NodePath[] nodePaths = new NodePath[environment.dataWithClusterFiles().length]; final Lock[] locks = new Lock[nodePaths.length]; boolean success = false; // trace logger to debug issues before the default node name is derived from the node id Logger startupTraceLogger = Loggers.getLogger(getClass(), settings); try { sharedDataPath = environment.sharedDataFile(); int nodeLockId = -1; IOException lastException = null; int maxLocalStorageNodes = MAX_LOCAL_STORAGE_NODES_SETTING.get(settings); for (int possibleLockId = 0; possibleLockId < maxLocalStorageNodes; possibleLockId++) { for (int dirIndex = 0; dirIndex < environment.dataFiles().length; dirIndex++) { Path dataDirWithClusterName = environment.dataWithClusterFiles()[dirIndex]; Path dataDir = environment.dataFiles()[dirIndex]; Path dir = dataDir.resolve(NODES_FOLDER).resolve(Integer.toString(possibleLockId)); Files.createDirectories(dir); try (Directory luceneDir = FSDirectory.open(dir, NativeFSLockFactory.INSTANCE)) { startupTraceLogger.trace("obtaining node lock on {} ...", dir.toAbsolutePath()); try { locks[dirIndex] = luceneDir.obtainLock(NODE_LOCK_FILENAME); nodePaths[dirIndex] = new NodePath(dir); nodeLockId = possibleLockId; } catch (LockObtainFailedException ex) { startupTraceLogger.trace("failed to obtain node lock on {}", dir.toAbsolutePath()); // release all the ones that were obtained up until now releaseAndNullLocks(locks); break; } } catch (IOException e) { startupTraceLogger.trace( (Supplier<?>) () -> new ParameterizedMessage( "failed to obtain node lock on {}", dir.toAbsolutePath()), e); lastException = new IOException("failed to obtain lock on " + dir.toAbsolutePath(), e); // release all the ones that were obtained up until now releaseAndNullLocks(locks); break; } } if (locks[0] != null) { // we found a lock, break break; } } if (locks[0] == null) { final String message = String.format( Locale.ROOT, "failed to obtain node locks, tried [%s] with lock id%s;" + " maybe these locations are not writable or multiple nodes were started without increasing [%s] (was [%d])?", Arrays.toString(environment.dataWithClusterFiles()), maxLocalStorageNodes == 1 ? " [0]" : "s [0--" + (maxLocalStorageNodes - 1) + "]", MAX_LOCAL_STORAGE_NODES_SETTING.getKey(), maxLocalStorageNodes); throw new IllegalStateException(message, lastException); } this.nodeMetaData = loadOrCreateNodeMetaData(settings, startupTraceLogger, nodePaths); this.logger = Loggers.getLogger( getClass(), Node.addNodeNameIfNeeded(settings, this.nodeMetaData.nodeId())); this.nodeLockId = nodeLockId; this.locks = locks; this.nodePaths = nodePaths; if (logger.isDebugEnabled()) { logger.debug("using node location [{}], local_lock_id [{}]", nodePaths, nodeLockId); } maybeLogPathDetails(); maybeLogHeapDetails(); applySegmentInfosTrace(settings); assertCanWrite(); success = true; } finally { if (success == false) { IOUtils.closeWhileHandlingException(locks); } } } /** Returns true if the directory is empty */ private static boolean dirEmpty(final Path path) throws IOException { try (DirectoryStream<Path> stream = Files.newDirectoryStream(path)) { return stream.iterator().hasNext() == false; } } private static void releaseAndNullLocks(Lock[] locks) { for (int i = 0; i < locks.length; i++) { if (locks[i] != null) { IOUtils.closeWhileHandlingException(locks[i]); } locks[i] = null; } } private void maybeLogPathDetails() throws IOException { // We do some I/O in here, so skip this if DEBUG/INFO are not enabled: if (logger.isDebugEnabled()) { // Log one line per path.data: StringBuilder sb = new StringBuilder(); for (NodePath nodePath : nodePaths) { sb.append('\n').append(" -> ").append(nodePath.path.toAbsolutePath()); String spinsDesc; if (nodePath.spins == null) { spinsDesc = "unknown"; } else if (nodePath.spins) { spinsDesc = "possibly"; } else { spinsDesc = "no"; } FsInfo.Path fsPath = FsProbe.getFSInfo(nodePath); sb.append(", free_space [") .append(fsPath.getFree()) .append("], usable_space [") .append(fsPath.getAvailable()) .append("], total_space [") .append(fsPath.getTotal()) .append("], spins? [") .append(spinsDesc) .append("], mount [") .append(fsPath.getMount()) .append("], type [") .append(fsPath.getType()) .append(']'); } logger.debug("node data locations details:{}", sb); } else if (logger.isInfoEnabled()) { FsInfo.Path totFSPath = new FsInfo.Path(); Set<String> allTypes = new HashSet<>(); Set<String> allSpins = new HashSet<>(); Set<String> allMounts = new HashSet<>(); for (NodePath nodePath : nodePaths) { FsInfo.Path fsPath = FsProbe.getFSInfo(nodePath); String mount = fsPath.getMount(); if (allMounts.contains(mount) == false) { allMounts.add(mount); String type = fsPath.getType(); if (type != null) { allTypes.add(type); } Boolean spins = fsPath.getSpins(); if (spins == null) { allSpins.add("unknown"); } else if (spins.booleanValue()) { allSpins.add("possibly"); } else { allSpins.add("no"); } totFSPath.add(fsPath); } } // Just log a 1-line summary: logger.info( "using [{}] data paths, mounts [{}], net usable_space [{}], net total_space [{}], spins? [{}], types [{}]", nodePaths.length, allMounts, totFSPath.getAvailable(), totFSPath.getTotal(), toString(allSpins), toString(allTypes)); } } private void maybeLogHeapDetails() { JvmInfo jvmInfo = JvmInfo.jvmInfo(); ByteSizeValue maxHeapSize = jvmInfo.getMem().getHeapMax(); String useCompressedOops = jvmInfo.useCompressedOops(); logger.info( "heap size [{}], compressed ordinary object pointers [{}]", maxHeapSize, useCompressedOops); } /** * scans the node paths and loads existing metaData file. If not found a new meta data will be * generated and persisted into the nodePaths */ private static NodeMetaData loadOrCreateNodeMetaData( Settings settings, Logger logger, NodePath... nodePaths) throws IOException { final Path[] paths = Arrays.stream(nodePaths).map(np -> np.path).toArray(Path[]::new); NodeMetaData metaData = NodeMetaData.FORMAT.loadLatestState(logger, paths); if (metaData == null) { metaData = new NodeMetaData(generateNodeId(settings)); } // we write again to make sure all paths have the latest state file NodeMetaData.FORMAT.write(metaData, paths); return metaData; } public static String generateNodeId(Settings settings) { Random random = Randomness.get(settings, NODE_ID_SEED_SETTING); return UUIDs.randomBase64UUID(random); } @SuppressForbidden(reason = "System.out.*") static void applySegmentInfosTrace(Settings settings) { if (ENABLE_LUCENE_SEGMENT_INFOS_TRACE_SETTING.get(settings)) { SegmentInfos.setInfoStream(System.out); } } private static String toString(Collection<String> items) { StringBuilder b = new StringBuilder(); for (String item : items) { if (b.length() > 0) { b.append(", "); } b.append(item); } return b.toString(); } /** * Deletes a shard data directory iff the shards locks were successfully acquired. * * @param shardId the id of the shard to delete to delete * @throws IOException if an IOException occurs */ public void deleteShardDirectorySafe(ShardId shardId, IndexSettings indexSettings) throws IOException, ShardLockObtainFailedException { final Path[] paths = availableShardPaths(shardId); logger.trace("deleting shard {} directory, paths: [{}]", shardId, paths); try (ShardLock lock = shardLock(shardId)) { deleteShardDirectoryUnderLock(lock, indexSettings); } } /** * Acquires, then releases, all {@code write.lock} files in the given shard paths. The * "write.lock" file is assumed to be under the shard path's "index" directory as used by * Elasticsearch. * * @throws LockObtainFailedException if any of the locks could not be acquired */ public static void acquireFSLockForPaths(IndexSettings indexSettings, Path... shardPaths) throws IOException { Lock[] locks = new Lock[shardPaths.length]; Directory[] dirs = new Directory[shardPaths.length]; try { for (int i = 0; i < shardPaths.length; i++) { // resolve the directory the shard actually lives in Path p = shardPaths[i].resolve("index"); // open a directory (will be immediately closed) on the shard's location dirs[i] = new SimpleFSDirectory( p, indexSettings.getValue(FsDirectoryService.INDEX_LOCK_FACTOR_SETTING)); // create a lock for the "write.lock" file try { locks[i] = dirs[i].obtainLock(IndexWriter.WRITE_LOCK_NAME); } catch (IOException ex) { throw new LockObtainFailedException( "unable to acquire " + IndexWriter.WRITE_LOCK_NAME + " for " + p, ex); } } } finally { IOUtils.closeWhileHandlingException(locks); IOUtils.closeWhileHandlingException(dirs); } } /** * Deletes a shard data directory. Note: this method assumes that the shard lock is acquired. This * method will also attempt to acquire the write locks for the shard's paths before deleting the * data, but this is best effort, as the lock is released before the deletion happens in order to * allow the folder to be deleted * * @param lock the shards lock * @throws IOException if an IOException occurs * @throws ElasticsearchException if the write.lock is not acquirable */ public void deleteShardDirectoryUnderLock(ShardLock lock, IndexSettings indexSettings) throws IOException { final ShardId shardId = lock.getShardId(); assert isShardLocked(shardId) : "shard " + shardId + " is not locked"; final Path[] paths = availableShardPaths(shardId); logger.trace("acquiring locks for {}, paths: [{}]", shardId, paths); acquireFSLockForPaths(indexSettings, paths); IOUtils.rm(paths); if (indexSettings.hasCustomDataPath()) { Path customLocation = resolveCustomLocation(indexSettings, shardId); logger.trace("acquiring lock for {}, custom path: [{}]", shardId, customLocation); acquireFSLockForPaths(indexSettings, customLocation); logger.trace("deleting custom shard {} directory [{}]", shardId, customLocation); IOUtils.rm(customLocation); } logger.trace("deleted shard {} directory, paths: [{}]", shardId, paths); assert FileSystemUtils.exists(paths) == false; } private boolean isShardLocked(ShardId id) { try { shardLock(id, 0).close(); return false; } catch (ShardLockObtainFailedException ex) { return true; } } /** * Deletes an indexes data directory recursively iff all of the indexes shards locks were * successfully acquired. If any of the indexes shard directories can't be locked non of the * shards will be deleted * * @param index the index to delete * @param lockTimeoutMS how long to wait for acquiring the indices shard locks * @param indexSettings settings for the index being deleted * @throws IOException if any of the shards data directories can't be locked or deleted */ public void deleteIndexDirectorySafe(Index index, long lockTimeoutMS, IndexSettings indexSettings) throws IOException, ShardLockObtainFailedException { final List<ShardLock> locks = lockAllForIndex(index, indexSettings, lockTimeoutMS); try { deleteIndexDirectoryUnderLock(index, indexSettings); } finally { IOUtils.closeWhileHandlingException(locks); } } /** * Deletes an indexes data directory recursively. Note: this method assumes that the shard lock is * acquired * * @param index the index to delete * @param indexSettings settings for the index being deleted */ public void deleteIndexDirectoryUnderLock(Index index, IndexSettings indexSettings) throws IOException { final Path[] indexPaths = indexPaths(index); logger.trace( "deleting index {} directory, paths({}): [{}]", index, indexPaths.length, indexPaths); IOUtils.rm(indexPaths); if (indexSettings.hasCustomDataPath()) { Path customLocation = resolveIndexCustomLocation(indexSettings); logger.trace("deleting custom index {} directory [{}]", index, customLocation); IOUtils.rm(customLocation); } } /** * Tries to lock all local shards for the given index. If any of the shard locks can't be acquired * a {@link ShardLockObtainFailedException} is thrown and all previously acquired locks are * released. * * @param index the index to lock shards for * @param lockTimeoutMS how long to wait for acquiring the indices shard locks * @return the {@link ShardLock} instances for this index. * @throws IOException if an IOException occurs. */ public List<ShardLock> lockAllForIndex(Index index, IndexSettings settings, long lockTimeoutMS) throws IOException, ShardLockObtainFailedException { final int numShards = settings.getNumberOfShards(); if (numShards <= 0) { throw new IllegalArgumentException("settings must contain a non-null > 0 number of shards"); } logger.trace("locking all shards for index {} - [{}]", index, numShards); List<ShardLock> allLocks = new ArrayList<>(numShards); boolean success = false; long startTimeNS = System.nanoTime(); try { for (int i = 0; i < numShards; i++) { long timeoutLeftMS = Math.max(0, lockTimeoutMS - TimeValue.nsecToMSec((System.nanoTime() - startTimeNS))); allLocks.add(shardLock(new ShardId(index, i), timeoutLeftMS)); } success = true; } finally { if (success == false) { logger.trace("unable to lock all shards for index {}", index); IOUtils.closeWhileHandlingException(allLocks); } } return allLocks; } /** * Tries to lock the given shards ID. A shard lock is required to perform any kind of write * operation on a shards data directory like deleting files, creating a new index writer or * recover from a different shard instance into it. If the shard lock can not be acquired a {@link * ShardLockObtainFailedException} is thrown. * * <p>Note: this method will return immediately if the lock can't be acquired. * * @param id the shard ID to lock * @return the shard lock. Call {@link ShardLock#close()} to release the lock */ public ShardLock shardLock(ShardId id) throws ShardLockObtainFailedException { return shardLock(id, 0); } /** * Tries to lock the given shards ID. A shard lock is required to perform any kind of write * operation on a shards data directory like deleting files, creating a new index writer or * recover from a different shard instance into it. If the shard lock can not be acquired a {@link * ShardLockObtainFailedException} is thrown * * @param shardId the shard ID to lock * @param lockTimeoutMS the lock timeout in milliseconds * @return the shard lock. Call {@link ShardLock#close()} to release the lock */ public ShardLock shardLock(final ShardId shardId, long lockTimeoutMS) throws ShardLockObtainFailedException { logger.trace("acquiring node shardlock on [{}], timeout [{}]", shardId, lockTimeoutMS); final InternalShardLock shardLock; final boolean acquired; synchronized (shardLocks) { if (shardLocks.containsKey(shardId)) { shardLock = shardLocks.get(shardId); shardLock.incWaitCount(); acquired = false; } else { shardLock = new InternalShardLock(shardId); shardLocks.put(shardId, shardLock); acquired = true; } } if (acquired == false) { boolean success = false; try { shardLock.acquire(lockTimeoutMS); success = true; } finally { if (success == false) { shardLock.decWaitCount(); } } } logger.trace("successfully acquired shardlock for [{}]", shardId); return new ShardLock(shardId) { // new instance prevents double closing @Override protected void closeInternal() { shardLock.release(); logger.trace("released shard lock for [{}]", shardId); } }; } /** A functional interface that people can use to reference {@link #shardLock(ShardId, long)} */ @FunctionalInterface public interface ShardLocker { ShardLock lock(ShardId shardId, long lockTimeoutMS) throws ShardLockObtainFailedException; } /** * Returns all currently lock shards. * * <p>Note: the shard ids return do not contain a valid Index UUID */ public Set<ShardId> lockedShards() { synchronized (shardLocks) { return unmodifiableSet(new HashSet<>(shardLocks.keySet())); } } private final class InternalShardLock { /* * This class holds a mutex for exclusive access and timeout / wait semantics * and a reference count to cleanup the shard lock instance form the internal data * structure if nobody is waiting for it. the wait count is guarded by the same lock * that is used to mutate the map holding the shard locks to ensure exclusive access */ private final Semaphore mutex = new Semaphore(1); private int waitCount = 1; // guarded by shardLocks private final ShardId shardId; InternalShardLock(ShardId shardId) { this.shardId = shardId; mutex.acquireUninterruptibly(); } protected void release() { mutex.release(); decWaitCount(); } void incWaitCount() { synchronized (shardLocks) { assert waitCount > 0 : "waitCount is " + waitCount + " but should be > 0"; waitCount++; } } private void decWaitCount() { synchronized (shardLocks) { assert waitCount > 0 : "waitCount is " + waitCount + " but should be > 0"; --waitCount; logger.trace("shard lock wait count for {} is now [{}]", shardId, waitCount); if (waitCount == 0) { logger.trace("last shard lock wait decremented, removing lock for {}", shardId); InternalShardLock remove = shardLocks.remove(shardId); assert remove != null : "Removed lock was null"; } } } void acquire(long timeoutInMillis) throws ShardLockObtainFailedException { try { if (mutex.tryAcquire(timeoutInMillis, TimeUnit.MILLISECONDS) == false) { throw new ShardLockObtainFailedException( shardId, "obtaining shard lock timed out after " + timeoutInMillis + "ms"); } } catch (InterruptedException e) { Thread.currentThread().interrupt(); throw new ShardLockObtainFailedException( shardId, "thread interrupted while trying to obtain shard lock", e); } } } public boolean hasNodeFile() { return nodePaths != null && locks != null; } /** * Returns an array of all of the nodes data locations. * * @throws IllegalStateException if the node is not configured to store local locations */ public Path[] nodeDataPaths() { assertEnvIsLocked(); Path[] paths = new Path[nodePaths.length]; for (int i = 0; i < paths.length; i++) { paths[i] = nodePaths[i].path; } return paths; } /** * returns the unique uuid describing this node. The uuid is persistent in the data folder of this * node and remains across restarts. */ public String nodeId() { // we currently only return the ID and hide the underlying nodeMetaData implementation in order // to avoid // confusion with other "metadata" like node settings found in elasticsearch.yml. In future // we can encapsulate both (and more) in one NodeMetaData (or NodeSettings) object ala // IndexSettings return nodeMetaData.nodeId(); } /** Returns an array of all of the {@link NodePath}s. */ public NodePath[] nodePaths() { assertEnvIsLocked(); if (nodePaths == null || locks == null) { throw new IllegalStateException("node is not configured to store local location"); } return nodePaths; } /** Returns all index paths. */ public Path[] indexPaths(Index index) { assertEnvIsLocked(); Path[] indexPaths = new Path[nodePaths.length]; for (int i = 0; i < nodePaths.length; i++) { indexPaths[i] = nodePaths[i].resolve(index); } return indexPaths; } /** * Returns all shard paths excluding custom shard path. Note: Shards are only allocated on one of * the returned paths. The returned array may contain paths to non-existing directories. * * @see IndexSettings#hasCustomDataPath() * @see #resolveCustomLocation(IndexSettings, ShardId) */ public Path[] availableShardPaths(ShardId shardId) { assertEnvIsLocked(); final NodePath[] nodePaths = nodePaths(); final Path[] shardLocations = new Path[nodePaths.length]; for (int i = 0; i < nodePaths.length; i++) { shardLocations[i] = nodePaths[i].resolve(shardId); } return shardLocations; } /** Returns all folder names in ${data.paths}/nodes/{node.id}/indices folder */ public Set<String> availableIndexFolders() throws IOException { if (nodePaths == null || locks == null) { throw new IllegalStateException("node is not configured to store local location"); } assertEnvIsLocked(); Set<String> indexFolders = new HashSet<>(); for (NodePath nodePath : nodePaths) { Path indicesLocation = nodePath.indicesPath; if (Files.isDirectory(indicesLocation)) { try (DirectoryStream<Path> stream = Files.newDirectoryStream(indicesLocation)) { for (Path index : stream) { if (Files.isDirectory(index)) { indexFolders.add(index.getFileName().toString()); } } } } } return indexFolders; } /** * Resolves all existing paths to <code>indexFolderName</code> in * ${data.paths}/nodes/{node.id}/indices */ public Path[] resolveIndexFolder(String indexFolderName) throws IOException { if (nodePaths == null || locks == null) { throw new IllegalStateException("node is not configured to store local location"); } assertEnvIsLocked(); List<Path> paths = new ArrayList<>(nodePaths.length); for (NodePath nodePath : nodePaths) { Path indexFolder = nodePath.indicesPath.resolve(indexFolderName); if (Files.exists(indexFolder)) { paths.add(indexFolder); } } return paths.toArray(new Path[paths.size()]); } /** * Tries to find all allocated shards for the given index on the current node. NOTE: This methods * is prone to race-conditions on the filesystem layer since it might not see directories created * concurrently or while it's traversing. * * @param index the index to filter shards * @return a set of shard IDs * @throws IOException if an IOException occurs */ public Set<ShardId> findAllShardIds(final Index index) throws IOException { assert index != null; if (nodePaths == null || locks == null) { throw new IllegalStateException("node is not configured to store local location"); } assertEnvIsLocked(); final Set<ShardId> shardIds = new HashSet<>(); final String indexUniquePathId = index.getUUID(); for (final NodePath nodePath : nodePaths) { Path location = nodePath.indicesPath; if (Files.isDirectory(location)) { try (DirectoryStream<Path> indexStream = Files.newDirectoryStream(location)) { for (Path indexPath : indexStream) { if (indexUniquePathId.equals(indexPath.getFileName().toString())) { shardIds.addAll(findAllShardsForIndex(indexPath, index)); } } } } } return shardIds; } private static Set<ShardId> findAllShardsForIndex(Path indexPath, Index index) throws IOException { assert indexPath.getFileName().toString().equals(index.getUUID()); Set<ShardId> shardIds = new HashSet<>(); if (Files.isDirectory(indexPath)) { try (DirectoryStream<Path> stream = Files.newDirectoryStream(indexPath)) { for (Path shardPath : stream) { String fileName = shardPath.getFileName().toString(); if (Files.isDirectory(shardPath) && fileName.chars().allMatch(Character::isDigit)) { int shardId = Integer.parseInt(fileName); ShardId id = new ShardId(index, shardId); shardIds.add(id); } } } } return shardIds; } @Override public void close() { if (closed.compareAndSet(false, true) && locks != null) { for (Lock lock : locks) { try { logger.trace("releasing lock [{}]", lock); lock.close(); } catch (IOException e) { logger.trace( (Supplier<?>) () -> new ParameterizedMessage("failed to release lock [{}]", lock), e); } } } } private void assertEnvIsLocked() { if (!closed.get() && locks != null) { for (Lock lock : locks) { try { lock.ensureValid(); } catch (IOException e) { logger.warn("lock assertion failed", e); throw new IllegalStateException("environment is not locked", e); } } } } /** * This method tries to write an empty file and moves it using an atomic move operation. This * method throws an {@link IllegalStateException} if this operation is not supported by the * filesystem. This test is executed on each of the data directories. This method cleans up all * files even in the case of an error. */ public void ensureAtomicMoveSupported() throws IOException { final NodePath[] nodePaths = nodePaths(); for (NodePath nodePath : nodePaths) { assert Files.isDirectory(nodePath.path) : nodePath.path + " is not a directory"; final Path src = nodePath.path.resolve("__es__.tmp"); final Path target = nodePath.path.resolve("__es__.final"); try { Files.createFile(src); Files.move(src, target, StandardCopyOption.ATOMIC_MOVE); } catch (AtomicMoveNotSupportedException ex) { throw new IllegalStateException( "atomic_move is not supported by the filesystem on path [" + nodePath.path + "] atomic_move is required for elasticsearch to work correctly.", ex); } finally { try { Files.deleteIfExists(src); } finally { Files.deleteIfExists(target); } } } } /** * Resolve the custom path for a index's shard. Uses the {@code IndexMetaData.SETTING_DATA_PATH} * setting to determine the root path for the index. * * @param indexSettings settings for the index */ public Path resolveBaseCustomLocation(IndexSettings indexSettings) { String customDataDir = indexSettings.customDataPath(); if (customDataDir != null) { // This assert is because this should be caught by MetaDataCreateIndexService assert sharedDataPath != null; if (ADD_NODE_LOCK_ID_TO_CUSTOM_PATH.get(indexSettings.getNodeSettings())) { return sharedDataPath.resolve(customDataDir).resolve(Integer.toString(this.nodeLockId)); } else { return sharedDataPath.resolve(customDataDir); } } else { throw new IllegalArgumentException( "no custom " + IndexMetaData.SETTING_DATA_PATH + " setting available"); } } /** * Resolve the custom path for a index's shard. Uses the {@code IndexMetaData.SETTING_DATA_PATH} * setting to determine the root path for the index. * * @param indexSettings settings for the index */ private Path resolveIndexCustomLocation(IndexSettings indexSettings) { return resolveBaseCustomLocation(indexSettings).resolve(indexSettings.getUUID()); } /** * Resolve the custom path for a index's shard. Uses the {@code IndexMetaData.SETTING_DATA_PATH} * setting to determine the root path for the index. * * @param indexSettings settings for the index * @param shardId shard to resolve the path to */ public Path resolveCustomLocation(IndexSettings indexSettings, final ShardId shardId) { return resolveIndexCustomLocation(indexSettings).resolve(Integer.toString(shardId.id())); } /** Returns the {@code NodePath.path} for this shard. */ public static Path shardStatePathToDataPath(Path shardPath) { int count = shardPath.getNameCount(); // Sanity check: assert Integer.parseInt(shardPath.getName(count - 1).toString()) >= 0; assert "indices".equals(shardPath.getName(count - 3).toString()); return shardPath.getParent().getParent().getParent(); } /** * This is a best effort to ensure that we actually have write permissions to write in all our * data directories. This prevents disasters if nodes are started under the wrong username etc. */ private void assertCanWrite() throws IOException { for (Path path : nodeDataPaths()) { // check node-paths are writable tryWriteTempFile(path); } for (String indexFolderName : this.availableIndexFolders()) { for (Path indexPath : this.resolveIndexFolder(indexFolderName)) { // check index paths are writable Path indexStatePath = indexPath.resolve(MetaDataStateFormat.STATE_DIR_NAME); tryWriteTempFile(indexStatePath); tryWriteTempFile(indexPath); try (DirectoryStream<Path> stream = Files.newDirectoryStream(indexPath)) { for (Path shardPath : stream) { String fileName = shardPath.getFileName().toString(); if (Files.isDirectory(shardPath) && fileName.chars().allMatch(Character::isDigit)) { Path indexDir = shardPath.resolve(ShardPath.INDEX_FOLDER_NAME); Path statePath = shardPath.resolve(MetaDataStateFormat.STATE_DIR_NAME); Path translogDir = shardPath.resolve(ShardPath.TRANSLOG_FOLDER_NAME); tryWriteTempFile(indexDir); tryWriteTempFile(translogDir); tryWriteTempFile(statePath); tryWriteTempFile(shardPath); } } } } } } private static void tryWriteTempFile(Path path) throws IOException { if (Files.exists(path)) { Path resolve = path.resolve(".es_temp_file"); try { Files.createFile(resolve); Files.deleteIfExists(resolve); } catch (IOException ex) { throw new IOException( "failed to write in data directory [" + path + "] write permission is required", ex); } } } }
public class ElectMasterService extends AbstractComponent { public static final Setting<Integer> DISCOVERY_ZEN_MINIMUM_MASTER_NODES_SETTING = Setting.intSetting("discovery.zen.minimum_master_nodes", -1, true, Setting.Scope.CLUSTER); // This is the minimum version a master needs to be on, otherwise it gets ignored // This is based on the minimum compatible version of the current version this node is on private final Version minMasterVersion; private final NodeComparator nodeComparator = new NodeComparator(); private volatile int minimumMasterNodes; @Inject public ElectMasterService(Settings settings, Version version) { super(settings); this.minMasterVersion = version.minimumCompatibilityVersion(); this.minimumMasterNodes = DISCOVERY_ZEN_MINIMUM_MASTER_NODES_SETTING.get(settings); logger.debug("using minimum_master_nodes [{}]", minimumMasterNodes); } public void minimumMasterNodes(int minimumMasterNodes) { this.minimumMasterNodes = minimumMasterNodes; } public int minimumMasterNodes() { return minimumMasterNodes; } public boolean hasEnoughMasterNodes(Iterable<DiscoveryNode> nodes) { if (minimumMasterNodes < 1) { return true; } int count = 0; for (DiscoveryNode node : nodes) { if (node.masterNode()) { count++; } } return count >= minimumMasterNodes; } /** * Returns the given nodes sorted by likelyhood of being elected as master, most likely first. * Non-master nodes are not removed but are rather put in the end */ public List<DiscoveryNode> sortByMasterLikelihood(Iterable<DiscoveryNode> nodes) { ArrayList<DiscoveryNode> sortedNodes = CollectionUtils.iterableAsArrayList(nodes); CollectionUtil.introSort(sortedNodes, nodeComparator); return sortedNodes; } /** Returns a list of the next possible masters. */ public DiscoveryNode[] nextPossibleMasters( ObjectContainer<DiscoveryNode> nodes, int numberOfPossibleMasters) { List<DiscoveryNode> sortedNodes = sortedMasterNodes(Arrays.asList(nodes.toArray(DiscoveryNode.class))); if (sortedNodes == null) { return new DiscoveryNode[0]; } List<DiscoveryNode> nextPossibleMasters = new ArrayList<>(numberOfPossibleMasters); int counter = 0; for (DiscoveryNode nextPossibleMaster : sortedNodes) { if (++counter >= numberOfPossibleMasters) { break; } nextPossibleMasters.add(nextPossibleMaster); } return nextPossibleMasters.toArray(new DiscoveryNode[nextPossibleMasters.size()]); } /** * Elects a new master out of the possible nodes, returning it. Returns <tt>null</tt> if no master * has been elected. */ public DiscoveryNode electMaster(Iterable<DiscoveryNode> nodes) { List<DiscoveryNode> sortedNodes = sortedMasterNodes(nodes); if (sortedNodes == null || sortedNodes.isEmpty()) { return null; } DiscoveryNode masterNode = sortedNodes.get(0); // Sanity check: maybe we don't end up here, because serialization may have failed. if (masterNode.getVersion().before(minMasterVersion)) { logger.warn( "ignoring master [{}], because the version [{}] is lower than the minimum compatible version [{}]", masterNode, masterNode.getVersion(), minMasterVersion); return null; } else { return masterNode; } } private List<DiscoveryNode> sortedMasterNodes(Iterable<DiscoveryNode> nodes) { List<DiscoveryNode> possibleNodes = CollectionUtils.iterableAsArrayList(nodes); if (possibleNodes.isEmpty()) { return null; } // clean non master nodes for (Iterator<DiscoveryNode> it = possibleNodes.iterator(); it.hasNext(); ) { DiscoveryNode node = it.next(); if (!node.masterNode()) { it.remove(); } } CollectionUtil.introSort(possibleNodes, nodeComparator); return possibleNodes; } private static class NodeComparator implements Comparator<DiscoveryNode> { @Override public int compare(DiscoveryNode o1, DiscoveryNode o2) { if (o1.masterNode() && !o2.masterNode()) { return -1; } if (!o1.masterNode() && o2.masterNode()) { return 1; } return o1.id().compareTo(o2.id()); } } }
public class IndicesQueryCache extends AbstractComponent implements QueryCache, Closeable { public static final Setting<ByteSizeValue> INDICES_CACHE_QUERY_SIZE_SETTING = Setting.byteSizeSetting("indices.queries.cache.size", "10%", false, Scope.CLUSTER); public static final Setting<Integer> INDICES_CACHE_QUERY_COUNT_SETTING = Setting.intSetting("indices.queries.cache.count", 10000, 1, false, Scope.CLUSTER); private final LRUQueryCache cache; private final ShardCoreKeyMap shardKeyMap = new ShardCoreKeyMap(); private final Map<ShardId, Stats> shardStats = new ConcurrentHashMap<>(); private volatile long sharedRamBytesUsed; // This is a hack for the fact that the close listener for the // ShardCoreKeyMap will be called before onDocIdSetEviction // See onDocIdSetEviction for more info private final Map<Object, StatsAndCount> stats2 = new IdentityHashMap<>(); public IndicesQueryCache(Settings settings) { super(settings); final ByteSizeValue size = INDICES_CACHE_QUERY_SIZE_SETTING.get(settings); final int count = INDICES_CACHE_QUERY_COUNT_SETTING.get(settings); logger.debug("using [node] query cache with size [{}] max filter count [{}]", size, count); cache = new LRUQueryCache(count, size.bytes()) { private Stats getStats(Object coreKey) { final ShardId shardId = shardKeyMap.getShardId(coreKey); if (shardId == null) { return null; } return shardStats.get(shardId); } private Stats getOrCreateStats(Object coreKey) { final ShardId shardId = shardKeyMap.getShardId(coreKey); Stats stats = shardStats.get(shardId); if (stats == null) { stats = new Stats(); shardStats.put(shardId, stats); } return stats; } // It's ok to not protect these callbacks by a lock since it is // done in LRUQueryCache @Override protected void onClear() { assert Thread.holdsLock(this); super.onClear(); for (Stats stats : shardStats.values()) { // don't throw away hit/miss stats.cacheSize = 0; stats.ramBytesUsed = 0; } sharedRamBytesUsed = 0; } @Override protected void onQueryCache(Query filter, long ramBytesUsed) { assert Thread.holdsLock(this); super.onQueryCache(filter, ramBytesUsed); sharedRamBytesUsed += ramBytesUsed; } @Override protected void onQueryEviction(Query filter, long ramBytesUsed) { assert Thread.holdsLock(this); super.onQueryEviction(filter, ramBytesUsed); sharedRamBytesUsed -= ramBytesUsed; } @Override protected void onDocIdSetCache(Object readerCoreKey, long ramBytesUsed) { assert Thread.holdsLock(this); super.onDocIdSetCache(readerCoreKey, ramBytesUsed); final Stats shardStats = getOrCreateStats(readerCoreKey); shardStats.cacheSize += 1; shardStats.cacheCount += 1; shardStats.ramBytesUsed += ramBytesUsed; StatsAndCount statsAndCount = stats2.get(readerCoreKey); if (statsAndCount == null) { statsAndCount = new StatsAndCount(shardStats); stats2.put(readerCoreKey, statsAndCount); } statsAndCount.count += 1; } @Override protected void onDocIdSetEviction( Object readerCoreKey, int numEntries, long sumRamBytesUsed) { assert Thread.holdsLock(this); super.onDocIdSetEviction(readerCoreKey, numEntries, sumRamBytesUsed); // onDocIdSetEviction might sometimes be called with a number // of entries equal to zero if the cache for the given segment // was already empty when the close listener was called if (numEntries > 0) { // We can't use ShardCoreKeyMap here because its core closed // listener is called before the listener of the cache which // triggers this eviction. So instead we use use stats2 that // we only evict when nothing is cached anymore on the segment // instead of relying on close listeners final StatsAndCount statsAndCount = stats2.get(readerCoreKey); final Stats shardStats = statsAndCount.stats; shardStats.cacheSize -= numEntries; shardStats.ramBytesUsed -= sumRamBytesUsed; statsAndCount.count -= numEntries; if (statsAndCount.count == 0) { stats2.remove(readerCoreKey); } } } @Override protected void onHit(Object readerCoreKey, Query filter) { assert Thread.holdsLock(this); super.onHit(readerCoreKey, filter); final Stats shardStats = getStats(readerCoreKey); shardStats.hitCount += 1; } @Override protected void onMiss(Object readerCoreKey, Query filter) { assert Thread.holdsLock(this); super.onMiss(readerCoreKey, filter); final Stats shardStats = getOrCreateStats(readerCoreKey); shardStats.missCount += 1; } }; sharedRamBytesUsed = 0; } /** Get usage statistics for the given shard. */ public QueryCacheStats getStats(ShardId shard) { final Map<ShardId, QueryCacheStats> stats = new HashMap<>(); for (Map.Entry<ShardId, Stats> entry : shardStats.entrySet()) { stats.put(entry.getKey(), entry.getValue().toQueryCacheStats()); } QueryCacheStats shardStats = new QueryCacheStats(); QueryCacheStats info = stats.get(shard); if (info == null) { info = new QueryCacheStats(); } shardStats.add(info); // We also have some shared ram usage that we try to distribute to // proportionally to their number of cache entries of each shard long totalSize = 0; for (QueryCacheStats s : stats.values()) { totalSize += s.getCacheSize(); } final double weight = totalSize == 0 ? 1d / stats.size() : shardStats.getCacheSize() / totalSize; final long additionalRamBytesUsed = Math.round(weight * sharedRamBytesUsed); shardStats.add(new QueryCacheStats(additionalRamBytesUsed, 0, 0, 0, 0)); return shardStats; } @Override public Weight doCache(Weight weight, QueryCachingPolicy policy) { while (weight instanceof CachingWeightWrapper) { weight = ((CachingWeightWrapper) weight).in; } final Weight in = cache.doCache(weight, policy); // We wrap the weight to track the readers it sees and map them with // the shards they belong to return new CachingWeightWrapper(in); } private class CachingWeightWrapper extends Weight { private final Weight in; protected CachingWeightWrapper(Weight in) { super(in.getQuery()); this.in = in; } @Override public void extractTerms(Set<Term> terms) { in.extractTerms(terms); } @Override public Explanation explain(LeafReaderContext context, int doc) throws IOException { shardKeyMap.add(context.reader()); return in.explain(context, doc); } @Override public float getValueForNormalization() throws IOException { return in.getValueForNormalization(); } @Override public void normalize(float norm, float topLevelBoost) { in.normalize(norm, topLevelBoost); } @Override public Scorer scorer(LeafReaderContext context) throws IOException { shardKeyMap.add(context.reader()); return in.scorer(context); } @Override public BulkScorer bulkScorer(LeafReaderContext context) throws IOException { shardKeyMap.add(context.reader()); return in.bulkScorer(context); } } /** Clear all entries that belong to the given index. */ public void clearIndex(String index) { final Set<Object> coreCacheKeys = shardKeyMap.getCoreKeysForIndex(index); for (Object coreKey : coreCacheKeys) { cache.clearCoreCacheKey(coreKey); } // This cache stores two things: filters, and doc id sets. Calling // clear only removes the doc id sets, but if we reach the situation // that the cache does not contain any DocIdSet anymore, then it // probably means that the user wanted to remove everything. if (cache.getCacheSize() == 0) { cache.clear(); } } @Override public void close() { assert shardKeyMap.size() == 0 : shardKeyMap.size(); assert shardStats.isEmpty() : shardStats.keySet(); assert stats2.isEmpty() : stats2; cache.clear(); } private static class Stats implements Cloneable { volatile long ramBytesUsed; volatile long hitCount; volatile long missCount; volatile long cacheCount; volatile long cacheSize; QueryCacheStats toQueryCacheStats() { return new QueryCacheStats(ramBytesUsed, hitCount, missCount, cacheCount, cacheSize); } } private static class StatsAndCount { int count; final Stats stats; StatsAndCount(Stats stats) { this.stats = stats; this.count = 0; } } private boolean empty(Stats stats) { if (stats == null) { return true; } return stats.cacheSize == 0 && stats.ramBytesUsed == 0; } public void onClose(ShardId shardId) { assert empty(shardStats.get(shardId)); shardStats.remove(shardId); } }