Beispiel #1
0
 private NodeState buildLocalNodeState(NodeState body) {
   if (body == null) {
     body = new NodeState();
   }
   body.id = getHost().getId();
   body.status = NodeStatus.SYNCHRONIZING;
   body.groupReference = UriUtils.buildPublicUri(getHost(), getSelfLink());
   body.documentSelfLink = UriUtils.buildUriPath(getSelfLink(), body.id);
   body.documentKind = Utils.buildKind(NodeState.class);
   body.documentUpdateTimeMicros = Utils.getNowMicrosUtc();
   return body;
 }
 /**
  * A request sent to an enrolled service at the end of this transaction to clear the service'
  * transaction id
  */
 public static class ClearTransactionRequest {
   public static final String KIND = Utils.buildKind(ClearTransactionRequest.class);
   public String kind;
   public TransactionOutcome transactionOutcome;
   public boolean isUpdated;
   public long originalVersion;
 }
 /** Request for enrolling a service in this transaction */
 public static class EnrollRequest {
   public static final String KIND = Utils.buildKind(EnrollRequest.class);
   public String kind = KIND;
   public String serviceSelfLink;
   public Action action;
   public long previousVersion;
 }
Beispiel #4
0
  public static class JoinPeerRequest {
    public static final String KIND = Utils.buildKind(JoinPeerRequest.class);

    public static JoinPeerRequest create(URI peerToJoin, Integer synchQuorum) {
      JoinPeerRequest r = new JoinPeerRequest();
      r.memberGroupReference = peerToJoin;
      r.synchQuorum = synchQuorum;
      r.kind = KIND;
      return r;
    }

    /** Member of the group we wish to join through */
    public URI memberGroupReference;

    /**
     * Optional node join options. If specified the node state representing the local node will be
     * updated with these options. Further, these options determine join behavior.
     */
    public EnumSet<NodeOption> localNodeOptions;

    /** Minimum number of nodes to enumeration, after join, for synchronization to start */
    public Integer synchQuorum;

    public String kind;
  }
Beispiel #5
0
  public static class UpdateQuorumRequest {
    public static final String KIND = Utils.buildKind(UpdateQuorumRequest.class);

    public static UpdateQuorumRequest create(boolean isGroupUpdate) {
      UpdateQuorumRequest r = new UpdateQuorumRequest();
      r.isGroupUpdate = isGroupUpdate;
      r.kind = KIND;
      return r;
    }

    public UpdateQuorumRequest setMembershipQuorum(int count) {
      this.membershipQuorum = count;
      return this;
    }

    public UpdateQuorumRequest setSynchQuorum(int count) {
      this.synchQuorum = count;
      return this;
    }

    public boolean isGroupUpdate;
    public Integer membershipQuorum;
    public Integer synchQuorum;
    public String kind;
  }
 private ResourceAggregateMetric processInput(Operation op) {
   if (!op.hasBody()) {
     throw (new IllegalArgumentException("body is required"));
   }
   ResourceAggregateMetric state = op.getBody(ResourceAggregateMetric.class);
   Utils.validateState(getStateDescription(), state);
   return state;
 }
  public static Service createFactory() {

    // workaround for GSON issue https://github.com/google/gson/issues/764
    // We serialize the complex type once, on service creation, to avoid possible GSON race
    ResourceGroupState st = new ResourceGroupState();
    st.query = QueryTask.Query.Builder.create().addFieldClause("one", "one").build();
    Utils.toJson(st);
    return FactoryService.createIdempotent(ResourceGroupService.class);
  }
Beispiel #8
0
  /**
   * The {@link RoleState} represents a role. A role applies to users contained in its user group,
   * to HTTP verbs in the set of applicable verbs, and to resources in its resource group.
   */
  public static class RoleState extends ServiceDocument {
    public static final String KIND = Utils.buildKind(RoleState.class);
    public static final String FIELD_NAME_USER_GROUP_LINK = "userGroupLink";
    public static final String FIELD_NAME_RESOURCE_GROUP_LINK = "resourceGroupLink";

    public String userGroupLink;
    public String resourceGroupLink;
    public Set<Action> verbs;
    public Policy policy;
    public int priority;
  }
  /** Request for committing or aborting this transaction */
  public static class EndTransactionRequest {
    public static final String KIND = Utils.buildKind(EndTransactionRequest.class);

    public enum TransactionOutcome {
      COMMIT,
      ABORT
    }

    public String kind = KIND;
    public TransactionOutcome transactionOutcome;
  }
  /**
   * Build a state object that can be used to submit a stage progress self patch.
   *
   * @param stage
   * @param subStage
   * @param e
   * @return
   */
  private State buildPatch(TaskState.TaskStage stage, TaskState.SubStage subStage, Throwable e) {
    State s = new State();
    s.taskInfo = new TaskState();
    s.taskInfo.stage = stage;
    s.taskInfo.subStage = subStage;

    if (e != null) {
      s.taskInfo.failure = Utils.toServiceErrorResponse(e);
    }

    return s;
  }
  private DeploymentService.State getState() {
    if (deploymentLink != null) {
      URI serviceUri = UriUtils.buildUri(xenonHost, deploymentLink);

      Operation getOperation =
          Operation.createGet(serviceUri).setUri(serviceUri).setReferer(this.xenonHost.getUri());
      OperationLatch operationLatch = new OperationLatch(getOperation);
      xenonHost.sendRequest(getOperation);
      Operation completedOperation = null;
      try {
        completedOperation = operationLatch.awaitOperationCompletion(TimeUnit.SECONDS.toMicros(90));
      } catch (Throwable e) {
        logger.error("SysConfig get failed!! ", e);
        throw new RuntimeException(e);
      }

      return completedOperation.getBody(DeploymentService.State.class);
    } else {
      QueryTask.Query kindClause =
          new QueryTask.Query()
              .setTermPropertyName(ServiceDocument.FIELD_NAME_KIND)
              .setTermMatchValue(Utils.buildKind(DeploymentService.State.class));

      QueryTask.QuerySpecification querySpecification = new QueryTask.QuerySpecification();
      querySpecification.query = kindClause;

      Operation broadcastOp =
          xenonHost
              .getCloudStoreHelper()
              .createBroadcastPost(
                  ServiceUriPaths.CORE_LOCAL_QUERY_TASKS, ServiceUriPaths.DEFAULT_NODE_SELECTOR)
              .setBody(QueryTask.create(querySpecification).setDirect(true));
      OperationLatch operationLatch = new OperationLatch(broadcastOp);
      xenonHost.sendRequest(broadcastOp);
      Operation completedOperation = null;
      try {
        completedOperation = operationLatch.awaitOperationCompletion(TimeUnit.SECONDS.toMicros(90));
      } catch (Throwable e) {
        logger.error("SysConfig broadcastquery failed!! ", e);
        throw new RuntimeException(e);
      }

      Collection<String> documentLinks =
          QueryTaskUtils.getBroadcastQueryDocumentLinks(completedOperation);
      if (documentLinks.size() == 0) {
        return null;
      }
      this.deploymentLink = documentLinks.iterator().next();
      return getState();
    }
  }
  private void assertSwagger(Swagger swagger) {
    assertEquals("/", swagger.getBasePath());

    assertEquals(INFO_DESCRIPTION, swagger.getInfo().getDescription());
    assertEquals(INFO_TERMS_OF_SERVICE, swagger.getInfo().getTermsOfService());

    // excluded prefixes
    assertNull(swagger.getPath(ServiceUriPaths.CORE_AUTHZ_USERS));
    assertNull(swagger.getPath(ServiceUriPaths.CORE_AUTHZ_ROLES));

    assertNotNull(swagger.getPath(ServiceUriPaths.CORE_PROCESSES));
    assertNotNull(swagger.getPath(ServiceUriPaths.CORE_CREDENTIALS));

    Path p = swagger.getPath("/cars");
    assertNotNull(p);
    assertNotNull(p.getPost());
    assertNotNull(p.getGet());

    assertNotNull(swagger.getPath("/cars/template"));
    assertNotNull(swagger.getPath("/cars/available"));
    assertNotNull(swagger.getPath("/cars/config"));
    assertNotNull(swagger.getPath("/cars/stats"));
    assertNotNull(swagger.getPath("/cars/subscriptions"));

    assertNotNull(swagger.getPath("/cars/{id}/template"));
    assertNotNull(swagger.getPath("/cars/{id}/available"));
    assertNotNull(swagger.getPath("/cars/{id}/config"));
    assertNotNull(swagger.getPath("/cars/{id}/stats"));
    assertNotNull(swagger.getPath("/cars/{id}/subscriptions"));

    p = swagger.getPath("/cars/{id}");
    assertNotNull(p);
    assertNull(p.getPost());
    assertNull(p.getPatch());
    assertNotNull(p.getGet());
    assertNotNull(p.getPut());

    p = swagger.getPath("/tokens");
    assertNotNull(p);
    assertNotNull(p.getGet());
    assertNotNull(p.getGet().getResponses());
    assertNotNull(p.getPost());
    assertNotNull(p.getPost().getParameters());
    assertNull(p.getPatch());
    assertNull(p.getDelete());

    Model model = swagger.getDefinitions().get(Utils.buildKind(UserToken.class));
    Map<String, Property> properties = model.getProperties();
    assertNull(properties.get(UserToken.FIELD_NAME_INTERNAL_ID));
  }
Beispiel #13
0
  public static class CheckConvergenceRequest {
    public static final String KIND = Utils.buildKind(CheckConvergenceRequest.class);

    public long membershipUpdateTimeMicros;

    public static CheckConvergenceRequest create(long membershipUpdateTime) {
      CheckConvergenceRequest r = new CheckConvergenceRequest();
      r.membershipUpdateTimeMicros = membershipUpdateTime;
      r.kind = KIND;
      return r;
    }

    public String kind;
  }
 @Before
 public void setUp() throws Exception {
   try {
     this.baseAccountId = Utils.getNowMicrosUtc();
     this.host.setTransactionService(null);
     if (this.host.getServiceStage(SimpleTransactionFactoryService.SELF_LINK) == null) {
       this.host.startServiceAndWait(
           SimpleTransactionFactoryService.class, SimpleTransactionFactoryService.SELF_LINK);
       this.host.startServiceAndWait(
           BankAccountFactoryService.class, BankAccountFactoryService.SELF_LINK);
     }
   } catch (Throwable e) {
     throw new RuntimeException(e);
   }
 }
 /**
  * Creates datastore documents in cloudstore.
  *
  * <p>This method creates datastore documents with datastore IDs
  * 00000000-0000-0000-0000-000000000000, 00000000-0000-0000-0000-000000000001, and so on.
  *
  * @param numDatastores The number of datastore documents to create.
  */
 public static void loadDatastores(TestEnvironment cloudstore, int numDatastores)
     throws Throwable {
   for (int i = 0; i < numDatastores; i++) {
     DatastoreService.State datastore = new DatastoreService.State();
     String datastoreId = new UUID(0, i).toString();
     datastore.id = datastoreId;
     datastore.name = datastoreId;
     datastore.documentSelfLink = datastoreId;
     datastore.type = "SHARED_VMFS";
     // TODO(mmutsuzaki) Support datastore tags.
     datastore.tags = new HashSet<>();
     Operation result = cloudstore.sendPostAndWait(DatastoreServiceFactory.SELF_LINK, datastore);
     assertThat(result.getStatusCode(), is(200));
     logger.debug("Created a datastore document: {}", Utils.toJson(datastore));
   }
 }
  /**
   * This method queries for the document link of the cluster configuration for the Kubernetes
   * Cluster.
   *
   * @param currentState
   */
  private void queryClusterConfiguration(final KubernetesClusterCreateTask currentState) {
    QueryTask.Query kindClause =
        new QueryTask.Query()
            .setTermPropertyName(ServiceDocument.FIELD_NAME_KIND)
            .setTermMatchValue(Utils.buildKind(ClusterConfigurationService.State.class));

    QueryTask.Query idClause =
        new QueryTask.Query()
            .setTermPropertyName(ClusterConfigurationService.State.FIELD_NAME_SELF_LINK)
            .setTermMatchValue(
                ClusterConfigurationServiceFactory.SELF_LINK
                    + "/"
                    + ClusterType.KUBERNETES.toString().toLowerCase());

    QueryTask.QuerySpecification querySpecification = new QueryTask.QuerySpecification();
    querySpecification.query.addBooleanClause(kindClause);
    querySpecification.query.addBooleanClause(idClause);
    QueryTask queryTask = QueryTask.create(querySpecification).setDirect(true);

    sendRequest(
        HostUtils.getCloudStoreHelper(this)
            .createBroadcastPost(
                ServiceUriPaths.CORE_LOCAL_QUERY_TASKS, ServiceUriPaths.DEFAULT_NODE_SELECTOR)
            .setBody(queryTask)
            .setCompletion(
                (Operation operation, Throwable throwable) -> {
                  if (null != throwable) {
                    failTask(throwable);
                    return;
                  }

                  NodeGroupBroadcastResponse queryResponse =
                      operation.getBody(NodeGroupBroadcastResponse.class);
                  Set<String> documentLinks =
                      QueryTaskUtils.getBroadcastQueryResults(queryResponse);
                  if (documentLinks.isEmpty()) {
                    failTask(
                        new IllegalStateException(
                            String.format(
                                "Cannot find cluster configuration for %s",
                                ClusterType.KUBERNETES.toString())));
                    return;
                  }

                  retrieveClusterConfiguration(currentState, documentLinks.iterator().next());
                }));
  }
 /**
  * Creates host documents in cloudstore.
  *
  * @param cloudstore CloudStore test environment to create documents in.
  * @param numHosts The number of host documents to create.
  * @param hostConfigurations A map from {@link HostConfiguration} to the probability that this
  *     host configuration is used in the deployment. The sum of all the values of this map must be
  *     1.
  * @param numDatastores The number of datastores.
  * @param numDatastoresDistribution Distribution for number of datastores on each host. This
  *     distribution is expected to generate samples in the range [0, numDatastores].
  * @throws Throwable
  */
 public static void loadHosts(
     TestEnvironment cloudstore,
     int numHosts,
     Map<HostConfiguration, Double> hostConfigurations,
     int numDatastores,
     IntegerDistribution numDatastoresDistribution)
     throws Throwable {
   int[] indices = new int[hostConfigurations.size()];
   HostConfiguration[] configs = new HostConfiguration[hostConfigurations.size()];
   double[] probabilities = new double[hostConfigurations.size()];
   int i = 0;
   for (Map.Entry<HostConfiguration, Double> entry : hostConfigurations.entrySet()) {
     indices[i] = i;
     configs[i] = entry.getKey();
     probabilities[i] = entry.getValue();
     i++;
   }
   EnumeratedIntegerDistribution configDistribution =
       new EnumeratedIntegerDistribution(indices, probabilities);
   for (i = 0; i < numHosts; i++) {
     HostService.State host = new HostService.State();
     host.hostAddress = "host" + i;
     host.state = HostState.READY;
     host.userName = "******";
     host.password = "******";
     host.reportedDatastores = new HashSet<>();
     int numDatastoresPerHost = numDatastoresDistribution.sample();
     assertThat(numDatastoresPerHost >= 0, is(true));
     assertThat(numDatastoresPerHost <= numDatastores, is(true));
     while (host.reportedDatastores.size() < numDatastoresPerHost) {
       int randomInt = random.nextInt(numDatastores);
       host.reportedDatastores.add(new UUID(0, randomInt).toString());
     }
     host.reportedNetworks = new HashSet<>();
     host.usageTags = new HashSet<>(Arrays.asList(UsageTag.CLOUD.name()));
     int configIndex = configDistribution.sample();
     host.cpuCount = configs[configIndex].numCpus;
     host.memoryMb = configs[configIndex].memoryMb;
     host.documentSelfLink = new UUID(0, i).toString();
     // TODO(mmutsuzaki) Support availability zones.
     Operation result = cloudstore.sendPostAndWait(HostServiceFactory.SELF_LINK, host);
     assertThat(result.getStatusCode(), is(200));
     logger.debug("Created a host document: {}", Utils.toJson(host));
   }
 }
  /**
   * This method retrieves the container templates of all the containers that are running on this
   * VM.
   *
   * @param currentState Supplies the current state object.
   */
  private void queryContainers(final State currentState) {

    QueryTask.Query kindClause =
        new QueryTask.Query()
            .setTermPropertyName(ServiceDocument.FIELD_NAME_KIND)
            .setTermMatchValue(Utils.buildKind(ContainerService.State.class));

    QueryTask.Query nameClause =
        new QueryTask.Query()
            .setTermPropertyName(ContainerService.State.FIELD_NAME_VM_SERVICE_LINK)
            .setTermMatchValue(currentState.vmServiceLink);

    QueryTask.QuerySpecification querySpecification = new QueryTask.QuerySpecification();
    querySpecification.query.addBooleanClause(kindClause);
    querySpecification.query.addBooleanClause(nameClause);
    QueryTask queryTask = QueryTask.create(querySpecification).setDirect(true);

    sendRequest(
        Operation.createPost(
                UriUtils.buildBroadcastRequestUri(
                    UriUtils.buildUri(getHost(), ServiceUriPaths.CORE_LOCAL_QUERY_TASKS),
                    ServiceUriPaths.DEFAULT_NODE_SELECTOR))
            .setBody(queryTask)
            .setCompletion(
                new Operation.CompletionHandler() {
                  @Override
                  public void handle(Operation operation, Throwable throwable) {
                    if (null != throwable) {
                      failTask(throwable);
                      return;
                    }

                    try {
                      Collection<String> documentLinks =
                          QueryTaskUtils.getBroadcastQueryDocumentLinks(operation);
                      QueryTaskUtils.logQueryResults(
                          CreateManagementVmTaskService.this, documentLinks);
                      checkState(documentLinks.size() > 0);
                      getContainerTemplates(currentState, documentLinks);
                    } catch (Throwable t) {
                      failTask(t);
                    }
                  }
                }));
  }
  /**
   * Convert the data returned as ServiceDocumentQueryResult from Xenon to ResourceList, which is
   * being used by api-fe.
   *
   * <p>The order of the data will be honored.
   *
   * @param documentType
   * @param queryResult
   * @param convert
   * @param <T>
   * @return
   */
  public static <T, S> ResourceList<T> xenonQueryResultToResourceList(
      Class<S> documentType, ServiceDocumentQueryResult queryResult, Function<S, T> convert) {
    // The documents links stored in documentLinks are sorted while documents are not, and
    // the following loop iterates on the documentLinks to preserve this order.
    List<T> documents = new ArrayList<>();
    if (queryResult.documentLinks != null) {
      for (String link : queryResult.documentLinks) {
        documents.add(convert.apply(Utils.fromJson(queryResult.documents.get(link), documentType)));
      }
    }

    ResourceList<T> resourceList = new ResourceList<>();
    resourceList.setItems(documents);
    resourceList.setNextPageLink(queryResult.nextPageLink);
    resourceList.setPreviousPageLink(queryResult.prevPageLink);

    return resourceList;
  }
  /**
   * This method queries the list of data stores available in this ESX cloud instance and, on query
   * completion, creates a set of ImageCopyService instances and transitions the current service
   * instance to the AWAIT_COMPLETION sub-state.
   *
   * @param current
   */
  protected void handleTriggerCopies(final State current) {
    try {
      Set<Datastore> datastoreSet = getZookeeperHostMonitor().getAllDatastores();
      ServiceUtils.logInfo(this, "All target datastores: %s", Utils.toJson(datastoreSet));
      triggerCopyServices(datastoreSet, current);

      // move to next stage
      if (!current.isSelfProgressionDisabled) {
        State patch =
            ImageReplicatorService.this.buildPatch(
                TaskState.TaskStage.STARTED, TaskState.SubStage.AWAIT_COMPLETION, null);
        patch.dataStoreCount = datastoreSet.size();

        sendSelfPatch(patch);
      }
    } catch (Exception e) {
      failTask(e);
    }
  }
  private void assertDescriptorJson(Operation o, Throwable e) {
    if (e != null) {
      e.printStackTrace();

      if (e.getMessage().contains("Unparseable JSON body")) {
        // Ignore failure
        // Expecting GSON classloading issue to be fixed:
        //  - https://github.com/google/gson/issues/764
        //  - https://www.pivotaltracker.com/story/show/120885303
        Utils.logWarning("GSON initialization failure: %s", e);
        // Stop assertion logic here, test will finish as success
        return;
      } else {
        fail(e.getMessage());
      }
    }

    try {
      Swagger swagger = Json.mapper().readValue(o.getBody(String.class), Swagger.class);
      assertSwagger(swagger);
    } catch (IOException ioe) {
      fail(ioe.getMessage());
    }
  }
  /**
   * Issues updates to peer nodes, after a local update has been accepted. If the service support
   * OWNER_SELECTION the replication message is the Propose message in the consensus work flow.
   *
   * @param localState
   * @param outboundOp
   * @param req
   * @param rsp
   */
  void replicateUpdate(
      NodeGroupState localState,
      Operation outboundOp,
      SelectAndForwardRequest req,
      SelectOwnerResponse rsp) {

    int memberCount = localState.nodes.size();
    NodeState selfNode = localState.nodes.get(getHost().getId());
    AtomicInteger successCount = new AtomicInteger(0);

    if (req.serviceOptions.contains(ServiceOption.OWNER_SELECTION)
        && selfNode.membershipQuorum > memberCount) {
      outboundOp.fail(new IllegalStateException("Not enough peers: " + memberCount));
      return;
    }

    if (memberCount == 1) {
      outboundOp.complete();
      return;
    }

    AtomicInteger failureCount = new AtomicInteger();

    // The eligible count can be less than the member count if the parent node selector has
    // a smaller replication factor than group size. We need to use the replication factor
    // as the upper bound for calculating success and failure thresholds
    int eligibleMemberCount = rsp.selectedNodes.size();

    // When quorum is not required, succeed when we replicate to at least one remote node,
    // or, if only local node is available, succeed immediately.
    int successThreshold = Math.min(2, eligibleMemberCount - 1);
    int failureThreshold = eligibleMemberCount - successThreshold;

    if (req.serviceOptions.contains(ServiceOption.OWNER_SELECTION)) {
      successThreshold = Math.min(eligibleMemberCount, selfNode.membershipQuorum);
      failureThreshold = eligibleMemberCount - successThreshold;

      if (failureThreshold == successThreshold && successThreshold == 1) {
        // degenerate case: node group has just two members and quorum must be one, which
        // means even the single remote peer is down, we should still succeed.
        failureThreshold = 0;
      }
    }

    final int successThresholdFinal = successThreshold;
    final int failureThresholdFinal = failureThreshold;

    CompletionHandler c =
        (o, e) -> {
          if (e == null
              && o != null
              && o.getStatusCode() >= Operation.STATUS_CODE_FAILURE_THRESHOLD) {
            e = new IllegalStateException("Request failed: " + o.toString());
          }
          int sCount = successCount.get();
          int fCount = failureCount.get();
          if (e != null) {
            logInfo("Replication to %s failed: %s", o.getUri(), e.toString());
            fCount = failureCount.incrementAndGet();
          } else {
            sCount = successCount.incrementAndGet();
          }

          if (sCount == successThresholdFinal) {
            outboundOp.complete();
            return;
          }

          if (fCount == 0) {
            return;
          }

          if (fCount >= failureThresholdFinal || ((fCount + sCount) == memberCount)) {
            String error =
                String.format(
                    "%s to %s failed. Success: %d,  Fail: %d, quorum: %d, threshold: %d",
                    outboundOp.getAction(),
                    outboundOp.getUri().getPath(),
                    sCount,
                    fCount,
                    selfNode.membershipQuorum,
                    failureThresholdFinal);
            logWarning("%s", error);
            outboundOp.fail(new IllegalStateException(error));
          }
        };

    String jsonBody = Utils.toJson(req.linkedState);

    Operation update =
        Operation.createPost(null)
            .setAction(outboundOp.getAction())
            .setBodyNoCloning(jsonBody)
            .setCompletion(c)
            .setRetryCount(1)
            .setExpiration(outboundOp.getExpirationMicrosUtc())
            .transferRequestHeadersFrom(outboundOp)
            .removePragmaDirective(Operation.PRAGMA_DIRECTIVE_FORWARDED)
            .addPragmaDirective(Operation.PRAGMA_DIRECTIVE_REPLICATED)
            .setReferer(outboundOp.getReferer());

    if (update.getCookies() != null) {
      update.getCookies().clear();
    }

    ServiceClient cl = getHost().getClient();
    String selfId = getHost().getId();

    // trigger completion once, for self node, since its part of our accounting
    c.handle(null, null);

    rsp.selectedNodes.forEach(
        (m) -> {
          if (m.id.equals(selfId)) {
            return;
          }

          if (m.options.contains(NodeOption.OBSERVER)) {
            return;
          }

          try {
            URI remotePeerService =
                new URI(
                    m.groupReference.getScheme(),
                    null,
                    m.groupReference.getHost(),
                    m.groupReference.getPort(),
                    outboundOp.getUri().getPath(),
                    outboundOp.getUri().getQuery(),
                    null);
            update.setUri(remotePeerService);
          } catch (Throwable e1) {
          }

          if (NodeState.isUnAvailable(m)) {
            c.handle(update, new IllegalStateException("node is not available"));
            return;
          }

          cl.send(update);
        });
  }
 private KubernetesClusterCreateTask buildPatch(
     TaskState.TaskStage stage, TaskState.SubStage subStage, @Nullable Throwable t) {
   return buildPatch(stage, subStage, null == t ? null : Utils.toServiceErrorResponse(t));
 }
 /**
  * This method builds a state object which can be used to submit a stage progress self-patch.
  *
  * @param stage Supplies the stage that the current service instance is moving to.
  * @param e Supplies the exception that the current service instance encountered if any.
  * @return Returns a patch state object that the current service instance is moving to.
  */
 @VisibleForTesting
 protected State buildPatch(TaskState.TaskStage stage, @Nullable Throwable e) {
   return buildPatch(stage, (e != null) ? Utils.toServiceErrorResponse(e) : null);
 }
Beispiel #25
0
  private void handleUpdateQuorumPatch(Operation patch, NodeGroupState localState) {
    UpdateQuorumRequest bd = patch.getBody(UpdateQuorumRequest.class);
    NodeState self = localState.nodes.get(getHost().getId());
    logInfo("Updating self quorum from %d. Body: %s", self.membershipQuorum, Utils.toJsonHtml(bd));

    if (bd.membershipQuorum != null) {
      self.membershipQuorum = bd.membershipQuorum;
    }
    if (bd.synchQuorum != null) {
      self.synchQuorum = bd.synchQuorum;
    }
    self.documentVersion++;
    self.documentUpdateTimeMicros = Utils.getNowMicrosUtc();
    localState.membershipUpdateTimeMicros = self.documentUpdateTimeMicros;

    if (!bd.isGroupUpdate) {
      patch.setBodyNoCloning(localState).complete();
      return;
    }

    // TODO use a three phase consensus algorithm to update quorum similar
    // to the steady state replication consensus.

    // Issue N requests to update quorum to all member of the group. If they
    // do not all succeed the request, then the operation fails and some peers
    // will be left with a quorum level different than the others. That is
    // acceptable. The replication logic, can reject a peer if its quorum level
    // is not set at the same level as the owner. The client of this request can
    // also retry...

    bd.isGroupUpdate = false;

    int failureThreshold = (localState.nodes.size() - 1) / 2;
    AtomicInteger pending = new AtomicInteger(localState.nodes.size());
    AtomicInteger failures = new AtomicInteger();
    CompletionHandler c =
        (o, e) -> {
          if (e != null) {
            logWarning("Node %s failed quorum update: %s", o.getUri(), e.toString());
            failures.incrementAndGet();
          }

          int p = pending.decrementAndGet();
          if (p != 0) {
            return;
          }
          if (failures.get() > failureThreshold) {
            patch.fail(new IllegalStateException("Majority of nodes failed request"));
          } else {
            patch.setBodyNoCloning(localState).complete();
          }
        };

    for (NodeState node : localState.nodes.values()) {
      if (!NodeState.isAvailable(node, getHost().getId(), true)) {
        c.handle(null, null);
        continue;
      }
      if (bd.membershipQuorum != null) {
        node.membershipQuorum = bd.membershipQuorum;
      }
      if (bd.synchQuorum != null) {
        node.synchQuorum = bd.synchQuorum;
      }
      node.documentVersion++;
      node.documentUpdateTimeMicros = Utils.getNowMicrosUtc();
      Operation p = Operation.createPatch(node.groupReference).setBody(bd).setCompletion(c);
      sendRequest(p);
    }
  }
Beispiel #26
0
  /**
   * Merges current node group state with state that came through a PATCH.
   *
   * <p>PATCH requests are sent from
   *
   * <p>1) local service to itself, after it has communicated with a peer, during maintenance.
   *
   * <p>2) A remote peer when its probing this local service, during its maintenance cycle
   *
   * <p>The key invariants that should not be violated, guaranteeing forward evolution of state even
   * if nodes only talk to a small portion of their peers:
   *
   * <p>- When a status changes, the change is accepted if the remote version is higher
   *
   * <p>- A local node is the only node that can change its own node entry status, for a PATCH that
   * it receives.
   *
   * <p>- A node should never increment the version of a node entry, for other nodes, unless that
   * node entry is marked UNAVAILABLE
   *
   * <p>- When a status changes during gossip version must be incremented - Versions always move
   * forward
   */
  private void mergeRemoteAndLocalMembership(
      NodeGroupState localState, NodeGroupState remotePeerState, EnumSet<NodeGroupChange> changes) {
    if (localState == null) {
      return;
    }

    boolean isSelfPatch = remotePeerState.documentOwner.equals(getHost().getId());
    long now = Utils.getNowMicrosUtc();

    NodeState selfEntry = localState.nodes.get(getHost().getId());

    for (NodeState remoteNodeEntry : remotePeerState.nodes.values()) {

      NodeState l = localState.nodes.get(remoteNodeEntry.id);
      boolean isLocalNode = remoteNodeEntry.id.equals(getHost().getId());

      if (!isSelfPatch && isLocalNode) {
        if (remoteNodeEntry.status != l.status) {
          logWarning(
              "Peer %s is reporting us as %s, current status: %s",
              remotePeerState.documentOwner, remoteNodeEntry.status, l.status);
          if (remoteNodeEntry.documentVersion > l.documentVersion) {
            // increment local version to re-enforce we are alive and well
            l.documentVersion = remoteNodeEntry.documentVersion;
            l.documentUpdateTimeMicros = now;
            changes.add(NodeGroupChange.SELF_CHANGE);
          }
        }
        // local instance of node group service is the only one that can update its own
        // status
        continue;
      }

      if (l == null) {
        boolean hasExpired =
            remoteNodeEntry.documentExpirationTimeMicros > 0
                && remoteNodeEntry.documentExpirationTimeMicros < now;
        if (hasExpired || NodeState.isUnAvailable(remoteNodeEntry)) {
          continue;
        }
        if (!isLocalNode) {
          logInfo(
              "Adding new peer %s (%s), status %s",
              remoteNodeEntry.id, remoteNodeEntry.groupReference, remoteNodeEntry.status);
        }
        // we found a new peer, through the gossip PATCH. Add to our state
        localState.nodes.put(remoteNodeEntry.id, remoteNodeEntry);
        changes.add(NodeGroupChange.PEER_ADDED);
        continue;
      }

      boolean needsUpdate = l.status != remoteNodeEntry.status;
      if (needsUpdate) {
        changes.add(NodeGroupChange.PEER_STATUS_CHANGE);
      }

      if (isSelfPatch && isLocalNode && needsUpdate) {
        // we sent a self PATCH to update our status. Move our version forward;
        remoteNodeEntry.documentVersion =
            Math.max(remoteNodeEntry.documentVersion, l.documentVersion) + 1;
      }

      // versions move forward only, ignore stale nodes
      if (remoteNodeEntry.documentVersion < l.documentVersion) {
        logInfo(
            "v:%d - q:%d, v:%d - q:%d , %s - %s (local:%s %d)",
            l.documentVersion,
            l.membershipQuorum,
            remoteNodeEntry.documentVersion,
            remoteNodeEntry.membershipQuorum,
            l.id,
            remotePeerState.documentOwner,
            getHost().getId(),
            selfEntry.documentVersion);
        continue;
      }

      if (remoteNodeEntry.documentVersion == l.documentVersion && needsUpdate) {
        // pick update with most recent time, even if that is prone to drift and jitter
        // between nodes
        if (remoteNodeEntry.documentUpdateTimeMicros < l.documentUpdateTimeMicros) {
          logWarning(
              "Ignoring update for %s from peer %s. Local status: %s, remote status: %s",
              remoteNodeEntry.id, remotePeerState.documentOwner, l.status, remoteNodeEntry.status);
          continue;
        }
      }

      if (remoteNodeEntry.status == NodeStatus.UNAVAILABLE
          && l.documentExpirationTimeMicros == 0
          && remoteNodeEntry.documentExpirationTimeMicros == 0) {
        remoteNodeEntry.documentExpirationTimeMicros =
            Utils.getNowMicrosUtc() + localState.config.nodeRemovalDelayMicros;
        logInfo(
            "Set expiration at %d for unavailable node %s(%s)",
            remoteNodeEntry.documentExpirationTimeMicros,
            remoteNodeEntry.id,
            remoteNodeEntry.groupReference);
        changes.add(NodeGroupChange.PEER_STATUS_CHANGE);
        needsUpdate = true;
      }

      if (remoteNodeEntry.status == NodeStatus.UNAVAILABLE && needsUpdate) {
        // nodes increment their own entry version, except, if they are unavailable
        remoteNodeEntry.documentVersion++;
      }

      localState.nodes.put(remoteNodeEntry.id, remoteNodeEntry);
    }

    List<String> missingNodes = new ArrayList<>();
    for (NodeState l : localState.nodes.values()) {
      NodeState r = remotePeerState.nodes.get(l.id);
      if (!NodeState.isUnAvailable(l) || l.id.equals(getHost().getId())) {
        continue;
      }

      long expirationMicros = l.documentExpirationTimeMicros;
      if (r != null) {
        expirationMicros = Math.max(l.documentExpirationTimeMicros, r.documentExpirationTimeMicros);
      }

      if (expirationMicros > 0 && now > expirationMicros) {
        changes.add(NodeGroupChange.PEER_STATUS_CHANGE);
        logInfo("Removing expired unavailable node %s(%s)", l.id, l.groupReference);
        missingNodes.add(l.id);
      }
    }

    for (String id : missingNodes) {
      localState.nodes.remove(id);
    }

    boolean isModified = !changes.isEmpty();
    localState.membershipUpdateTimeMicros =
        Math.max(
            remotePeerState.membershipUpdateTimeMicros,
            isModified ? now : localState.membershipUpdateTimeMicros);
    if (isModified) {
      logInfo(
          "State updated, merge with %s, self %s, %d",
          remotePeerState.documentOwner,
          localState.documentOwner,
          localState.membershipUpdateTimeMicros);
    }
  }
 public static class BankAccountServiceState extends ServiceDocument {
   static final String KIND = Utils.buildKind(BankAccountServiceState.class);
   public double balance;
 }
Beispiel #28
0
  public void handleGossipPatchCompletion(
      Operation maint,
      Operation patch,
      Throwable e,
      NodeGroupState localState,
      NodeGroupState patchBody,
      AtomicInteger remaining,
      NodeState remotePeer) {

    try {
      if (patch == null) {
        return;
      }

      long updateTime = localState.membershipUpdateTimeMicros;
      if (e != null) {
        updateTime =
            remotePeer.status != NodeStatus.UNAVAILABLE ? Utils.getNowMicrosUtc() : updateTime;

        if (remotePeer.status != NodeStatus.UNAVAILABLE) {
          remotePeer.documentUpdateTimeMicros = Utils.getNowMicrosUtc();
          remotePeer.documentVersion++;
        }
        remotePeer.status = NodeStatus.UNAVAILABLE;
      } else {
        NodeGroupState peerState = getBody(patch);
        if (peerState.documentOwner.equals(remotePeer.id)) {
          NodeState remotePeerStateFromRsp = peerState.nodes.get(remotePeer.id);
          if (remotePeerStateFromRsp.documentVersion > remotePeer.documentVersion) {
            remotePeer = remotePeerStateFromRsp;
          }
        } else if (remotePeer.status != NodeStatus.REPLACED) {
          logWarning(
              "Peer address %s has changed to id %s from %s",
              patch.getUri(), peerState.documentOwner, remotePeer.id);
          remotePeer.status = NodeStatus.REPLACED;
          remotePeer.documentVersion++;
          updateTime = Utils.getNowMicrosUtc();
        }
        updateTime = Math.max(updateTime, peerState.membershipUpdateTimeMicros);
      }

      synchronized (patchBody) {
        patchBody.nodes.put(remotePeer.id, remotePeer);
        patchBody.membershipUpdateTimeMicros =
            Math.max(updateTime, patchBody.membershipUpdateTimeMicros);
      }

    } finally {
      int r = remaining.decrementAndGet();
      if (r != 0) {
        return;
      }

      // to merge updated state, issue a self PATCH. It contains NodeState entries for every
      // peer node we just talked to
      sendRequest(Operation.createPatch(getUri()).setBody(patchBody));

      maint.complete();
    }
  }
Beispiel #29
0
  private void performGroupMaintenance(Operation maint, Operation get, Throwable getEx) {
    // we ignore any body associated with the PUT

    if (getEx != null) {
      logWarning("Failure getting state: %s", getEx.toString());
      maint.complete();
      return;
    }

    if (!get.hasBody()) {
      maint.complete();
      return;
    }

    NodeGroupState localState = get.getBody(NodeGroupState.class);

    if (localState == null || localState.nodes == null) {
      maint.complete();
      return;
    }

    if (localState.nodes.size() <= 1) {
      maint.complete();
      return;
    }

    if (getHost().isStopping()) {
      maint.complete();
      return;
    }

    // probe a fixed, random selection of our peers, giving them our view of the group and
    // getting back theirs

    // probe log 10 of peers (exclude self)
    int peersToProbe = (int) Math.log10(localState.nodes.size() - 1);
    // probe at least N peers
    peersToProbe = Math.max(peersToProbe, MIN_PEER_GOSSIP_COUNT);
    // probe at most total number of peers
    peersToProbe = Math.min(localState.nodes.size() - 1, peersToProbe);

    AtomicInteger remaining = new AtomicInteger(peersToProbe);
    NodeState[] randomizedPeers = shuffleGroupMembers(localState);
    NodeState localNode = localState.nodes.get(getHost().getId());
    localNode.documentUpdateTimeMicros = Utils.getNowMicrosUtc();
    localNode.groupReference = UriUtils.buildPublicUri(getHost(), getSelfLink());
    localState.documentOwner = getHost().getId();

    NodeGroupState patchBody = new NodeGroupState();
    patchBody.documentOwner = getHost().getId();
    patchBody.documentUpdateTimeMicros = Utils.getNowMicrosUtc();

    int probeCount = 0;
    for (NodeState peer : randomizedPeers) {
      if (peer == null) {
        continue;
      }

      if (peer.id.equals(getHost().getId())) {
        continue;
      }

      NodeState remotePeer = peer;
      URI peerUri = peer.groupReference;
      // send a gossip PATCH to the peer, with our state

      // perform a health check to N randomly selected peers
      // 1) We issue a PATCH to a peer, with the body set to our view of the group
      // 2a) if the peer is healthy, they will merge our state with theirs and return
      // the merged state in the response. We will then update our state and mark the
      // peer AVAILABLE. We just update peer node, we don't currently merge their state
      // 2b) if the PATCH failed, we mark the PEER it UNAVAILABLE

      CompletionHandler ch =
          (o, e) ->
              handleGossipPatchCompletion(
                  maint, o, e, localState, patchBody, remaining, remotePeer);
      Operation patch =
          Operation.createPatch(peerUri)
              .setBody(localState)
              .setRetryCount(0)
              .setExpiration(Utils.getNowMicrosUtc() + getHost().getOperationTimeoutMicros() / 2)
              .forceRemote()
              .setCompletion(ch);

      if (peer.groupReference.equals(localNode.groupReference)
          && peer.status != NodeStatus.REPLACED) {
        // If we just detected this is a peer node that used to listen on our address,
        // but its obviously no longer around, mark it as REPLACED and do not send PATCH
        peer.status = NodeStatus.REPLACED;
        peer.documentUpdateTimeMicros = Utils.getNowMicrosUtc();
        peer.documentVersion++;
        ch.handle(null, null);
      } else {
        sendRequest(patch);
      }

      // only probe N peers
      if (++probeCount >= peersToProbe) {
        break;
      }
    }

    if (probeCount == 0) {
      maint.complete();
    }
  }
Beispiel #30
0
  private void handleJoinPost(
      JoinPeerRequest joinBody,
      Operation joinOp,
      NodeGroupState localState,
      NodeGroupState remotePeerState) {

    if (UriUtils.isHostEqual(getHost(), joinBody.memberGroupReference)) {
      logInfo("Skipping self join");
      // we tried joining ourself, abort;
      joinOp.complete();
      return;
    }

    NodeState self = localState.nodes.get(getHost().getId());

    if (joinOp != null) {
      self.documentUpdateTimeMicros = Utils.getNowMicrosUtc();
      self.documentVersion++;

      // at a minimum we need 2 nodes to synch: self plus the node we are joining
      self.synchQuorum = 2;

      if (joinBody.synchQuorum != null) {
        self.synchQuorum = Math.max(self.synchQuorum, joinBody.synchQuorum);
      }

      if (joinBody.localNodeOptions != null) {
        if (!validateNodeOptions(joinOp, joinBody.localNodeOptions)) {
          return;
        }
        self.options = joinBody.localNodeOptions;
      }

      localState.membershipUpdateTimeMicros = self.documentUpdateTimeMicros;

      // complete the join POST, continue with state merge
      joinOp.complete();
    }

    // this method is two pass
    // First pass get the remote peer state
    // Second pass, insert self

    if (remotePeerState == null) {
      // Pass 1, get existing member state
      sendRequest(
          Operation.createGet(joinBody.memberGroupReference)
              .setCompletion(
                  (o, e) -> {
                    if (e != null) {
                      logWarning("Failure getting peer %s state:%s", o.getUri(), e.toString());
                      return;
                    }

                    NodeGroupState remoteState = getBody(o);
                    handleJoinPost(joinBody, null, localState, remoteState);
                  }));
      return;
    }

    // Pass 2, merge remote group state with ours, send self to peer
    sendRequest(Operation.createPatch(getUri()).setBody(remotePeerState));

    logInfo(
        "Synch quorum: %d. Sending POST to insert self (%s) to peer %s",
        self.synchQuorum, self.groupReference, joinBody.memberGroupReference);

    Operation insertSelfToPeer =
        Operation.createPost(joinBody.memberGroupReference)
            .setBody(self)
            .setCompletion(
                (o, e) -> {
                  if (e != null) {
                    logSevere("Insert POST to %s failed", o.getUri());
                    return;
                  }
                  // we will restart services to synchronize with peers on the next
                  // maintenance interval with a stable group membership
                });
    sendRequest(insertSelfToPeer);
  }