예제 #1
0
  /** Used to update the existing deployment of a job. */
  @Override
  public void updateDeployment(final String host, final Deployment deployment)
      throws HostNotFoundException, JobNotDeployedException {
    log.info("updating deployment {}: {}", deployment, host);

    final ZooKeeperClient client = provider.get("updateDeployment");

    final JobId jobId = deployment.getJobId();
    final Job job = getJob(client, jobId);

    if (job == null) {
      throw new JobNotDeployedException(host, jobId);
    }

    assertHostExists(client, host);
    assertTaskExists(client, host, deployment.getJobId());

    final String path = Paths.configHostJob(host, jobId);
    final Task task = new Task(job, deployment.getGoal());
    try {
      client.setData(path, task.toJsonBytes());
    } catch (Exception e) {
      throw new HeliosRuntimeException(
          "updating deployment " + deployment + " on host " + host + " failed", e);
    }
  }
예제 #2
0
  public void assertVolumes(final JobId jobId) throws Exception {
    // Wait for agent to come up
    awaitHostRegistered(client, testHost(), LONG_WAIT_SECONDS, SECONDS);
    awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);

    // Deploy the job on the agent
    final Deployment deployment = Deployment.of(jobId, START);
    final JobDeployResponse deployed = client.deploy(deployment, testHost()).get();
    assertEquals(JobDeployResponse.Status.OK, deployed.getStatus());

    // Wait for the job to run
    final TaskStatus taskStatus =
        awaitJobState(client, testHost(), jobId, RUNNING, LONG_WAIT_SECONDS, SECONDS);
    assertJobEquals(job, taskStatus.getJob());

    final Integer barPort = taskStatus.getPorts().get("bar").getExternalPort();
    final Integer hostnamePort = taskStatus.getPorts().get("hostname").getExternalPort();

    assert barPort != null;
    assert hostnamePort != null;

    // Read "foo" from /volume/bar
    final String foo = recvUtf8(barPort, 3);
    assertEquals("foo", foo);

    // Read hostname from /hostname
    final String hostname = getNewDockerClient().info().name();
    final String mountedHostname = recvUtf8(hostnamePort, hostname.length());
    assertEquals(hostname, mountedHostname);
  }
예제 #3
0
  private Map<JobId, Deployment> getTasks(final ZooKeeperClient client, final String host) {
    final Map<JobId, Deployment> jobs = Maps.newHashMap();
    try {
      final String folder = Paths.configHostJobs(host);
      final List<String> jobIds;
      try {
        jobIds = client.getChildren(folder);
      } catch (KeeperException.NoNodeException e) {
        return null;
      }

      for (final String jobIdString : jobIds) {
        final JobId jobId = JobId.fromString(jobIdString);
        final String containerPath = Paths.configHostJob(host, jobId);
        try {
          final byte[] data = client.getData(containerPath);
          final Task task = parse(data, Task.class);
          jobs.put(jobId, Deployment.of(jobId, task.getGoal()));
        } catch (KeeperException.NoNodeException ignored) {
          log.debug("deployment config node disappeared: {}", jobIdString);
        }
      }
    } catch (KeeperException | IOException e) {
      throw new HeliosRuntimeException("getting deployment config failed", e);
    }

    return jobs;
  }
예제 #4
0
 public ListenableFuture<SetGoalResponse> setGoal(
     final Deployment job, final String host, final String token) {
   return transform(
       request(
           uri(path("/hosts/%s/jobs/%s", host, job.getJobId()), ImmutableMap.of("token", token)),
           "PATCH",
           job),
       ConvertResponseToPojo.create(
           SetGoalResponse.class, ImmutableSet.of(HTTP_OK, HTTP_NOT_FOUND, HTTP_FORBIDDEN)));
 }
예제 #5
0
 public ListenableFuture<JobDeployResponse> deploy(
     final Deployment job, final String host, final String token) {
   final Set<Integer> deserializeReturnCodes =
       ImmutableSet.of(HTTP_OK, HTTP_NOT_FOUND, HTTP_BAD_METHOD, HTTP_BAD_REQUEST, HTTP_FORBIDDEN);
   return transform(
       request(
           uri(path("/hosts/%s/jobs/%s", host, job.getJobId()), ImmutableMap.of("token", token)),
           "PUT",
           job),
       ConvertResponseToPojo.create(JobDeployResponse.class, deserializeReturnCodes));
 }
  @Test
  public void test() throws Exception {
    startDefaultMaster();
    startDefaultAgent(testHost());

    final HeliosClient client = defaultClient();

    // Create a job using an image exposing port 11211 but without mapping it
    final Job job1 =
        Job.newBuilder()
            .setName(testTag + "memcached")
            .setVersion("v1")
            .setImage("rohan/memcached-mini")
            .build();
    final JobId jobId1 = job1.getId();
    client.createJob(job1).get();

    // Create a job using an image exposing port 11211 and map it to a specific external port
    final Job job2 =
        Job.newBuilder()
            .setName(testTag + "memcached")
            .setVersion("v2")
            .setImage("rohan/memcached-mini")
            .setPorts(ImmutableMap.of("tcp", PortMapping.of(11211, externalPort)))
            .build();
    final JobId jobId2 = job2.getId();
    client.createJob(job2).get();

    // Wait for agent to come up
    awaitHostRegistered(client, testHost(), LONG_WAIT_MINUTES, MINUTES);
    awaitHostStatus(client, testHost(), UP, LONG_WAIT_MINUTES, MINUTES);

    // Deploy the jobs on the agent
    client.deploy(Deployment.of(jobId1, START), testHost()).get();
    client.deploy(Deployment.of(jobId2, START), testHost()).get();

    // Wait for the jobs to run
    awaitJobState(client, testHost(), jobId1, RUNNING, LONG_WAIT_MINUTES, MINUTES);
    awaitJobState(client, testHost(), jobId2, RUNNING, LONG_WAIT_MINUTES, MINUTES);
  }
예제 #7
0
 /** Returns the current deployment state of {@code jobId} on {@code host}. */
 @Override
 public Deployment getDeployment(final String host, final JobId jobId) {
   final String path = Paths.configHostJob(host, jobId);
   final ZooKeeperClient client = provider.get("getDeployment");
   try {
     final byte[] data = client.getData(path);
     final Task task = parse(data, Task.class);
     return Deployment.of(jobId, task.getGoal());
   } catch (KeeperException.NoNodeException e) {
     return null;
   } catch (KeeperException | IOException e) {
     throw new HeliosRuntimeException("getting deployment failed", e);
   }
 }
예제 #8
0
  // TODO(drewc): this kinda screams "long method"
  private void deployJobRetry(
      final ZooKeeperClient client, final String host, final Deployment deployment, int count)
      throws JobDoesNotExistException, JobAlreadyDeployedException, HostNotFoundException,
          JobPortAllocationConflictException {
    if (count == 3) {
      throw new HeliosRuntimeException(
          "3 failures (possibly concurrent modifications) while " + "deploying. Giving up.");
    }
    log.info("deploying {}: {} (retry={})", deployment, host, count);

    final JobId id = deployment.getJobId();
    final Job job = getJob(id);

    if (job == null) {
      throw new JobDoesNotExistException(id);
    }

    final UUID operationId = UUID.randomUUID();
    final String jobPath = Paths.configJob(id);
    final String taskPath = Paths.configHostJob(host, id);
    final String taskCreationPath = Paths.configHostJobCreation(host, id, operationId);

    final List<Integer> staticPorts = staticPorts(job);
    final Map<String, byte[]> portNodes = Maps.newHashMap();
    final byte[] idJson = id.toJsonBytes();
    for (final int port : staticPorts) {
      final String path = Paths.configHostPort(host, port);
      portNodes.put(path, idJson);
    }

    final Task task = new Task(job, deployment.getGoal());
    final List<ZooKeeperOperation> operations =
        Lists.newArrayList(
            check(jobPath), create(portNodes), create(Paths.configJobHost(id, host)));

    // Attempt to read a task here.  If it's goal is UNDEPLOY, it's as good as not existing
    try {
      final Node existing = client.getNode(taskPath);
      byte[] bytes = existing.getBytes();
      Task readTask = Json.read(bytes, Task.class);
      if (readTask.getGoal() != Goal.UNDEPLOY) {
        throw new JobAlreadyDeployedException(host, id);
      }
      operations.add(check(taskPath, existing.getStat().getVersion()));
      operations.add(set(taskPath, task));
    } catch (NoNodeException e) {
      operations.add(create(taskPath, task));
      operations.add(create(taskCreationPath));
    } catch (IOException | KeeperException e) {
      throw new HeliosRuntimeException("reading existing task description failed", e);
    }

    // TODO (dano): Failure handling is racy wrt agent and job modifications.
    try {
      client.transaction(operations);
      log.info("deployed {}: {} (retry={})", deployment, host, count);
    } catch (NoNodeException e) {
      // Either the job, the host or the task went away
      assertJobExists(client, id);
      assertHostExists(client, host);
      // If the job and host still exists, we likely tried to redeploy a job that had an UNDEPLOY
      // goal and lost the race with the agent removing the task before we could set it. Retry.
      deployJobRetry(client, host, deployment, count + 1);
    } catch (NodeExistsException e) {
      // Check for conflict due to transaction retry
      try {
        if (client.exists(taskCreationPath) != null) {
          // Our creation operation node existed, we're done here
          return;
        }
      } catch (KeeperException ex) {
        throw new HeliosRuntimeException("checking job deployment failed", ex);
      }
      try {
        // Check if the job was already deployed
        if (client.stat(taskPath) != null) {
          throw new JobAlreadyDeployedException(host, id);
        }
      } catch (KeeperException ex) {
        throw new HeliosRuntimeException("checking job deployment failed", e);
      }

      // Check for static port collisions
      for (final int port : staticPorts) {
        final String path = Paths.configHostPort(host, port);
        try {
          if (client.stat(path) == null) {
            continue;
          }
          final byte[] b = client.getData(path);
          final JobId existingJobId = parse(b, JobId.class);
          throw new JobPortAllocationConflictException(id, existingJobId, host, port);
        } catch (KeeperException | IOException ex) {
          throw new HeliosRuntimeException("checking port allocations failed", e);
        }
      }

      // Catch all for logic and ephemeral issues
      throw new HeliosRuntimeException("deploying job failed", e);
    } catch (KeeperException e) {
      throw new HeliosRuntimeException("deploying job failed", e);
    }
  }