@Override
 public void addWorker(String worker) {
   heartbeat.put(worker, System.currentTimeMillis());
   if (!workers.contains(worker)) {
     log.info("Adding worker " + worker);
     workers.add(worker);
     log.info("Number of workers is now " + workers.size());
   }
 }
  @Override
  public boolean addJobToCurrent(Job j) throws Exception {

    IAtomicReference<Job> r = h.getAtomicReference("job-" + j.getWorkerId());

    if (r.get() != null || !r.isNull()) {
      boolean sent = false;
      while (!sent) {
        // always update
        for (String s : workers()) {
          if (jobFor(s) == null) {
            log.info(
                "Redirecting worker "
                    + j.getWorkerId()
                    + " to "
                    + s
                    + " due to work already being allocated");
            r = h.getAtomicReference("job-" + s);
            j.setWorkerId(s);
            sent = true;
          }
        }
      }
    }

    r.set(j);

    // iterate over jobs without the work/data
    j.setWork(null);

    jobs.add(j);

    return true;
  }
 /**
  * Adds an update to the current mini batch
  *
  * @param id the id of the worker who did the update
  * @param update the update to add
  */
 @Override
 public void addUpdate(String id, E update) {
   try {
     updateSaver().save(id, update);
   } catch (Exception e) {
     throw new RuntimeException(e);
   }
   updates.add(id);
 }
  @Override
  public void removeWorker(String worker) {
    workers.remove(worker);
    if (jobFor(worker) != null) {
      try {
        clearJob(worker);

      } catch (Exception e) {
        log.warn("Unable to clear job for worker with id" + worker);
      }
    }
  }
  @Override
  public void clearJob(String id) throws Exception {
    if (id == null) {
      log.warn("No job to clear; was null, returning");
      return;
    }

    IAtomicReference<Job> jRef = h.getAtomicReference("job-" + id);
    if (jRef.isNull()) return;
    jRef.clear();
    log.info("Destroyed job ref " + id);
    Job remove = null;
    for (Job j : jobs) {
      if (j.getWorkerId().equals(id)) {
        remove = j;
        break;
      }
    }

    jobs.remove(remove);
  }
 @Override
 public int numWorkers() {
   int num = workers.size();
   return num;
 }
 @Override
 public void availableForWork(String id) {
   if (!workers.contains(id)) workers.add(id);
 }
 @Override
 public void addTopic(String topic) throws Exception {
   topics.add(topic);
 }
  public BaseHazelCastStateTracker(String connectionString, String type, int stateTrackerPort)
      throws Exception {
    log.info(
        "Setting up hazelcast with type "
            + type
            + " connection string "
            + connectionString
            + " and port "
            + stateTrackerPort);

    if (type.equals("master") && !PortTaken.portTaken(stateTrackerPort)) {
      // sets up a proper connection string for reference wrt external actors needing a reference
      if (connectionString.equals("master")) {
        String host = InetAddress.getLocalHost().getHostName();
        this.connectionString = host + ":" + stateTrackerPort;
      }

      this.hazelCastPort = stateTrackerPort;
      config = hazelcast();

      h = Hazelcast.newHazelcastInstance(config);
      h.getCluster()
          .addMembershipListener(
              new MembershipListener() {

                @Override
                public void memberAdded(MembershipEvent membershipEvent) {
                  log.info("Member added " + membershipEvent.toString());
                }

                @Override
                public void memberRemoved(MembershipEvent membershipEvent) {
                  log.info("Member removed " + membershipEvent.toString());
                }

                @Override
                public void memberAttributeChanged(MemberAttributeEvent memberAttributeEvent) {
                  log.info("Member changed " + memberAttributeEvent.toString());
                }
              });
    } else if (type.equals("master") && PortTaken.portTaken(stateTrackerPort))
      throw new IllegalStateException(
          "Specified type was master and the port specified was taken, please specify a different port");
    else {

      setConnectionString(connectionString);
      log.info("Connecting to hazelcast on " + connectionString);
      ClientConfig client = new ClientConfig();
      client.getNetworkConfig().addAddress(connectionString);
      h = HazelcastClient.newHazelcastClient(client);
    }

    this.type = type;

    jobs = h.getList(JOBS);
    workers = h.getList(WORKERS);

    // we can make the assumption workers isn't empty because
    // the master node by default comes with a applyTransformToDestination of workers
    if (!this.type.equals("master")) {
      while (workers.isEmpty()) {
        log.warn("Waiting for data sync...");
        Thread.sleep(1000);
      }

      log.info("Workers is " + workers.size());
    }

    begunTraining = h.getAtomicReference(BEGUN);
    miniBatchSize = h.getAtomicReference(INPUT_SPLIT);
    workerEnabled = h.getMap(WORKER_ENABLED);
    replicate = h.getList(REPLICATE_WEIGHTS);
    topics = h.getList(TOPICS);
    updates = h.getList(UPDATES);
    heartbeat = h.getMap(HEART_BEAT);
    master = h.getAtomicReference(RESULT);
    isPretrain = h.getAtomicReference(IS_PRETRAIN);
    numTimesPretrain = h.getAtomicReference(NUM_TIMES_RUN_PRETRAIN);
    numTimesPretrainRan = h.getAtomicReference(NUM_TIMES_PRETRAIN_RAN);
    done = h.getAtomicReference(DONE);
    validationEpochs = h.getAtomicReference(VALIDATION_EPOCHS);
    improvementThreshold = h.getAtomicReference(IMPROVEMENT_THRESHOLD);
    bestLoss = h.getAtomicReference(BEST_LOSS);
    earlyStop = h.getAtomicReference(EARLY_STOP);
    patience = h.getAtomicReference(PATIENCE);
    patienceIncrease = h.getAtomicReference(PATIENCE_INCREASE);
    numBatches = h.getAtomicReference(NUM_BATCHES_SO_FAR_RAN);

    // applyTransformToDestination defaults only when master, otherwise, overrides previous values
    if (type.equals("master")) {
      begunTraining.set(false);
      saver = createUpdateSaver();
      numTimesPretrainRan.set(0);
      numTimesPretrain.set(1);
      isPretrain.set(true);
      done.set(false);
      resource = new StateTrackerDropWizardResource(this);
      bestLoss.set(Double.POSITIVE_INFINITY);
      earlyStop.set(true);
      patience.set(40.0);
      patienceIncrease.set(2.0);
      improvementThreshold.set(0.995);
      validationEpochs.set((int) Math.min(10, patience() / 2));
      numBatches.set(0);
    }
  }
 /**
  * Tracks worker ids that need state replication
  *
  * @param workerId the worker id to replicate
  * @return the list of worker ids that need state replication
  */
 @Override
 public boolean needsReplicate(String workerId) {
   return replicate.contains(workerId);
 }
 /**
  * Adds a worker to the list to be replicate d
  *
  * @param workerId the worker id to add
  */
 @Override
 public void addReplicate(String workerId) {
   if (!replicate.contains(workerId)) replicate.add(workerId);
 }
 /**
  * Updates the status of the worker to not needing replication
  *
  * @param workerId the worker id to update
  */
 @Override
 public void doneReplicating(String workerId) {
   replicate.remove(workerId);
 }