@Override
 public void moveToFinetune() {
   log.info("Moving to finetune");
   isPretrain.set(false);
   // new phase: resets the counter
   numBatches.set(0);
 }
  @Override
  public boolean addJobToCurrent(Job j) throws Exception {

    IAtomicReference<Job> r = h.getAtomicReference("job-" + j.getWorkerId());

    if (r.get() != null || !r.isNull()) {
      boolean sent = false;
      while (!sent) {
        // always update
        for (String s : workers()) {
          if (jobFor(s) == null) {
            log.info(
                "Redirecting worker "
                    + j.getWorkerId()
                    + " to "
                    + s
                    + " due to work already being allocated");
            r = h.getAtomicReference("job-" + s);
            j.setWorkerId(s);
            sent = true;
          }
        }
      }
    }

    r.set(j);

    // iterate over jobs without the work/data
    j.setWork(null);

    jobs.add(j);

    return true;
  }
 @Test
 public void testHazelcastInstances() {
   assertNotNull(map1);
   assertNotNull(map2);
   assertNotNull(multiMap);
   assertNotNull(queue);
   assertNotNull(topic);
   assertNotNull(set);
   assertNotNull(list);
   assertNotNull(executorService);
   assertNotNull(idGenerator);
   assertNotNull(atomicLong);
   assertNotNull(atomicReference);
   assertNotNull(countDownLatch);
   assertNotNull(semaphore);
   assertNotNull(lock);
   assertEquals("map1", map1.getName());
   assertEquals("map2", map2.getName());
   assertEquals("testMultimap", multiMap.getName());
   assertEquals("testQ", queue.getName());
   assertEquals("testTopic", topic.getName());
   assertEquals("set", set.getName());
   assertEquals("list", list.getName());
   assertEquals("idGenerator", idGenerator.getName());
   assertEquals("atomicLong", atomicLong.getName());
   assertEquals("atomicReference", atomicReference.getName());
   assertEquals("countDownLatch", countDownLatch.getName());
   assertEquals("semaphore", semaphore.getName());
 }
  @Override
  public void clearJob(String id) throws Exception {
    if (id == null) {
      log.warn("No job to clear; was null, returning");
      return;
    }

    IAtomicReference<Job> jRef = h.getAtomicReference("job-" + id);
    if (jRef.isNull()) return;
    jRef.clear();
    log.info("Destroyed job ref " + id);
    Job remove = null;
    for (Job j : jobs) {
      if (j.getWorkerId().equals(id)) {
        remove = j;
        break;
      }
    }

    jobs.remove(remove);
  }
 @Override
 public boolean isDone() {
   // reason being that isDone() may getFromOrigin called and throw errors
   // this ensures a safe method call happens and just silently
   // returns true in case hazelcast is shutdown
   try {
     return done.get();
   } catch (Exception e) {
     log.warn("Hazelcast already shutdown...returning true on isDone()");
     return true;
   }
 }
 @Override
 public void finish() {
   // reason being that isDone() may getFromOrigin called and throw errors
   // this ensures a safe method call happens and just silently
   // returns true in case hazelcast is shutdown
   try {
     done.set(true);
     updateSaver().cleanup();
   } catch (Exception e) {
     log.warn(
         "Hazelcast already shutdown...done() being applyTransformToDestination is pointless");
   }
 }
 /**
  * Creates a training evaluator using the given neural network
  *
  * @param network the neural network to use
  * @return a training evaluator based on the configuration of the state tracker and the given
  *     network
  */
 @Override
 public TrainingEvaluator create(BaseMultiLayerNetwork network) {
   OutputLayerTrainingEvaluator eval =
       new OutputLayerTrainingEvaluator.Builder()
           .bestLoss(bestLoss())
           .improvementThreshold(improvementThreshold())
           .patience(patience())
           .testSet(testSet())
           .withNetwork(network)
           .validationEpochs(validationEpochs())
           .patienceIncrease(patienceIncrease.get())
           .build();
   return eval;
 }
 @Override
 public E getCurrent() throws Exception {
   E u = (E) master.get();
   if (u == null) return null;
   return u;
 }
 /**
  * Whether the cluster has begun training
  *
  * @return whether the cluster has begun training
  */
 @Override
 public boolean hasBegun() {
   return begunTraining.get();
 }
 @Override
 public void incrementNumTimesPreTrainRan() {
   numTimesPretrainRan.set(numTimesPreTrainRun() + 1);
 }
 @Override
 public int runPreTrainIterations() {
   return numTimesPretrain.get();
 }
 @Override
 public Job jobFor(String id) {
   IAtomicReference<Job> j = h.getAtomicReference("job-" + id);
   if (j.isNull() || isCurrentlyJob(id)) return null;
   return j.get();
 }
 /**
  * The number of epochs to test on
  *
  * @return the number of epochs to test on
  */
 @Override
 public int validationEpochs() {
   return validationEpochs.get();
 }
 /**
  * The best validation loss so far
  *
  * @return the best validation loss so far
  */
 @Override
 public double bestLoss() {
   return bestLoss.get();
 }
 /**
  * Improvement threshold for early stopping, aka the minimum
  *
  * @return
  */
 @Override
 public double improvmentThreshold() {
   return improvementThreshold.get();
 }
 /**
  * Patience is what controls early stopping
  *
  * @return the patience for the trainer
  */
 @Override
 public double patience() {
   return patience.get();
 }
 /**
  * The patience improvement to use
  *
  * @param improvmentThreshold the patience improvement to applyTransformToDestination
  */
 @Override
 public void setImprovmentThreshold(double improvmentThreshold) {
   improvementThreshold.set(improvmentThreshold);
 }
 @Override
 public boolean isPretrain() {
   return isPretrain.get();
 }
 /**
  * Current mini batch size
  *
  * @return
  */
 @Override
 public int miniBatchSize() {
   return miniBatchSize.get();
 }
 /**
  * Whether to validate against a held out test applyTransformToDestination and test for validation
  * error.
  *
  * @return whether to validate against a held out test applyTransformToDestination and test for
  *     validation error.
  */
 @Override
 public boolean isEarlyStopTesting() {
   return earlyStop.get();
 }
 @Override
 public void runPreTrainIterations(int numTimes) {
   numTimesPretrain.set(numTimes);
 }
 /**
  * The input split to use. This means that each data applyTransformToDestination that is trained
  * on and loaded will be this batch size or lower per worker
  *
  * @return the input split to use
  */
 @Override
 public int inputSplit() {
   return (miniBatchSize.get() * numWorkers()) / numWorkers();
 }
 @Override
 public int numTimesPreTrainRun() {
   return numTimesPretrainRan.get();
 }
  public BaseHazelCastStateTracker(String connectionString, String type, int stateTrackerPort)
      throws Exception {
    log.info(
        "Setting up hazelcast with type "
            + type
            + " connection string "
            + connectionString
            + " and port "
            + stateTrackerPort);

    if (type.equals("master") && !PortTaken.portTaken(stateTrackerPort)) {
      // sets up a proper connection string for reference wrt external actors needing a reference
      if (connectionString.equals("master")) {
        String host = InetAddress.getLocalHost().getHostName();
        this.connectionString = host + ":" + stateTrackerPort;
      }

      this.hazelCastPort = stateTrackerPort;
      config = hazelcast();

      h = Hazelcast.newHazelcastInstance(config);
      h.getCluster()
          .addMembershipListener(
              new MembershipListener() {

                @Override
                public void memberAdded(MembershipEvent membershipEvent) {
                  log.info("Member added " + membershipEvent.toString());
                }

                @Override
                public void memberRemoved(MembershipEvent membershipEvent) {
                  log.info("Member removed " + membershipEvent.toString());
                }

                @Override
                public void memberAttributeChanged(MemberAttributeEvent memberAttributeEvent) {
                  log.info("Member changed " + memberAttributeEvent.toString());
                }
              });
    } else if (type.equals("master") && PortTaken.portTaken(stateTrackerPort))
      throw new IllegalStateException(
          "Specified type was master and the port specified was taken, please specify a different port");
    else {

      setConnectionString(connectionString);
      log.info("Connecting to hazelcast on " + connectionString);
      ClientConfig client = new ClientConfig();
      client.getNetworkConfig().addAddress(connectionString);
      h = HazelcastClient.newHazelcastClient(client);
    }

    this.type = type;

    jobs = h.getList(JOBS);
    workers = h.getList(WORKERS);

    // we can make the assumption workers isn't empty because
    // the master node by default comes with a applyTransformToDestination of workers
    if (!this.type.equals("master")) {
      while (workers.isEmpty()) {
        log.warn("Waiting for data sync...");
        Thread.sleep(1000);
      }

      log.info("Workers is " + workers.size());
    }

    begunTraining = h.getAtomicReference(BEGUN);
    miniBatchSize = h.getAtomicReference(INPUT_SPLIT);
    workerEnabled = h.getMap(WORKER_ENABLED);
    replicate = h.getList(REPLICATE_WEIGHTS);
    topics = h.getList(TOPICS);
    updates = h.getList(UPDATES);
    heartbeat = h.getMap(HEART_BEAT);
    master = h.getAtomicReference(RESULT);
    isPretrain = h.getAtomicReference(IS_PRETRAIN);
    numTimesPretrain = h.getAtomicReference(NUM_TIMES_RUN_PRETRAIN);
    numTimesPretrainRan = h.getAtomicReference(NUM_TIMES_PRETRAIN_RAN);
    done = h.getAtomicReference(DONE);
    validationEpochs = h.getAtomicReference(VALIDATION_EPOCHS);
    improvementThreshold = h.getAtomicReference(IMPROVEMENT_THRESHOLD);
    bestLoss = h.getAtomicReference(BEST_LOSS);
    earlyStop = h.getAtomicReference(EARLY_STOP);
    patience = h.getAtomicReference(PATIENCE);
    patienceIncrease = h.getAtomicReference(PATIENCE_INCREASE);
    numBatches = h.getAtomicReference(NUM_BATCHES_SO_FAR_RAN);

    // applyTransformToDestination defaults only when master, otherwise, overrides previous values
    if (type.equals("master")) {
      begunTraining.set(false);
      saver = createUpdateSaver();
      numTimesPretrainRan.set(0);
      numTimesPretrain.set(1);
      isPretrain.set(true);
      done.set(false);
      resource = new StateTrackerDropWizardResource(this);
      bestLoss.set(Double.POSITIVE_INFINITY);
      earlyStop.set(true);
      patience.set(40.0);
      patienceIncrease.set(2.0);
      improvementThreshold.set(0.995);
      validationEpochs.set((int) Math.min(10, patience() / 2));
      numBatches.set(0);
    }
  }
 /**
  * Increments the number of batches ran. This is purely a count and does not necessarily mean
  * progress.
  *
  * @param numBatchesRan the number of batches ran to increment by
  */
 @Override
 public void incrementBatchesRan(int numBatchesRan) {
   numBatches.set(numBatchesRan + numBatches.get());
 }
 /**
  * Assuming a job already exists, updates the job
  *
  * @param j the job to update
  */
 @Override
 public void updateJob(Job j) {
   IAtomicReference<Job> jRef = h.getAtomicReference("job-" + j.getWorkerId());
   jRef.set(j);
 }
 /**
  * Number of batches ran so far
  *
  * @return the number of batches ran so far
  */
 @Override
 public int numBatchesRan() {
   return numBatches.get();
 }
 /** Begin training */
 @Override
 public void beginTraining() {
   begunTraining.set(true);
 }