コード例 #1
0
 @Override
 public void moveToFinetune() {
   log.info("Moving to finetune");
   isPretrain.set(false);
   // new phase: resets the counter
   numBatches.set(0);
 }
コード例 #2
0
  @Override
  public boolean addJobToCurrent(Job j) throws Exception {

    IAtomicReference<Job> r = h.getAtomicReference("job-" + j.getWorkerId());

    if (r.get() != null || !r.isNull()) {
      boolean sent = false;
      while (!sent) {
        // always update
        for (String s : workers()) {
          if (jobFor(s) == null) {
            log.info(
                "Redirecting worker "
                    + j.getWorkerId()
                    + " to "
                    + s
                    + " due to work already being allocated");
            r = h.getAtomicReference("job-" + s);
            j.setWorkerId(s);
            sent = true;
          }
        }
      }
    }

    r.set(j);

    // iterate over jobs without the work/data
    j.setWork(null);

    jobs.add(j);

    return true;
  }
コード例 #3
0
 @Override
 public void finish() {
   // reason being that isDone() may getFromOrigin called and throw errors
   // this ensures a safe method call happens and just silently
   // returns true in case hazelcast is shutdown
   try {
     done.set(true);
     updateSaver().cleanup();
   } catch (Exception e) {
     log.warn(
         "Hazelcast already shutdown...done() being applyTransformToDestination is pointless");
   }
 }
コード例 #4
0
 @Override
 public void incrementNumTimesPreTrainRan() {
   numTimesPretrainRan.set(numTimesPreTrainRun() + 1);
 }
コード例 #5
0
 @Override
 public void runPreTrainIterations(int numTimes) {
   numTimesPretrain.set(numTimes);
 }
コード例 #6
0
 /**
  * Assuming a job already exists, updates the job
  *
  * @param j the job to update
  */
 @Override
 public void updateJob(Job j) {
   IAtomicReference<Job> jRef = h.getAtomicReference("job-" + j.getWorkerId());
   jRef.set(j);
 }
コード例 #7
0
  public BaseHazelCastStateTracker(String connectionString, String type, int stateTrackerPort)
      throws Exception {
    log.info(
        "Setting up hazelcast with type "
            + type
            + " connection string "
            + connectionString
            + " and port "
            + stateTrackerPort);

    if (type.equals("master") && !PortTaken.portTaken(stateTrackerPort)) {
      // sets up a proper connection string for reference wrt external actors needing a reference
      if (connectionString.equals("master")) {
        String host = InetAddress.getLocalHost().getHostName();
        this.connectionString = host + ":" + stateTrackerPort;
      }

      this.hazelCastPort = stateTrackerPort;
      config = hazelcast();

      h = Hazelcast.newHazelcastInstance(config);
      h.getCluster()
          .addMembershipListener(
              new MembershipListener() {

                @Override
                public void memberAdded(MembershipEvent membershipEvent) {
                  log.info("Member added " + membershipEvent.toString());
                }

                @Override
                public void memberRemoved(MembershipEvent membershipEvent) {
                  log.info("Member removed " + membershipEvent.toString());
                }

                @Override
                public void memberAttributeChanged(MemberAttributeEvent memberAttributeEvent) {
                  log.info("Member changed " + memberAttributeEvent.toString());
                }
              });
    } else if (type.equals("master") && PortTaken.portTaken(stateTrackerPort))
      throw new IllegalStateException(
          "Specified type was master and the port specified was taken, please specify a different port");
    else {

      setConnectionString(connectionString);
      log.info("Connecting to hazelcast on " + connectionString);
      ClientConfig client = new ClientConfig();
      client.getNetworkConfig().addAddress(connectionString);
      h = HazelcastClient.newHazelcastClient(client);
    }

    this.type = type;

    jobs = h.getList(JOBS);
    workers = h.getList(WORKERS);

    // we can make the assumption workers isn't empty because
    // the master node by default comes with a applyTransformToDestination of workers
    if (!this.type.equals("master")) {
      while (workers.isEmpty()) {
        log.warn("Waiting for data sync...");
        Thread.sleep(1000);
      }

      log.info("Workers is " + workers.size());
    }

    begunTraining = h.getAtomicReference(BEGUN);
    miniBatchSize = h.getAtomicReference(INPUT_SPLIT);
    workerEnabled = h.getMap(WORKER_ENABLED);
    replicate = h.getList(REPLICATE_WEIGHTS);
    topics = h.getList(TOPICS);
    updates = h.getList(UPDATES);
    heartbeat = h.getMap(HEART_BEAT);
    master = h.getAtomicReference(RESULT);
    isPretrain = h.getAtomicReference(IS_PRETRAIN);
    numTimesPretrain = h.getAtomicReference(NUM_TIMES_RUN_PRETRAIN);
    numTimesPretrainRan = h.getAtomicReference(NUM_TIMES_PRETRAIN_RAN);
    done = h.getAtomicReference(DONE);
    validationEpochs = h.getAtomicReference(VALIDATION_EPOCHS);
    improvementThreshold = h.getAtomicReference(IMPROVEMENT_THRESHOLD);
    bestLoss = h.getAtomicReference(BEST_LOSS);
    earlyStop = h.getAtomicReference(EARLY_STOP);
    patience = h.getAtomicReference(PATIENCE);
    patienceIncrease = h.getAtomicReference(PATIENCE_INCREASE);
    numBatches = h.getAtomicReference(NUM_BATCHES_SO_FAR_RAN);

    // applyTransformToDestination defaults only when master, otherwise, overrides previous values
    if (type.equals("master")) {
      begunTraining.set(false);
      saver = createUpdateSaver();
      numTimesPretrainRan.set(0);
      numTimesPretrain.set(1);
      isPretrain.set(true);
      done.set(false);
      resource = new StateTrackerDropWizardResource(this);
      bestLoss.set(Double.POSITIVE_INFINITY);
      earlyStop.set(true);
      patience.set(40.0);
      patienceIncrease.set(2.0);
      improvementThreshold.set(0.995);
      validationEpochs.set((int) Math.min(10, patience() / 2));
      numBatches.set(0);
    }
  }
コード例 #8
0
 /**
  * The patience improvement to use
  *
  * @param improvmentThreshold the patience improvement to applyTransformToDestination
  */
 @Override
 public void setImprovmentThreshold(double improvmentThreshold) {
   improvementThreshold.set(improvmentThreshold);
 }
コード例 #9
0
 /** Begin training */
 @Override
 public void beginTraining() {
   begunTraining.set(true);
 }
コード例 #10
0
 /**
  * Increments the number of batches ran. This is purely a count and does not necessarily mean
  * progress.
  *
  * @param numBatchesRan the number of batches ran to increment by
  */
 @Override
 public void incrementBatchesRan(int numBatchesRan) {
   numBatches.set(numBatchesRan + numBatches.get());
 }