Example #1
 private static void run(Callable c, boolean read, int size) {
   // Count all i/o time from here, including all retry overheads
   long start_io_ms = System.currentTimeMillis();
   while (true) {
     try {
       long start_ns = System.nanoTime(); // Blocking i/o call timing - without counting repeats
       TimeLine.record_IOclose(start_ns, start_io_ms, read ? 1 : 0, size, Value.HDFS);
       // Explicitly ignore the following exceptions but
       // fail on the rest IOExceptions
     } catch (EOFException e) {
       ignoreAndWait(e, false);
     } catch (SocketTimeoutException e) {
       ignoreAndWait(e, false);
     } catch (S3Exception e) {
       // Preserve S3Exception before IOException
       // Since this is tricky code - we are supporting different HDFS version
       // New version declares S3Exception as IOException
       // But old versions (0.20.xxx) declares it as RuntimeException
       // So we have to catch it before IOException !!!
       ignoreAndWait(e, false);
     } catch (IOException e) {
       ignoreAndWait(e, true);
     } catch (Exception e) {
       throw Log.errRTExcept(e);
Example #2
    // Stopping criteria
    boolean isDone(KMeansModel model, double[][] newCenters, double[][] oldCenters) {
      if (!isRunning()) return true; // Stopped/cancelled
      // Stopped for running out iterations
      if (model._output._iterations >= _parms._max_iterations) return true;

      // Compute average change in standardized cluster centers
      if (oldCenters == null) return false; // No prior iteration, not stopping
      double average_change = 0;
      for (int clu = 0; clu < _parms._k; clu++)
        average_change +=
                oldCenters[clu], newCenters[clu], _isCats, null, null);
      average_change /= _parms._k; // Average change per cluster
      model._output._avg_centroids_chg =
              model._output._avg_centroids_chg.length + 1,
      model._output._training_time_ms =
              model._output._training_time_ms.length + 1,
      return average_change < TOLERANCE;
Example #3
 public NanoHTTPD.Response serve(NanoHTTPD server, Properties args, RequestType type) {
   // Needs to be done also for help to initialize or argument records
   String query = checkArguments(args, type);
   switch (type) {
     case help:
       return wrap(server, build(Response.done(serveHelp())));
     case json:
     case www:
       if (log()) {
         String log = getClass().getSimpleName();
         for (Object arg : args.keySet()) {
           String value = args.getProperty((String) arg);
           if (value != null && value.length() != 0) log += " " + arg + "=" + value;
         Log.debug(Sys.HTTPD, log);
       if (query != null) return wrap(server, query, type);
       long time = System.currentTimeMillis();
       Response response = serve();
       if (type == RequestType.json) return wrap(server, response.toJson());
       return wrap(server, build(response));
     case debug:
       response = serve_debug();
       return wrap(server, build(response));
     case query:
       return wrap(server, query);
       throw new RuntimeException("Invalid request type " + type.toString());
Example #4
 private static void run(Callable c, boolean read, int size) {
   // Count all i/o time from here, including all retry overheads
   long start_io_ms = System.currentTimeMillis();
   while (true) {
     try {
       long start_ns = System.nanoTime(); // Blocking i/o call timing - without counting repeats
       TimeLine.record_IOclose(start_ns, start_io_ms, read ? 1 : 0, size, Value.HDFS);
       // Explicitly ignore the following exceptions but
       // fail on the rest IOExceptions
     } catch (EOFException e) {
       ignoreAndWait(e, false);
     } catch (SocketTimeoutException e) {
       ignoreAndWait(e, false);
     } catch (IOException e) {
       ignoreAndWait(e, true);
     } catch (Exception e) {
       throw Log.errRTExcept(e);
Example #5
  * Start this task based on given top-level fork-join task representing job computation.
  * @param fjtask top-level job computation task.
  * @return this job in {@link JobState#RUNNING} state
  * @see JobState
  * @see H2OCountedCompleter
 /** FIXME: should be final or at least protected */
 Job start(final H2OCountedCompleter fjtask) {
   assert state == JobState.CREATED : "Trying to run job which was already run?";
   assert fjtask != null : "Starting a job with null working task is not permitted! Fix you API";
   _fjtask = fjtask;
   start_time = System.currentTimeMillis();
   state = JobState.RUNNING;
   // Save the full state of the job
   UKV.put(self(), this);
   // Update job list
   new TAtomic<List>() {
     public List atomic(List old) {
       if (old == null) old = new List();
       Key[] jobs = old._jobs;
       old._jobs = Arrays.copyOf(jobs, jobs.length + 1);
       old._jobs[jobs.length] = job_key;
       return old;
   return this;
Example #6
  protected double doScoringAndSaveModel(
      boolean finalScoring, boolean oob, boolean build_tree_one_node) {
    double training_r2 = Double.NaN; // Training R^2 value, if computed
    long now = System.currentTimeMillis();
    if (_firstScore == 0) _firstScore = now;
    long sinceLastScore = now - _timeLastScoreStart;
    boolean updated = false;
    new ProgressUpdate(
            "Built " + _model._output._ntrees + " trees so far (out of " + _parms._ntrees + ").")
    // Now model already contains tid-trees in serialized form
    if (_parms._score_each_iteration
        || finalScoring
        || (now - _firstScore < 4000)
        || // Score every time for 4 secs
        // Throttle scoring to keep the cost sane; limit to a 10% duty cycle & every 4 secs
        (sinceLastScore > 4000
            && // Limit scoring updates to every 4sec
            (double) (_timeLastScoreEnd - _timeLastScoreStart) / sinceLastScore
                < 0.1)) { // 10% duty cycle


      // If validation is specified we use a model for scoring, so we need to
      // update it!  First we save model with trees (i.e., make them available
      // for scoring) and then update it with resulting error
      updated = true;

      Log.info("============================================================== ");
      SharedTreeModel.SharedTreeOutput out = _model._output;
      _timeLastScoreStart = now;
      // Score on training data
      new ProgressUpdate("Scoring the model.").fork(_progressKey);
      Score sc =
          new Score(this, true, oob, _model._output.getModelCategory())
              .doAll(train(), build_tree_one_node);
      ModelMetrics mm = sc.makeModelMetrics(_model, _parms.train());
      out._training_metrics = mm;
      if (oob)
        out._training_metrics._description = "Metrics reported on Out-Of-Bag training samples";
      if (out._ntrees > 0) Log.info("Training " + out._scored_train[out._ntrees].toString());

      // Score again on validation data
      if (_parms._valid != null) {
        Score scv =
            new Score(this, false, false, _model._output.getModelCategory())
                .doAll(valid(), build_tree_one_node);
        ModelMetrics mmv = scv.makeModelMetrics(_model, _parms.valid());
        out._validation_metrics = mmv;
        if (out._ntrees > 0) Log.info("Validation " + out._scored_valid[out._ntrees].toString());

      if (out._ntrees > 0) { // Compute variable importances
        out._model_summary = createModelSummaryTable(out);
        out._scoring_history = createScoringHistoryTable(out);
        out._varimp = new hex.VarImp(_improvPerVar, out._names);
        out._variable_importances = hex.ModelMetrics.calcVarImp(out._varimp);
        // For Debugging:
        //        Log.info(out._scoring_history.toString());
        //        Log.info(out._variable_importances.toString());

      ConfusionMatrix cm = mm.cm();
      if (cm != null) {
        if (cm._cm.length <= _parms._max_confusion_matrix_size) {
        } else {
              "Confusion Matrix is too large (max_confusion_matrix_size="
                  + _parms._max_confusion_matrix_size
                  + "): "
                  + _nclass
                  + " classes.");
      _timeLastScoreEnd = System.currentTimeMillis();

    // Double update - after either scoring or variable importance
    if (updated) _model.update(_key);
    return training_r2;
Example #7
  * Returns job execution time in milliseconds. If job is not running then returns job execution
  * time.
 public final long runTimeMs() {
   long until = end_time != 0 ? end_time : System.currentTimeMillis();
   return until - start_time;
Example #8
 /** Marks job as finished and records job end time. */
 public void remove() {
   end_time = System.currentTimeMillis();
   if (state == JobState.RUNNING) state = JobState.DONE;
   // Overwrite handle - copy end_time, state, msg