Exemple #1
0
 private static void run(Callable c, boolean read, int size) {
   // Count all i/o time from here, including all retry overheads
   long start_io_ms = System.currentTimeMillis();
   while (true) {
     try {
       long start_ns = System.nanoTime(); // Blocking i/o call timing - without counting repeats
       c.call();
       TimeLine.record_IOclose(start_ns, start_io_ms, read ? 1 : 0, size, Value.HDFS);
       break;
       // Explicitly ignore the following exceptions but
       // fail on the rest IOExceptions
     } catch (EOFException e) {
       ignoreAndWait(e, false);
     } catch (SocketTimeoutException e) {
       ignoreAndWait(e, false);
     } catch (S3Exception e) {
       // Preserve S3Exception before IOException
       // Since this is tricky code - we are supporting different HDFS version
       // New version declares S3Exception as IOException
       // But old versions (0.20.xxx) declares it as RuntimeException
       // So we have to catch it before IOException !!!
       ignoreAndWait(e, false);
     } catch (IOException e) {
       ignoreAndWait(e, true);
     } catch (Exception e) {
       throw Log.errRTExcept(e);
     }
   }
 }
    protected void calcCounts(CoxPHModel model, final CoxPHTask coxMR) {
      CoxPHModel.CoxPHParameters p = model._parms;
      CoxPHModel.CoxPHOutput o = model._output;

      o.n_missing = o.n - coxMR.n;
      o.n = coxMR.n;
      for (int j = 0; j < o.x_mean_cat.length; j++)
        o.x_mean_cat[j] = coxMR.sumWeightedCatX[j] / coxMR.sumWeights;
      for (int j = 0; j < o.x_mean_num.length; j++)
        o.x_mean_num[j] = coxMR.dinfo()._normSub[j] + coxMR.sumWeightedNumX[j] / coxMR.sumWeights;
      System.arraycopy(
          coxMR.dinfo()._normSub, o.x_mean_num.length, o.mean_offset, 0, o.mean_offset.length);
      int nz = 0;
      for (int t = 0; t < coxMR.countEvents.length; ++t) {
        o.total_event += coxMR.countEvents[t];
        if (coxMR.sizeEvents[t] > 0 || coxMR.sizeCensored[t] > 0) {
          o.time[nz] = o.min_time + t;
          o.n_risk[nz] = coxMR.sizeRiskSet[t];
          o.n_event[nz] = coxMR.sizeEvents[t];
          o.n_censor[nz] = coxMR.sizeCensored[t];
          nz++;
        }
      }
      if (p.start_column == null)
        for (int t = o.n_risk.length - 2; t >= 0; --t) o.n_risk[t] += o.n_risk[t + 1];
    }
Exemple #3
0
    // Stopping criteria
    boolean isDone(KMeansModel model, double[][] newCenters, double[][] oldCenters) {
      if (!isRunning()) return true; // Stopped/cancelled
      // Stopped for running out iterations
      if (model._output._iterations >= _parms._max_iterations) return true;

      // Compute average change in standardized cluster centers
      if (oldCenters == null) return false; // No prior iteration, not stopping
      double average_change = 0;
      for (int clu = 0; clu < _parms._k; clu++)
        average_change +=
            hex.genmodel.GenModel.KMeans_distance(
                oldCenters[clu], newCenters[clu], _isCats, null, null);
      average_change /= _parms._k; // Average change per cluster
      model._output._avg_centroids_chg =
          ArrayUtils.copyAndFillOf(
              model._output._avg_centroids_chg,
              model._output._avg_centroids_chg.length + 1,
              average_change);
      model._output._training_time_ms =
          ArrayUtils.copyAndFillOf(
              model._output._training_time_ms,
              model._output._training_time_ms.length + 1,
              System.currentTimeMillis());
      return average_change < TOLERANCE;
    }
Exemple #4
0
 public NanoHTTPD.Response serve(NanoHTTPD server, Properties args, RequestType type) {
   // Needs to be done also for help to initialize or argument records
   String query = checkArguments(args, type);
   switch (type) {
     case help:
       return wrap(server, build(Response.done(serveHelp())));
     case json:
     case www:
       if (log()) {
         String log = getClass().getSimpleName();
         for (Object arg : args.keySet()) {
           String value = args.getProperty((String) arg);
           if (value != null && value.length() != 0) log += " " + arg + "=" + value;
         }
         Log.debug(Sys.HTTPD, log);
       }
       if (query != null) return wrap(server, query, type);
       long time = System.currentTimeMillis();
       Response response = serve();
       response.setTimeStart(time);
       if (type == RequestType.json) return wrap(server, response.toJson());
       return wrap(server, build(response));
     case debug:
       response = serve_debug();
       return wrap(server, build(response));
     case query:
       return wrap(server, query);
     default:
       throw new RuntimeException("Invalid request type " + type.toString());
   }
 }
    protected void initStats(final CoxPHModel model, final DataInfo dinfo) {
      CoxPHModel.CoxPHParameters p = model._parms;
      CoxPHModel.CoxPHOutput o = model._output;

      o.n = p.stop_column.length();
      o.data_info = dinfo;
      final int n_offsets = (p.offset_columns == null) ? 0 : p.offset_columns.length;
      final int n_coef = o.data_info.fullN() - n_offsets;
      final String[] coefNames = o.data_info.coefNames();
      o.coef_names = new String[n_coef];
      System.arraycopy(coefNames, 0, o.coef_names, 0, n_coef);
      o.coef = MemoryManager.malloc8d(n_coef);
      o.exp_coef = MemoryManager.malloc8d(n_coef);
      o.exp_neg_coef = MemoryManager.malloc8d(n_coef);
      o.se_coef = MemoryManager.malloc8d(n_coef);
      o.z_coef = MemoryManager.malloc8d(n_coef);
      o.gradient = MemoryManager.malloc8d(n_coef);
      o.hessian = malloc2DArray(n_coef, n_coef);
      o.var_coef = malloc2DArray(n_coef, n_coef);
      o.x_mean_cat = MemoryManager.malloc8d(n_coef - (o.data_info._nums - n_offsets));
      o.x_mean_num = MemoryManager.malloc8d(o.data_info._nums - n_offsets);
      o.mean_offset = MemoryManager.malloc8d(n_offsets);
      o.offset_names = new String[n_offsets];
      System.arraycopy(coefNames, n_coef, o.offset_names, 0, n_offsets);

      final Vec start_column = p.start_column;
      final Vec stop_column = p.stop_column;
      o.min_time =
          p.start_column == null ? (long) stop_column.min() : (long) start_column.min() + 1;
      o.max_time = (long) stop_column.max();

      final int n_time = new Vec.CollectDomain().doAll(stop_column).domain().length;
      o.time = MemoryManager.malloc8(n_time);
      o.n_risk = MemoryManager.malloc8d(n_time);
      o.n_event = MemoryManager.malloc8d(n_time);
      o.n_censor = MemoryManager.malloc8d(n_time);
      o.cumhaz_0 = MemoryManager.malloc8d(n_time);
      o.var_cumhaz_1 = MemoryManager.malloc8d(n_time);
      o.var_cumhaz_2 = malloc2DArray(n_time, n_coef);
    }
Exemple #6
0
 private static void run(Callable c, boolean read, int size) {
   // Count all i/o time from here, including all retry overheads
   long start_io_ms = System.currentTimeMillis();
   while (true) {
     try {
       long start_ns = System.nanoTime(); // Blocking i/o call timing - without counting repeats
       c.call();
       TimeLine.record_IOclose(start_ns, start_io_ms, read ? 1 : 0, size, Value.HDFS);
       break;
       // Explicitly ignore the following exceptions but
       // fail on the rest IOExceptions
     } catch (EOFException e) {
       ignoreAndWait(e, false);
     } catch (SocketTimeoutException e) {
       ignoreAndWait(e, false);
     } catch (IOException e) {
       ignoreAndWait(e, true);
     } catch (Exception e) {
       throw Log.errRTExcept(e);
     }
   }
 }
Exemple #7
0
 private static double[][] destandardize(
     double[][] centers, String[][] isCats, double[] means, double[] mults) {
   int K = centers.length;
   int N = centers[0].length;
   double[][] value = new double[K][N];
   for (int clu = 0; clu < K; clu++) {
     System.arraycopy(centers[clu], 0, value[clu], 0, N);
     if (mults != null) { // Reverse standardization
       for (int col = 0; col < N; col++)
         if (isCats[col] == null) value[clu][col] = value[clu][col] / mults[col] + means[col];
     }
   }
   return value;
 }
Exemple #8
0
 private void updateClusters(
     int[] clusters, int count, long chunk, long numrows, int rpc, long updatedRow) {
   final int offset = (int) (updatedRow - (rpc * chunk));
   final Key chunkKey = ValueArray.getChunkKey(chunk, _job.dest());
   final int[] message;
   if (count == clusters.length) message = clusters;
   else {
     message = new int[count];
     System.arraycopy(clusters, 0, message, 0, message.length);
   }
   final int rows = ValueArray.rpc(chunk, rpc, numrows);
   new Atomic() {
     @Override
     public Value atomic(Value val) {
       assert val == null || val._key.equals(chunkKey);
       AutoBuffer b = new AutoBuffer(rows * ROW_SIZE);
       if (val != null) b._bb.put(val.memOrLoad());
       for (int i = 0; i < message.length; i++) b.put4((offset + i) * 4, message[i]);
       b.position(b.limit());
       return new Value(chunkKey, b.buf());
     }
   }.invoke(chunkKey);
 }
Exemple #9
0
 /**
  * Start this task based on given top-level fork-join task representing job computation.
  *
  * @param fjtask top-level job computation task.
  * @return this job in {@link JobState#RUNNING} state
  * @see JobState
  * @see H2OCountedCompleter
  */
 public
 /** FIXME: should be final or at least protected */
 Job start(final H2OCountedCompleter fjtask) {
   assert state == JobState.CREATED : "Trying to run job which was already run?";
   assert fjtask != null : "Starting a job with null working task is not permitted! Fix you API";
   _fjtask = fjtask;
   start_time = System.currentTimeMillis();
   state = JobState.RUNNING;
   // Save the full state of the job
   UKV.put(self(), this);
   // Update job list
   new TAtomic<List>() {
     @Override
     public List atomic(List old) {
       if (old == null) old = new List();
       Key[] jobs = old._jobs;
       old._jobs = Arrays.copyOf(jobs, jobs.length + 1);
       old._jobs[jobs.length] = job_key;
       return old;
     }
   }.invoke(LIST);
   return this;
 }
  /**
   * Main constructor
   *
   * @param params Model parameters
   * @param dinfo Data Info
   * @param nClasses number of classes (1 for regression, 0 for autoencoder)
   * @param train User-given training data frame, prepared by AdaptTestTrain
   * @param valid User-specified validation data frame, prepared by AdaptTestTrain
   */
  public DeepLearningModelInfo(
      final DeepLearningParameters params,
      final DataInfo dinfo,
      int nClasses,
      Frame train,
      Frame valid) {
    _classification = nClasses > 1;
    _train = train;
    _valid = valid;
    data_info = dinfo;
    parameters =
        (DeepLearningParameters) params.clone(); // make a copy, don't change model's parameters
    DeepLearningParameters.Sanity.modifyParms(
        parameters, parameters, nClasses); // sanitize the model_info's parameters

    final int num_input = dinfo.fullN();
    final int num_output =
        get_params()._autoencoder
            ? num_input
            : (_classification ? train.lastVec().cardinality() : 1);
    if (!get_params()._autoencoder) assert (num_output == nClasses);

    _saw_missing_cats = dinfo._cats > 0 ? new boolean[data_info._cats] : null;
    assert (num_input > 0);
    assert (num_output > 0);
    if (has_momenta() && adaDelta())
      throw new IllegalArgumentException(
          "Cannot have non-zero momentum and adaptive rate at the same time.");
    final int layers = get_params()._hidden.length;
    // units (# neurons for each layer)
    units = new int[layers + 2];
    if (get_params()._max_categorical_features <= Integer.MAX_VALUE - dinfo._nums)
      units[0] = Math.min(dinfo._nums + get_params()._max_categorical_features, num_input);
    else units[0] = num_input;
    System.arraycopy(get_params()._hidden, 0, units, 1, layers);
    units[layers + 1] = num_output;

    boolean printLevels = units[0] > 1000L;
    boolean warn = units[0] > 100000L;
    if (printLevels) {
      final String[][] domains = dinfo._adaptedFrame.domains();
      int[] levels = new int[domains.length];
      for (int i = 0; i < levels.length; ++i) {
        levels[i] = domains[i] != null ? domains[i].length : 0;
      }
      Arrays.sort(levels);
      if (warn) {
        Log.warn(
            "===================================================================================================================================");
        Log.warn(
            num_input
                + " input features"
                + (dinfo._cats > 0 ? " (after categorical one-hot encoding)" : "")
                + ". Can be slow and require a lot of memory.");
      }
      if (levels[levels.length - 1] > 0) {
        int levelcutoff = levels[levels.length - 1 - Math.min(10, levels.length - 1)];
        int count = 0;
        for (int i = 0;
            i < dinfo._adaptedFrame.numCols() - (get_params()._autoencoder ? 0 : 1) && count < 10;
            ++i) {
          if (dinfo._adaptedFrame.domains()[i] != null
              && dinfo._adaptedFrame.domains()[i].length >= levelcutoff) {
            if (warn) {
              Log.warn(
                  "Categorical feature '"
                      + dinfo._adaptedFrame._names[i]
                      + "' has cardinality "
                      + dinfo._adaptedFrame.domains()[i].length
                      + ".");
            } else {
              Log.info(
                  "Categorical feature '"
                      + dinfo._adaptedFrame._names[i]
                      + "' has cardinality "
                      + dinfo._adaptedFrame.domains()[i].length
                      + ".");
            }
          }
          count++;
        }
      }
      if (warn) {
        Log.warn("Suggestions:");
        Log.warn(" *) Limit the size of the first hidden layer");
        if (dinfo._cats > 0) {
          Log.warn(
              " *) Limit the total number of one-hot encoded features with the parameter 'max_categorical_features'");
          Log.warn(
              " *) Run h2o.interaction(...,pairwise=F) on high-cardinality categorical columns to limit the factor count, see http://learn.h2o.ai");
        }
        Log.warn(
            "===================================================================================================================================");
      }
    }

    // weights (to connect layers)
    dense_row_weights = new Storage.DenseRowMatrix[layers + 1];
    dense_col_weights = new Storage.DenseColMatrix[layers + 1];

    // decide format of weight matrices row-major or col-major
    if (get_params()._col_major)
      dense_col_weights[0] = new Storage.DenseColMatrix(units[1], units[0]);
    else dense_row_weights[0] = new Storage.DenseRowMatrix(units[1], units[0]);
    for (int i = 1; i <= layers; ++i)
      dense_row_weights[i] = new Storage.DenseRowMatrix(units[i + 1] /*rows*/, units[i] /*cols*/);

    // biases (only for hidden layers and output layer)
    biases = new Storage.DenseVector[layers + 1];
    for (int i = 0; i <= layers; ++i) biases[i] = new Storage.DenseVector(units[i + 1]);
    // average activation (only for hidden layers)
    if (get_params()._autoencoder && get_params()._sparsity_beta > 0) {
      avg_activations = new Storage.DenseVector[layers];
      mean_a = new float[layers];
      for (int i = 0; i < layers; ++i) avg_activations[i] = new Storage.DenseVector(units[i + 1]);
    }
    allocateHelperArrays();
    // for diagnostics
    mean_rate = new float[units.length];
    rms_rate = new float[units.length];
    mean_bias = new float[units.length];
    rms_bias = new float[units.length];
    mean_weight = new float[units.length];
    rms_weight = new float[units.length];
  }
Exemple #11
0
  protected double doScoringAndSaveModel(
      boolean finalScoring, boolean oob, boolean build_tree_one_node) {
    double training_r2 = Double.NaN; // Training R^2 value, if computed
    long now = System.currentTimeMillis();
    if (_firstScore == 0) _firstScore = now;
    long sinceLastScore = now - _timeLastScoreStart;
    boolean updated = false;
    new ProgressUpdate(
            "Built " + _model._output._ntrees + " trees so far (out of " + _parms._ntrees + ").")
        .fork(_progressKey);
    // Now model already contains tid-trees in serialized form
    if (_parms._score_each_iteration
        || finalScoring
        || (now - _firstScore < 4000)
        || // Score every time for 4 secs
        // Throttle scoring to keep the cost sane; limit to a 10% duty cycle & every 4 secs
        (sinceLastScore > 4000
            && // Limit scoring updates to every 4sec
            (double) (_timeLastScoreEnd - _timeLastScoreStart) / sinceLastScore
                < 0.1)) { // 10% duty cycle

      checkMemoryFootPrint();

      // If validation is specified we use a model for scoring, so we need to
      // update it!  First we save model with trees (i.e., make them available
      // for scoring) and then update it with resulting error
      _model.update(_key);
      updated = true;

      Log.info("============================================================== ");
      SharedTreeModel.SharedTreeOutput out = _model._output;
      _timeLastScoreStart = now;
      // Score on training data
      new ProgressUpdate("Scoring the model.").fork(_progressKey);
      Score sc =
          new Score(this, true, oob, _model._output.getModelCategory())
              .doAll(train(), build_tree_one_node);
      ModelMetrics mm = sc.makeModelMetrics(_model, _parms.train());
      out._training_metrics = mm;
      if (oob)
        out._training_metrics._description = "Metrics reported on Out-Of-Bag training samples";
      out._scored_train[out._ntrees].fillFrom(mm);
      if (out._ntrees > 0) Log.info("Training " + out._scored_train[out._ntrees].toString());

      // Score again on validation data
      if (_parms._valid != null) {
        Score scv =
            new Score(this, false, false, _model._output.getModelCategory())
                .doAll(valid(), build_tree_one_node);
        ModelMetrics mmv = scv.makeModelMetrics(_model, _parms.valid());
        out._validation_metrics = mmv;
        out._scored_valid[out._ntrees].fillFrom(mmv);
        if (out._ntrees > 0) Log.info("Validation " + out._scored_valid[out._ntrees].toString());
      }

      if (out._ntrees > 0) { // Compute variable importances
        out._model_summary = createModelSummaryTable(out);
        out._scoring_history = createScoringHistoryTable(out);
        out._varimp = new hex.VarImp(_improvPerVar, out._names);
        out._variable_importances = hex.ModelMetrics.calcVarImp(out._varimp);
        Log.info(out._model_summary.toString());
        // For Debugging:
        //        Log.info(out._scoring_history.toString());
        //        Log.info(out._variable_importances.toString());
      }

      ConfusionMatrix cm = mm.cm();
      if (cm != null) {
        if (cm._cm.length <= _parms._max_confusion_matrix_size) {
          Log.info(cm.toASCII());
        } else {
          Log.info(
              "Confusion Matrix is too large (max_confusion_matrix_size="
                  + _parms._max_confusion_matrix_size
                  + "): "
                  + _nclass
                  + " classes.");
        }
      }
      _timeLastScoreEnd = System.currentTimeMillis();
    }

    // Double update - after either scoring or variable importance
    if (updated) _model.update(_key);
    return training_r2;
  }
Exemple #12
0
  public static class GLRMParameters extends Model.Parameters {
    public String algoName() {
      return "GLRM";
    }

    public String fullName() {
      return "Generalized Low Rank Modeling";
    }

    public String javaName() {
      return GLRMModel.class.getName();
    }

    public DataInfo.TransformType _transform =
        DataInfo.TransformType.NONE; // Data transformation (demean to compare with PCA)
    public int _k = 1; // Rank of resulting XY matrix
    public GLRM.Initialization _init = GLRM.Initialization.PlusPlus; // Initialization of Y matrix
    public SVDParameters.Method _svd_method =
        SVDParameters.Method.Randomized; // SVD initialization method (for _init = SVD)
    public Key<Frame> _user_y; // User-specified Y matrix (for _init = User)
    public Key<Frame> _user_x; // User-specified X matrix (for _init = User)
    public boolean _expand_user_y =
        true; // Should categorical columns in _user_y be expanded via one-hot encoding? (for _init
              // = User)

    // Loss functions
    public Loss _loss = Loss.Quadratic; // Default loss function for numeric cols
    public Loss _multi_loss = Loss.Categorical; // Default loss function for categorical cols
    public int _period = 1; // Length of the period when _loss = Periodic
    public Loss[] _loss_by_col; // Override default loss function for specific columns
    public int[] _loss_by_col_idx;

    // Regularization functions
    public Regularizer _regularization_x = Regularizer.None; // Regularization function for X matrix
    public Regularizer _regularization_y = Regularizer.None; // Regularization function for Y matrix
    public double _gamma_x = 0; // Regularization weight on X matrix
    public double _gamma_y = 0; // Regularization weight on Y matrix

    // Optional parameters
    public int _max_iterations = 1000; // Max iterations
    public int _max_updates = 2 * _max_iterations; // Max number of updates (X or Y)
    public double _init_step_size = 1.0; // Initial step size (decrease until we hit min_step_size)
    public double _min_step_size = 1e-4; // Min step size
    public long _seed = System.nanoTime(); // RNG seed

    @Override
    protected long nFoldSeed() {
      return _seed;
    }

    // public Key<Frame> _representation_key;     // Key to save X matrix
    public String _representation_name;
    public boolean _recover_svd =
        false; // Recover singular values and eigenvectors of XY at the end?
    public boolean _impute_original =
        false; // Reconstruct original training data by reversing _transform?
    public boolean _verbose = true; // Log when objective increases each iteration?

    // Quadratic -> Gaussian distribution ~ exp(-(a-u)^2)
    // Absolute -> Laplace distribution ~ exp(-|a-u|)
    public enum Loss {
      Quadratic(true),
      Absolute(true),
      Huber(true),
      Poisson(true),
      Periodic(true), // One-dimensional loss (numeric)
      Logistic(true, true),
      Hinge(true, true), // Boolean loss (categorical)
      Categorical(false),
      Ordinal(false); // Multi-dimensional loss (categorical)

      private boolean forNumeric;
      private boolean forBinary;

      Loss(boolean forNumeric) {
        this(forNumeric, false);
      }

      Loss(boolean forNumeric, boolean forBinary) {
        this.forNumeric = forNumeric;
        this.forBinary = forBinary;
      }

      public boolean isForNumeric() {
        return forNumeric;
      }

      public boolean isForCategorical() {
        return !forNumeric;
      }

      public boolean isForBinary() {
        return forBinary;
      }
    }

    // Non-negative matrix factorization (NNMF): r_x = r_y = NonNegative
    // Orthogonal NNMF: r_x = OneSparse, r_y = NonNegative
    // K-means clustering: r_x = UnitOneSparse, r_y = 0 (\gamma_y = 0)
    // Quadratic mixture: r_x = Simplex, r_y = 0 (\gamma_y = 0)
    public enum Regularizer {
      None,
      Quadratic,
      L2,
      L1,
      NonNegative,
      OneSparse,
      UnitOneSparse,
      Simplex
    }

    // Check if all elements of _loss_by_col are equal to a specific loss function
    private final boolean allLossEquals(Loss loss) {
      if (null == _loss_by_col) return false;

      boolean res = true;
      for (int i = 0; i < _loss_by_col.length; i++) {
        if (_loss_by_col[i] != loss) {
          res = false;
          break;
        }
      }
      return res;
    }

    // Closed form solution only if quadratic loss, no regularization or quadratic regularization
    // (same for X and Y), and no missing values
    public final boolean hasClosedForm() {
      long na_cnt = 0;
      Frame train = _train.get();
      for (int i = 0; i < train.numCols(); i++) na_cnt += train.vec(i).naCnt();
      return hasClosedForm(na_cnt);
    }

    public final boolean hasClosedForm(long na_cnt) {
      boolean loss_quad =
          (null == _loss_by_col && _loss == Quadratic)
              || (null != _loss_by_col
                  && allLossEquals(Quadratic)
                  && (_loss_by_col.length == _train.get().numCols() || _loss == Quadratic));

      return na_cnt == 0
          && ((loss_quad
              && (_gamma_x == 0
                  || _regularization_x == Regularizer.None
                  || _regularization_x == GLRMParameters.Regularizer.Quadratic)
              && (_gamma_y == 0
                  || _regularization_y == Regularizer.None
                  || _regularization_y == GLRMParameters.Regularizer.Quadratic)));
    }

    // L(u,a): Loss function
    public final double loss(double u, double a) {
      return loss(u, a, _loss);
    }

    public final double loss(double u, double a, Loss loss) {
      assert loss.isForNumeric() : "Loss function " + loss + " not applicable to numerics";
      switch (loss) {
        case Quadratic:
          return (u - a) * (u - a);
        case Absolute:
          return Math.abs(u - a);
        case Huber:
          return Math.abs(u - a) <= 1 ? 0.5 * (u - a) * (u - a) : Math.abs(u - a) - 0.5;
        case Poisson:
          assert a >= 0 : "Poisson loss L(u,a) requires variable a >= 0";
          return Math.exp(u)
              + (a == 0 ? 0 : -a * u + a * Math.log(a) - a); // Since \lim_{a->0} a*log(a) = 0
        case Hinge:
          // return Math.max(1-a*u,0);
          return Math.max(1 - (a == 0 ? -u : u), 0); // Booleans are coded {0,1} instead of {-1,1}
        case Logistic:
          // return Math.log(1 + Math.exp(-a * u));
          return Math.log(
              1 + Math.exp(a == 0 ? u : -u)); // Booleans are coded {0,1} instead of {-1,1}
        case Periodic:
          return 1 - Math.cos((a - u) * (2 * Math.PI) / _period);
        default:
          throw new RuntimeException("Unknown loss function " + loss);
      }
    }

    // \grad_u L(u,a): Gradient of loss function with respect to u
    public final double lgrad(double u, double a) {
      return lgrad(u, a, _loss);
    }

    public final double lgrad(double u, double a, Loss loss) {
      assert loss.isForNumeric() : "Loss function " + loss + " not applicable to numerics";
      switch (loss) {
        case Quadratic:
          return 2 * (u - a);
        case Absolute:
          return Math.signum(u - a);
        case Huber:
          return Math.abs(u - a) <= 1 ? u - a : Math.signum(u - a);
        case Poisson:
          assert a >= 0 : "Poisson loss L(u,a) requires variable a >= 0";
          return Math.exp(u) - a;
        case Hinge:
          // return a*u <= 1 ? -a : 0;
          return a == 0
              ? (-u <= 1 ? 1 : 0)
              : (u <= 1 ? -1 : 0); // Booleans are coded as {0,1} instead of {-1,1}
        case Logistic:
          // return -a/(1+Math.exp(a*u));
          return a == 0
              ? 1 / (1 + Math.exp(-u))
              : -1 / (1 + Math.exp(u)); // Booleans are coded as {0,1} instead of {-1,1}
        case Periodic:
          return ((2 * Math.PI) / _period) * Math.sin((a - u) * (2 * Math.PI) / _period);
        default:
          throw new RuntimeException("Unknown loss function " + loss);
      }
    }

    // L(u,a): Multidimensional loss function
    public final double mloss(double[] u, int a) {
      return mloss(u, a, _multi_loss);
    }

    public static double mloss(double[] u, int a, Loss multi_loss) {
      assert multi_loss.isForCategorical()
          : "Loss function " + multi_loss + " not applicable to categoricals";
      if (a < 0 || a > u.length - 1)
        throw new IllegalArgumentException(
            "Index must be between 0 and " + String.valueOf(u.length - 1));

      double sum = 0;
      switch (multi_loss) {
        case Categorical:
          for (int i = 0; i < u.length; i++) sum += Math.max(1 + u[i], 0);
          sum += Math.max(1 - u[a], 0) - Math.max(1 + u[a], 0);
          return sum;
        case Ordinal:
          for (int i = 0; i < u.length - 1; i++) sum += Math.max(a > i ? 1 - u[i] : 1, 0);
          return sum;
        default:
          throw new RuntimeException("Unknown multidimensional loss function " + multi_loss);
      }
    }

    // \grad_u L(u,a): Gradient of multidimensional loss function with respect to u
    public final double[] mlgrad(double[] u, int a) {
      return mlgrad(u, a, _multi_loss);
    }

    public static double[] mlgrad(double[] u, int a, Loss multi_loss) {
      assert multi_loss.isForCategorical()
          : "Loss function " + multi_loss + " not applicable to categoricals";
      if (a < 0 || a > u.length - 1)
        throw new IllegalArgumentException(
            "Index must be between 0 and " + String.valueOf(u.length - 1));

      double[] grad = new double[u.length];
      switch (multi_loss) {
        case Categorical:
          for (int i = 0; i < u.length; i++) grad[i] = (1 + u[i] > 0) ? 1 : 0;
          grad[a] = (1 - u[a] > 0) ? -1 : 0;
          return grad;
        case Ordinal:
          for (int i = 0; i < u.length - 1; i++) grad[i] = (a > i && 1 - u[i] > 0) ? -1 : 0;
          return grad;
        default:
          throw new RuntimeException("Unknown multidimensional loss function " + multi_loss);
      }
    }

    // r_i(x_i), r_j(y_j): Regularization function for single row x_i or column y_j
    public final double regularize_x(double[] u) {
      return regularize(u, _regularization_x);
    }

    public final double regularize_y(double[] u) {
      return regularize(u, _regularization_y);
    }

    public final double regularize(double[] u, Regularizer regularization) {
      if (u == null) return 0;
      double ureg = 0;

      switch (regularization) {
        case None:
          return 0;
        case Quadratic:
          for (int i = 0; i < u.length; i++) ureg += u[i] * u[i];
          return ureg;
        case L2:
          for (int i = 0; i < u.length; i++) ureg += u[i] * u[i];
          return Math.sqrt(ureg);
        case L1:
          for (int i = 0; i < u.length; i++) ureg += Math.abs(u[i]);
          return ureg;
        case NonNegative:
          for (int i = 0; i < u.length; i++) {
            if (u[i] < 0) return Double.POSITIVE_INFINITY;
          }
          return 0;
        case OneSparse:
          int card = 0;
          for (int i = 0; i < u.length; i++) {
            if (u[i] < 0) return Double.POSITIVE_INFINITY;
            else if (u[i] > 0) card++;
          }
          return card == 1 ? 0 : Double.POSITIVE_INFINITY;
        case UnitOneSparse:
          int ones = 0, zeros = 0;
          for (int i = 0; i < u.length; i++) {
            if (u[i] == 1) ones++;
            else if (u[i] == 0) zeros++;
            else return Double.POSITIVE_INFINITY;
          }
          return ones == 1 && zeros == u.length - 1 ? 0 : Double.POSITIVE_INFINITY;
        case Simplex:
          double sum = 0, absum = 0;
          for (int i = 0; i < u.length; i++) {
            if (u[i] < 0) return Double.POSITIVE_INFINITY;
            else {
              sum += u[i];
              absum += Math.abs(u[i]);
            }
          }
          return MathUtils.equalsWithinRecSumErr(sum, 1.0, u.length, absum)
              ? 0
              : Double.POSITIVE_INFINITY;
        default:
          throw new RuntimeException("Unknown regularization function " + regularization);
      }
    }

    // \sum_i r_i(x_i): Sum of regularization function for all entries of X
    public final double regularize_x(double[][] u) {
      return regularize(u, _regularization_x);
    }

    public final double regularize_y(double[][] u) {
      return regularize(u, _regularization_y);
    }

    public final double regularize(double[][] u, Regularizer regularization) {
      if (u == null || regularization == Regularizer.None) return 0;

      double ureg = 0;
      for (int i = 0; i < u.length; i++) {
        ureg += regularize(u[i], regularization);
        if (Double.isInfinite(ureg)) return ureg;
      }
      return ureg;
    }

    // \prox_{\alpha_k*r}(u): Proximal gradient of (step size) * (regularization function) evaluated
    // at vector u
    public final double[] rproxgrad_x(double[] u, double alpha, Random rand) {
      return rproxgrad(u, alpha, _gamma_x, _regularization_x, rand);
    }

    public final double[] rproxgrad_y(double[] u, double alpha, Random rand) {
      return rproxgrad(u, alpha, _gamma_y, _regularization_y, rand);
    }
    // public final double[] rproxgrad_x(double[] u, double alpha) { return rproxgrad(u, alpha,
    // _gamma_x, _regularization_x, RandomUtils.getRNG(_seed)); }
    // public final double[] rproxgrad_y(double[] u, double alpha) { return rproxgrad(u, alpha,
    // _gamma_y, _regularization_y, RandomUtils.getRNG(_seed)); }
    static double[] rproxgrad(
        double[] u, double alpha, double gamma, Regularizer regularization, Random rand) {
      if (u == null || alpha == 0 || gamma == 0) return u;
      double[] v = new double[u.length];

      switch (regularization) {
        case None:
          return u;
        case Quadratic:
          for (int i = 0; i < u.length; i++) v[i] = u[i] / (1 + 2 * alpha * gamma);
          return v;
        case L2:
          // Proof uses Moreau decomposition; see section 6.5.1 of Parikh and Boyd
          // https://web.stanford.edu/~boyd/papers/pdf/prox_algs.pdf
          double weight = 1 - alpha * gamma / ArrayUtils.l2norm(u);
          if (weight < 0) return v; // Zero vector
          for (int i = 0; i < u.length; i++) v[i] = weight * u[i];
          return v;
        case L1:
          for (int i = 0; i < u.length; i++)
            v[i] = Math.max(u[i] - alpha * gamma, 0) + Math.min(u[i] + alpha * gamma, 0);
          return v;
        case NonNegative:
          for (int i = 0; i < u.length; i++) v[i] = Math.max(u[i], 0);
          return v;
        case OneSparse:
          int idx = ArrayUtils.maxIndex(u, rand);
          v[idx] = u[idx] > 0 ? u[idx] : 1e-6;
          return v;
        case UnitOneSparse:
          idx = ArrayUtils.maxIndex(u, rand);
          v[idx] = 1;
          return v;
        case Simplex:
          // Proximal gradient algorithm by Chen and Ye in http://arxiv.org/pdf/1101.6081v2.pdf
          // 1) Sort input vector u in ascending order: u[1] <= ... <= u[n]
          int n = u.length;
          int[] idxs = new int[n];
          for (int i = 0; i < n; i++) idxs[i] = i;
          ArrayUtils.sort(idxs, u);

          // 2) Calculate cumulative sum of u in descending order
          // cumsum(u) = (..., u[n-2]+u[n-1]+u[n], u[n-1]+u[n], u[n])
          double[] ucsum = new double[n];
          ucsum[n - 1] = u[idxs[n - 1]];
          for (int i = n - 2; i >= 0; i--) ucsum[i] = ucsum[i + 1] + u[idxs[i]];

          // 3) Let t_i = (\sum_{j=i+1}^n u[j] - 1)/(n - i)
          // For i = n-1,...,1, set optimal t* to first t_i >= u[i]
          double t = (ucsum[0] - 1) / n; // Default t* = (\sum_{j=1}^n u[j] - 1)/n
          for (int i = n - 1; i >= 1; i--) {
            double tmp = (ucsum[i] - 1) / (n - i);
            if (tmp >= u[idxs[i - 1]]) {
              t = tmp;
              break;
            }
          }

          // 4) Return max(u - t*, 0) as projection of u onto simplex
          double[] x = new double[u.length];
          for (int i = 0; i < u.length; i++) x[i] = Math.max(u[i] - t, 0);
          return x;
        default:
          throw new RuntimeException("Unknown regularization function " + regularization);
      }
    }

    // Project X,Y matrices into appropriate subspace so regularizer is finite. Used during
    // initialization.
    public final double[] project_x(double[] u, Random rand) {
      return project(u, _regularization_x, rand);
    }

    public final double[] project_y(double[] u, Random rand) {
      return project(u, _regularization_y, rand);
    }

    public final double[] project(double[] u, Regularizer regularization, Random rand) {
      if (u == null) return u;

      switch (regularization) {
          // Domain is all real numbers
        case None:
        case Quadratic:
        case L2:
        case L1:
          return u;
          // Proximal operator of indicator function for a set C is (Euclidean) projection onto C
        case NonNegative:
        case OneSparse:
        case UnitOneSparse:
          return rproxgrad(u, 1, 1, regularization, rand);
        case Simplex:
          double reg =
              regularize(
                  u,
                  regularization); // Check if inside simplex before projecting since algo is
                                   // complicated
          if (reg == 0) return u;
          return rproxgrad(u, 1, 1, regularization, rand);
        default:
          throw new RuntimeException("Unknown regularization function " + regularization);
      }
    }

    // \hat A_{i,j} = \argmin_a L_{i,j}(x_iy_j, a): Data imputation for real numeric values
    public final double impute(double u) {
      return impute(u, _loss);
    }

    public static double impute(double u, Loss loss) {
      assert loss.isForNumeric() : "Loss function " + loss + " not applicable to numerics";
      switch (loss) {
        case Quadratic:
        case Absolute:
        case Huber:
        case Periodic:
          return u;
        case Poisson:
          return Math.exp(u) - 1;
        case Hinge:
        case Logistic:
          return u > 0 ? 1 : 0; // Booleans are coded as {0,1} instead of {-1,1}
        default:
          throw new RuntimeException("Unknown loss function " + loss);
      }
    }

    // \hat A_{i,j} = \argmin_a L_{i,j}(x_iy_j, a): Data imputation for categorical values
    // {0,1,2,...}
    // TODO: Is there a faster way to find the loss minimizer?
    public final int mimpute(double[] u) {
      return mimpute(u, _multi_loss);
    }

    public static int mimpute(double[] u, Loss multi_loss) {
      assert multi_loss.isForCategorical()
          : "Loss function " + multi_loss + " not applicable to categoricals";
      switch (multi_loss) {
        case Categorical:
        case Ordinal:
          double[] cand = new double[u.length];
          for (int a = 0; a < cand.length; a++) cand[a] = mloss(u, a, multi_loss);
          return ArrayUtils.minIndex(cand);
        default:
          throw new RuntimeException("Unknown multidimensional loss function " + multi_loss);
      }
    }
  }
    @Override
    protected void compute2() {
      CoxPHModel model = null;
      try {
        Scope.enter();
        _parms.read_lock_frames(CoxPH.this);
        init(true);

        applyScoringFrameSideEffects();

        // The model to be built
        model = new CoxPHModel(dest(), _parms, new CoxPHModel.CoxPHOutput(CoxPH.this));
        model.delete_and_lock(_key);

        applyTrainingFrameSideEffects();

        int nResponses = 1;
        boolean useAllFactorLevels = false;
        final DataInfo dinfo =
            new DataInfo(
                Key.make(),
                _modelBuilderTrain,
                null,
                nResponses,
                useAllFactorLevels,
                DataInfo.TransformType.DEMEAN,
                TransformType.NONE,
                true,
                false,
                false,
                false,
                false,
                false);
        initStats(model, dinfo);

        final int n_offsets =
            (model._parms.offset_columns == null) ? 0 : model._parms.offset_columns.length;
        final int n_coef = dinfo.fullN() - n_offsets;
        final double[] step = MemoryManager.malloc8d(n_coef);
        final double[] oldCoef = MemoryManager.malloc8d(n_coef);
        final double[] newCoef = MemoryManager.malloc8d(n_coef);
        Arrays.fill(step, Double.NaN);
        Arrays.fill(oldCoef, Double.NaN);
        for (int j = 0; j < n_coef; ++j) newCoef[j] = model._parms.init;
        double oldLoglik = -Double.MAX_VALUE;
        final int n_time = (int) (model._output.max_time - model._output.min_time + 1);
        final boolean has_start_column = (model._parms.start_column != null);
        final boolean has_weights_column = (model._parms.weights_column != null);
        for (int i = 0; i <= model._parms.iter_max; ++i) {
          model._output.iter = i;

          final CoxPHTask coxMR =
              new CoxPHTask(
                      self(),
                      dinfo,
                      newCoef,
                      model._output.min_time,
                      n_time,
                      n_offsets,
                      has_start_column,
                      has_weights_column)
                  .doAll(dinfo._adaptedFrame);

          final double newLoglik = calcLoglik(model, coxMR);
          if (newLoglik > oldLoglik) {
            if (i == 0) calcCounts(model, coxMR);

            calcModelStats(model, newCoef, newLoglik);
            calcCumhaz_0(model, coxMR);

            if (newLoglik == 0) model._output.lre = -Math.log10(Math.abs(oldLoglik - newLoglik));
            else model._output.lre = -Math.log10(Math.abs((oldLoglik - newLoglik) / newLoglik));
            if (model._output.lre >= model._parms.lre_min) break;

            Arrays.fill(step, 0);
            for (int j = 0; j < n_coef; ++j)
              for (int k = 0; k < n_coef; ++k)
                step[j] -= model._output.var_coef[j][k] * model._output.gradient[k];
            for (int j = 0; j < n_coef; ++j)
              if (Double.isNaN(step[j]) || Double.isInfinite(step[j])) break;

            oldLoglik = newLoglik;
            System.arraycopy(newCoef, 0, oldCoef, 0, oldCoef.length);
          } else {
            for (int j = 0; j < n_coef; ++j) step[j] /= 2;
          }

          for (int j = 0; j < n_coef; ++j) newCoef[j] = oldCoef[j] - step[j];
        }

        model.update(_key);
      } catch (Throwable t) {
        Job thisJob = DKV.getGet(_key);
        if (thisJob._state == JobState.CANCELLED) {
          Log.info("Job cancelled by user.");
        } else {
          t.printStackTrace();
          failed(t);
          throw t;
        }
      } finally {
        updateModelOutput();
        _parms.read_unlock_frames(CoxPH.this);
        Scope.exit();
        done(); // Job done!
      }
      tryComplete();
    }
Exemple #14
0
 /**
  * Returns job execution time in milliseconds. If job is not running then returns job execution
  * time.
  */
 public final long runTimeMs() {
   long until = end_time != 0 ? end_time : System.currentTimeMillis();
   return until - start_time;
 }
Exemple #15
0
 /** Marks job as finished and records job end time. */
 public void remove() {
   end_time = System.currentTimeMillis();
   if (state == JobState.RUNNING) state = JobState.DONE;
   // Overwrite handle - copy end_time, state, msg
   replaceByJobHandle();
 }