/**
   * Compute Variable Importance, based on GEDEON: DATA MINING OF INPUTS: ANALYSING MAGNITUDE AND
   * FUNCTIONAL MEASURES
   *
   * @return variable importances for input features
   */
  public float[] computeVariableImportances() {
    float[] vi = new float[units[0]];
    Arrays.fill(vi, 0f);

    float[][] Qik = new float[units[0]][units[2]]; // importance of input i on output k
    float[] sum_wj = new float[units[1]]; // sum of incoming weights into first hidden layer
    float[] sum_wk =
        new float[units[2]]; // sum of incoming weights into output layer (or second hidden layer)
    for (float[] Qi : Qik) Arrays.fill(Qi, 0f);
    Arrays.fill(sum_wj, 0f);
    Arrays.fill(sum_wk, 0f);

    // compute sum of absolute incoming weights
    for (int j = 0; j < units[1]; j++) {
      for (int i = 0; i < units[0]; i++) {
        float wij = get_weights(0).get(j, i);
        sum_wj[j] += Math.abs(wij);
      }
    }
    for (int k = 0; k < units[2]; k++) {
      for (int j = 0; j < units[1]; j++) {
        float wjk = get_weights(1).get(k, j);
        sum_wk[k] += Math.abs(wjk);
      }
    }
    // compute importance of input i on output k as product of connecting weights going through j
    for (int i = 0; i < units[0]; i++) {
      for (int k = 0; k < units[2]; k++) {
        for (int j = 0; j < units[1]; j++) {
          float wij = get_weights(0).get(j, i);
          float wjk = get_weights(1).get(k, j);
          // Qik[i][k] += Math.abs(wij)/sum_wj[j] * wjk; //Wong,Gedeon,Taggart '95
          Qik[i][k] += Math.abs(wij) / sum_wj[j] * Math.abs(wjk) / sum_wk[k]; // Gedeon '97
        }
      }
    }
    // normalize Qik over all outputs k
    for (int k = 0; k < units[2]; k++) {
      float sumQk = 0;
      for (int i = 0; i < units[0]; i++) sumQk += Qik[i][k];
      for (int i = 0; i < units[0]; i++) Qik[i][k] /= sumQk;
    }
    // importance for feature i is the sum over k of i->k importances
    for (int i = 0; i < units[0]; i++) vi[i] = ArrayUtils.sum(Qik[i]);

    // normalize importances such that max(vi) = 1
    ArrayUtils.div(vi, ArrayUtils.maxValue(vi));

    // zero out missing categorical variables if they were never seen
    if (_saw_missing_cats != null) {
      for (int i = 0; i < _saw_missing_cats.length; ++i) {
        assert (data_info._catMissing[i] == 1); // have a missing bucket for each categorical
        if (!_saw_missing_cats[i]) vi[data_info._catOffsets[i + 1] - 1] = 0;
      }
    }
    return vi;
  }
Exemple #2
0
 public final double loss(double u, double a, Loss loss) {
   assert loss.isForNumeric() : "Loss function " + loss + " not applicable to numerics";
   switch (loss) {
     case Quadratic:
       return (u - a) * (u - a);
     case Absolute:
       return Math.abs(u - a);
     case Huber:
       return Math.abs(u - a) <= 1 ? 0.5 * (u - a) * (u - a) : Math.abs(u - a) - 0.5;
     case Poisson:
       assert a >= 0 : "Poisson loss L(u,a) requires variable a >= 0";
       return Math.exp(u)
           + (a == 0 ? 0 : -a * u + a * Math.log(a) - a); // Since \lim_{a->0} a*log(a) = 0
     case Hinge:
       // return Math.max(1-a*u,0);
       return Math.max(1 - (a == 0 ? -u : u), 0); // Booleans are coded {0,1} instead of {-1,1}
     case Logistic:
       // return Math.log(1 + Math.exp(-a * u));
       return Math.log(
           1 + Math.exp(a == 0 ? u : -u)); // Booleans are coded {0,1} instead of {-1,1}
     case Periodic:
       return 1 - Math.cos((a - u) * (2 * Math.PI) / _period);
     default:
       throw new RuntimeException("Unknown loss function " + loss);
   }
 }
Exemple #3
0
    public final double regularize(double[] u, Regularizer regularization) {
      if (u == null) return 0;
      double ureg = 0;

      switch (regularization) {
        case None:
          return 0;
        case Quadratic:
          for (int i = 0; i < u.length; i++) ureg += u[i] * u[i];
          return ureg;
        case L2:
          for (int i = 0; i < u.length; i++) ureg += u[i] * u[i];
          return Math.sqrt(ureg);
        case L1:
          for (int i = 0; i < u.length; i++) ureg += Math.abs(u[i]);
          return ureg;
        case NonNegative:
          for (int i = 0; i < u.length; i++) {
            if (u[i] < 0) return Double.POSITIVE_INFINITY;
          }
          return 0;
        case OneSparse:
          int card = 0;
          for (int i = 0; i < u.length; i++) {
            if (u[i] < 0) return Double.POSITIVE_INFINITY;
            else if (u[i] > 0) card++;
          }
          return card == 1 ? 0 : Double.POSITIVE_INFINITY;
        case UnitOneSparse:
          int ones = 0, zeros = 0;
          for (int i = 0; i < u.length; i++) {
            if (u[i] == 1) ones++;
            else if (u[i] == 0) zeros++;
            else return Double.POSITIVE_INFINITY;
          }
          return ones == 1 && zeros == u.length - 1 ? 0 : Double.POSITIVE_INFINITY;
        case Simplex:
          double sum = 0, absum = 0;
          for (int i = 0; i < u.length; i++) {
            if (u[i] < 0) return Double.POSITIVE_INFINITY;
            else {
              sum += u[i];
              absum += Math.abs(u[i]);
            }
          }
          return MathUtils.equalsWithinRecSumErr(sum, 1.0, u.length, absum)
              ? 0
              : Double.POSITIVE_INFINITY;
        default:
          throw new RuntimeException("Unknown regularization function " + regularization);
      }
    }
Exemple #4
0
 // Read the 'tree' columns, do model-specific math and put the results in the
 // fs[] array, and return the sum.  Dividing any fs[] element by the sum
 // turns the results into a probability distribution.
 @Override
 protected float score1(Chunk chks[], float fs[ /*nclass*/], int row) {
   if (_nclass == 1) // Classification?
   return (float) chk_tree(chks, 0).at0(row); // Regression.
   if (_nclass == 2) { // The Boolean Optimization
     // This optimization assumes the 2nd tree of a 2-class system is the
     // inverse of the first.  Fill in the missing tree
     fs[1] = (float) Math.exp(chk_tree(chks, 0).at0(row));
     fs[2] = 1.0f / fs[1]; // exp(-d) === 1/d
     return fs[1] + fs[2];
   }
   float sum = 0;
   for (int k = 0; k < _nclass; k++) // Sum across of likelyhoods
   sum += (fs[k + 1] = (float) Math.exp(chk_tree(chks, k).at0(row)));
   return sum;
 }
Exemple #5
0
 public final double lgrad(double u, double a, Loss loss) {
   assert loss.isForNumeric() : "Loss function " + loss + " not applicable to numerics";
   switch (loss) {
     case Quadratic:
       return 2 * (u - a);
     case Absolute:
       return Math.signum(u - a);
     case Huber:
       return Math.abs(u - a) <= 1 ? u - a : Math.signum(u - a);
     case Poisson:
       assert a >= 0 : "Poisson loss L(u,a) requires variable a >= 0";
       return Math.exp(u) - a;
     case Hinge:
       // return a*u <= 1 ? -a : 0;
       return a == 0
           ? (-u <= 1 ? 1 : 0)
           : (u <= 1 ? -1 : 0); // Booleans are coded as {0,1} instead of {-1,1}
     case Logistic:
       // return -a/(1+Math.exp(a*u));
       return a == 0
           ? 1 / (1 + Math.exp(-u))
           : -1 / (1 + Math.exp(u)); // Booleans are coded as {0,1} instead of {-1,1}
     case Periodic:
       return ((2 * Math.PI) / _period) * Math.sin((a - u) * (2 * Math.PI) / _period);
     default:
       throw new RuntimeException("Unknown loss function " + loss);
   }
 }
Exemple #6
0
    TotSS(double[] means, double[] mults, int[] modes, String[][] isCats, int[] card) {
      _means = means;
      _mults = mults;
      _modes = modes;
      _tss = 0;
      _isCats = isCats;
      _card = card;

      // Mean of numeric col is zero when standardized
      _gc = mults != null ? new double[means.length] : Arrays.copyOf(means, means.length);
      for (int i = 0; i < means.length; i++) {
        if (isCats[i] != null)
          _gc[i] =
              Math.min(
                  Math.round(means[i]),
                  _card[i] - 1); // TODO: Should set to majority class of categorical column
      }
    }
Exemple #7
0
 private static void addFolder(FileSystem fs, Path p, JsonArray succeeded, JsonArray failed) {
   try {
     if (fs == null) return;
     for (FileStatus file : fs.listStatus(p)) {
       Path pfs = file.getPath();
       if (file.isDir()) {
         addFolder(fs, pfs, succeeded, failed);
       } else {
         Key k = Key.make(pfs.toString());
         long size = file.getLen();
         Value val = null;
         if (pfs.getName().endsWith(Extensions.JSON)) {
           JsonParser parser = new JsonParser();
           JsonObject json = parser.parse(new InputStreamReader(fs.open(pfs))).getAsJsonObject();
           JsonElement v = json.get(Constants.VERSION);
           if (v == null) throw new RuntimeException("Missing version");
           JsonElement type = json.get(Constants.TYPE);
           if (type == null) throw new RuntimeException("Missing type");
           Class c = Class.forName(type.getAsString());
           OldModel model = (OldModel) c.newInstance();
           model.fromJson(json);
         } else if (pfs.getName().endsWith(Extensions.HEX)) { // Hex file?
           FSDataInputStream s = fs.open(pfs);
           int sz = (int) Math.min(1L << 20, size); // Read up to the 1st meg
           byte[] mem = MemoryManager.malloc1(sz);
           s.readFully(mem);
           // Convert to a ValueArray (hope it fits in 1Meg!)
           ValueArray ary = new ValueArray(k, 0).read(new AutoBuffer(mem));
           val = new Value(k, ary, Value.HDFS);
         } else if (size >= 2 * ValueArray.CHUNK_SZ) {
           val =
               new Value(
                   k,
                   new ValueArray(k, size),
                   Value.HDFS); // ValueArray byte wrapper over a large file
         } else {
           val = new Value(k, (int) size, Value.HDFS); // Plain Value
           val.setdsk();
         }
         DKV.put(k, val);
         Log.info("PersistHdfs: DKV.put(" + k + ")");
         JsonObject o = new JsonObject();
         o.addProperty(Constants.KEY, k.toString());
         o.addProperty(Constants.FILE, pfs.toString());
         o.addProperty(Constants.VALUE_SIZE, file.getLen());
         succeeded.add(o);
       }
     }
   } catch (Exception e) {
     Log.err(e);
     JsonObject o = new JsonObject();
     o.addProperty(Constants.FILE, p.toString());
     o.addProperty(Constants.ERROR, e.getMessage());
     failed.add(o);
   }
 }
Exemple #8
0
    public static double mloss(double[] u, int a, Loss multi_loss) {
      assert multi_loss.isForCategorical()
          : "Loss function " + multi_loss + " not applicable to categoricals";
      if (a < 0 || a > u.length - 1)
        throw new IllegalArgumentException(
            "Index must be between 0 and " + String.valueOf(u.length - 1));

      double sum = 0;
      switch (multi_loss) {
        case Categorical:
          for (int i = 0; i < u.length; i++) sum += Math.max(1 + u[i], 0);
          sum += Math.max(1 - u[a], 0) - Math.max(1 + u[a], 0);
          return sum;
        case Ordinal:
          for (int i = 0; i < u.length - 1; i++) sum += Math.max(a > i ? 1 - u[i] : 1, 0);
          return sum;
        default:
          throw new RuntimeException("Unknown multidimensional loss function " + multi_loss);
      }
    }
    protected void calcModelStats(
        CoxPHModel model, final double[] newCoef, final double newLoglik) {
      CoxPHModel.CoxPHParameters p = model._parms;
      CoxPHModel.CoxPHOutput o = model._output;

      final int n_coef = o.coef.length;
      final Matrix inv_hessian = new Matrix(o.hessian).inverse();
      for (int j = 0; j < n_coef; ++j) {
        for (int k = 0; k <= j; ++k) {
          final double elem = -inv_hessian.get(j, k);
          o.var_coef[j][k] = elem;
          o.var_coef[k][j] = elem;
        }
      }
      for (int j = 0; j < n_coef; ++j) {
        o.coef[j] = newCoef[j];
        o.exp_coef[j] = Math.exp(o.coef[j]);
        o.exp_neg_coef[j] = Math.exp(-o.coef[j]);
        o.se_coef[j] = Math.sqrt(o.var_coef[j][j]);
        o.z_coef[j] = o.coef[j] / o.se_coef[j];
      }
      if (o.iter == 0) {
        o.null_loglik = newLoglik;
        o.maxrsq = 1 - Math.exp(2 * o.null_loglik / o.n);
        o.score_test = 0;
        for (int j = 0; j < n_coef; ++j) {
          double sum = 0;
          for (int k = 0; k < n_coef; ++k) sum += o.var_coef[j][k] * o.gradient[k];
          o.score_test += o.gradient[j] * sum;
        }
      }
      o.loglik = newLoglik;
      o.loglik_test = -2 * (o.null_loglik - o.loglik);
      o.rsq = 1 - Math.exp(-o.loglik_test / o.n);
      o.wald_test = 0;
      for (int j = 0; j < n_coef; ++j) {
        double sum = 0;
        for (int k = 0; k < n_coef; ++k) sum -= o.hessian[j][k] * (o.coef[k] - p.init);
        o.wald_test += (o.coef[j] - p.init) * sum;
      }
    }
Exemple #10
0
    private TwoDimTable createModelSummaryTable(KMeansModel.KMeansOutput output) {
      List<String> colHeaders = new ArrayList<>();
      List<String> colTypes = new ArrayList<>();
      List<String> colFormat = new ArrayList<>();
      colHeaders.add("Number of Rows");
      colTypes.add("long");
      colFormat.add("%d");
      colHeaders.add("Number of Clusters");
      colTypes.add("long");
      colFormat.add("%d");
      colHeaders.add("Number of Categorical Columns");
      colTypes.add("long");
      colFormat.add("%d");
      colHeaders.add("Number of Iterations");
      colTypes.add("long");
      colFormat.add("%d");
      colHeaders.add("Within Cluster Sum of Squares");
      colTypes.add("double");
      colFormat.add("%.5f");
      colHeaders.add("Total Sum of Squares");
      colTypes.add("double");
      colFormat.add("%.5f");
      colHeaders.add("Between Cluster Sum of Squares");
      colTypes.add("double");
      colFormat.add("%.5f");

      final int rows = 1;
      TwoDimTable table =
          new TwoDimTable(
              "Model Summary",
              null,
              new String[rows],
              colHeaders.toArray(new String[0]),
              colTypes.toArray(new String[0]),
              colFormat.toArray(new String[0]),
              "");
      int row = 0;
      int col = 0;
      table.set(
          row,
          col++,
          Math.round(_train.numRows() * (hasWeightCol() ? _train.lastVec().mean() : 1)));
      table.set(row, col++, output._centers_raw.length);
      table.set(row, col++, output._categorical_column_count);
      table.set(row, col++, output._iterations);
      table.set(row, col++, output._tot_withinss);
      table.set(row, col++, output._totss);
      table.set(row, col++, output._betweenss);
      return table;
    }
Exemple #11
0
 @SuppressWarnings("unused")
 @Override
 protected void init() {
   super.init();
   // Initialize local variables
   _mtry =
       (mtries == -1)
           ? // classification: mtry=sqrt(_ncols), regression: mtry=_ncols/3
           (classification ? Math.max((int) Math.sqrt(_ncols), 1) : Math.max(_ncols / 3, 1))
           : mtries;
   if (!(1 <= _mtry && _mtry <= _ncols))
     throw new IllegalArgumentException(
         "Computed mtry should be in interval <1,#cols> but it is " + _mtry);
   if (!(0.0 < sample_rate && sample_rate <= 1.0))
     throw new IllegalArgumentException(
         "Sample rate should be interval (0,1> but it is " + sample_rate);
   if (DEBUG_DETERMINISTIC && seed == -1) _seed = 0x1321e74a0192470cL; // fixed version of seed
   else if (seed == -1) _seed = _seedGenerator.nextLong();
   else _seed = seed;
   if (sample_rate == 1f && validation != null)
     Log.warn(
         Sys.DRF__,
         "Sample rate is 100% and no validation dataset is required. There are no OOB data to perform validation!");
 }
Exemple #12
0
 @Override
 protected float[] score0(double[] data, float[] preds) {
   float[] p = super.score0(data, preds);
   if (nclasses() > 1) { // classification
     // Because we call Math.exp, we have to be numerically stable or else
     // we get Infinities, and then shortly NaN's.  Rescale the data so the
     // largest value is +/-1 and the other values are smaller.
     // See notes here:  http://www.hongliangjie.com/2011/01/07/logsum/
     float maxval = Float.NEGATIVE_INFINITY;
     float dsum = 0;
     if (nclasses() == 2) p[2] = -p[1];
     // Find a max
     for (int k = 1; k < p.length; k++) maxval = Math.max(maxval, p[k]);
     assert !Float.isInfinite(maxval)
         : "Something is wrong with GBM trees since returned prediction is "
             + Arrays.toString(p);
     for (int k = 1; k < p.length; k++) dsum += (p[k] = (float) Math.exp(p[k] - maxval));
     div(p, dsum);
     p[0] = getPrediction(p, data);
   } else { // regression
     // do nothing for regression
   }
   return p;
 }
Exemple #13
0
 public static double impute(double u, Loss loss) {
   assert loss.isForNumeric() : "Loss function " + loss + " not applicable to numerics";
   switch (loss) {
     case Quadratic:
     case Absolute:
     case Huber:
     case Periodic:
       return u;
     case Poisson:
       return Math.exp(u) - 1;
     case Hinge:
     case Logistic:
       return u > 0 ? 1 : 0; // Booleans are coded as {0,1} instead of {-1,1}
     default:
       throw new RuntimeException("Unknown loss function " + loss);
   }
 }
 /**
  * Initialization of neural net weights cf.
  * http://machinelearning.wustl.edu/mlpapers/paper_files/AISTATS2010_GlorotB10.pdf
  */
 private void randomizeWeights() {
   for (int w = 0; w < dense_row_weights.length; ++w) {
     final Random rng =
         water.util.RandomUtils.getRNG(
             get_params()._seed + 0xBAD5EED + w + 1); // to match NeuralNet behavior
     final double range = Math.sqrt(6. / (units[w] + units[w + 1]));
     for (int i = 0; i < get_weights(w).rows(); i++) {
       for (int j = 0; j < get_weights(w).cols(); j++) {
         if (get_params()._initial_weight_distribution
             == DeepLearningParameters.InitialWeightDistribution.UniformAdaptive) {
           // cf. http://machinelearning.wustl.edu/mlpapers/paper_files/AISTATS2010_GlorotB10.pdf
           if (w == dense_row_weights.length - 1 && _classification)
             get_weights(w)
                 .set(
                     i,
                     j,
                     (float)
                         (4.
                             * uniformDist(
                                 rng, -range,
                                 range))); // Softmax might need an extra factor 4, since it's like
                                           // a sigmoid
           else get_weights(w).set(i, j, (float) uniformDist(rng, -range, range));
         } else if (get_params()._initial_weight_distribution
             == DeepLearningParameters.InitialWeightDistribution.Uniform) {
           get_weights(w)
               .set(
                   i,
                   j,
                   (float)
                       uniformDist(
                           rng,
                           -get_params()._initial_weight_scale,
                           get_params()._initial_weight_scale));
         } else if (get_params()._initial_weight_distribution
             == DeepLearningParameters.InitialWeightDistribution.Normal) {
           get_weights(w)
               .set(i, j, (float) (rng.nextGaussian() * get_params()._initial_weight_scale));
         }
       }
     }
   }
 }
Exemple #15
0
 @Override
 public void map(Chunk[] chks) {
   _gss = new double[_nclass][];
   _rss = new double[_nclass][];
   // For all tree/klasses
   for (int k = 0; k < _nclass; k++) {
     final DTree tree = _trees[k];
     final int leaf = _leafs[k];
     if (tree == null) continue; // Empty class is ignored
     // A leaf-biased array of all active Tree leaves.
     final double gs[] = _gss[k] = new double[tree._len - leaf];
     final double rs[] = _rss[k] = new double[tree._len - leaf];
     final Chunk nids = chk_nids(chks, k); // Node-ids  for this tree/class
     final Chunk ress = chk_work(chks, k); // Residuals for this tree/class
     // If we have all constant responses, then we do not split even the
     // root and the residuals should be zero.
     if (tree.root() instanceof LeafNode) continue;
     for (int row = 0; row < nids._len; row++) { // For all rows
       int nid = (int) nids.at80(row); // Get Node to decide from
       if (nid < 0) continue; // Missing response
       if (tree.node(nid) instanceof UndecidedNode) // If we bottomed out the tree
       nid = tree.node(nid)._pid; // Then take parent's decision
       DecidedNode dn = tree.decided(nid); // Must have a decision point
       if (dn._split._col == -1) // Unable to decide?
       dn = tree.decided(nid = dn._pid); // Then take parent's decision
       int leafnid = dn.ns(chks, row); // Decide down to a leafnode
       assert leaf <= leafnid && leafnid < tree._len;
       assert tree.node(leafnid) instanceof LeafNode;
       // Note: I can which leaf/region I end up in, but I do not care for
       // the prediction presented by the tree.  For GBM, we compute the
       // sum-of-residuals (and sum/abs/mult residuals) for all rows in the
       // leaf, and get our prediction from that.
       nids.set0(row, leafnid);
       assert !ress.isNA0(row);
       double res = ress.at0(row);
       double ares = Math.abs(res);
       gs[leafnid - leaf] += _nclass > 1 ? ares * (1 - ares) : 1;
       rs[leafnid - leaf] += res;
     }
   }
 }
Exemple #16
0
 @Override
 public float progress() {
   if (_status == Status.Done) return 1.0f;
   return Math.min(0.99f, (float) ((double) _count / (double) _nchunks));
 }
    @Override
    protected void compute2() {
      CoxPHModel model = null;
      try {
        Scope.enter();
        _parms.read_lock_frames(CoxPH.this);
        init(true);

        applyScoringFrameSideEffects();

        // The model to be built
        model = new CoxPHModel(dest(), _parms, new CoxPHModel.CoxPHOutput(CoxPH.this));
        model.delete_and_lock(_key);

        applyTrainingFrameSideEffects();

        int nResponses = 1;
        boolean useAllFactorLevels = false;
        final DataInfo dinfo =
            new DataInfo(
                Key.make(),
                _modelBuilderTrain,
                null,
                nResponses,
                useAllFactorLevels,
                DataInfo.TransformType.DEMEAN,
                TransformType.NONE,
                true,
                false,
                false,
                false,
                false,
                false);
        initStats(model, dinfo);

        final int n_offsets =
            (model._parms.offset_columns == null) ? 0 : model._parms.offset_columns.length;
        final int n_coef = dinfo.fullN() - n_offsets;
        final double[] step = MemoryManager.malloc8d(n_coef);
        final double[] oldCoef = MemoryManager.malloc8d(n_coef);
        final double[] newCoef = MemoryManager.malloc8d(n_coef);
        Arrays.fill(step, Double.NaN);
        Arrays.fill(oldCoef, Double.NaN);
        for (int j = 0; j < n_coef; ++j) newCoef[j] = model._parms.init;
        double oldLoglik = -Double.MAX_VALUE;
        final int n_time = (int) (model._output.max_time - model._output.min_time + 1);
        final boolean has_start_column = (model._parms.start_column != null);
        final boolean has_weights_column = (model._parms.weights_column != null);
        for (int i = 0; i <= model._parms.iter_max; ++i) {
          model._output.iter = i;

          final CoxPHTask coxMR =
              new CoxPHTask(
                      self(),
                      dinfo,
                      newCoef,
                      model._output.min_time,
                      n_time,
                      n_offsets,
                      has_start_column,
                      has_weights_column)
                  .doAll(dinfo._adaptedFrame);

          final double newLoglik = calcLoglik(model, coxMR);
          if (newLoglik > oldLoglik) {
            if (i == 0) calcCounts(model, coxMR);

            calcModelStats(model, newCoef, newLoglik);
            calcCumhaz_0(model, coxMR);

            if (newLoglik == 0) model._output.lre = -Math.log10(Math.abs(oldLoglik - newLoglik));
            else model._output.lre = -Math.log10(Math.abs((oldLoglik - newLoglik) / newLoglik));
            if (model._output.lre >= model._parms.lre_min) break;

            Arrays.fill(step, 0);
            for (int j = 0; j < n_coef; ++j)
              for (int k = 0; k < n_coef; ++k)
                step[j] -= model._output.var_coef[j][k] * model._output.gradient[k];
            for (int j = 0; j < n_coef; ++j)
              if (Double.isNaN(step[j]) || Double.isInfinite(step[j])) break;

            oldLoglik = newLoglik;
            System.arraycopy(newCoef, 0, oldCoef, 0, oldCoef.length);
          } else {
            for (int j = 0; j < n_coef; ++j) step[j] /= 2;
          }

          for (int j = 0; j < n_coef; ++j) newCoef[j] = oldCoef[j] - step[j];
        }

        model.update(_key);
      } catch (Throwable t) {
        Job thisJob = DKV.getGet(_key);
        if (thisJob._state == JobState.CANCELLED) {
          Log.info("Job cancelled by user.");
        } else {
          t.printStackTrace();
          failed(t);
          throw t;
        }
      } finally {
        updateModelOutput();
        _parms.read_unlock_frames(CoxPH.this);
        Scope.exit();
        done(); // Job done!
      }
      tryComplete();
    }
    @Override
    protected void processRow(long gid, Row row) {
      n++;
      double[] response = row.response;
      int ncats = row.nBins;
      int[] cats = row.numIds;
      double[] nums = row.numVals;
      final double weight = _has_weights_column ? response[0] : 1.0;
      if (weight <= 0) throw new IllegalArgumentException("weights must be positive values");
      final long event = (long) response[response.length - 1];
      final int t1 =
          _has_start_column ? (int) (((long) response[response.length - 3] + 1) - _min_time) : -1;
      final int t2 = (int) (((long) response[response.length - 2]) - _min_time);
      if (t1 > t2)
        throw new IllegalArgumentException("start times must be strictly less than stop times");
      final int numStart = _dinfo.numStart();
      sumWeights += weight;
      for (int j = 0; j < ncats; ++j) sumWeightedCatX[cats[j]] += weight;
      for (int j = 0; j < nums.length; ++j) sumWeightedNumX[j] += weight * nums[j];
      double logRisk = 0;
      for (int j = 0; j < ncats; ++j) logRisk += _beta[cats[j]];
      for (int j = 0; j < nums.length - _n_offsets; ++j) logRisk += nums[j] * _beta[numStart + j];
      for (int j = nums.length - _n_offsets; j < nums.length; ++j) logRisk += nums[j];
      final double risk = weight * Math.exp(logRisk);
      logRisk *= weight;
      if (event > 0) {
        countEvents[t2]++;
        sizeEvents[t2] += weight;
        sumLogRiskEvents[t2] += logRisk;
        sumRiskEvents[t2] += risk;
      } else sizeCensored[t2] += weight;
      if (_has_start_column) {
        for (int t = t1; t <= t2; ++t) sizeRiskSet[t] += weight;
        for (int t = t1; t <= t2; ++t) rcumsumRisk[t] += risk;
      } else {
        sizeRiskSet[t2] += weight;
        rcumsumRisk[t2] += risk;
      }

      final int ntotal = ncats + (nums.length - _n_offsets);
      final int numStartIter = numStart - ncats;
      for (int jit = 0; jit < ntotal; ++jit) {
        final boolean jIsCat = jit < ncats;
        final int j = jIsCat ? cats[jit] : numStartIter + jit;
        final double x1 = jIsCat ? 1.0 : nums[jit - ncats];
        final double xRisk = x1 * risk;
        if (event > 0) {
          sumXEvents[t2][j] += weight * x1;
          sumXRiskEvents[t2][j] += xRisk;
        }
        if (_has_start_column) {
          for (int t = t1; t <= t2; ++t) rcumsumXRisk[t][j] += xRisk;
        } else {
          rcumsumXRisk[t2][j] += xRisk;
        }
        for (int kit = 0; kit < ntotal; ++kit) {
          final boolean kIsCat = kit < ncats;
          final int k = kIsCat ? cats[kit] : numStartIter + kit;
          final double x2 = kIsCat ? 1.0 : nums[kit - ncats];
          final double xxRisk = x2 * xRisk;
          if (event > 0) sumXXRiskEvents[t2][j][k] += xxRisk;
          if (_has_start_column) {
            for (int t = t1; t <= t2; ++t) rcumsumXXRisk[t][j][k] += xxRisk;
          } else {
            rcumsumXXRisk[t2][j][k] += xxRisk;
          }
        }
      }
    }
  /**
   * Main constructor
   *
   * @param params Model parameters
   * @param dinfo Data Info
   * @param nClasses number of classes (1 for regression, 0 for autoencoder)
   * @param train User-given training data frame, prepared by AdaptTestTrain
   * @param valid User-specified validation data frame, prepared by AdaptTestTrain
   */
  public DeepLearningModelInfo(
      final DeepLearningParameters params,
      final DataInfo dinfo,
      int nClasses,
      Frame train,
      Frame valid) {
    _classification = nClasses > 1;
    _train = train;
    _valid = valid;
    data_info = dinfo;
    parameters =
        (DeepLearningParameters) params.clone(); // make a copy, don't change model's parameters
    DeepLearningParameters.Sanity.modifyParms(
        parameters, parameters, nClasses); // sanitize the model_info's parameters

    final int num_input = dinfo.fullN();
    final int num_output =
        get_params()._autoencoder
            ? num_input
            : (_classification ? train.lastVec().cardinality() : 1);
    if (!get_params()._autoencoder) assert (num_output == nClasses);

    _saw_missing_cats = dinfo._cats > 0 ? new boolean[data_info._cats] : null;
    assert (num_input > 0);
    assert (num_output > 0);
    if (has_momenta() && adaDelta())
      throw new IllegalArgumentException(
          "Cannot have non-zero momentum and adaptive rate at the same time.");
    final int layers = get_params()._hidden.length;
    // units (# neurons for each layer)
    units = new int[layers + 2];
    if (get_params()._max_categorical_features <= Integer.MAX_VALUE - dinfo._nums)
      units[0] = Math.min(dinfo._nums + get_params()._max_categorical_features, num_input);
    else units[0] = num_input;
    System.arraycopy(get_params()._hidden, 0, units, 1, layers);
    units[layers + 1] = num_output;

    boolean printLevels = units[0] > 1000L;
    boolean warn = units[0] > 100000L;
    if (printLevels) {
      final String[][] domains = dinfo._adaptedFrame.domains();
      int[] levels = new int[domains.length];
      for (int i = 0; i < levels.length; ++i) {
        levels[i] = domains[i] != null ? domains[i].length : 0;
      }
      Arrays.sort(levels);
      if (warn) {
        Log.warn(
            "===================================================================================================================================");
        Log.warn(
            num_input
                + " input features"
                + (dinfo._cats > 0 ? " (after categorical one-hot encoding)" : "")
                + ". Can be slow and require a lot of memory.");
      }
      if (levels[levels.length - 1] > 0) {
        int levelcutoff = levels[levels.length - 1 - Math.min(10, levels.length - 1)];
        int count = 0;
        for (int i = 0;
            i < dinfo._adaptedFrame.numCols() - (get_params()._autoencoder ? 0 : 1) && count < 10;
            ++i) {
          if (dinfo._adaptedFrame.domains()[i] != null
              && dinfo._adaptedFrame.domains()[i].length >= levelcutoff) {
            if (warn) {
              Log.warn(
                  "Categorical feature '"
                      + dinfo._adaptedFrame._names[i]
                      + "' has cardinality "
                      + dinfo._adaptedFrame.domains()[i].length
                      + ".");
            } else {
              Log.info(
                  "Categorical feature '"
                      + dinfo._adaptedFrame._names[i]
                      + "' has cardinality "
                      + dinfo._adaptedFrame.domains()[i].length
                      + ".");
            }
          }
          count++;
        }
      }
      if (warn) {
        Log.warn("Suggestions:");
        Log.warn(" *) Limit the size of the first hidden layer");
        if (dinfo._cats > 0) {
          Log.warn(
              " *) Limit the total number of one-hot encoded features with the parameter 'max_categorical_features'");
          Log.warn(
              " *) Run h2o.interaction(...,pairwise=F) on high-cardinality categorical columns to limit the factor count, see http://learn.h2o.ai");
        }
        Log.warn(
            "===================================================================================================================================");
      }
    }

    // weights (to connect layers)
    dense_row_weights = new Storage.DenseRowMatrix[layers + 1];
    dense_col_weights = new Storage.DenseColMatrix[layers + 1];

    // decide format of weight matrices row-major or col-major
    if (get_params()._col_major)
      dense_col_weights[0] = new Storage.DenseColMatrix(units[1], units[0]);
    else dense_row_weights[0] = new Storage.DenseRowMatrix(units[1], units[0]);
    for (int i = 1; i <= layers; ++i)
      dense_row_weights[i] = new Storage.DenseRowMatrix(units[i + 1] /*rows*/, units[i] /*cols*/);

    // biases (only for hidden layers and output layer)
    biases = new Storage.DenseVector[layers + 1];
    for (int i = 0; i <= layers; ++i) biases[i] = new Storage.DenseVector(units[i + 1]);
    // average activation (only for hidden layers)
    if (get_params()._autoencoder && get_params()._sparsity_beta > 0) {
      avg_activations = new Storage.DenseVector[layers];
      mean_a = new float[layers];
      for (int i = 0; i < layers; ++i) avg_activations[i] = new Storage.DenseVector(units[i + 1]);
    }
    allocateHelperArrays();
    // for diagnostics
    mean_rate = new float[units.length];
    rms_rate = new float[units.length];
    mean_bias = new float[units.length];
    rms_bias = new float[units.length];
    mean_weight = new float[units.length];
    rms_weight = new float[units.length];
  }
Exemple #20
0
    // public final double[] rproxgrad_x(double[] u, double alpha) { return rproxgrad(u, alpha,
    // _gamma_x, _regularization_x, RandomUtils.getRNG(_seed)); }
    // public final double[] rproxgrad_y(double[] u, double alpha) { return rproxgrad(u, alpha,
    // _gamma_y, _regularization_y, RandomUtils.getRNG(_seed)); }
    static double[] rproxgrad(
        double[] u, double alpha, double gamma, Regularizer regularization, Random rand) {
      if (u == null || alpha == 0 || gamma == 0) return u;
      double[] v = new double[u.length];

      switch (regularization) {
        case None:
          return u;
        case Quadratic:
          for (int i = 0; i < u.length; i++) v[i] = u[i] / (1 + 2 * alpha * gamma);
          return v;
        case L2:
          // Proof uses Moreau decomposition; see section 6.5.1 of Parikh and Boyd
          // https://web.stanford.edu/~boyd/papers/pdf/prox_algs.pdf
          double weight = 1 - alpha * gamma / ArrayUtils.l2norm(u);
          if (weight < 0) return v; // Zero vector
          for (int i = 0; i < u.length; i++) v[i] = weight * u[i];
          return v;
        case L1:
          for (int i = 0; i < u.length; i++)
            v[i] = Math.max(u[i] - alpha * gamma, 0) + Math.min(u[i] + alpha * gamma, 0);
          return v;
        case NonNegative:
          for (int i = 0; i < u.length; i++) v[i] = Math.max(u[i], 0);
          return v;
        case OneSparse:
          int idx = ArrayUtils.maxIndex(u, rand);
          v[idx] = u[idx] > 0 ? u[idx] : 1e-6;
          return v;
        case UnitOneSparse:
          idx = ArrayUtils.maxIndex(u, rand);
          v[idx] = 1;
          return v;
        case Simplex:
          // Proximal gradient algorithm by Chen and Ye in http://arxiv.org/pdf/1101.6081v2.pdf
          // 1) Sort input vector u in ascending order: u[1] <= ... <= u[n]
          int n = u.length;
          int[] idxs = new int[n];
          for (int i = 0; i < n; i++) idxs[i] = i;
          ArrayUtils.sort(idxs, u);

          // 2) Calculate cumulative sum of u in descending order
          // cumsum(u) = (..., u[n-2]+u[n-1]+u[n], u[n-1]+u[n], u[n])
          double[] ucsum = new double[n];
          ucsum[n - 1] = u[idxs[n - 1]];
          for (int i = n - 2; i >= 0; i--) ucsum[i] = ucsum[i + 1] + u[idxs[i]];

          // 3) Let t_i = (\sum_{j=i+1}^n u[j] - 1)/(n - i)
          // For i = n-1,...,1, set optimal t* to first t_i >= u[i]
          double t = (ucsum[0] - 1) / n; // Default t* = (\sum_{j=1}^n u[j] - 1)/n
          for (int i = n - 1; i >= 1; i--) {
            double tmp = (ucsum[i] - 1) / (n - i);
            if (tmp >= u[idxs[i - 1]]) {
              t = tmp;
              break;
            }
          }

          // 4) Return max(u - t*, 0) as projection of u onto simplex
          double[] x = new double[u.length];
          for (int i = 0; i < u.length; i++) x[i] = Math.max(u[i] - t, 0);
          return x;
        default:
          throw new RuntimeException("Unknown regularization function " + regularization);
      }
    }
Exemple #21
0
 private void randomRow(
     Vec[] vecs, Random rand, double[] center, double[] means, double[] mults, int[] modes) {
   long row = Math.max(0, (long) (rand.nextDouble() * vecs[0].length()) - 1);
   data(center, vecs, row, means, mults, modes);
 }
    protected double calcLoglik(CoxPHModel model, final CoxPHTask coxMR) {
      CoxPHModel.CoxPHParameters p = model._parms;
      CoxPHModel.CoxPHOutput o = model._output;

      final int n_coef = o.coef.length;
      final int n_time = coxMR.sizeEvents.length;
      double newLoglik = 0;
      for (int j = 0; j < n_coef; ++j) o.gradient[j] = 0;
      for (int j = 0; j < n_coef; ++j) for (int k = 0; k < n_coef; ++k) o.hessian[j][k] = 0;

      switch (p.ties) {
        case efron:
          final double[] newLoglik_t = MemoryManager.malloc8d(n_time);
          final double[][] gradient_t = malloc2DArray(n_time, n_coef);
          final double[][][] hessian_t = malloc3DArray(n_time, n_coef, n_coef);
          ForkJoinTask[] fjts = new ForkJoinTask[n_time];
          for (int t = n_time - 1; t >= 0; --t) {
            final int _t = t;
            fjts[t] =
                new RecursiveAction() {
                  @Override
                  protected void compute() {
                    final double sizeEvents_t = coxMR.sizeEvents[_t];
                    if (sizeEvents_t > 0) {
                      final long countEvents_t = coxMR.countEvents[_t];
                      final double sumLogRiskEvents_t = coxMR.sumLogRiskEvents[_t];
                      final double sumRiskEvents_t = coxMR.sumRiskEvents[_t];
                      final double rcumsumRisk_t = coxMR.rcumsumRisk[_t];
                      final double avgSize = sizeEvents_t / countEvents_t;
                      newLoglik_t[_t] = sumLogRiskEvents_t;
                      System.arraycopy(coxMR.sumXEvents[_t], 0, gradient_t[_t], 0, n_coef);
                      for (long e = 0; e < countEvents_t; ++e) {
                        final double frac = ((double) e) / ((double) countEvents_t);
                        final double term = rcumsumRisk_t - frac * sumRiskEvents_t;
                        newLoglik_t[_t] -= avgSize * Math.log(term);
                        for (int j = 0; j < n_coef; ++j) {
                          final double djTerm =
                              coxMR.rcumsumXRisk[_t][j] - frac * coxMR.sumXRiskEvents[_t][j];
                          final double djLogTerm = djTerm / term;
                          gradient_t[_t][j] -= avgSize * djLogTerm;
                          for (int k = 0; k < n_coef; ++k) {
                            final double dkTerm =
                                coxMR.rcumsumXRisk[_t][k] - frac * coxMR.sumXRiskEvents[_t][k];
                            final double djkTerm =
                                coxMR.rcumsumXXRisk[_t][j][k]
                                    - frac * coxMR.sumXXRiskEvents[_t][j][k];
                            hessian_t[_t][j][k] -=
                                avgSize * (djkTerm / term - (djLogTerm * (dkTerm / term)));
                          }
                        }
                      }
                    }
                  }
                };
          }
          ForkJoinTask.invokeAll(fjts);

          for (int t = 0; t < n_time; ++t) newLoglik += newLoglik_t[t];

          for (int t = 0; t < n_time; ++t)
            for (int j = 0; j < n_coef; ++j) o.gradient[j] += gradient_t[t][j];

          for (int t = 0; t < n_time; ++t)
            for (int j = 0; j < n_coef; ++j)
              for (int k = 0; k < n_coef; ++k) o.hessian[j][k] += hessian_t[t][j][k];
          break;
        case breslow:
          for (int t = n_time - 1; t >= 0; --t) {
            final double sizeEvents_t = coxMR.sizeEvents[t];
            if (sizeEvents_t > 0) {
              final double sumLogRiskEvents_t = coxMR.sumLogRiskEvents[t];
              final double rcumsumRisk_t = coxMR.rcumsumRisk[t];
              newLoglik += sumLogRiskEvents_t;
              newLoglik -= sizeEvents_t * Math.log(rcumsumRisk_t);
              for (int j = 0; j < n_coef; ++j) {
                final double dlogTerm = coxMR.rcumsumXRisk[t][j] / rcumsumRisk_t;
                o.gradient[j] += coxMR.sumXEvents[t][j];
                o.gradient[j] -= sizeEvents_t * dlogTerm;
                for (int k = 0; k < n_coef; ++k)
                  o.hessian[j][k] -=
                      sizeEvents_t
                          * (((coxMR.rcumsumXXRisk[t][j][k] / rcumsumRisk_t)
                              - (dlogTerm * (coxMR.rcumsumXRisk[t][k] / rcumsumRisk_t))));
              }
            }
          }
          break;
        default:
          throw new IllegalArgumentException("ties method must be either efron or breslow");
      }
      return newLoglik;
    }
Exemple #23
0
 // Progress reporting for the job/progress page
 @Override
 public float progress() {
   return Math.min(1f, _iteration / (float) _maxIter);
 }