Esempio n. 1
0
 public double[][] getNormBetaMultinomial(int idx) {
   double[][] res = new double[nclasses()][];
   Submodel sm = _submodels[idx];
   int N = _dinfo.fullN() + 1;
   double[] beta = sm.beta;
   if (sm.idxs != null)
     beta = ArrayUtils.expandAndScatter(beta, nclasses() * (_dinfo.fullN() + 1), sm.idxs);
   for (int i = 0; i < res.length; ++i) res[i] = Arrays.copyOfRange(beta, i * N, (i + 1) * N);
   return res;
 }
Esempio n. 2
0
 public double[] getNormBeta() {
   return _submodels[_best_lambda_idx].getBeta(MemoryManager.malloc8d(_dinfo.fullN() + 1));
 }
  /**
   * Main constructor
   *
   * @param params Model parameters
   * @param dinfo Data Info
   * @param nClasses number of classes (1 for regression, 0 for autoencoder)
   * @param train User-given training data frame, prepared by AdaptTestTrain
   * @param valid User-specified validation data frame, prepared by AdaptTestTrain
   */
  public DeepLearningModelInfo(
      final DeepLearningParameters params,
      final DataInfo dinfo,
      int nClasses,
      Frame train,
      Frame valid) {
    _classification = nClasses > 1;
    _train = train;
    _valid = valid;
    data_info = dinfo;
    parameters =
        (DeepLearningParameters) params.clone(); // make a copy, don't change model's parameters
    DeepLearningParameters.Sanity.modifyParms(
        parameters, parameters, nClasses); // sanitize the model_info's parameters

    final int num_input = dinfo.fullN();
    final int num_output =
        get_params()._autoencoder
            ? num_input
            : (_classification ? train.lastVec().cardinality() : 1);
    if (!get_params()._autoencoder) assert (num_output == nClasses);

    _saw_missing_cats = dinfo._cats > 0 ? new boolean[data_info._cats] : null;
    assert (num_input > 0);
    assert (num_output > 0);
    if (has_momenta() && adaDelta())
      throw new IllegalArgumentException(
          "Cannot have non-zero momentum and adaptive rate at the same time.");
    final int layers = get_params()._hidden.length;
    // units (# neurons for each layer)
    units = new int[layers + 2];
    if (get_params()._max_categorical_features <= Integer.MAX_VALUE - dinfo._nums)
      units[0] = Math.min(dinfo._nums + get_params()._max_categorical_features, num_input);
    else units[0] = num_input;
    System.arraycopy(get_params()._hidden, 0, units, 1, layers);
    units[layers + 1] = num_output;

    boolean printLevels = units[0] > 1000L;
    boolean warn = units[0] > 100000L;
    if (printLevels) {
      final String[][] domains = dinfo._adaptedFrame.domains();
      int[] levels = new int[domains.length];
      for (int i = 0; i < levels.length; ++i) {
        levels[i] = domains[i] != null ? domains[i].length : 0;
      }
      Arrays.sort(levels);
      if (warn) {
        Log.warn(
            "===================================================================================================================================");
        Log.warn(
            num_input
                + " input features"
                + (dinfo._cats > 0 ? " (after categorical one-hot encoding)" : "")
                + ". Can be slow and require a lot of memory.");
      }
      if (levels[levels.length - 1] > 0) {
        int levelcutoff = levels[levels.length - 1 - Math.min(10, levels.length - 1)];
        int count = 0;
        for (int i = 0;
            i < dinfo._adaptedFrame.numCols() - (get_params()._autoencoder ? 0 : 1) && count < 10;
            ++i) {
          if (dinfo._adaptedFrame.domains()[i] != null
              && dinfo._adaptedFrame.domains()[i].length >= levelcutoff) {
            if (warn) {
              Log.warn(
                  "Categorical feature '"
                      + dinfo._adaptedFrame._names[i]
                      + "' has cardinality "
                      + dinfo._adaptedFrame.domains()[i].length
                      + ".");
            } else {
              Log.info(
                  "Categorical feature '"
                      + dinfo._adaptedFrame._names[i]
                      + "' has cardinality "
                      + dinfo._adaptedFrame.domains()[i].length
                      + ".");
            }
          }
          count++;
        }
      }
      if (warn) {
        Log.warn("Suggestions:");
        Log.warn(" *) Limit the size of the first hidden layer");
        if (dinfo._cats > 0) {
          Log.warn(
              " *) Limit the total number of one-hot encoded features with the parameter 'max_categorical_features'");
          Log.warn(
              " *) Run h2o.interaction(...,pairwise=F) on high-cardinality categorical columns to limit the factor count, see http://learn.h2o.ai");
        }
        Log.warn(
            "===================================================================================================================================");
      }
    }

    // weights (to connect layers)
    dense_row_weights = new Storage.DenseRowMatrix[layers + 1];
    dense_col_weights = new Storage.DenseColMatrix[layers + 1];

    // decide format of weight matrices row-major or col-major
    if (get_params()._col_major)
      dense_col_weights[0] = new Storage.DenseColMatrix(units[1], units[0]);
    else dense_row_weights[0] = new Storage.DenseRowMatrix(units[1], units[0]);
    for (int i = 1; i <= layers; ++i)
      dense_row_weights[i] = new Storage.DenseRowMatrix(units[i + 1] /*rows*/, units[i] /*cols*/);

    // biases (only for hidden layers and output layer)
    biases = new Storage.DenseVector[layers + 1];
    for (int i = 0; i <= layers; ++i) biases[i] = new Storage.DenseVector(units[i + 1]);
    // average activation (only for hidden layers)
    if (get_params()._autoencoder && get_params()._sparsity_beta > 0) {
      avg_activations = new Storage.DenseVector[layers];
      mean_a = new float[layers];
      for (int i = 0; i < layers; ++i) avg_activations[i] = new Storage.DenseVector(units[i + 1]);
    }
    allocateHelperArrays();
    // for diagnostics
    mean_rate = new float[units.length];
    rms_rate = new float[units.length];
    mean_bias = new float[units.length];
    rms_bias = new float[units.length];
    mean_weight = new float[units.length];
    rms_weight = new float[units.length];
  }
Esempio n. 4
0
    /**
     * Train a Deep Learning model, assumes that all members are populated If checkpoint == null,
     * then start training a new model, otherwise continue from a checkpoint
     */
    public final void buildModel() {
      DeepLearningModel cp = null;
      if (_parms._checkpoint == null) {
        cp =
            new DeepLearningModel(
                dest(),
                _parms,
                new DeepLearningModel.DeepLearningModelOutput(DeepLearning.this),
                _train,
                _valid,
                nclasses());
        cp.model_info().initializeMembers();
      } else {
        final DeepLearningModel previous = DKV.getGet(_parms._checkpoint);
        if (previous == null) throw new IllegalArgumentException("Checkpoint not found.");
        Log.info("Resuming from checkpoint.");
        _job.update(0, "Resuming from checkpoint");

        if (isClassifier() != previous._output.isClassifier())
          throw new H2OIllegalArgumentException(
              "Response type must be the same as for the checkpointed model.");
        if (isSupervised() != previous._output.isSupervised())
          throw new H2OIllegalArgumentException(
              "Model type must be the same as for the checkpointed model.");

        // check the user-given arguments for consistency
        DeepLearningParameters oldP =
            previous._parms; // sanitized parameters for checkpointed model
        DeepLearningParameters newP = _parms; // user-given parameters for restart

        DeepLearningParameters oldP2 = (DeepLearningParameters) oldP.clone();
        DeepLearningParameters newP2 = (DeepLearningParameters) newP.clone();
        DeepLearningParameters.Sanity.modifyParms(
            oldP, oldP2, nclasses()); // sanitize the user-given parameters
        DeepLearningParameters.Sanity.modifyParms(
            newP, newP2, nclasses()); // sanitize the user-given parameters
        DeepLearningParameters.Sanity.checkpoint(oldP2, newP2);

        DataInfo dinfo;
        try {
          // PUBDEV-2513: Adapt _train and _valid (in-place) to match the frames that were used for
          // the previous model
          // This can add or remove dummy columns (can happen if the dataset is sparse and datasets
          // have different non-const columns)
          for (String st : previous.adaptTestForTrain(_train, true, false)) Log.warn(st);
          for (String st : previous.adaptTestForTrain(_valid, true, false)) Log.warn(st);
          dinfo = makeDataInfo(_train, _valid, _parms, nclasses());
          DKV.put(dinfo);
          cp = new DeepLearningModel(dest(), _parms, previous, false, dinfo);
          cp.write_lock(_job);

          if (!Arrays.equals(cp._output._names, previous._output._names)) {
            throw new H2OIllegalArgumentException(
                "The columns of the training data must be the same as for the checkpointed model. Check ignored columns (or disable ignore_const_cols).");
          }
          if (!Arrays.deepEquals(cp._output._domains, previous._output._domains)) {
            throw new H2OIllegalArgumentException(
                "Categorical factor levels of the training data must be the same as for the checkpointed model.");
          }
          if (dinfo.fullN() != previous.model_info().data_info().fullN()) {
            throw new H2OIllegalArgumentException(
                "Total number of predictors is different than for the checkpointed model.");
          }
          if (_parms._epochs <= previous.epoch_counter) {
            throw new H2OIllegalArgumentException(
                "Total number of epochs must be larger than the number of epochs already trained for the checkpointed model ("
                    + previous.epoch_counter
                    + ").");
          }

          // these are the mutable parameters that are to be used by the model (stored in
          // model_info._parms)
          final DeepLearningParameters actualNewP =
              cp.model_info()
                  .get_params(); // actually used parameters for model building (defaults filled in,
                                 // etc.)
          assert (actualNewP != previous.model_info().get_params());
          assert (actualNewP != newP);
          assert (actualNewP != oldP);
          DeepLearningParameters.Sanity.update(actualNewP, newP, nclasses());

          Log.info(
              "Continuing training after "
                  + String.format("%.3f", previous.epoch_counter)
                  + " epochs from the checkpointed model.");
          cp.update(_job);
        } catch (H2OIllegalArgumentException ex) {
          if (cp != null) {
            cp.unlock(_job);
            cp.delete();
            cp = null;
          }
          throw ex;
        } finally {
          if (cp != null) cp.unlock(_job);
        }
      }
      trainModel(cp);

      // clean up, but don't delete weights and biases if user asked for export
      List<Key> keep = new ArrayList<>();
      try {
        if (_parms._export_weights_and_biases
            && cp._output.weights != null
            && cp._output.biases != null) {
          for (Key k : Arrays.asList(cp._output.weights)) {
            keep.add(k);
            for (Vec vk : ((Frame) DKV.getGet(k)).vecs()) {
              keep.add(vk._key);
            }
          }
          for (Key k : Arrays.asList(cp._output.biases)) {
            keep.add(k);
            for (Vec vk : ((Frame) DKV.getGet(k)).vecs()) {
              keep.add(vk._key);
            }
          }
        }
      } finally {
        Scope.exit(keep.toArray(new Key[keep.size()]));
      }
    }
    @Override
    protected void compute2() {
      CoxPHModel model = null;
      try {
        Scope.enter();
        _parms.read_lock_frames(CoxPH.this);
        init(true);

        applyScoringFrameSideEffects();

        // The model to be built
        model = new CoxPHModel(dest(), _parms, new CoxPHModel.CoxPHOutput(CoxPH.this));
        model.delete_and_lock(_key);

        applyTrainingFrameSideEffects();

        int nResponses = 1;
        boolean useAllFactorLevels = false;
        final DataInfo dinfo =
            new DataInfo(
                Key.make(),
                _modelBuilderTrain,
                null,
                nResponses,
                useAllFactorLevels,
                DataInfo.TransformType.DEMEAN,
                TransformType.NONE,
                true,
                false,
                false,
                false,
                false,
                false);
        initStats(model, dinfo);

        final int n_offsets =
            (model._parms.offset_columns == null) ? 0 : model._parms.offset_columns.length;
        final int n_coef = dinfo.fullN() - n_offsets;
        final double[] step = MemoryManager.malloc8d(n_coef);
        final double[] oldCoef = MemoryManager.malloc8d(n_coef);
        final double[] newCoef = MemoryManager.malloc8d(n_coef);
        Arrays.fill(step, Double.NaN);
        Arrays.fill(oldCoef, Double.NaN);
        for (int j = 0; j < n_coef; ++j) newCoef[j] = model._parms.init;
        double oldLoglik = -Double.MAX_VALUE;
        final int n_time = (int) (model._output.max_time - model._output.min_time + 1);
        final boolean has_start_column = (model._parms.start_column != null);
        final boolean has_weights_column = (model._parms.weights_column != null);
        for (int i = 0; i <= model._parms.iter_max; ++i) {
          model._output.iter = i;

          final CoxPHTask coxMR =
              new CoxPHTask(
                      self(),
                      dinfo,
                      newCoef,
                      model._output.min_time,
                      n_time,
                      n_offsets,
                      has_start_column,
                      has_weights_column)
                  .doAll(dinfo._adaptedFrame);

          final double newLoglik = calcLoglik(model, coxMR);
          if (newLoglik > oldLoglik) {
            if (i == 0) calcCounts(model, coxMR);

            calcModelStats(model, newCoef, newLoglik);
            calcCumhaz_0(model, coxMR);

            if (newLoglik == 0) model._output.lre = -Math.log10(Math.abs(oldLoglik - newLoglik));
            else model._output.lre = -Math.log10(Math.abs((oldLoglik - newLoglik) / newLoglik));
            if (model._output.lre >= model._parms.lre_min) break;

            Arrays.fill(step, 0);
            for (int j = 0; j < n_coef; ++j)
              for (int k = 0; k < n_coef; ++k)
                step[j] -= model._output.var_coef[j][k] * model._output.gradient[k];
            for (int j = 0; j < n_coef; ++j)
              if (Double.isNaN(step[j]) || Double.isInfinite(step[j])) break;

            oldLoglik = newLoglik;
            System.arraycopy(newCoef, 0, oldCoef, 0, oldCoef.length);
          } else {
            for (int j = 0; j < n_coef; ++j) step[j] /= 2;
          }

          for (int j = 0; j < n_coef; ++j) newCoef[j] = oldCoef[j] - step[j];
        }

        model.update(_key);
      } catch (Throwable t) {
        Job thisJob = DKV.getGet(_key);
        if (thisJob._state == JobState.CANCELLED) {
          Log.info("Job cancelled by user.");
        } else {
          t.printStackTrace();
          failed(t);
          throw t;
        }
      } finally {
        updateModelOutput();
        _parms.read_unlock_frames(CoxPH.this);
        Scope.exit();
        done(); // Job done!
      }
      tryComplete();
    }