public final void set_params(DeepLearningParameters p) {
   parameters = (DeepLearningParameters) p.clone();
 }
  /**
   * Main constructor
   *
   * @param params Model parameters
   * @param dinfo Data Info
   * @param nClasses number of classes (1 for regression, 0 for autoencoder)
   * @param train User-given training data frame, prepared by AdaptTestTrain
   * @param valid User-specified validation data frame, prepared by AdaptTestTrain
   */
  public DeepLearningModelInfo(
      final DeepLearningParameters params,
      final DataInfo dinfo,
      int nClasses,
      Frame train,
      Frame valid) {
    _classification = nClasses > 1;
    _train = train;
    _valid = valid;
    data_info = dinfo;
    parameters =
        (DeepLearningParameters) params.clone(); // make a copy, don't change model's parameters
    DeepLearningParameters.Sanity.modifyParms(
        parameters, parameters, nClasses); // sanitize the model_info's parameters

    final int num_input = dinfo.fullN();
    final int num_output =
        get_params()._autoencoder
            ? num_input
            : (_classification ? train.lastVec().cardinality() : 1);
    if (!get_params()._autoencoder) assert (num_output == nClasses);

    _saw_missing_cats = dinfo._cats > 0 ? new boolean[data_info._cats] : null;
    assert (num_input > 0);
    assert (num_output > 0);
    if (has_momenta() && adaDelta())
      throw new IllegalArgumentException(
          "Cannot have non-zero momentum and adaptive rate at the same time.");
    final int layers = get_params()._hidden.length;
    // units (# neurons for each layer)
    units = new int[layers + 2];
    if (get_params()._max_categorical_features <= Integer.MAX_VALUE - dinfo._nums)
      units[0] = Math.min(dinfo._nums + get_params()._max_categorical_features, num_input);
    else units[0] = num_input;
    System.arraycopy(get_params()._hidden, 0, units, 1, layers);
    units[layers + 1] = num_output;

    boolean printLevels = units[0] > 1000L;
    boolean warn = units[0] > 100000L;
    if (printLevels) {
      final String[][] domains = dinfo._adaptedFrame.domains();
      int[] levels = new int[domains.length];
      for (int i = 0; i < levels.length; ++i) {
        levels[i] = domains[i] != null ? domains[i].length : 0;
      }
      Arrays.sort(levels);
      if (warn) {
        Log.warn(
            "===================================================================================================================================");
        Log.warn(
            num_input
                + " input features"
                + (dinfo._cats > 0 ? " (after categorical one-hot encoding)" : "")
                + ". Can be slow and require a lot of memory.");
      }
      if (levels[levels.length - 1] > 0) {
        int levelcutoff = levels[levels.length - 1 - Math.min(10, levels.length - 1)];
        int count = 0;
        for (int i = 0;
            i < dinfo._adaptedFrame.numCols() - (get_params()._autoencoder ? 0 : 1) && count < 10;
            ++i) {
          if (dinfo._adaptedFrame.domains()[i] != null
              && dinfo._adaptedFrame.domains()[i].length >= levelcutoff) {
            if (warn) {
              Log.warn(
                  "Categorical feature '"
                      + dinfo._adaptedFrame._names[i]
                      + "' has cardinality "
                      + dinfo._adaptedFrame.domains()[i].length
                      + ".");
            } else {
              Log.info(
                  "Categorical feature '"
                      + dinfo._adaptedFrame._names[i]
                      + "' has cardinality "
                      + dinfo._adaptedFrame.domains()[i].length
                      + ".");
            }
          }
          count++;
        }
      }
      if (warn) {
        Log.warn("Suggestions:");
        Log.warn(" *) Limit the size of the first hidden layer");
        if (dinfo._cats > 0) {
          Log.warn(
              " *) Limit the total number of one-hot encoded features with the parameter 'max_categorical_features'");
          Log.warn(
              " *) Run h2o.interaction(...,pairwise=F) on high-cardinality categorical columns to limit the factor count, see http://learn.h2o.ai");
        }
        Log.warn(
            "===================================================================================================================================");
      }
    }

    // weights (to connect layers)
    dense_row_weights = new Storage.DenseRowMatrix[layers + 1];
    dense_col_weights = new Storage.DenseColMatrix[layers + 1];

    // decide format of weight matrices row-major or col-major
    if (get_params()._col_major)
      dense_col_weights[0] = new Storage.DenseColMatrix(units[1], units[0]);
    else dense_row_weights[0] = new Storage.DenseRowMatrix(units[1], units[0]);
    for (int i = 1; i <= layers; ++i)
      dense_row_weights[i] = new Storage.DenseRowMatrix(units[i + 1] /*rows*/, units[i] /*cols*/);

    // biases (only for hidden layers and output layer)
    biases = new Storage.DenseVector[layers + 1];
    for (int i = 0; i <= layers; ++i) biases[i] = new Storage.DenseVector(units[i + 1]);
    // average activation (only for hidden layers)
    if (get_params()._autoencoder && get_params()._sparsity_beta > 0) {
      avg_activations = new Storage.DenseVector[layers];
      mean_a = new float[layers];
      for (int i = 0; i < layers; ++i) avg_activations[i] = new Storage.DenseVector(units[i + 1]);
    }
    allocateHelperArrays();
    // for diagnostics
    mean_rate = new float[units.length];
    rms_rate = new float[units.length];
    mean_bias = new float[units.length];
    rms_bias = new float[units.length];
    mean_weight = new float[units.length];
    rms_weight = new float[units.length];
  }
예제 #3
0
  @Test
  @Ignore
  public void run() {
    Scope.enter();
    try {
      File file = find_test_file("bigdata/laptop/mnist/train.csv.gz");
      File valid = find_test_file("bigdata/laptop/mnist/test.csv.gz");
      if (file != null) {
        NFSFileVec trainfv = NFSFileVec.make(file);
        Frame frame = ParseDataset.parse(Key.make(), trainfv._key);
        NFSFileVec validfv = NFSFileVec.make(valid);
        Frame vframe = ParseDataset.parse(Key.make(), validfv._key);
        DeepLearningParameters p = new DeepLearningParameters();

        // populate model parameters
        p._model_id = Key.make("dl_mnist_model");
        p._train = frame._key;
        //        p._valid = vframe._key;
        p._response_column = "C785"; // last column is the response
        p._activation = DeepLearningParameters.Activation.RectifierWithDropout;
        //        p._activation = DeepLearningParameters.Activation.MaxoutWithDropout;
        p._hidden = new int[] {800, 800};
        p._input_dropout_ratio = 0.2;
        p._mini_batch_size = 1;
        p._train_samples_per_iteration = 50000;
        p._score_duty_cycle = 0;
        //        p._shuffle_training_data = true;
        //        p._l1= 1e-5;
        //        p._max_w2= 10;
        p._epochs = 10 * 5. / 6;

        // Convert response 'C785' to categorical (digits 1 to 10)
        int ci = frame.find("C785");
        Scope.track(frame.replace(ci, frame.vecs()[ci].toEnum())._key);
        Scope.track(vframe.replace(ci, vframe.vecs()[ci].toEnum())._key);
        DKV.put(frame);
        DKV.put(vframe);

        // speed up training
        p._adaptive_rate =
            true; // disable adaptive per-weight learning rate -> default settings for learning rate
                  // and momentum are probably not ideal (slow convergence)
        p._replicate_training_data =
            true; // avoid extra communication cost upfront, got enough data on each node for load
                  // balancing
        p._overwrite_with_best_model = true; // no need to keep the best model around
        p._classification_stop = -1;
        p._score_interval = 60; // score and print progress report (only) every 20 seconds
        p._score_training_samples =
            10000; // only score on a small sample of the training set -> don't want to spend too
                   // much time scoring (note: there will be at least 1 row per chunk)

        DeepLearning dl = new DeepLearning(p);
        DeepLearningModel model = null;
        try {
          model = dl.trainModel().get();
        } catch (Throwable t) {
          t.printStackTrace();
          throw new RuntimeException(t);
        } finally {
          dl.remove();
          if (model != null) {
            model.delete();
          }
        }
      } else {
        Log.info("Please run ./gradlew syncBigDataLaptop in the top-level directory of h2o-3.");
      }
    } catch (Throwable t) {
      t.printStackTrace();
      throw new RuntimeException(t);
    } finally {
      Scope.exit();
    }
  }