public final void set_params(DeepLearningParameters p) { parameters = (DeepLearningParameters) p.clone(); }
/** * Main constructor * * @param params Model parameters * @param dinfo Data Info * @param nClasses number of classes (1 for regression, 0 for autoencoder) * @param train User-given training data frame, prepared by AdaptTestTrain * @param valid User-specified validation data frame, prepared by AdaptTestTrain */ public DeepLearningModelInfo( final DeepLearningParameters params, final DataInfo dinfo, int nClasses, Frame train, Frame valid) { _classification = nClasses > 1; _train = train; _valid = valid; data_info = dinfo; parameters = (DeepLearningParameters) params.clone(); // make a copy, don't change model's parameters DeepLearningParameters.Sanity.modifyParms( parameters, parameters, nClasses); // sanitize the model_info's parameters final int num_input = dinfo.fullN(); final int num_output = get_params()._autoencoder ? num_input : (_classification ? train.lastVec().cardinality() : 1); if (!get_params()._autoencoder) assert (num_output == nClasses); _saw_missing_cats = dinfo._cats > 0 ? new boolean[data_info._cats] : null; assert (num_input > 0); assert (num_output > 0); if (has_momenta() && adaDelta()) throw new IllegalArgumentException( "Cannot have non-zero momentum and adaptive rate at the same time."); final int layers = get_params()._hidden.length; // units (# neurons for each layer) units = new int[layers + 2]; if (get_params()._max_categorical_features <= Integer.MAX_VALUE - dinfo._nums) units[0] = Math.min(dinfo._nums + get_params()._max_categorical_features, num_input); else units[0] = num_input; System.arraycopy(get_params()._hidden, 0, units, 1, layers); units[layers + 1] = num_output; boolean printLevels = units[0] > 1000L; boolean warn = units[0] > 100000L; if (printLevels) { final String[][] domains = dinfo._adaptedFrame.domains(); int[] levels = new int[domains.length]; for (int i = 0; i < levels.length; ++i) { levels[i] = domains[i] != null ? domains[i].length : 0; } Arrays.sort(levels); if (warn) { Log.warn( "==================================================================================================================================="); Log.warn( num_input + " input features" + (dinfo._cats > 0 ? " (after categorical one-hot encoding)" : "") + ". Can be slow and require a lot of memory."); } if (levels[levels.length - 1] > 0) { int levelcutoff = levels[levels.length - 1 - Math.min(10, levels.length - 1)]; int count = 0; for (int i = 0; i < dinfo._adaptedFrame.numCols() - (get_params()._autoencoder ? 0 : 1) && count < 10; ++i) { if (dinfo._adaptedFrame.domains()[i] != null && dinfo._adaptedFrame.domains()[i].length >= levelcutoff) { if (warn) { Log.warn( "Categorical feature '" + dinfo._adaptedFrame._names[i] + "' has cardinality " + dinfo._adaptedFrame.domains()[i].length + "."); } else { Log.info( "Categorical feature '" + dinfo._adaptedFrame._names[i] + "' has cardinality " + dinfo._adaptedFrame.domains()[i].length + "."); } } count++; } } if (warn) { Log.warn("Suggestions:"); Log.warn(" *) Limit the size of the first hidden layer"); if (dinfo._cats > 0) { Log.warn( " *) Limit the total number of one-hot encoded features with the parameter 'max_categorical_features'"); Log.warn( " *) Run h2o.interaction(...,pairwise=F) on high-cardinality categorical columns to limit the factor count, see http://learn.h2o.ai"); } Log.warn( "==================================================================================================================================="); } } // weights (to connect layers) dense_row_weights = new Storage.DenseRowMatrix[layers + 1]; dense_col_weights = new Storage.DenseColMatrix[layers + 1]; // decide format of weight matrices row-major or col-major if (get_params()._col_major) dense_col_weights[0] = new Storage.DenseColMatrix(units[1], units[0]); else dense_row_weights[0] = new Storage.DenseRowMatrix(units[1], units[0]); for (int i = 1; i <= layers; ++i) dense_row_weights[i] = new Storage.DenseRowMatrix(units[i + 1] /*rows*/, units[i] /*cols*/); // biases (only for hidden layers and output layer) biases = new Storage.DenseVector[layers + 1]; for (int i = 0; i <= layers; ++i) biases[i] = new Storage.DenseVector(units[i + 1]); // average activation (only for hidden layers) if (get_params()._autoencoder && get_params()._sparsity_beta > 0) { avg_activations = new Storage.DenseVector[layers]; mean_a = new float[layers]; for (int i = 0; i < layers; ++i) avg_activations[i] = new Storage.DenseVector(units[i + 1]); } allocateHelperArrays(); // for diagnostics mean_rate = new float[units.length]; rms_rate = new float[units.length]; mean_bias = new float[units.length]; rms_bias = new float[units.length]; mean_weight = new float[units.length]; rms_weight = new float[units.length]; }
@Test @Ignore public void run() { Scope.enter(); try { File file = find_test_file("bigdata/laptop/mnist/train.csv.gz"); File valid = find_test_file("bigdata/laptop/mnist/test.csv.gz"); if (file != null) { NFSFileVec trainfv = NFSFileVec.make(file); Frame frame = ParseDataset.parse(Key.make(), trainfv._key); NFSFileVec validfv = NFSFileVec.make(valid); Frame vframe = ParseDataset.parse(Key.make(), validfv._key); DeepLearningParameters p = new DeepLearningParameters(); // populate model parameters p._model_id = Key.make("dl_mnist_model"); p._train = frame._key; // p._valid = vframe._key; p._response_column = "C785"; // last column is the response p._activation = DeepLearningParameters.Activation.RectifierWithDropout; // p._activation = DeepLearningParameters.Activation.MaxoutWithDropout; p._hidden = new int[] {800, 800}; p._input_dropout_ratio = 0.2; p._mini_batch_size = 1; p._train_samples_per_iteration = 50000; p._score_duty_cycle = 0; // p._shuffle_training_data = true; // p._l1= 1e-5; // p._max_w2= 10; p._epochs = 10 * 5. / 6; // Convert response 'C785' to categorical (digits 1 to 10) int ci = frame.find("C785"); Scope.track(frame.replace(ci, frame.vecs()[ci].toEnum())._key); Scope.track(vframe.replace(ci, vframe.vecs()[ci].toEnum())._key); DKV.put(frame); DKV.put(vframe); // speed up training p._adaptive_rate = true; // disable adaptive per-weight learning rate -> default settings for learning rate // and momentum are probably not ideal (slow convergence) p._replicate_training_data = true; // avoid extra communication cost upfront, got enough data on each node for load // balancing p._overwrite_with_best_model = true; // no need to keep the best model around p._classification_stop = -1; p._score_interval = 60; // score and print progress report (only) every 20 seconds p._score_training_samples = 10000; // only score on a small sample of the training set -> don't want to spend too // much time scoring (note: there will be at least 1 row per chunk) DeepLearning dl = new DeepLearning(p); DeepLearningModel model = null; try { model = dl.trainModel().get(); } catch (Throwable t) { t.printStackTrace(); throw new RuntimeException(t); } finally { dl.remove(); if (model != null) { model.delete(); } } } else { Log.info("Please run ./gradlew syncBigDataLaptop in the top-level directory of h2o-3."); } } catch (Throwable t) { t.printStackTrace(); throw new RuntimeException(t); } finally { Scope.exit(); } }