/** * This method trains a stacked autoencoder * * @param trainData Training dataset as a JavaRDD * @param batchSize Size of a training mini-batch * @param layerSizes Number of neurons for each layer * @param epochs Number of epochs to train * @param responseColumn Name of the response column * @param modelName Name of the model * @return DeepLearningModel */ public DeepLearningModel train( JavaRDD<LabeledPoint> trainData, int batchSize, int[] layerSizes, String activationType, int epochs, String responseColumn, String modelName, MLModel mlModel, long modelID) { // build stacked autoencoder by training the model with training data double trainingFraction = 1; try { Scope.enter(); if (trainData != null) { int numberOfFeatures = mlModel.getFeatures().size(); List<Feature> features = mlModel.getFeatures(); String[] names = new String[numberOfFeatures + 1]; for (int i = 0; i < numberOfFeatures; i++) { names[i] = features.get(i).getName(); } names[numberOfFeatures] = mlModel.getResponseVariable(); Frame frame = DeeplearningModelUtils.javaRDDToFrame(names, trainData); // H2O uses default C<x> for column header // String classifColName = "C" + frame.numCols(); String classifColName = mlModel.getResponseVariable(); // Convert response to categorical (digits 1 to <num of columns>) int ci = frame.find(classifColName); Scope.track(frame.replace(ci, frame.vecs()[ci].toEnum())._key); // Splitting train file to train, validation and test // Using FrameSplitter (instead of SuffleSplitFrame) gives a weird exception // barrier onExCompletion for hex.deeplearning.DeepLearning$DeepLearningDriver@78ec854 double[] ratios = new double[] {trainingFraction, 1 - trainingFraction}; @SuppressWarnings("unchecked") Frame[] splits = ShuffleSplitFrame.shuffleSplitFrame( frame, generateNumKeys(frame._key, ratios.length), ratios, 123456789); Frame trainFrame = splits[0]; Frame vframe = splits[1]; if (log.isDebugEnabled()) { log.debug("Creating Deeplearning parameters"); } DeepLearningParameters deeplearningParameters = new DeepLearningParameters(); // convert model name String dlModelName = modelName.replace('.', '_').replace('-', '_'); // populate model parameters deeplearningParameters._model_id = Key.make(dlModelName + "_dl"); deeplearningParameters._train = trainFrame._key; deeplearningParameters._valid = vframe._key; deeplearningParameters._response_column = classifColName; // last column is the response // This is causin all the predictions to be 0.0 // p._autoencoder = true; deeplearningParameters._activation = getActivationType(activationType); deeplearningParameters._hidden = layerSizes; deeplearningParameters._train_samples_per_iteration = batchSize; deeplearningParameters._input_dropout_ratio = 0.2; deeplearningParameters._l1 = 1e-5; deeplearningParameters._max_w2 = 10; deeplearningParameters._epochs = epochs; // speed up training deeplearningParameters._adaptive_rate = true; // disable adaptive per-weight learning rate -> default // settings for learning rate and momentum are probably // not ideal (slow convergence) deeplearningParameters._replicate_training_data = true; // avoid extra communication cost upfront, got // enough data on each node for load balancing deeplearningParameters._overwrite_with_best_model = true; // no need to keep the best model around deeplearningParameters._diagnostics = false; // no need to compute statistics during training deeplearningParameters._classification_stop = -1; deeplearningParameters._score_interval = 60; // score and print progress report (only) every 20 seconds deeplearningParameters._score_training_samples = batchSize / 10; // only score on a small sample of the // training set -> don't want to spend // too much time scoring (note: there // will be at least 1 row per chunk) DKV.put(trainFrame); DKV.put(vframe); deeplearning = new DeepLearning(deeplearningParameters); if (log.isDebugEnabled()) { log.debug("Start training deeplearning model ...."); } try { dlModel = deeplearning.trainModel().get(); if (log.isDebugEnabled()) { log.debug("Successfully finished Training deeplearning model."); } } catch (RuntimeException ex) { log.error("Error in training Stacked Autoencoder classifier model", ex); } } else { log.error("Train file not found!"); } } catch (RuntimeException ex) { log.error("Failed to train the deeplearning model [id] " + modelID + ". " + ex.getMessage()); } finally { Scope.exit(); } return dlModel; }