public double[][] getNormBetaMultinomial(int idx) { double[][] res = new double[nclasses()][]; Submodel sm = _submodels[idx]; int N = _dinfo.fullN() + 1; double[] beta = sm.beta; if (sm.idxs != null) beta = ArrayUtils.expandAndScatter(beta, nclasses() * (_dinfo.fullN() + 1), sm.idxs); for (int i = 0; i < res.length; ++i) res[i] = Arrays.copyOfRange(beta, i * N, (i + 1) * N); return res; }
public double[] getNormBeta() { return _submodels[_best_lambda_idx].getBeta(MemoryManager.malloc8d(_dinfo.fullN() + 1)); }
/** * Main constructor * * @param params Model parameters * @param dinfo Data Info * @param nClasses number of classes (1 for regression, 0 for autoencoder) * @param train User-given training data frame, prepared by AdaptTestTrain * @param valid User-specified validation data frame, prepared by AdaptTestTrain */ public DeepLearningModelInfo( final DeepLearningParameters params, final DataInfo dinfo, int nClasses, Frame train, Frame valid) { _classification = nClasses > 1; _train = train; _valid = valid; data_info = dinfo; parameters = (DeepLearningParameters) params.clone(); // make a copy, don't change model's parameters DeepLearningParameters.Sanity.modifyParms( parameters, parameters, nClasses); // sanitize the model_info's parameters final int num_input = dinfo.fullN(); final int num_output = get_params()._autoencoder ? num_input : (_classification ? train.lastVec().cardinality() : 1); if (!get_params()._autoencoder) assert (num_output == nClasses); _saw_missing_cats = dinfo._cats > 0 ? new boolean[data_info._cats] : null; assert (num_input > 0); assert (num_output > 0); if (has_momenta() && adaDelta()) throw new IllegalArgumentException( "Cannot have non-zero momentum and adaptive rate at the same time."); final int layers = get_params()._hidden.length; // units (# neurons for each layer) units = new int[layers + 2]; if (get_params()._max_categorical_features <= Integer.MAX_VALUE - dinfo._nums) units[0] = Math.min(dinfo._nums + get_params()._max_categorical_features, num_input); else units[0] = num_input; System.arraycopy(get_params()._hidden, 0, units, 1, layers); units[layers + 1] = num_output; boolean printLevels = units[0] > 1000L; boolean warn = units[0] > 100000L; if (printLevels) { final String[][] domains = dinfo._adaptedFrame.domains(); int[] levels = new int[domains.length]; for (int i = 0; i < levels.length; ++i) { levels[i] = domains[i] != null ? domains[i].length : 0; } Arrays.sort(levels); if (warn) { Log.warn( "==================================================================================================================================="); Log.warn( num_input + " input features" + (dinfo._cats > 0 ? " (after categorical one-hot encoding)" : "") + ". Can be slow and require a lot of memory."); } if (levels[levels.length - 1] > 0) { int levelcutoff = levels[levels.length - 1 - Math.min(10, levels.length - 1)]; int count = 0; for (int i = 0; i < dinfo._adaptedFrame.numCols() - (get_params()._autoencoder ? 0 : 1) && count < 10; ++i) { if (dinfo._adaptedFrame.domains()[i] != null && dinfo._adaptedFrame.domains()[i].length >= levelcutoff) { if (warn) { Log.warn( "Categorical feature '" + dinfo._adaptedFrame._names[i] + "' has cardinality " + dinfo._adaptedFrame.domains()[i].length + "."); } else { Log.info( "Categorical feature '" + dinfo._adaptedFrame._names[i] + "' has cardinality " + dinfo._adaptedFrame.domains()[i].length + "."); } } count++; } } if (warn) { Log.warn("Suggestions:"); Log.warn(" *) Limit the size of the first hidden layer"); if (dinfo._cats > 0) { Log.warn( " *) Limit the total number of one-hot encoded features with the parameter 'max_categorical_features'"); Log.warn( " *) Run h2o.interaction(...,pairwise=F) on high-cardinality categorical columns to limit the factor count, see http://learn.h2o.ai"); } Log.warn( "==================================================================================================================================="); } } // weights (to connect layers) dense_row_weights = new Storage.DenseRowMatrix[layers + 1]; dense_col_weights = new Storage.DenseColMatrix[layers + 1]; // decide format of weight matrices row-major or col-major if (get_params()._col_major) dense_col_weights[0] = new Storage.DenseColMatrix(units[1], units[0]); else dense_row_weights[0] = new Storage.DenseRowMatrix(units[1], units[0]); for (int i = 1; i <= layers; ++i) dense_row_weights[i] = new Storage.DenseRowMatrix(units[i + 1] /*rows*/, units[i] /*cols*/); // biases (only for hidden layers and output layer) biases = new Storage.DenseVector[layers + 1]; for (int i = 0; i <= layers; ++i) biases[i] = new Storage.DenseVector(units[i + 1]); // average activation (only for hidden layers) if (get_params()._autoencoder && get_params()._sparsity_beta > 0) { avg_activations = new Storage.DenseVector[layers]; mean_a = new float[layers]; for (int i = 0; i < layers; ++i) avg_activations[i] = new Storage.DenseVector(units[i + 1]); } allocateHelperArrays(); // for diagnostics mean_rate = new float[units.length]; rms_rate = new float[units.length]; mean_bias = new float[units.length]; rms_bias = new float[units.length]; mean_weight = new float[units.length]; rms_weight = new float[units.length]; }
/** * Train a Deep Learning model, assumes that all members are populated If checkpoint == null, * then start training a new model, otherwise continue from a checkpoint */ public final void buildModel() { DeepLearningModel cp = null; if (_parms._checkpoint == null) { cp = new DeepLearningModel( dest(), _parms, new DeepLearningModel.DeepLearningModelOutput(DeepLearning.this), _train, _valid, nclasses()); cp.model_info().initializeMembers(); } else { final DeepLearningModel previous = DKV.getGet(_parms._checkpoint); if (previous == null) throw new IllegalArgumentException("Checkpoint not found."); Log.info("Resuming from checkpoint."); _job.update(0, "Resuming from checkpoint"); if (isClassifier() != previous._output.isClassifier()) throw new H2OIllegalArgumentException( "Response type must be the same as for the checkpointed model."); if (isSupervised() != previous._output.isSupervised()) throw new H2OIllegalArgumentException( "Model type must be the same as for the checkpointed model."); // check the user-given arguments for consistency DeepLearningParameters oldP = previous._parms; // sanitized parameters for checkpointed model DeepLearningParameters newP = _parms; // user-given parameters for restart DeepLearningParameters oldP2 = (DeepLearningParameters) oldP.clone(); DeepLearningParameters newP2 = (DeepLearningParameters) newP.clone(); DeepLearningParameters.Sanity.modifyParms( oldP, oldP2, nclasses()); // sanitize the user-given parameters DeepLearningParameters.Sanity.modifyParms( newP, newP2, nclasses()); // sanitize the user-given parameters DeepLearningParameters.Sanity.checkpoint(oldP2, newP2); DataInfo dinfo; try { // PUBDEV-2513: Adapt _train and _valid (in-place) to match the frames that were used for // the previous model // This can add or remove dummy columns (can happen if the dataset is sparse and datasets // have different non-const columns) for (String st : previous.adaptTestForTrain(_train, true, false)) Log.warn(st); for (String st : previous.adaptTestForTrain(_valid, true, false)) Log.warn(st); dinfo = makeDataInfo(_train, _valid, _parms, nclasses()); DKV.put(dinfo); cp = new DeepLearningModel(dest(), _parms, previous, false, dinfo); cp.write_lock(_job); if (!Arrays.equals(cp._output._names, previous._output._names)) { throw new H2OIllegalArgumentException( "The columns of the training data must be the same as for the checkpointed model. Check ignored columns (or disable ignore_const_cols)."); } if (!Arrays.deepEquals(cp._output._domains, previous._output._domains)) { throw new H2OIllegalArgumentException( "Categorical factor levels of the training data must be the same as for the checkpointed model."); } if (dinfo.fullN() != previous.model_info().data_info().fullN()) { throw new H2OIllegalArgumentException( "Total number of predictors is different than for the checkpointed model."); } if (_parms._epochs <= previous.epoch_counter) { throw new H2OIllegalArgumentException( "Total number of epochs must be larger than the number of epochs already trained for the checkpointed model (" + previous.epoch_counter + ")."); } // these are the mutable parameters that are to be used by the model (stored in // model_info._parms) final DeepLearningParameters actualNewP = cp.model_info() .get_params(); // actually used parameters for model building (defaults filled in, // etc.) assert (actualNewP != previous.model_info().get_params()); assert (actualNewP != newP); assert (actualNewP != oldP); DeepLearningParameters.Sanity.update(actualNewP, newP, nclasses()); Log.info( "Continuing training after " + String.format("%.3f", previous.epoch_counter) + " epochs from the checkpointed model."); cp.update(_job); } catch (H2OIllegalArgumentException ex) { if (cp != null) { cp.unlock(_job); cp.delete(); cp = null; } throw ex; } finally { if (cp != null) cp.unlock(_job); } } trainModel(cp); // clean up, but don't delete weights and biases if user asked for export List<Key> keep = new ArrayList<>(); try { if (_parms._export_weights_and_biases && cp._output.weights != null && cp._output.biases != null) { for (Key k : Arrays.asList(cp._output.weights)) { keep.add(k); for (Vec vk : ((Frame) DKV.getGet(k)).vecs()) { keep.add(vk._key); } } for (Key k : Arrays.asList(cp._output.biases)) { keep.add(k); for (Vec vk : ((Frame) DKV.getGet(k)).vecs()) { keep.add(vk._key); } } } } finally { Scope.exit(keep.toArray(new Key[keep.size()])); } }
@Override protected void compute2() { CoxPHModel model = null; try { Scope.enter(); _parms.read_lock_frames(CoxPH.this); init(true); applyScoringFrameSideEffects(); // The model to be built model = new CoxPHModel(dest(), _parms, new CoxPHModel.CoxPHOutput(CoxPH.this)); model.delete_and_lock(_key); applyTrainingFrameSideEffects(); int nResponses = 1; boolean useAllFactorLevels = false; final DataInfo dinfo = new DataInfo( Key.make(), _modelBuilderTrain, null, nResponses, useAllFactorLevels, DataInfo.TransformType.DEMEAN, TransformType.NONE, true, false, false, false, false, false); initStats(model, dinfo); final int n_offsets = (model._parms.offset_columns == null) ? 0 : model._parms.offset_columns.length; final int n_coef = dinfo.fullN() - n_offsets; final double[] step = MemoryManager.malloc8d(n_coef); final double[] oldCoef = MemoryManager.malloc8d(n_coef); final double[] newCoef = MemoryManager.malloc8d(n_coef); Arrays.fill(step, Double.NaN); Arrays.fill(oldCoef, Double.NaN); for (int j = 0; j < n_coef; ++j) newCoef[j] = model._parms.init; double oldLoglik = -Double.MAX_VALUE; final int n_time = (int) (model._output.max_time - model._output.min_time + 1); final boolean has_start_column = (model._parms.start_column != null); final boolean has_weights_column = (model._parms.weights_column != null); for (int i = 0; i <= model._parms.iter_max; ++i) { model._output.iter = i; final CoxPHTask coxMR = new CoxPHTask( self(), dinfo, newCoef, model._output.min_time, n_time, n_offsets, has_start_column, has_weights_column) .doAll(dinfo._adaptedFrame); final double newLoglik = calcLoglik(model, coxMR); if (newLoglik > oldLoglik) { if (i == 0) calcCounts(model, coxMR); calcModelStats(model, newCoef, newLoglik); calcCumhaz_0(model, coxMR); if (newLoglik == 0) model._output.lre = -Math.log10(Math.abs(oldLoglik - newLoglik)); else model._output.lre = -Math.log10(Math.abs((oldLoglik - newLoglik) / newLoglik)); if (model._output.lre >= model._parms.lre_min) break; Arrays.fill(step, 0); for (int j = 0; j < n_coef; ++j) for (int k = 0; k < n_coef; ++k) step[j] -= model._output.var_coef[j][k] * model._output.gradient[k]; for (int j = 0; j < n_coef; ++j) if (Double.isNaN(step[j]) || Double.isInfinite(step[j])) break; oldLoglik = newLoglik; System.arraycopy(newCoef, 0, oldCoef, 0, oldCoef.length); } else { for (int j = 0; j < n_coef; ++j) step[j] /= 2; } for (int j = 0; j < n_coef; ++j) newCoef[j] = oldCoef[j] - step[j]; } model.update(_key); } catch (Throwable t) { Job thisJob = DKV.getGet(_key); if (thisJob._state == JobState.CANCELLED) { Log.info("Job cancelled by user."); } else { t.printStackTrace(); failed(t); throw t; } } finally { updateModelOutput(); _parms.read_unlock_frames(CoxPH.this); Scope.exit(); done(); // Job done! } tryComplete(); }