@Override protected Frame predictScoreImpl(Frame orig, Frame adaptedFr, String destination_key) { return reconstruct( orig, adaptedFr, (null == destination_key ? Key.make() : Key.make(destination_key)), true, _parms._impute_original); }
/** * Project each archetype into original feature space * * @param frame Original training data with m rows and n columns * @param destination_key Frame Id for output * @return Frame containing k rows and n columns, where each row corresponds to an archetype */ public Frame scoreArchetypes(Frame frame, Key destination_key, boolean reverse_transform) { final int ncols = _output._names.length; Frame adaptedFr = new Frame(frame); adaptTestForTrain(adaptedFr, true, false); assert ncols == adaptedFr.numCols(); String[][] adaptedDomme = adaptedFr.domains(); double[][] proj = new double[_parms._k][_output._nnums + _output._ncats]; // Categorical columns for (int d = 0; d < _output._ncats; d++) { double[][] block = _output._archetypes_raw.getCatBlock(d); for (int k = 0; k < _parms._k; k++) proj[k][_output._permutation[d]] = _parms.mimpute(block[k], _output._lossFunc[d]); } // Numeric columns for (int d = _output._ncats; d < (_output._ncats + _output._nnums); d++) { int ds = d - _output._ncats; for (int k = 0; k < _parms._k; k++) { double num = _output._archetypes_raw.getNum(ds, k); proj[k][_output._permutation[d]] = _parms.impute(num, _output._lossFunc[d]); if (reverse_transform) proj[k][_output._permutation[d]] = proj[k][_output._permutation[d]] / _output._normMul[ds] + _output._normSub[ds]; } } // Convert projection of archetypes into a frame with correct domains Frame f = ArrayUtils.frame( (null == destination_key ? Key.make() : destination_key), adaptedFr.names(), proj); for (int i = 0; i < ncols; i++) f.vec(i).setDomain(adaptedDomme[i]); return f; }
@Override protected Frame rebalance(final Frame original_fr, boolean local, final String name) { if (original_fr == null) return null; if (_parms._force_load_balance) { int original_chunks = original_fr.anyVec().nChunks(); _job.update(0, "Load balancing " + name.substring(name.length() - 5) + " data..."); int chunks = desiredChunks(original_fr, local); if (!_parms._reproducible) { if (original_chunks >= chunks) { if (!_parms._quiet_mode) Log.info( "Dataset already contains " + original_chunks + " chunks. No need to rebalance."); return original_fr; } } else { // reproducible, set chunks to 1 assert chunks == 1; if (!_parms._quiet_mode) Log.warn("Reproducibility enforced - using only 1 thread - can be slow."); if (original_chunks == 1) return original_fr; } if (!_parms._quiet_mode) Log.info( "Rebalancing " + name.substring(name.length() - 5) + " dataset into " + chunks + " chunks."); Key newKey = Key.make(name + ".chks" + chunks); RebalanceDataSet rb = new RebalanceDataSet(original_fr, newKey, chunks); H2O.submitTask(rb).join(); Frame rebalanced_fr = DKV.get(newKey).get(); Scope.track(rebalanced_fr); return rebalanced_fr; } return original_fr; }
// GLRM scoring is data imputation based on feature domains using reconstructed XY (see Udell // (2015), Section 5.3) private Frame reconstruct( Frame orig, Frame adaptedFr, Key destination_key, boolean save_imputed, boolean reverse_transform) { final int ncols = _output._names.length; assert ncols == adaptedFr.numCols(); String prefix = "reconstr_"; // Need [A,X,P] where A = adaptedFr, X = loading frame, P = imputed frame // Note: A is adapted to original training frame, P has columns shuffled so cats come before // nums! Frame fullFrm = new Frame(adaptedFr); Frame loadingFrm = DKV.get(_output._representation_key).get(); fullFrm.add(loadingFrm); String[][] adaptedDomme = adaptedFr.domains(); for (int i = 0; i < ncols; i++) { Vec v = fullFrm.anyVec().makeZero(); v.setDomain(adaptedDomme[i]); fullFrm.add(prefix + _output._names[i], v); } GLRMScore gs = new GLRMScore(ncols, _parms._k, save_imputed, reverse_transform).doAll(fullFrm); // Return the imputed training frame int x = ncols + _parms._k, y = fullFrm.numCols(); Frame f = fullFrm.extractFrame( x, y); // this will call vec_impl() and we cannot call the delete() below just yet f = new Frame((null == destination_key ? Key.make() : destination_key), f.names(), f.vecs()); DKV.put(f); gs._mb.makeModelMetrics( GLRMModel.this, orig, null, null); // save error metrics based on imputed data return f; }
/** * Train a Deep Learning neural net model * * @param model Input model (e.g., from initModel(), or from a previous training run) * @return Trained model */ public final DeepLearningModel trainModel(DeepLearningModel model) { Frame validScoreFrame = null; Frame train, trainScoreFrame; try { // if (checkpoint == null && !quiet_mode) logStart(); //if checkpoint is given, some // Job's params might be uninitialized (but the restarted model's parameters are correct) if (model == null) { model = DKV.get(dest()).get(); } Log.info( "Model category: " + (_parms._autoencoder ? "Auto-Encoder" : isClassifier() ? "Classification" : "Regression")); final long model_size = model.model_info().size(); Log.info( "Number of model parameters (weights/biases): " + String.format("%,d", model_size)); model.write_lock(_job); _job.update(0, "Setting up training data..."); final DeepLearningParameters mp = model.model_info().get_params(); // temporary frames of the same "name" as the orig _train/_valid (asking the parameter's // Key, not the actual frame) // Note: don't put into DKV or they would overwrite the _train/_valid frames! Frame tra_fr = new Frame(mp._train, _train.names(), _train.vecs()); Frame val_fr = _valid != null ? new Frame(mp._valid, _valid.names(), _valid.vecs()) : null; train = tra_fr; if (model._output.isClassifier() && mp._balance_classes) { _job.update(0, "Balancing class distribution of training data..."); float[] trainSamplingFactors = new float [train .lastVec() .domain() .length]; // leave initialized to 0 -> will be filled up below if (mp._class_sampling_factors != null) { if (mp._class_sampling_factors.length != train.lastVec().domain().length) throw new IllegalArgumentException( "class_sampling_factors must have " + train.lastVec().domain().length + " elements"); trainSamplingFactors = mp._class_sampling_factors.clone(); // clone: don't modify the original } train = sampleFrameStratified( train, train.lastVec(), train.vec(model._output.weightsName()), trainSamplingFactors, (long) (mp._max_after_balance_size * train.numRows()), mp._seed, true, false); Vec l = train.lastVec(); Vec w = train.vec(model._output.weightsName()); MRUtils.ClassDist cd = new MRUtils.ClassDist(l); model._output._modelClassDist = _weights != null ? cd.doAll(l, w).rel_dist() : cd.doAll(l).rel_dist(); } model.training_rows = train.numRows(); if (_weights != null && _weights.min() == 0 && _weights.max() == 1 && _weights.isInt()) { model.training_rows = Math.round(train.numRows() * _weights.mean()); Log.warn( "Not counting " + (train.numRows() - model.training_rows) + " rows with weight=0 towards an epoch."); } Log.info("One epoch corresponds to " + model.training_rows + " training data rows."); trainScoreFrame = sampleFrame( train, mp._score_training_samples, mp._seed); // training scoring dataset is always sampled uniformly from the training // dataset if (trainScoreFrame != train) Scope.track(trainScoreFrame); if (!_parms._quiet_mode) Log.info("Number of chunks of the training data: " + train.anyVec().nChunks()); if (val_fr != null) { model.validation_rows = val_fr.numRows(); // validation scoring dataset can be sampled in multiple ways from the given validation // dataset if (model._output.isClassifier() && mp._balance_classes && mp._score_validation_sampling == DeepLearningParameters.ClassSamplingMethod.Stratified) { _job.update(0, "Sampling validation data (stratified)..."); validScoreFrame = sampleFrameStratified( val_fr, val_fr.lastVec(), val_fr.vec(model._output.weightsName()), null, mp._score_validation_samples > 0 ? mp._score_validation_samples : val_fr.numRows(), mp._seed + 1, false /* no oversampling */, false); } else { _job.update(0, "Sampling validation data..."); validScoreFrame = sampleFrame(val_fr, mp._score_validation_samples, mp._seed + 1); if (validScoreFrame != val_fr) Scope.track(validScoreFrame); } if (!_parms._quiet_mode) Log.info( "Number of chunks of the validation data: " + validScoreFrame.anyVec().nChunks()); } // Set train_samples_per_iteration size (cannot be done earlier since this depends on // whether stratified sampling is done) model.actual_train_samples_per_iteration = computeTrainSamplesPerIteration(mp, model.training_rows, model); // Determine whether shuffling is enforced if (mp._replicate_training_data && (model.actual_train_samples_per_iteration == model.training_rows * (mp._single_node_mode ? 1 : H2O.CLOUD.size())) && !mp._shuffle_training_data && H2O.CLOUD.size() > 1 && !mp._reproducible) { if (!mp._quiet_mode) Log.info( "Enabling training data shuffling, because all nodes train on the full dataset (replicated training data)."); mp._shuffle_training_data = true; } if (!mp._shuffle_training_data && model.actual_train_samples_per_iteration == model.training_rows && train.anyVec().nChunks() == 1) { if (!mp._quiet_mode) Log.info( "Enabling training data shuffling to avoid training rows in the same order over and over (no Hogwild since there's only 1 chunk)."); mp._shuffle_training_data = true; } // if (!mp._quiet_mode) Log.info("Initial model:\n" + model.model_info()); long now = System.currentTimeMillis(); model._timeLastIterationEnter = now; if (_parms._autoencoder) { _job.update(0, "Scoring null model of autoencoder..."); if (!mp._quiet_mode) Log.info("Scoring the null model of the autoencoder."); model.doScoring( trainScoreFrame, validScoreFrame, _job._key, 0, false); // get the null model reconstruction error } // put the initial version of the model into DKV model.update(_job); model.total_setup_time_ms += now - _job.start_time(); Log.info("Total setup time: " + PrettyPrint.msecs(model.total_setup_time_ms, true)); Log.info("Starting to train the Deep Learning model."); _job.update(0, "Training..."); // main loop for (; ; ) { model.iterations++; model.set_model_info( mp._epochs == 0 ? model.model_info() : H2O.CLOUD.size() > 1 && mp._replicate_training_data ? (mp._single_node_mode ? new DeepLearningTask2( _job._key, train, model.model_info(), rowFraction(train, mp, model), model.iterations) .doAll(Key.make(H2O.SELF)) .model_info() : // replicated data + single node mode new DeepLearningTask2( _job._key, train, model.model_info(), rowFraction(train, mp, model), model.iterations) .doAllNodes() .model_info()) : // replicated data + multi-node mode new DeepLearningTask( _job._key, model.model_info(), rowFraction(train, mp, model), model.iterations) .doAll(train) .model_info()); // distributed data (always in multi-node mode) if (stop_requested() && !timeout()) break; // cancellation if (!model.doScoring( trainScoreFrame, validScoreFrame, _job._key, model.iterations, false)) break; // finished training (or early stopping or convergence) if (timeout()) break; // stop after scoring } // replace the model with the best model so far (if it's better) if (!stop_requested() && _parms._overwrite_with_best_model && model.actual_best_model_key != null && _parms._nfolds == 0) { DeepLearningModel best_model = DKV.getGet(model.actual_best_model_key); if (best_model != null && best_model.loss() < model.loss() && Arrays.equals(best_model.model_info().units, model.model_info().units)) { if (!_parms._quiet_mode) Log.info("Setting the model to be the best model so far (based on scoring history)."); DeepLearningModelInfo mi = best_model.model_info().deep_clone(); // Don't cheat - count full amount of training samples, since that's the amount of // training it took to train (without finding anything better) mi.set_processed_global(model.model_info().get_processed_global()); mi.set_processed_local(model.model_info().get_processed_local()); model.set_model_info(mi); model.update(_job); model.doScoring(trainScoreFrame, validScoreFrame, _job._key, model.iterations, true); assert (best_model.loss() == model.loss()); } } // store coefficient names for future use // possibly change model.model_info().data_info().coefNames(); if (!_parms._quiet_mode) { Log.info( "=============================================================================================================================================================================="); if (stop_requested()) { Log.info("Deep Learning model training was interrupted."); } else { Log.info("Finished training the Deep Learning model."); Log.info(model); } Log.info( "=============================================================================================================================================================================="); } } finally { if (model != null) { model.deleteElasticAverageModels(); model.unlock(_job); if (model.actual_best_model_key != null) { assert (model.actual_best_model_key != model._key); DKV.remove(model.actual_best_model_key); } } } return model; }
@Override protected void compute2() { CoxPHModel model = null; try { Scope.enter(); _parms.read_lock_frames(CoxPH.this); init(true); applyScoringFrameSideEffects(); // The model to be built model = new CoxPHModel(dest(), _parms, new CoxPHModel.CoxPHOutput(CoxPH.this)); model.delete_and_lock(_key); applyTrainingFrameSideEffects(); int nResponses = 1; boolean useAllFactorLevels = false; final DataInfo dinfo = new DataInfo( Key.make(), _modelBuilderTrain, null, nResponses, useAllFactorLevels, DataInfo.TransformType.DEMEAN, TransformType.NONE, true, false, false, false, false, false); initStats(model, dinfo); final int n_offsets = (model._parms.offset_columns == null) ? 0 : model._parms.offset_columns.length; final int n_coef = dinfo.fullN() - n_offsets; final double[] step = MemoryManager.malloc8d(n_coef); final double[] oldCoef = MemoryManager.malloc8d(n_coef); final double[] newCoef = MemoryManager.malloc8d(n_coef); Arrays.fill(step, Double.NaN); Arrays.fill(oldCoef, Double.NaN); for (int j = 0; j < n_coef; ++j) newCoef[j] = model._parms.init; double oldLoglik = -Double.MAX_VALUE; final int n_time = (int) (model._output.max_time - model._output.min_time + 1); final boolean has_start_column = (model._parms.start_column != null); final boolean has_weights_column = (model._parms.weights_column != null); for (int i = 0; i <= model._parms.iter_max; ++i) { model._output.iter = i; final CoxPHTask coxMR = new CoxPHTask( self(), dinfo, newCoef, model._output.min_time, n_time, n_offsets, has_start_column, has_weights_column) .doAll(dinfo._adaptedFrame); final double newLoglik = calcLoglik(model, coxMR); if (newLoglik > oldLoglik) { if (i == 0) calcCounts(model, coxMR); calcModelStats(model, newCoef, newLoglik); calcCumhaz_0(model, coxMR); if (newLoglik == 0) model._output.lre = -Math.log10(Math.abs(oldLoglik - newLoglik)); else model._output.lre = -Math.log10(Math.abs((oldLoglik - newLoglik) / newLoglik)); if (model._output.lre >= model._parms.lre_min) break; Arrays.fill(step, 0); for (int j = 0; j < n_coef; ++j) for (int k = 0; k < n_coef; ++k) step[j] -= model._output.var_coef[j][k] * model._output.gradient[k]; for (int j = 0; j < n_coef; ++j) if (Double.isNaN(step[j]) || Double.isInfinite(step[j])) break; oldLoglik = newLoglik; System.arraycopy(newCoef, 0, oldCoef, 0, oldCoef.length); } else { for (int j = 0; j < n_coef; ++j) step[j] /= 2; } for (int j = 0; j < n_coef; ++j) newCoef[j] = oldCoef[j] - step[j]; } model.update(_key); } catch (Throwable t) { Job thisJob = DKV.getGet(_key); if (thisJob._state == JobState.CANCELLED) { Log.info("Job cancelled by user."); } else { t.printStackTrace(); failed(t); throw t; } } finally { updateModelOutput(); _parms.read_unlock_frames(CoxPH.this); Scope.exit(); done(); // Job done! } tryComplete(); }