private TwoDimTable createScoringHistoryTable(KMeansModel.KMeansOutput output) { List<String> colHeaders = new ArrayList<>(); List<String> colTypes = new ArrayList<>(); List<String> colFormat = new ArrayList<>(); colHeaders.add("Timestamp"); colTypes.add("string"); colFormat.add("%s"); colHeaders.add("Duration"); colTypes.add("string"); colFormat.add("%s"); colHeaders.add("Iteration"); colTypes.add("long"); colFormat.add("%d"); colHeaders.add("Avg. Change of Std. Centroids"); colTypes.add("double"); colFormat.add("%.5f"); colHeaders.add("Within Cluster Sum Of Squares"); colTypes.add("double"); colFormat.add("%.5f"); final int rows = output._avg_centroids_chg.length; TwoDimTable table = new TwoDimTable( "Scoring History", null, new String[rows], colHeaders.toArray(new String[0]), colTypes.toArray(new String[0]), colFormat.toArray(new String[0]), ""); int row = 0; for (int i = 0; i < rows; i++) { int col = 0; assert (row < table.getRowDim()); assert (col < table.getColDim()); DateTimeFormatter fmt = DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss"); table.set(row, col++, fmt.print(output._training_time_ms[i])); table.set(row, col++, PrettyPrint.msecs(output._training_time_ms[i] - _start_time, true)); table.set(row, col++, i); table.set(row, col++, output._avg_centroids_chg[i]); table.set(row, col++, output._history_withinss[i]); row++; } return table; }
@Ignore @Test public void matrixVecTest() { int rows = 2048; int cols = 8192; int loops = 5; int warmup_loops = 5; long seed = 0x533D; float nnz_ratio_vec = 0.01f; // fraction of non-zeroes for vector float nnz_ratio_mat = 0.1f; // fraction of non-zeroes for matrix float[] a = new float[rows * cols]; float[] x = new float[cols]; float[] y = new float[rows]; float[] res = new float[rows]; byte[] bits = new byte[rows]; for (int row = 0; row < rows; ++row) { y[row] = 0; res[row] = 0; bits[row] = (byte) ("abcdefghijklmnopqrstuvwxyz".toCharArray()[row % 26]); } Random rng = new Random(seed); for (int col = 0; col < cols; ++col) if (rng.nextFloat() < nnz_ratio_vec) x[col] = ((float) col) / cols; for (int row = 0; row < rows; ++row) { int off = row * cols; for (int col = 0; col < cols; ++col) { if (rng.nextFloat() < nnz_ratio_mat) a[off + col] = ((float) (row + col)) / cols; } } Storage.DenseRowMatrix dra = new Storage.DenseRowMatrix(a, rows, cols); Storage.DenseColMatrix dca = new Storage.DenseColMatrix(dra, rows, cols); Storage.SparseRowMatrix sra = new Storage.SparseRowMatrix(dra, rows, cols); Storage.SparseColMatrix sca = new Storage.SparseColMatrix(dca, rows, cols); Storage.DenseVector dx = new Storage.DenseVector(x); Storage.DenseVector dy = new Storage.DenseVector(y); Storage.DenseVector dres = new Storage.DenseVector(res); Storage.SparseVector sx = new Storage.SparseVector(x); /** warmup */ System.out.println("warming up."); float sum = 0; for (int l = 0; l < warmup_loops; ++l) { gemv_naive(res, a, x, y, bits); sum += res[rows / 2]; } for (int l = 0; l < warmup_loops; ++l) { gemv_naive(dres, dra, dx, dy, bits); sum += res[rows / 2]; } for (int l = 0; l < warmup_loops; ++l) { gemv_row_optimized(res, a, x, y, bits); sum += res[rows / 2]; } for (int l = 0; l < warmup_loops; ++l) { gemv(dres, dca, dx, dy, bits); sum += res[rows / 2]; } for (int l = 0; l < warmup_loops; ++l) { gemv(dres, dra, sx, dy, bits); sum += res[rows / 2]; } for (int l = 0; l < warmup_loops; ++l) { gemv(dres, dca, sx, dy, bits); sum += res[rows / 2]; } for (int l = 0; l < warmup_loops; ++l) { gemv(dres, sra, sx, dy, bits); sum += res[rows / 2]; } for (int l = 0; l < warmup_loops; ++l) { gemv(dres, sca, sx, dy, bits); sum += res[rows / 2]; } /** naive version */ System.out.println("\nstarting naive."); sum = 0; long start = System.currentTimeMillis(); for (int l = 0; l < loops; ++l) { gemv_naive(res, a, x, y, bits); sum += res[rows / 2]; // do something useful } System.out.println("result: " + sum + " and " + ArrayUtils.sum(res)); System.out.println( "naive time: " + PrettyPrint.msecs(System.currentTimeMillis() - start, true)); System.out.println("\nstarting dense row * dense."); sum = 0; start = System.currentTimeMillis(); for (int l = 0; l < loops; ++l) { gemv_naive(dres, dra, dx, dy, bits); sum += res[rows / 2]; // do something useful } System.out.println("result: " + sum + " and " + ArrayUtils.sum(res)); System.out.println( "dense row * dense time: " + PrettyPrint.msecs(System.currentTimeMillis() - start, true)); System.out.println("\nstarting optimized dense row * dense."); sum = 0; start = System.currentTimeMillis(); for (int l = 0; l < loops; ++l) { gemv_row_optimized(res, a, x, y, bits); sum += res[rows / 2]; // do something useful } System.out.println("result: " + sum + " and " + ArrayUtils.sum(res)); System.out.println( "optimized dense row * dense time: " + PrettyPrint.msecs(System.currentTimeMillis() - start, true)); System.out.println("\nstarting dense col * dense."); sum = 0; start = System.currentTimeMillis(); for (int l = 0; l < loops; ++l) { gemv(dres, dca, dx, dy, bits); sum += res[rows / 2]; // do something useful } System.out.println("result: " + sum + " and " + ArrayUtils.sum(res)); System.out.println( "dense col * dense time: " + PrettyPrint.msecs(System.currentTimeMillis() - start, true)); System.out.println("\nstarting dense row * sparse."); sum = 0; start = System.currentTimeMillis(); for (int l = 0; l < loops; ++l) { gemv(dres, dra, sx, dy, bits); sum += res[rows / 2]; // do something useful } System.out.println("result: " + sum + " and " + ArrayUtils.sum(res)); System.out.println( "dense row * sparse time: " + PrettyPrint.msecs(System.currentTimeMillis() - start, true)); System.out.println("\nstarting dense col * sparse."); sum = 0; start = System.currentTimeMillis(); for (int l = 0; l < loops; ++l) { gemv(dres, dca, sx, dy, bits); sum += res[rows / 2]; // do something useful } System.out.println("result: " + sum + " and " + ArrayUtils.sum(res)); System.out.println( "dense col * sparse time: " + PrettyPrint.msecs(System.currentTimeMillis() - start, true)); System.out.println("\nstarting sparse row * sparse."); sum = 0; start = System.currentTimeMillis(); for (int l = 0; l < loops; ++l) { gemv(dres, sra, sx, dy, bits); sum += res[rows / 2]; // do something useful } System.out.println("result: " + sum + " and " + ArrayUtils.sum(res)); System.out.println( "sparse row * sparse time: " + PrettyPrint.msecs(System.currentTimeMillis() - start, true)); System.out.println("\nstarting sparse col * sparse."); sum = 0; start = System.currentTimeMillis(); for (int l = 0; l < loops; ++l) { gemv(dres, sca, sx, dy, bits); sum += res[rows / 2]; // do something useful } System.out.println("result: " + sum + " and " + ArrayUtils.sum(res)); System.out.println( "sparse col * sparse time: " + PrettyPrint.msecs(System.currentTimeMillis() - start, true)); }
/** * Train a Deep Learning neural net model * * @param model Input model (e.g., from initModel(), or from a previous training run) * @return Trained model */ public final DeepLearningModel trainModel(DeepLearningModel model) { Frame validScoreFrame = null; Frame train, trainScoreFrame; try { // if (checkpoint == null && !quiet_mode) logStart(); //if checkpoint is given, some // Job's params might be uninitialized (but the restarted model's parameters are correct) if (model == null) { model = DKV.get(dest()).get(); } Log.info( "Model category: " + (_parms._autoencoder ? "Auto-Encoder" : isClassifier() ? "Classification" : "Regression")); final long model_size = model.model_info().size(); Log.info( "Number of model parameters (weights/biases): " + String.format("%,d", model_size)); model.write_lock(_job); _job.update(0, "Setting up training data..."); final DeepLearningParameters mp = model.model_info().get_params(); // temporary frames of the same "name" as the orig _train/_valid (asking the parameter's // Key, not the actual frame) // Note: don't put into DKV or they would overwrite the _train/_valid frames! Frame tra_fr = new Frame(mp._train, _train.names(), _train.vecs()); Frame val_fr = _valid != null ? new Frame(mp._valid, _valid.names(), _valid.vecs()) : null; train = tra_fr; if (model._output.isClassifier() && mp._balance_classes) { _job.update(0, "Balancing class distribution of training data..."); float[] trainSamplingFactors = new float [train .lastVec() .domain() .length]; // leave initialized to 0 -> will be filled up below if (mp._class_sampling_factors != null) { if (mp._class_sampling_factors.length != train.lastVec().domain().length) throw new IllegalArgumentException( "class_sampling_factors must have " + train.lastVec().domain().length + " elements"); trainSamplingFactors = mp._class_sampling_factors.clone(); // clone: don't modify the original } train = sampleFrameStratified( train, train.lastVec(), train.vec(model._output.weightsName()), trainSamplingFactors, (long) (mp._max_after_balance_size * train.numRows()), mp._seed, true, false); Vec l = train.lastVec(); Vec w = train.vec(model._output.weightsName()); MRUtils.ClassDist cd = new MRUtils.ClassDist(l); model._output._modelClassDist = _weights != null ? cd.doAll(l, w).rel_dist() : cd.doAll(l).rel_dist(); } model.training_rows = train.numRows(); if (_weights != null && _weights.min() == 0 && _weights.max() == 1 && _weights.isInt()) { model.training_rows = Math.round(train.numRows() * _weights.mean()); Log.warn( "Not counting " + (train.numRows() - model.training_rows) + " rows with weight=0 towards an epoch."); } Log.info("One epoch corresponds to " + model.training_rows + " training data rows."); trainScoreFrame = sampleFrame( train, mp._score_training_samples, mp._seed); // training scoring dataset is always sampled uniformly from the training // dataset if (trainScoreFrame != train) Scope.track(trainScoreFrame); if (!_parms._quiet_mode) Log.info("Number of chunks of the training data: " + train.anyVec().nChunks()); if (val_fr != null) { model.validation_rows = val_fr.numRows(); // validation scoring dataset can be sampled in multiple ways from the given validation // dataset if (model._output.isClassifier() && mp._balance_classes && mp._score_validation_sampling == DeepLearningParameters.ClassSamplingMethod.Stratified) { _job.update(0, "Sampling validation data (stratified)..."); validScoreFrame = sampleFrameStratified( val_fr, val_fr.lastVec(), val_fr.vec(model._output.weightsName()), null, mp._score_validation_samples > 0 ? mp._score_validation_samples : val_fr.numRows(), mp._seed + 1, false /* no oversampling */, false); } else { _job.update(0, "Sampling validation data..."); validScoreFrame = sampleFrame(val_fr, mp._score_validation_samples, mp._seed + 1); if (validScoreFrame != val_fr) Scope.track(validScoreFrame); } if (!_parms._quiet_mode) Log.info( "Number of chunks of the validation data: " + validScoreFrame.anyVec().nChunks()); } // Set train_samples_per_iteration size (cannot be done earlier since this depends on // whether stratified sampling is done) model.actual_train_samples_per_iteration = computeTrainSamplesPerIteration(mp, model.training_rows, model); // Determine whether shuffling is enforced if (mp._replicate_training_data && (model.actual_train_samples_per_iteration == model.training_rows * (mp._single_node_mode ? 1 : H2O.CLOUD.size())) && !mp._shuffle_training_data && H2O.CLOUD.size() > 1 && !mp._reproducible) { if (!mp._quiet_mode) Log.info( "Enabling training data shuffling, because all nodes train on the full dataset (replicated training data)."); mp._shuffle_training_data = true; } if (!mp._shuffle_training_data && model.actual_train_samples_per_iteration == model.training_rows && train.anyVec().nChunks() == 1) { if (!mp._quiet_mode) Log.info( "Enabling training data shuffling to avoid training rows in the same order over and over (no Hogwild since there's only 1 chunk)."); mp._shuffle_training_data = true; } // if (!mp._quiet_mode) Log.info("Initial model:\n" + model.model_info()); long now = System.currentTimeMillis(); model._timeLastIterationEnter = now; if (_parms._autoencoder) { _job.update(0, "Scoring null model of autoencoder..."); if (!mp._quiet_mode) Log.info("Scoring the null model of the autoencoder."); model.doScoring( trainScoreFrame, validScoreFrame, _job._key, 0, false); // get the null model reconstruction error } // put the initial version of the model into DKV model.update(_job); model.total_setup_time_ms += now - _job.start_time(); Log.info("Total setup time: " + PrettyPrint.msecs(model.total_setup_time_ms, true)); Log.info("Starting to train the Deep Learning model."); _job.update(0, "Training..."); // main loop for (; ; ) { model.iterations++; model.set_model_info( mp._epochs == 0 ? model.model_info() : H2O.CLOUD.size() > 1 && mp._replicate_training_data ? (mp._single_node_mode ? new DeepLearningTask2( _job._key, train, model.model_info(), rowFraction(train, mp, model), model.iterations) .doAll(Key.make(H2O.SELF)) .model_info() : // replicated data + single node mode new DeepLearningTask2( _job._key, train, model.model_info(), rowFraction(train, mp, model), model.iterations) .doAllNodes() .model_info()) : // replicated data + multi-node mode new DeepLearningTask( _job._key, model.model_info(), rowFraction(train, mp, model), model.iterations) .doAll(train) .model_info()); // distributed data (always in multi-node mode) if (stop_requested() && !timeout()) break; // cancellation if (!model.doScoring( trainScoreFrame, validScoreFrame, _job._key, model.iterations, false)) break; // finished training (or early stopping or convergence) if (timeout()) break; // stop after scoring } // replace the model with the best model so far (if it's better) if (!stop_requested() && _parms._overwrite_with_best_model && model.actual_best_model_key != null && _parms._nfolds == 0) { DeepLearningModel best_model = DKV.getGet(model.actual_best_model_key); if (best_model != null && best_model.loss() < model.loss() && Arrays.equals(best_model.model_info().units, model.model_info().units)) { if (!_parms._quiet_mode) Log.info("Setting the model to be the best model so far (based on scoring history)."); DeepLearningModelInfo mi = best_model.model_info().deep_clone(); // Don't cheat - count full amount of training samples, since that's the amount of // training it took to train (without finding anything better) mi.set_processed_global(model.model_info().get_processed_global()); mi.set_processed_local(model.model_info().get_processed_local()); model.set_model_info(mi); model.update(_job); model.doScoring(trainScoreFrame, validScoreFrame, _job._key, model.iterations, true); assert (best_model.loss() == model.loss()); } } // store coefficient names for future use // possibly change model.model_info().data_info().coefNames(); if (!_parms._quiet_mode) { Log.info( "=============================================================================================================================================================================="); if (stop_requested()) { Log.info("Deep Learning model training was interrupted."); } else { Log.info("Finished training the Deep Learning model."); Log.info(model); } Log.info( "=============================================================================================================================================================================="); } } finally { if (model != null) { model.deleteElasticAverageModels(); model.unlock(_job); if (model.actual_best_model_key != null) { assert (model.actual_best_model_key != model._key); DKV.remove(model.actual_best_model_key); } } } return model; }
private TwoDimTable createScoringHistoryTable(SharedTreeModel.SharedTreeOutput _output) { List<String> colHeaders = new ArrayList<>(); List<String> colTypes = new ArrayList<>(); List<String> colFormat = new ArrayList<>(); colHeaders.add("Timestamp"); colTypes.add("string"); colFormat.add("%s"); colHeaders.add("Duration"); colTypes.add("string"); colFormat.add("%s"); colHeaders.add("Number of Trees"); colTypes.add("long"); colFormat.add("%d"); colHeaders.add("Training MSE"); colTypes.add("double"); colFormat.add("%.5f"); if (_output.isClassifier()) { colHeaders.add("Training LogLoss"); colTypes.add("double"); colFormat.add("%.5f"); } if (_output.getModelCategory() == ModelCategory.Binomial) { colHeaders.add("Training AUC"); colTypes.add("double"); colFormat.add("%.5f"); } if (_output.getModelCategory() == ModelCategory.Binomial || _output.getModelCategory() == ModelCategory.Multinomial) { colHeaders.add("Training Classification Error"); colTypes.add("double"); colFormat.add("%.5f"); } if (valid() != null) { colHeaders.add("Validation MSE"); colTypes.add("double"); colFormat.add("%.5f"); if (_output.isClassifier()) { colHeaders.add("Validation LogLoss"); colTypes.add("double"); colFormat.add("%.5f"); } if (_output.getModelCategory() == ModelCategory.Binomial) { colHeaders.add("Validation AUC"); colTypes.add("double"); colFormat.add("%.5f"); } if (_output.isClassifier()) { colHeaders.add("Validation Classification Error"); colTypes.add("double"); colFormat.add("%.5f"); } } int rows = 0; for (int i = 1; i < _output._scored_train.length; i++) { if (!Double.isNaN(_output._scored_train[i]._mse)) ++rows; } TwoDimTable table = new TwoDimTable( "Scoring History", null, new String[rows], colHeaders.toArray(new String[0]), colTypes.toArray(new String[0]), colFormat.toArray(new String[0]), ""); int row = 0; for (int i = 1; i < _output._scored_train.length; i++) { if (Double.isNaN(_output._scored_train[i]._mse)) continue; int col = 0; assert (row < table.getRowDim()); assert (col < table.getColDim()); DateTimeFormatter fmt = DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss"); table.set(row, col++, fmt.print(_output._training_time_ms[i])); table.set(row, col++, PrettyPrint.msecs(_output._training_time_ms[i] - _start_time, true)); table.set(row, col++, i); ScoreKeeper st = _output._scored_train[i]; table.set(row, col++, st._mse); if (_output.isClassifier()) table.set(row, col++, st._logloss); if (_output.getModelCategory() == ModelCategory.Binomial) table.set(row, col++, st._AUC); if (_output.isClassifier()) table.set(row, col++, st._classError); if (_valid != null) { st = _output._scored_valid[i]; table.set(row, col++, st._mse); if (_output.isClassifier()) table.set(row, col++, st._logloss); if (_output.getModelCategory() == ModelCategory.Binomial) table.set(row, col++, st._AUC); if (_output.isClassifier()) table.set(row, col++, st._classError); } row++; } return table; }