private TwoDimTable createModelSummaryTable(SharedTreeModel.SharedTreeOutput _output) { List<String> colHeaders = new ArrayList<>(); List<String> colTypes = new ArrayList<>(); List<String> colFormat = new ArrayList<>(); colHeaders.add("Number of Trees"); colTypes.add("long"); colFormat.add("%d"); colHeaders.add("Model Size in Bytes"); colTypes.add("long"); colFormat.add("%d"); colHeaders.add("Min. Depth"); colTypes.add("long"); colFormat.add("%d"); colHeaders.add("Max. Depth"); colTypes.add("long"); colFormat.add("%d"); colHeaders.add("Mean Depth"); colTypes.add("double"); colFormat.add("%.5f"); colHeaders.add("Min. Leaves"); colTypes.add("long"); colFormat.add("%d"); colHeaders.add("Max. Leaves"); colTypes.add("long"); colFormat.add("%d"); colHeaders.add("Mean Leaves"); colTypes.add("double"); colFormat.add("%.5f"); final int rows = 1; TwoDimTable table = new TwoDimTable( "Model Summary", null, new String[rows], colHeaders.toArray(new String[0]), colTypes.toArray(new String[0]), colFormat.toArray(new String[0]), ""); int row = 0; int col = 0; table.set(row, col++, _output._treeStats._num_trees); table.set(row, col++, _output._treeStats._byte_size); table.set(row, col++, _output._treeStats._min_depth); table.set(row, col++, _output._treeStats._max_depth); table.set(row, col++, _output._treeStats._mean_depth); table.set(row, col++, _output._treeStats._min_leaves); table.set(row, col++, _output._treeStats._max_leaves); table.set(row, col++, _output._treeStats._mean_leaves); return table; }
public static TwoDimTable createCenterTable( KMeansModel.KMeansOutput output, boolean standardized) { String name = standardized ? "Standardized Cluster Means" : "Cluster Means"; if (output._size == null || output._names == null || output._domains == null || output._centers_raw == null || (standardized && output._centers_std_raw == null)) { TwoDimTable table = new TwoDimTable( name, null, new String[] {"1"}, new String[] {"C1"}, new String[] {"double"}, new String[] {"%f"}, "Centroid"); table.set(0, 0, Double.NaN); return table; } String[] rowHeaders = new String[output._size.length]; for (int i = 0; i < rowHeaders.length; i++) rowHeaders[i] = String.valueOf(i + 1); String[] colTypes = new String[output._names.length]; String[] colFormats = new String[output._names.length]; for (int i = 0; i < output._domains.length; ++i) { colTypes[i] = output._domains[i] == null ? "double" : "String"; colFormats[i] = output._domains[i] == null ? "%f" : "%s"; } TwoDimTable table = new TwoDimTable(name, null, rowHeaders, output._names, colTypes, colFormats, "Centroid"); for (int j = 0; j < output._domains.length; ++j) { boolean string = output._domains[j] != null; if (string) { for (int i = 0; i < output._centers_raw.length; ++i) { table.set(i, j, output._domains[j][(int) output._centers_raw[i][j]]); } } else { for (int i = 0; i < output._centers_raw.length; ++i) { table.set(i, j, standardized ? output._centers_std_raw[i][j] : output._centers_raw[i][j]); } } } return table; }
private TwoDimTable createScoringHistoryTable(KMeansModel.KMeansOutput output) { List<String> colHeaders = new ArrayList<>(); List<String> colTypes = new ArrayList<>(); List<String> colFormat = new ArrayList<>(); colHeaders.add("Timestamp"); colTypes.add("string"); colFormat.add("%s"); colHeaders.add("Duration"); colTypes.add("string"); colFormat.add("%s"); colHeaders.add("Iteration"); colTypes.add("long"); colFormat.add("%d"); colHeaders.add("Avg. Change of Std. Centroids"); colTypes.add("double"); colFormat.add("%.5f"); colHeaders.add("Within Cluster Sum Of Squares"); colTypes.add("double"); colFormat.add("%.5f"); final int rows = output._avg_centroids_chg.length; TwoDimTable table = new TwoDimTable( "Scoring History", null, new String[rows], colHeaders.toArray(new String[0]), colTypes.toArray(new String[0]), colFormat.toArray(new String[0]), ""); int row = 0; for (int i = 0; i < rows; i++) { int col = 0; assert (row < table.getRowDim()); assert (col < table.getColDim()); DateTimeFormatter fmt = DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss"); table.set(row, col++, fmt.print(output._training_time_ms[i])); table.set(row, col++, PrettyPrint.msecs(output._training_time_ms[i] - _start_time, true)); table.set(row, col++, i); table.set(row, col++, output._avg_centroids_chg[i]); table.set(row, col++, output._history_withinss[i]); row++; } return table; }
private TwoDimTable createModelSummaryTable(KMeansModel.KMeansOutput output) { List<String> colHeaders = new ArrayList<>(); List<String> colTypes = new ArrayList<>(); List<String> colFormat = new ArrayList<>(); colHeaders.add("Number of Rows"); colTypes.add("long"); colFormat.add("%d"); colHeaders.add("Number of Clusters"); colTypes.add("long"); colFormat.add("%d"); colHeaders.add("Number of Categorical Columns"); colTypes.add("long"); colFormat.add("%d"); colHeaders.add("Number of Iterations"); colTypes.add("long"); colFormat.add("%d"); colHeaders.add("Within Cluster Sum of Squares"); colTypes.add("double"); colFormat.add("%.5f"); colHeaders.add("Total Sum of Squares"); colTypes.add("double"); colFormat.add("%.5f"); colHeaders.add("Between Cluster Sum of Squares"); colTypes.add("double"); colFormat.add("%.5f"); final int rows = 1; TwoDimTable table = new TwoDimTable( "Model Summary", null, new String[rows], colHeaders.toArray(new String[0]), colTypes.toArray(new String[0]), colFormat.toArray(new String[0]), ""); int row = 0; int col = 0; table.set( row, col++, Math.round(_train.numRows() * (hasWeightCol() ? _train.lastVec().mean() : 1))); table.set(row, col++, output._centers_raw.length); table.set(row, col++, output._categorical_column_count); table.set(row, col++, output._iterations); table.set(row, col++, output._tot_withinss); table.set(row, col++, output._totss); table.set(row, col++, output._betweenss); return table; }
/** * Create a summary table * * @return */ TwoDimTable createSummaryTable() { Neurons[] neurons = DeepLearningTask.makeNeuronsForTesting(this); long byte_size = new AutoBuffer().put(this).buf().length; TwoDimTable table = new TwoDimTable( "Status of Neuron Layers", (get_params()._diagnostics ? "" : "diagnostics disabled, ") + (!get_params()._autoencoder ? ("predicting " + _train.lastVecName() + ", ") : "") + (get_params()._autoencoder ? "auto-encoder" : _classification ? (units[units.length - 1] + "-class classification") : "regression") + ", " + get_params()._distribution + " distribution, " + get_params()._loss + " loss, " + String.format("%,d", size()) + " weights/biases, " + PrettyPrint.bytes(byte_size) + ", " + String.format("%,d", get_processed_global()) + " training samples, " + "mini-batch size " + String.format("%,d", get_params()._mini_batch_size), new String[neurons.length], new String[] { "Layer", "Units", "Type", "Dropout", "L1", "L2", "Mean Rate", "Rate RMS", "Momentum", "Mean Weight", "Weight RMS", "Mean Bias", "Bias RMS" }, new String[] { "int", "int", "string", "double", "double", "double", "double", "double", "double", "double", "double", "double", "double" }, new String[] { "%d", "%d", "%s", "%2.2f %%", "%5f", "%5f", "%5f", "%5f", "%5f", "%5f", "%5f", "%5f", "%5f" }, ""); for (int i = 0; i < neurons.length; ++i) { table.set(i, 0, i + 1); table.set(i, 1, neurons[i].units); table.set(i, 2, neurons[i].getClass().getSimpleName()); if (i == 0) { table.set(i, 3, neurons[i].params._input_dropout_ratio * 100); continue; } else if (i < neurons.length - 1) { if (neurons[i].params._hidden_dropout_ratios == null) { table.set(i, 3, 0); } else { table.set(i, 3, neurons[i].params._hidden_dropout_ratios[i - 1] * 100); } } table.set(i, 4, neurons[i].params._l1); table.set(i, 5, neurons[i].params._l2); table.set( i, 6, (get_params()._adaptive_rate ? mean_rate[i] : neurons[i].rate(get_processed_total()))); table.set(i, 7, (get_params()._adaptive_rate ? rms_rate[i] : 0)); table.set(i, 8, get_params()._adaptive_rate ? 0 : neurons[i].momentum(get_processed_total())); table.set(i, 9, mean_weight[i]); table.set(i, 10, rms_weight[i]); table.set(i, 11, mean_bias[i]); table.set(i, 12, rms_bias[i]); } summaryTable = table; return summaryTable; }
private TwoDimTable createScoringHistoryTable(SharedTreeModel.SharedTreeOutput _output) { List<String> colHeaders = new ArrayList<>(); List<String> colTypes = new ArrayList<>(); List<String> colFormat = new ArrayList<>(); colHeaders.add("Timestamp"); colTypes.add("string"); colFormat.add("%s"); colHeaders.add("Duration"); colTypes.add("string"); colFormat.add("%s"); colHeaders.add("Number of Trees"); colTypes.add("long"); colFormat.add("%d"); colHeaders.add("Training MSE"); colTypes.add("double"); colFormat.add("%.5f"); if (_output.isClassifier()) { colHeaders.add("Training LogLoss"); colTypes.add("double"); colFormat.add("%.5f"); } if (_output.getModelCategory() == ModelCategory.Binomial) { colHeaders.add("Training AUC"); colTypes.add("double"); colFormat.add("%.5f"); } if (_output.getModelCategory() == ModelCategory.Binomial || _output.getModelCategory() == ModelCategory.Multinomial) { colHeaders.add("Training Classification Error"); colTypes.add("double"); colFormat.add("%.5f"); } if (valid() != null) { colHeaders.add("Validation MSE"); colTypes.add("double"); colFormat.add("%.5f"); if (_output.isClassifier()) { colHeaders.add("Validation LogLoss"); colTypes.add("double"); colFormat.add("%.5f"); } if (_output.getModelCategory() == ModelCategory.Binomial) { colHeaders.add("Validation AUC"); colTypes.add("double"); colFormat.add("%.5f"); } if (_output.isClassifier()) { colHeaders.add("Validation Classification Error"); colTypes.add("double"); colFormat.add("%.5f"); } } int rows = 0; for (int i = 1; i < _output._scored_train.length; i++) { if (!Double.isNaN(_output._scored_train[i]._mse)) ++rows; } TwoDimTable table = new TwoDimTable( "Scoring History", null, new String[rows], colHeaders.toArray(new String[0]), colTypes.toArray(new String[0]), colFormat.toArray(new String[0]), ""); int row = 0; for (int i = 1; i < _output._scored_train.length; i++) { if (Double.isNaN(_output._scored_train[i]._mse)) continue; int col = 0; assert (row < table.getRowDim()); assert (col < table.getColDim()); DateTimeFormatter fmt = DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss"); table.set(row, col++, fmt.print(_output._training_time_ms[i])); table.set(row, col++, PrettyPrint.msecs(_output._training_time_ms[i] - _start_time, true)); table.set(row, col++, i); ScoreKeeper st = _output._scored_train[i]; table.set(row, col++, st._mse); if (_output.isClassifier()) table.set(row, col++, st._logloss); if (_output.getModelCategory() == ModelCategory.Binomial) table.set(row, col++, st._AUC); if (_output.isClassifier()) table.set(row, col++, st._classError); if (_valid != null) { st = _output._scored_valid[i]; table.set(row, col++, st._mse); if (_output.isClassifier()) table.set(row, col++, st._logloss); if (_output.getModelCategory() == ModelCategory.Binomial) table.set(row, col++, st._AUC); if (_output.isClassifier()) table.set(row, col++, st._classError); } row++; } return table; }
private GLMModelOutputV3 fillMultinomial(GLMOutput impl) { if (impl.get_global_beta_multinomial() == null) return this; // no coefificients yet String[] names = impl.coefficientNames().clone(); // put intercept as the first String[] ns = ArrayUtils.append(new String[] {"Intercept"}, Arrays.copyOf(names, names.length - 1)); coefficients_table = new TwoDimTableV3(); if (impl.isStandardized()) { int n = impl.nclasses(); String[] cols = new String[n * 2]; for (int i = 0; i < n; ++i) { cols[i] = "Coefs_class_" + i; cols[n + i] = "Std_Coefs_class_" + i; } String[] colTypes = new String[cols.length]; Arrays.fill(colTypes, "double"); String[] colFormats = new String[cols.length]; Arrays.fill(colFormats, "%5f"); double[][] betaNorm = impl.getNormBetaMultinomial(); if (betaNorm != null) { TwoDimTable tdt = new TwoDimTable( "Coefficients", "glm multinomial coefficients", ns, cols, colTypes, colFormats, "names"); for (int c = 0; c < n; ++c) { double[] beta = impl.get_global_beta_multinomial()[c]; tdt.set(0, c, beta[beta.length - 1]); tdt.set(0, n + c, betaNorm[c][beta.length - 1]); for (int i = 0; i < beta.length - 1; ++i) { tdt.set(i + 1, c, beta[i]); tdt.set(i + 1, n + c, betaNorm[c][i]); } } coefficients_table.fillFromImpl(tdt); final double[] magnitudes = new double[betaNorm[0].length]; for (int i = 0; i < betaNorm.length; ++i) { for (int j = 0; j < betaNorm[i].length; ++j) { double d = betaNorm[i][j]; magnitudes[j] += d < 0 ? -d : d; } } Integer[] indices = new Integer[magnitudes.length - 1]; for (int i = 0; i < indices.length; ++i) indices[i] = i; Arrays.sort( indices, new Comparator<Integer>() { @Override public int compare(Integer o1, Integer o2) { if (magnitudes[o1] < magnitudes[o2]) return +1; if (magnitudes[o1] > magnitudes[o2]) return -1; return 0; } }); String[] names2 = new String[names.length]; for (int i = 0; i < names2.length - 1; ++i) names2[i] = names[indices[i]]; tdt = new TwoDimTable( "Standardized Coefficient Magnitudes", "standardized coefficient magnitudes", names2, new String[] {"Coefficients", "Sign"}, new String[] {"double", "string"}, new String[] {"%5f", "%s"}, "names"); for (int i = 0; i < magnitudes.length - 1; ++i) { tdt.set(i, 0, magnitudes[indices[i]]); tdt.set(i, 1, "POS"); } standardized_coefficient_magnitudes = new TwoDimTableV3(); standardized_coefficient_magnitudes.fillFromImpl(tdt); } } else { int n = impl.nclasses(); String[] cols = new String[n]; for (int i = 0; i < n; ++i) { cols[i] = "Coefs_class_" + i; } String[] colTypes = new String[cols.length]; Arrays.fill(colTypes, "double"); String[] colFormats = new String[cols.length]; Arrays.fill(colFormats, "%5f"); TwoDimTable tdt = new TwoDimTable( "Coefficients", "glm multinomial coefficients", ns, cols, colTypes, colFormats, "names"); for (int c = 0; c < n; ++c) { double[] beta = impl.get_global_beta_multinomial()[c]; tdt.set(0, c, beta[beta.length - 1]); for (int i = 0; i < beta.length - 1; ++i) tdt.set(i + 1, c, beta[i]); } coefficients_table.fillFromImpl(tdt); } return this; }
@Override public GLMModelOutputV3 fillFromImpl(GLMModel.GLMOutput impl) { super.fillFromImpl(impl); lambda_1se = impl.lambda_1se(); lambda_best = impl.lambda_best(); if (impl._multinomial) return fillMultinomial(impl); String[] names = impl.coefficientNames().clone(); // put intercept as the first String[] ns = ArrayUtils.append(new String[] {"Intercept"}, Arrays.copyOf(names, names.length - 1)); coefficients_table = new TwoDimTableV3(); final double[] magnitudes; double[] beta = impl.beta(); if (beta == null) beta = MemoryManager.malloc8d(names.length); String[] colTypes = new String[] {"double"}; String[] colFormats = new String[] {"%5f"}; String[] colnames = new String[] {"Coefficients"}; if (impl.hasPValues()) { colTypes = new String[] {"double", "double", "double", "double"}; colFormats = new String[] {"%5f", "%5f", "%5f", "%5f"}; colnames = new String[] {"Coefficients", "Std. Error", "z value", "p value"}; } int stdOff = colnames.length; if (impl.isStandardized()) { colTypes = ArrayUtils.append(colTypes, "double"); colFormats = ArrayUtils.append(colFormats, "%5f"); colnames = ArrayUtils.append(colnames, "Standardized Coefficients"); } TwoDimTable tdt = new TwoDimTable( "Coefficients", "glm coefficients", ns, colnames, colTypes, colFormats, "names"); // fill in coefficients tdt.set(0, 0, beta[beta.length - 1]); for (int i = 0; i < beta.length - 1; ++i) { tdt.set(i + 1, 0, beta[i]); } double[] norm_beta = null; if (impl.isStandardized() && impl.beta() != null) { norm_beta = impl.getNormBeta(); tdt.set(0, stdOff, norm_beta[norm_beta.length - 1]); for (int i = 0; i < norm_beta.length - 1; ++i) tdt.set(i + 1, stdOff, norm_beta[i]); } if (impl.hasPValues()) { // fill in p values double[] stdErr = impl.stdErr(); double[] zVals = impl.zValues(); double[] pVals = impl.pValues(); tdt.set(0, 1, stdErr[stdErr.length - 1]); tdt.set(0, 2, zVals[zVals.length - 1]); tdt.set(0, 3, pVals[pVals.length - 1]); for (int i = 0; i < stdErr.length - 1; ++i) { tdt.set(i + 1, 1, stdErr[i]); tdt.set(i + 1, 2, zVals[i]); tdt.set(i + 1, 3, pVals[i]); } } coefficients_table.fillFromImpl(tdt); if (impl.isStandardized() && impl.beta() != null) { magnitudes = norm_beta.clone(); for (int i = 0; i < magnitudes.length; ++i) if (magnitudes[i] < 0) magnitudes[i] *= -1; Integer[] indices = new Integer[magnitudes.length - 1]; for (int i = 0; i < indices.length; ++i) indices[i] = i; Arrays.sort( indices, new Comparator<Integer>() { @Override public int compare(Integer o1, Integer o2) { if (magnitudes[o1] < magnitudes[o2]) return +1; if (magnitudes[o1] > magnitudes[o2]) return -1; return 0; } }); String[] names2 = new String[names.length]; for (int i = 0; i < names2.length - 1; ++i) names2[i] = names[indices[i]]; tdt = new TwoDimTable( "Standardized Coefficient Magnitudes", "standardized coefficient magnitudes", names2, new String[] {"Coefficients", "Sign"}, new String[] {"double", "string"}, new String[] {"%5f", "%s"}, "names"); for (int i = 0; i < beta.length - 1; ++i) { tdt.set(i, 0, magnitudes[indices[i]]); tdt.set(i, 1, beta[indices[i]] < 0 ? "NEG" : "POS"); } standardized_coefficient_magnitudes = new TwoDimTableV3(); standardized_coefficient_magnitudes.fillFromImpl(tdt); } return this; }