public static TwoDimTable createCenterTable( KMeansModel.KMeansOutput output, boolean standardized) { String name = standardized ? "Standardized Cluster Means" : "Cluster Means"; if (output._size == null || output._names == null || output._domains == null || output._centers_raw == null || (standardized && output._centers_std_raw == null)) { TwoDimTable table = new TwoDimTable( name, null, new String[] {"1"}, new String[] {"C1"}, new String[] {"double"}, new String[] {"%f"}, "Centroid"); table.set(0, 0, Double.NaN); return table; } String[] rowHeaders = new String[output._size.length]; for (int i = 0; i < rowHeaders.length; i++) rowHeaders[i] = String.valueOf(i + 1); String[] colTypes = new String[output._names.length]; String[] colFormats = new String[output._names.length]; for (int i = 0; i < output._domains.length; ++i) { colTypes[i] = output._domains[i] == null ? "double" : "String"; colFormats[i] = output._domains[i] == null ? "%f" : "%s"; } TwoDimTable table = new TwoDimTable(name, null, rowHeaders, output._names, colTypes, colFormats, "Centroid"); for (int j = 0; j < output._domains.length; ++j) { boolean string = output._domains[j] != null; if (string) { for (int i = 0; i < output._centers_raw.length; ++i) { table.set(i, j, output._domains[j][(int) output._centers_raw[i][j]]); } } else { for (int i = 0; i < output._centers_raw.length; ++i) { table.set(i, j, standardized ? output._centers_std_raw[i][j] : output._centers_raw[i][j]); } } } return table; }
private TwoDimTable createModelSummaryTable(KMeansModel.KMeansOutput output) { List<String> colHeaders = new ArrayList<>(); List<String> colTypes = new ArrayList<>(); List<String> colFormat = new ArrayList<>(); colHeaders.add("Number of Rows"); colTypes.add("long"); colFormat.add("%d"); colHeaders.add("Number of Clusters"); colTypes.add("long"); colFormat.add("%d"); colHeaders.add("Number of Categorical Columns"); colTypes.add("long"); colFormat.add("%d"); colHeaders.add("Number of Iterations"); colTypes.add("long"); colFormat.add("%d"); colHeaders.add("Within Cluster Sum of Squares"); colTypes.add("double"); colFormat.add("%.5f"); colHeaders.add("Total Sum of Squares"); colTypes.add("double"); colFormat.add("%.5f"); colHeaders.add("Between Cluster Sum of Squares"); colTypes.add("double"); colFormat.add("%.5f"); final int rows = 1; TwoDimTable table = new TwoDimTable( "Model Summary", null, new String[rows], colHeaders.toArray(new String[0]), colTypes.toArray(new String[0]), colFormat.toArray(new String[0]), ""); int row = 0; int col = 0; table.set( row, col++, Math.round(_train.numRows() * (hasWeightCol() ? _train.lastVec().mean() : 1))); table.set(row, col++, output._centers_raw.length); table.set(row, col++, output._categorical_column_count); table.set(row, col++, output._iterations); table.set(row, col++, output._tot_withinss); table.set(row, col++, output._totss); table.set(row, col++, output._betweenss); return table; }
private TwoDimTable createScoringHistoryTable(KMeansModel.KMeansOutput output) { List<String> colHeaders = new ArrayList<>(); List<String> colTypes = new ArrayList<>(); List<String> colFormat = new ArrayList<>(); colHeaders.add("Timestamp"); colTypes.add("string"); colFormat.add("%s"); colHeaders.add("Duration"); colTypes.add("string"); colFormat.add("%s"); colHeaders.add("Iteration"); colTypes.add("long"); colFormat.add("%d"); colHeaders.add("Avg. Change of Std. Centroids"); colTypes.add("double"); colFormat.add("%.5f"); colHeaders.add("Within Cluster Sum Of Squares"); colTypes.add("double"); colFormat.add("%.5f"); final int rows = output._avg_centroids_chg.length; TwoDimTable table = new TwoDimTable( "Scoring History", null, new String[rows], colHeaders.toArray(new String[0]), colTypes.toArray(new String[0]), colFormat.toArray(new String[0]), ""); int row = 0; for (int i = 0; i < rows; i++) { int col = 0; assert (row < table.getRowDim()); assert (col < table.getColDim()); DateTimeFormatter fmt = DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss"); table.set(row, col++, fmt.print(output._training_time_ms[i])); table.set(row, col++, PrettyPrint.msecs(output._training_time_ms[i] - _start_time, true)); table.set(row, col++, i); table.set(row, col++, output._avg_centroids_chg[i]); table.set(row, col++, output._history_withinss[i]); row++; } return table; }
private TwoDimTable createModelSummaryTable(SharedTreeModel.SharedTreeOutput _output) { List<String> colHeaders = new ArrayList<>(); List<String> colTypes = new ArrayList<>(); List<String> colFormat = new ArrayList<>(); colHeaders.add("Number of Trees"); colTypes.add("long"); colFormat.add("%d"); colHeaders.add("Model Size in Bytes"); colTypes.add("long"); colFormat.add("%d"); colHeaders.add("Min. Depth"); colTypes.add("long"); colFormat.add("%d"); colHeaders.add("Max. Depth"); colTypes.add("long"); colFormat.add("%d"); colHeaders.add("Mean Depth"); colTypes.add("double"); colFormat.add("%.5f"); colHeaders.add("Min. Leaves"); colTypes.add("long"); colFormat.add("%d"); colHeaders.add("Max. Leaves"); colTypes.add("long"); colFormat.add("%d"); colHeaders.add("Mean Leaves"); colTypes.add("double"); colFormat.add("%.5f"); final int rows = 1; TwoDimTable table = new TwoDimTable( "Model Summary", null, new String[rows], colHeaders.toArray(new String[0]), colTypes.toArray(new String[0]), colFormat.toArray(new String[0]), ""); int row = 0; int col = 0; table.set(row, col++, _output._treeStats._num_trees); table.set(row, col++, _output._treeStats._byte_size); table.set(row, col++, _output._treeStats._min_depth); table.set(row, col++, _output._treeStats._max_depth); table.set(row, col++, _output._treeStats._mean_depth); table.set(row, col++, _output._treeStats._min_leaves); table.set(row, col++, _output._treeStats._max_leaves); table.set(row, col++, _output._treeStats._mean_leaves); return table; }
private TwoDimTable createScoringHistoryTable(SharedTreeModel.SharedTreeOutput _output) { List<String> colHeaders = new ArrayList<>(); List<String> colTypes = new ArrayList<>(); List<String> colFormat = new ArrayList<>(); colHeaders.add("Timestamp"); colTypes.add("string"); colFormat.add("%s"); colHeaders.add("Duration"); colTypes.add("string"); colFormat.add("%s"); colHeaders.add("Number of Trees"); colTypes.add("long"); colFormat.add("%d"); colHeaders.add("Training MSE"); colTypes.add("double"); colFormat.add("%.5f"); if (_output.isClassifier()) { colHeaders.add("Training LogLoss"); colTypes.add("double"); colFormat.add("%.5f"); } if (_output.getModelCategory() == ModelCategory.Binomial) { colHeaders.add("Training AUC"); colTypes.add("double"); colFormat.add("%.5f"); } if (_output.getModelCategory() == ModelCategory.Binomial || _output.getModelCategory() == ModelCategory.Multinomial) { colHeaders.add("Training Classification Error"); colTypes.add("double"); colFormat.add("%.5f"); } if (valid() != null) { colHeaders.add("Validation MSE"); colTypes.add("double"); colFormat.add("%.5f"); if (_output.isClassifier()) { colHeaders.add("Validation LogLoss"); colTypes.add("double"); colFormat.add("%.5f"); } if (_output.getModelCategory() == ModelCategory.Binomial) { colHeaders.add("Validation AUC"); colTypes.add("double"); colFormat.add("%.5f"); } if (_output.isClassifier()) { colHeaders.add("Validation Classification Error"); colTypes.add("double"); colFormat.add("%.5f"); } } int rows = 0; for (int i = 1; i < _output._scored_train.length; i++) { if (!Double.isNaN(_output._scored_train[i]._mse)) ++rows; } TwoDimTable table = new TwoDimTable( "Scoring History", null, new String[rows], colHeaders.toArray(new String[0]), colTypes.toArray(new String[0]), colFormat.toArray(new String[0]), ""); int row = 0; for (int i = 1; i < _output._scored_train.length; i++) { if (Double.isNaN(_output._scored_train[i]._mse)) continue; int col = 0; assert (row < table.getRowDim()); assert (col < table.getColDim()); DateTimeFormatter fmt = DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss"); table.set(row, col++, fmt.print(_output._training_time_ms[i])); table.set(row, col++, PrettyPrint.msecs(_output._training_time_ms[i] - _start_time, true)); table.set(row, col++, i); ScoreKeeper st = _output._scored_train[i]; table.set(row, col++, st._mse); if (_output.isClassifier()) table.set(row, col++, st._logloss); if (_output.getModelCategory() == ModelCategory.Binomial) table.set(row, col++, st._AUC); if (_output.isClassifier()) table.set(row, col++, st._classError); if (_valid != null) { st = _output._scored_valid[i]; table.set(row, col++, st._mse); if (_output.isClassifier()) table.set(row, col++, st._logloss); if (_output.getModelCategory() == ModelCategory.Binomial) table.set(row, col++, st._AUC); if (_output.isClassifier()) table.set(row, col++, st._classError); } row++; } return table; }