示例#1
0
  public static TwoDimTable createCenterTable(
      KMeansModel.KMeansOutput output, boolean standardized) {
    String name = standardized ? "Standardized Cluster Means" : "Cluster Means";
    if (output._size == null
        || output._names == null
        || output._domains == null
        || output._centers_raw == null
        || (standardized && output._centers_std_raw == null)) {
      TwoDimTable table =
          new TwoDimTable(
              name,
              null,
              new String[] {"1"},
              new String[] {"C1"},
              new String[] {"double"},
              new String[] {"%f"},
              "Centroid");
      table.set(0, 0, Double.NaN);
      return table;
    }

    String[] rowHeaders = new String[output._size.length];
    for (int i = 0; i < rowHeaders.length; i++) rowHeaders[i] = String.valueOf(i + 1);
    String[] colTypes = new String[output._names.length];
    String[] colFormats = new String[output._names.length];
    for (int i = 0; i < output._domains.length; ++i) {
      colTypes[i] = output._domains[i] == null ? "double" : "String";
      colFormats[i] = output._domains[i] == null ? "%f" : "%s";
    }
    TwoDimTable table =
        new TwoDimTable(name, null, rowHeaders, output._names, colTypes, colFormats, "Centroid");

    for (int j = 0; j < output._domains.length; ++j) {
      boolean string = output._domains[j] != null;
      if (string) {
        for (int i = 0; i < output._centers_raw.length; ++i) {
          table.set(i, j, output._domains[j][(int) output._centers_raw[i][j]]);
        }
      } else {
        for (int i = 0; i < output._centers_raw.length; ++i) {
          table.set(i, j, standardized ? output._centers_std_raw[i][j] : output._centers_raw[i][j]);
        }
      }
    }
    return table;
  }
示例#2
0
    private TwoDimTable createModelSummaryTable(KMeansModel.KMeansOutput output) {
      List<String> colHeaders = new ArrayList<>();
      List<String> colTypes = new ArrayList<>();
      List<String> colFormat = new ArrayList<>();
      colHeaders.add("Number of Rows");
      colTypes.add("long");
      colFormat.add("%d");
      colHeaders.add("Number of Clusters");
      colTypes.add("long");
      colFormat.add("%d");
      colHeaders.add("Number of Categorical Columns");
      colTypes.add("long");
      colFormat.add("%d");
      colHeaders.add("Number of Iterations");
      colTypes.add("long");
      colFormat.add("%d");
      colHeaders.add("Within Cluster Sum of Squares");
      colTypes.add("double");
      colFormat.add("%.5f");
      colHeaders.add("Total Sum of Squares");
      colTypes.add("double");
      colFormat.add("%.5f");
      colHeaders.add("Between Cluster Sum of Squares");
      colTypes.add("double");
      colFormat.add("%.5f");

      final int rows = 1;
      TwoDimTable table =
          new TwoDimTable(
              "Model Summary",
              null,
              new String[rows],
              colHeaders.toArray(new String[0]),
              colTypes.toArray(new String[0]),
              colFormat.toArray(new String[0]),
              "");
      int row = 0;
      int col = 0;
      table.set(
          row,
          col++,
          Math.round(_train.numRows() * (hasWeightCol() ? _train.lastVec().mean() : 1)));
      table.set(row, col++, output._centers_raw.length);
      table.set(row, col++, output._categorical_column_count);
      table.set(row, col++, output._iterations);
      table.set(row, col++, output._tot_withinss);
      table.set(row, col++, output._totss);
      table.set(row, col++, output._betweenss);
      return table;
    }
示例#3
0
    private TwoDimTable createScoringHistoryTable(KMeansModel.KMeansOutput output) {
      List<String> colHeaders = new ArrayList<>();
      List<String> colTypes = new ArrayList<>();
      List<String> colFormat = new ArrayList<>();
      colHeaders.add("Timestamp");
      colTypes.add("string");
      colFormat.add("%s");
      colHeaders.add("Duration");
      colTypes.add("string");
      colFormat.add("%s");
      colHeaders.add("Iteration");
      colTypes.add("long");
      colFormat.add("%d");
      colHeaders.add("Avg. Change of Std. Centroids");
      colTypes.add("double");
      colFormat.add("%.5f");
      colHeaders.add("Within Cluster Sum Of Squares");
      colTypes.add("double");
      colFormat.add("%.5f");

      final int rows = output._avg_centroids_chg.length;
      TwoDimTable table =
          new TwoDimTable(
              "Scoring History",
              null,
              new String[rows],
              colHeaders.toArray(new String[0]),
              colTypes.toArray(new String[0]),
              colFormat.toArray(new String[0]),
              "");
      int row = 0;
      for (int i = 0; i < rows; i++) {
        int col = 0;
        assert (row < table.getRowDim());
        assert (col < table.getColDim());
        DateTimeFormatter fmt = DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss");
        table.set(row, col++, fmt.print(output._training_time_ms[i]));
        table.set(row, col++, PrettyPrint.msecs(output._training_time_ms[i] - _start_time, true));
        table.set(row, col++, i);
        table.set(row, col++, output._avg_centroids_chg[i]);
        table.set(row, col++, output._history_withinss[i]);
        row++;
      }
      return table;
    }
示例#4
0
  private TwoDimTable createModelSummaryTable(SharedTreeModel.SharedTreeOutput _output) {
    List<String> colHeaders = new ArrayList<>();
    List<String> colTypes = new ArrayList<>();
    List<String> colFormat = new ArrayList<>();

    colHeaders.add("Number of Trees");
    colTypes.add("long");
    colFormat.add("%d");
    colHeaders.add("Model Size in Bytes");
    colTypes.add("long");
    colFormat.add("%d");

    colHeaders.add("Min. Depth");
    colTypes.add("long");
    colFormat.add("%d");
    colHeaders.add("Max. Depth");
    colTypes.add("long");
    colFormat.add("%d");
    colHeaders.add("Mean Depth");
    colTypes.add("double");
    colFormat.add("%.5f");

    colHeaders.add("Min. Leaves");
    colTypes.add("long");
    colFormat.add("%d");
    colHeaders.add("Max. Leaves");
    colTypes.add("long");
    colFormat.add("%d");
    colHeaders.add("Mean Leaves");
    colTypes.add("double");
    colFormat.add("%.5f");

    final int rows = 1;
    TwoDimTable table =
        new TwoDimTable(
            "Model Summary",
            null,
            new String[rows],
            colHeaders.toArray(new String[0]),
            colTypes.toArray(new String[0]),
            colFormat.toArray(new String[0]),
            "");
    int row = 0;
    int col = 0;
    table.set(row, col++, _output._treeStats._num_trees);
    table.set(row, col++, _output._treeStats._byte_size);
    table.set(row, col++, _output._treeStats._min_depth);
    table.set(row, col++, _output._treeStats._max_depth);
    table.set(row, col++, _output._treeStats._mean_depth);
    table.set(row, col++, _output._treeStats._min_leaves);
    table.set(row, col++, _output._treeStats._max_leaves);
    table.set(row, col++, _output._treeStats._mean_leaves);
    return table;
  }
示例#5
0
  private TwoDimTable createScoringHistoryTable(SharedTreeModel.SharedTreeOutput _output) {
    List<String> colHeaders = new ArrayList<>();
    List<String> colTypes = new ArrayList<>();
    List<String> colFormat = new ArrayList<>();
    colHeaders.add("Timestamp");
    colTypes.add("string");
    colFormat.add("%s");
    colHeaders.add("Duration");
    colTypes.add("string");
    colFormat.add("%s");
    colHeaders.add("Number of Trees");
    colTypes.add("long");
    colFormat.add("%d");
    colHeaders.add("Training MSE");
    colTypes.add("double");
    colFormat.add("%.5f");
    if (_output.isClassifier()) {
      colHeaders.add("Training LogLoss");
      colTypes.add("double");
      colFormat.add("%.5f");
    }
    if (_output.getModelCategory() == ModelCategory.Binomial) {
      colHeaders.add("Training AUC");
      colTypes.add("double");
      colFormat.add("%.5f");
    }
    if (_output.getModelCategory() == ModelCategory.Binomial
        || _output.getModelCategory() == ModelCategory.Multinomial) {
      colHeaders.add("Training Classification Error");
      colTypes.add("double");
      colFormat.add("%.5f");
    }

    if (valid() != null) {
      colHeaders.add("Validation MSE");
      colTypes.add("double");
      colFormat.add("%.5f");
      if (_output.isClassifier()) {
        colHeaders.add("Validation LogLoss");
        colTypes.add("double");
        colFormat.add("%.5f");
      }
      if (_output.getModelCategory() == ModelCategory.Binomial) {
        colHeaders.add("Validation AUC");
        colTypes.add("double");
        colFormat.add("%.5f");
      }
      if (_output.isClassifier()) {
        colHeaders.add("Validation Classification Error");
        colTypes.add("double");
        colFormat.add("%.5f");
      }
    }

    int rows = 0;
    for (int i = 1; i < _output._scored_train.length; i++) {
      if (!Double.isNaN(_output._scored_train[i]._mse)) ++rows;
    }
    TwoDimTable table =
        new TwoDimTable(
            "Scoring History",
            null,
            new String[rows],
            colHeaders.toArray(new String[0]),
            colTypes.toArray(new String[0]),
            colFormat.toArray(new String[0]),
            "");
    int row = 0;
    for (int i = 1; i < _output._scored_train.length; i++) {
      if (Double.isNaN(_output._scored_train[i]._mse)) continue;
      int col = 0;
      assert (row < table.getRowDim());
      assert (col < table.getColDim());
      DateTimeFormatter fmt = DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss");
      table.set(row, col++, fmt.print(_output._training_time_ms[i]));
      table.set(row, col++, PrettyPrint.msecs(_output._training_time_ms[i] - _start_time, true));
      table.set(row, col++, i);
      ScoreKeeper st = _output._scored_train[i];
      table.set(row, col++, st._mse);
      if (_output.isClassifier()) table.set(row, col++, st._logloss);
      if (_output.getModelCategory() == ModelCategory.Binomial) table.set(row, col++, st._AUC);
      if (_output.isClassifier()) table.set(row, col++, st._classError);

      if (_valid != null) {
        st = _output._scored_valid[i];
        table.set(row, col++, st._mse);
        if (_output.isClassifier()) table.set(row, col++, st._logloss);
        if (_output.getModelCategory() == ModelCategory.Binomial) table.set(row, col++, st._AUC);
        if (_output.isClassifier()) table.set(row, col++, st._classError);
      }
      row++;
    }
    return table;
  }