예제 #1
0
    private TwoDimTable createModelSummaryTable(KMeansModel.KMeansOutput output) {
      List<String> colHeaders = new ArrayList<>();
      List<String> colTypes = new ArrayList<>();
      List<String> colFormat = new ArrayList<>();
      colHeaders.add("Number of Rows");
      colTypes.add("long");
      colFormat.add("%d");
      colHeaders.add("Number of Clusters");
      colTypes.add("long");
      colFormat.add("%d");
      colHeaders.add("Number of Categorical Columns");
      colTypes.add("long");
      colFormat.add("%d");
      colHeaders.add("Number of Iterations");
      colTypes.add("long");
      colFormat.add("%d");
      colHeaders.add("Within Cluster Sum of Squares");
      colTypes.add("double");
      colFormat.add("%.5f");
      colHeaders.add("Total Sum of Squares");
      colTypes.add("double");
      colFormat.add("%.5f");
      colHeaders.add("Between Cluster Sum of Squares");
      colTypes.add("double");
      colFormat.add("%.5f");

      final int rows = 1;
      TwoDimTable table =
          new TwoDimTable(
              "Model Summary",
              null,
              new String[rows],
              colHeaders.toArray(new String[0]),
              colTypes.toArray(new String[0]),
              colFormat.toArray(new String[0]),
              "");
      int row = 0;
      int col = 0;
      table.set(
          row,
          col++,
          Math.round(_train.numRows() * (hasWeightCol() ? _train.lastVec().mean() : 1)));
      table.set(row, col++, output._centers_raw.length);
      table.set(row, col++, output._categorical_column_count);
      table.set(row, col++, output._iterations);
      table.set(row, col++, output._tot_withinss);
      table.set(row, col++, output._totss);
      table.set(row, col++, output._betweenss);
      return table;
    }
예제 #2
0
    TotSS(double[] means, double[] mults, int[] modes, String[][] isCats, int[] card) {
      _means = means;
      _mults = mults;
      _modes = modes;
      _tss = 0;
      _isCats = isCats;
      _card = card;

      // Mean of numeric col is zero when standardized
      _gc = mults != null ? new double[means.length] : Arrays.copyOf(means, means.length);
      for (int i = 0; i < means.length; i++) {
        if (isCats[i] != null)
          _gc[i] =
              Math.min(
                  Math.round(means[i]),
                  _card[i] - 1); // TODO: Should set to majority class of categorical column
      }
    }