예제 #1
0
  public static TwoDimTable createCenterTable(
      KMeansModel.KMeansOutput output, boolean standardized) {
    String name = standardized ? "Standardized Cluster Means" : "Cluster Means";
    if (output._size == null
        || output._names == null
        || output._domains == null
        || output._centers_raw == null
        || (standardized && output._centers_std_raw == null)) {
      TwoDimTable table =
          new TwoDimTable(
              name,
              null,
              new String[] {"1"},
              new String[] {"C1"},
              new String[] {"double"},
              new String[] {"%f"},
              "Centroid");
      table.set(0, 0, Double.NaN);
      return table;
    }

    String[] rowHeaders = new String[output._size.length];
    for (int i = 0; i < rowHeaders.length; i++) rowHeaders[i] = String.valueOf(i + 1);
    String[] colTypes = new String[output._names.length];
    String[] colFormats = new String[output._names.length];
    for (int i = 0; i < output._domains.length; ++i) {
      colTypes[i] = output._domains[i] == null ? "double" : "String";
      colFormats[i] = output._domains[i] == null ? "%f" : "%s";
    }
    TwoDimTable table =
        new TwoDimTable(name, null, rowHeaders, output._names, colTypes, colFormats, "Centroid");

    for (int j = 0; j < output._domains.length; ++j) {
      boolean string = output._domains[j] != null;
      if (string) {
        for (int i = 0; i < output._centers_raw.length; ++i) {
          table.set(i, j, output._domains[j][(int) output._centers_raw[i][j]]);
        }
      } else {
        for (int i = 0; i < output._centers_raw.length; ++i) {
          table.set(i, j, standardized ? output._centers_std_raw[i][j] : output._centers_raw[i][j]);
        }
      }
    }
    return table;
  }
예제 #2
0
    private TwoDimTable createModelSummaryTable(KMeansModel.KMeansOutput output) {
      List<String> colHeaders = new ArrayList<>();
      List<String> colTypes = new ArrayList<>();
      List<String> colFormat = new ArrayList<>();
      colHeaders.add("Number of Rows");
      colTypes.add("long");
      colFormat.add("%d");
      colHeaders.add("Number of Clusters");
      colTypes.add("long");
      colFormat.add("%d");
      colHeaders.add("Number of Categorical Columns");
      colTypes.add("long");
      colFormat.add("%d");
      colHeaders.add("Number of Iterations");
      colTypes.add("long");
      colFormat.add("%d");
      colHeaders.add("Within Cluster Sum of Squares");
      colTypes.add("double");
      colFormat.add("%.5f");
      colHeaders.add("Total Sum of Squares");
      colTypes.add("double");
      colFormat.add("%.5f");
      colHeaders.add("Between Cluster Sum of Squares");
      colTypes.add("double");
      colFormat.add("%.5f");

      final int rows = 1;
      TwoDimTable table =
          new TwoDimTable(
              "Model Summary",
              null,
              new String[rows],
              colHeaders.toArray(new String[0]),
              colTypes.toArray(new String[0]),
              colFormat.toArray(new String[0]),
              "");
      int row = 0;
      int col = 0;
      table.set(
          row,
          col++,
          Math.round(_train.numRows() * (hasWeightCol() ? _train.lastVec().mean() : 1)));
      table.set(row, col++, output._centers_raw.length);
      table.set(row, col++, output._categorical_column_count);
      table.set(row, col++, output._iterations);
      table.set(row, col++, output._tot_withinss);
      table.set(row, col++, output._totss);
      table.set(row, col++, output._betweenss);
      return table;
    }
예제 #3
0
    private TwoDimTable createScoringHistoryTable(KMeansModel.KMeansOutput output) {
      List<String> colHeaders = new ArrayList<>();
      List<String> colTypes = new ArrayList<>();
      List<String> colFormat = new ArrayList<>();
      colHeaders.add("Timestamp");
      colTypes.add("string");
      colFormat.add("%s");
      colHeaders.add("Duration");
      colTypes.add("string");
      colFormat.add("%s");
      colHeaders.add("Iteration");
      colTypes.add("long");
      colFormat.add("%d");
      colHeaders.add("Avg. Change of Std. Centroids");
      colTypes.add("double");
      colFormat.add("%.5f");
      colHeaders.add("Within Cluster Sum Of Squares");
      colTypes.add("double");
      colFormat.add("%.5f");

      final int rows = output._avg_centroids_chg.length;
      TwoDimTable table =
          new TwoDimTable(
              "Scoring History",
              null,
              new String[rows],
              colHeaders.toArray(new String[0]),
              colTypes.toArray(new String[0]),
              colFormat.toArray(new String[0]),
              "");
      int row = 0;
      for (int i = 0; i < rows; i++) {
        int col = 0;
        assert (row < table.getRowDim());
        assert (col < table.getColDim());
        DateTimeFormatter fmt = DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss");
        table.set(row, col++, fmt.print(output._training_time_ms[i]));
        table.set(row, col++, PrettyPrint.msecs(output._training_time_ms[i] - _start_time, true));
        table.set(row, col++, i);
        table.set(row, col++, output._avg_centroids_chg[i]);
        table.set(row, col++, output._history_withinss[i]);
        row++;
      }
      return table;
    }
예제 #4
0
  private TwoDimTable createModelSummaryTable(SharedTreeModel.SharedTreeOutput _output) {
    List<String> colHeaders = new ArrayList<>();
    List<String> colTypes = new ArrayList<>();
    List<String> colFormat = new ArrayList<>();

    colHeaders.add("Number of Trees");
    colTypes.add("long");
    colFormat.add("%d");
    colHeaders.add("Model Size in Bytes");
    colTypes.add("long");
    colFormat.add("%d");

    colHeaders.add("Min. Depth");
    colTypes.add("long");
    colFormat.add("%d");
    colHeaders.add("Max. Depth");
    colTypes.add("long");
    colFormat.add("%d");
    colHeaders.add("Mean Depth");
    colTypes.add("double");
    colFormat.add("%.5f");

    colHeaders.add("Min. Leaves");
    colTypes.add("long");
    colFormat.add("%d");
    colHeaders.add("Max. Leaves");
    colTypes.add("long");
    colFormat.add("%d");
    colHeaders.add("Mean Leaves");
    colTypes.add("double");
    colFormat.add("%.5f");

    final int rows = 1;
    TwoDimTable table =
        new TwoDimTable(
            "Model Summary",
            null,
            new String[rows],
            colHeaders.toArray(new String[0]),
            colTypes.toArray(new String[0]),
            colFormat.toArray(new String[0]),
            "");
    int row = 0;
    int col = 0;
    table.set(row, col++, _output._treeStats._num_trees);
    table.set(row, col++, _output._treeStats._byte_size);
    table.set(row, col++, _output._treeStats._min_depth);
    table.set(row, col++, _output._treeStats._max_depth);
    table.set(row, col++, _output._treeStats._mean_depth);
    table.set(row, col++, _output._treeStats._min_leaves);
    table.set(row, col++, _output._treeStats._max_leaves);
    table.set(row, col++, _output._treeStats._mean_leaves);
    return table;
  }
 /**
  * Print a summary table
  *
  * @return String containing ASCII version of summary table
  */
 @Override
 public String toString() {
   StringBuilder sb = new StringBuilder();
   if (get_params()._diagnostics && !get_params()._quiet_mode) {
     if (get_params()._sparsity_beta > 0) {
       for (int k = 0; k < get_params()._hidden.length; k++) {
         sb.append("Average activation in hidden layer ")
             .append(k)
             .append(" is  ")
             .append(mean_a[k])
             .append(" \n");
       }
     }
     createSummaryTable();
     sb.append(summaryTable.toString(1));
   }
   return sb.toString();
 }
  /**
   * Create a summary table
   *
   * @return
   */
  TwoDimTable createSummaryTable() {
    Neurons[] neurons = DeepLearningTask.makeNeuronsForTesting(this);
    long byte_size = new AutoBuffer().put(this).buf().length;
    TwoDimTable table =
        new TwoDimTable(
            "Status of Neuron Layers",
            (get_params()._diagnostics ? "" : "diagnostics disabled, ")
                + (!get_params()._autoencoder ? ("predicting " + _train.lastVecName() + ", ") : "")
                + (get_params()._autoencoder
                    ? "auto-encoder"
                    : _classification
                        ? (units[units.length - 1] + "-class classification")
                        : "regression")
                + ", "
                + get_params()._distribution
                + " distribution, "
                + get_params()._loss
                + " loss, "
                + String.format("%,d", size())
                + " weights/biases, "
                + PrettyPrint.bytes(byte_size)
                + ", "
                + String.format("%,d", get_processed_global())
                + " training samples, "
                + "mini-batch size "
                + String.format("%,d", get_params()._mini_batch_size),
            new String[neurons.length],
            new String[] {
              "Layer",
              "Units",
              "Type",
              "Dropout",
              "L1",
              "L2",
              "Mean Rate",
              "Rate RMS",
              "Momentum",
              "Mean Weight",
              "Weight RMS",
              "Mean Bias",
              "Bias RMS"
            },
            new String[] {
              "int", "int", "string", "double", "double", "double", "double", "double", "double",
              "double", "double", "double", "double"
            },
            new String[] {
              "%d",
              "%d",
              "%s",
              "%2.2f %%",
              "%5f",
              "%5f",
              "%5f",
              "%5f",
              "%5f",
              "%5f",
              "%5f",
              "%5f",
              "%5f"
            },
            "");

    for (int i = 0; i < neurons.length; ++i) {
      table.set(i, 0, i + 1);
      table.set(i, 1, neurons[i].units);
      table.set(i, 2, neurons[i].getClass().getSimpleName());

      if (i == 0) {
        table.set(i, 3, neurons[i].params._input_dropout_ratio * 100);
        continue;
      } else if (i < neurons.length - 1) {
        if (neurons[i].params._hidden_dropout_ratios == null) {
          table.set(i, 3, 0);
        } else {
          table.set(i, 3, neurons[i].params._hidden_dropout_ratios[i - 1] * 100);
        }
      }
      table.set(i, 4, neurons[i].params._l1);
      table.set(i, 5, neurons[i].params._l2);
      table.set(
          i,
          6,
          (get_params()._adaptive_rate ? mean_rate[i] : neurons[i].rate(get_processed_total())));
      table.set(i, 7, (get_params()._adaptive_rate ? rms_rate[i] : 0));
      table.set(i, 8, get_params()._adaptive_rate ? 0 : neurons[i].momentum(get_processed_total()));
      table.set(i, 9, mean_weight[i]);
      table.set(i, 10, rms_weight[i]);
      table.set(i, 11, mean_bias[i]);
      table.set(i, 12, rms_bias[i]);
    }
    summaryTable = table;
    return summaryTable;
  }
예제 #7
0
  private TwoDimTable createScoringHistoryTable(SharedTreeModel.SharedTreeOutput _output) {
    List<String> colHeaders = new ArrayList<>();
    List<String> colTypes = new ArrayList<>();
    List<String> colFormat = new ArrayList<>();
    colHeaders.add("Timestamp");
    colTypes.add("string");
    colFormat.add("%s");
    colHeaders.add("Duration");
    colTypes.add("string");
    colFormat.add("%s");
    colHeaders.add("Number of Trees");
    colTypes.add("long");
    colFormat.add("%d");
    colHeaders.add("Training MSE");
    colTypes.add("double");
    colFormat.add("%.5f");
    if (_output.isClassifier()) {
      colHeaders.add("Training LogLoss");
      colTypes.add("double");
      colFormat.add("%.5f");
    }
    if (_output.getModelCategory() == ModelCategory.Binomial) {
      colHeaders.add("Training AUC");
      colTypes.add("double");
      colFormat.add("%.5f");
    }
    if (_output.getModelCategory() == ModelCategory.Binomial
        || _output.getModelCategory() == ModelCategory.Multinomial) {
      colHeaders.add("Training Classification Error");
      colTypes.add("double");
      colFormat.add("%.5f");
    }

    if (valid() != null) {
      colHeaders.add("Validation MSE");
      colTypes.add("double");
      colFormat.add("%.5f");
      if (_output.isClassifier()) {
        colHeaders.add("Validation LogLoss");
        colTypes.add("double");
        colFormat.add("%.5f");
      }
      if (_output.getModelCategory() == ModelCategory.Binomial) {
        colHeaders.add("Validation AUC");
        colTypes.add("double");
        colFormat.add("%.5f");
      }
      if (_output.isClassifier()) {
        colHeaders.add("Validation Classification Error");
        colTypes.add("double");
        colFormat.add("%.5f");
      }
    }

    int rows = 0;
    for (int i = 1; i < _output._scored_train.length; i++) {
      if (!Double.isNaN(_output._scored_train[i]._mse)) ++rows;
    }
    TwoDimTable table =
        new TwoDimTable(
            "Scoring History",
            null,
            new String[rows],
            colHeaders.toArray(new String[0]),
            colTypes.toArray(new String[0]),
            colFormat.toArray(new String[0]),
            "");
    int row = 0;
    for (int i = 1; i < _output._scored_train.length; i++) {
      if (Double.isNaN(_output._scored_train[i]._mse)) continue;
      int col = 0;
      assert (row < table.getRowDim());
      assert (col < table.getColDim());
      DateTimeFormatter fmt = DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss");
      table.set(row, col++, fmt.print(_output._training_time_ms[i]));
      table.set(row, col++, PrettyPrint.msecs(_output._training_time_ms[i] - _start_time, true));
      table.set(row, col++, i);
      ScoreKeeper st = _output._scored_train[i];
      table.set(row, col++, st._mse);
      if (_output.isClassifier()) table.set(row, col++, st._logloss);
      if (_output.getModelCategory() == ModelCategory.Binomial) table.set(row, col++, st._AUC);
      if (_output.isClassifier()) table.set(row, col++, st._classError);

      if (_valid != null) {
        st = _output._scored_valid[i];
        table.set(row, col++, st._mse);
        if (_output.isClassifier()) table.set(row, col++, st._logloss);
        if (_output.getModelCategory() == ModelCategory.Binomial) table.set(row, col++, st._AUC);
        if (_output.isClassifier()) table.set(row, col++, st._classError);
      }
      row++;
    }
    return table;
  }