Esempio n. 1
0
  private TwoDimTable createModelSummaryTable(SharedTreeModel.SharedTreeOutput _output) {
    List<String> colHeaders = new ArrayList<>();
    List<String> colTypes = new ArrayList<>();
    List<String> colFormat = new ArrayList<>();

    colHeaders.add("Number of Trees");
    colTypes.add("long");
    colFormat.add("%d");
    colHeaders.add("Model Size in Bytes");
    colTypes.add("long");
    colFormat.add("%d");

    colHeaders.add("Min. Depth");
    colTypes.add("long");
    colFormat.add("%d");
    colHeaders.add("Max. Depth");
    colTypes.add("long");
    colFormat.add("%d");
    colHeaders.add("Mean Depth");
    colTypes.add("double");
    colFormat.add("%.5f");

    colHeaders.add("Min. Leaves");
    colTypes.add("long");
    colFormat.add("%d");
    colHeaders.add("Max. Leaves");
    colTypes.add("long");
    colFormat.add("%d");
    colHeaders.add("Mean Leaves");
    colTypes.add("double");
    colFormat.add("%.5f");

    final int rows = 1;
    TwoDimTable table =
        new TwoDimTable(
            "Model Summary",
            null,
            new String[rows],
            colHeaders.toArray(new String[0]),
            colTypes.toArray(new String[0]),
            colFormat.toArray(new String[0]),
            "");
    int row = 0;
    int col = 0;
    table.set(row, col++, _output._treeStats._num_trees);
    table.set(row, col++, _output._treeStats._byte_size);
    table.set(row, col++, _output._treeStats._min_depth);
    table.set(row, col++, _output._treeStats._max_depth);
    table.set(row, col++, _output._treeStats._mean_depth);
    table.set(row, col++, _output._treeStats._min_leaves);
    table.set(row, col++, _output._treeStats._max_leaves);
    table.set(row, col++, _output._treeStats._mean_leaves);
    return table;
  }
Esempio n. 2
0
  public static TwoDimTable createCenterTable(
      KMeansModel.KMeansOutput output, boolean standardized) {
    String name = standardized ? "Standardized Cluster Means" : "Cluster Means";
    if (output._size == null
        || output._names == null
        || output._domains == null
        || output._centers_raw == null
        || (standardized && output._centers_std_raw == null)) {
      TwoDimTable table =
          new TwoDimTable(
              name,
              null,
              new String[] {"1"},
              new String[] {"C1"},
              new String[] {"double"},
              new String[] {"%f"},
              "Centroid");
      table.set(0, 0, Double.NaN);
      return table;
    }

    String[] rowHeaders = new String[output._size.length];
    for (int i = 0; i < rowHeaders.length; i++) rowHeaders[i] = String.valueOf(i + 1);
    String[] colTypes = new String[output._names.length];
    String[] colFormats = new String[output._names.length];
    for (int i = 0; i < output._domains.length; ++i) {
      colTypes[i] = output._domains[i] == null ? "double" : "String";
      colFormats[i] = output._domains[i] == null ? "%f" : "%s";
    }
    TwoDimTable table =
        new TwoDimTable(name, null, rowHeaders, output._names, colTypes, colFormats, "Centroid");

    for (int j = 0; j < output._domains.length; ++j) {
      boolean string = output._domains[j] != null;
      if (string) {
        for (int i = 0; i < output._centers_raw.length; ++i) {
          table.set(i, j, output._domains[j][(int) output._centers_raw[i][j]]);
        }
      } else {
        for (int i = 0; i < output._centers_raw.length; ++i) {
          table.set(i, j, standardized ? output._centers_std_raw[i][j] : output._centers_raw[i][j]);
        }
      }
    }
    return table;
  }
Esempio n. 3
0
    private TwoDimTable createScoringHistoryTable(KMeansModel.KMeansOutput output) {
      List<String> colHeaders = new ArrayList<>();
      List<String> colTypes = new ArrayList<>();
      List<String> colFormat = new ArrayList<>();
      colHeaders.add("Timestamp");
      colTypes.add("string");
      colFormat.add("%s");
      colHeaders.add("Duration");
      colTypes.add("string");
      colFormat.add("%s");
      colHeaders.add("Iteration");
      colTypes.add("long");
      colFormat.add("%d");
      colHeaders.add("Avg. Change of Std. Centroids");
      colTypes.add("double");
      colFormat.add("%.5f");
      colHeaders.add("Within Cluster Sum Of Squares");
      colTypes.add("double");
      colFormat.add("%.5f");

      final int rows = output._avg_centroids_chg.length;
      TwoDimTable table =
          new TwoDimTable(
              "Scoring History",
              null,
              new String[rows],
              colHeaders.toArray(new String[0]),
              colTypes.toArray(new String[0]),
              colFormat.toArray(new String[0]),
              "");
      int row = 0;
      for (int i = 0; i < rows; i++) {
        int col = 0;
        assert (row < table.getRowDim());
        assert (col < table.getColDim());
        DateTimeFormatter fmt = DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss");
        table.set(row, col++, fmt.print(output._training_time_ms[i]));
        table.set(row, col++, PrettyPrint.msecs(output._training_time_ms[i] - _start_time, true));
        table.set(row, col++, i);
        table.set(row, col++, output._avg_centroids_chg[i]);
        table.set(row, col++, output._history_withinss[i]);
        row++;
      }
      return table;
    }
Esempio n. 4
0
    private TwoDimTable createModelSummaryTable(KMeansModel.KMeansOutput output) {
      List<String> colHeaders = new ArrayList<>();
      List<String> colTypes = new ArrayList<>();
      List<String> colFormat = new ArrayList<>();
      colHeaders.add("Number of Rows");
      colTypes.add("long");
      colFormat.add("%d");
      colHeaders.add("Number of Clusters");
      colTypes.add("long");
      colFormat.add("%d");
      colHeaders.add("Number of Categorical Columns");
      colTypes.add("long");
      colFormat.add("%d");
      colHeaders.add("Number of Iterations");
      colTypes.add("long");
      colFormat.add("%d");
      colHeaders.add("Within Cluster Sum of Squares");
      colTypes.add("double");
      colFormat.add("%.5f");
      colHeaders.add("Total Sum of Squares");
      colTypes.add("double");
      colFormat.add("%.5f");
      colHeaders.add("Between Cluster Sum of Squares");
      colTypes.add("double");
      colFormat.add("%.5f");

      final int rows = 1;
      TwoDimTable table =
          new TwoDimTable(
              "Model Summary",
              null,
              new String[rows],
              colHeaders.toArray(new String[0]),
              colTypes.toArray(new String[0]),
              colFormat.toArray(new String[0]),
              "");
      int row = 0;
      int col = 0;
      table.set(
          row,
          col++,
          Math.round(_train.numRows() * (hasWeightCol() ? _train.lastVec().mean() : 1)));
      table.set(row, col++, output._centers_raw.length);
      table.set(row, col++, output._categorical_column_count);
      table.set(row, col++, output._iterations);
      table.set(row, col++, output._tot_withinss);
      table.set(row, col++, output._totss);
      table.set(row, col++, output._betweenss);
      return table;
    }
  /**
   * Create a summary table
   *
   * @return
   */
  TwoDimTable createSummaryTable() {
    Neurons[] neurons = DeepLearningTask.makeNeuronsForTesting(this);
    long byte_size = new AutoBuffer().put(this).buf().length;
    TwoDimTable table =
        new TwoDimTable(
            "Status of Neuron Layers",
            (get_params()._diagnostics ? "" : "diagnostics disabled, ")
                + (!get_params()._autoencoder ? ("predicting " + _train.lastVecName() + ", ") : "")
                + (get_params()._autoencoder
                    ? "auto-encoder"
                    : _classification
                        ? (units[units.length - 1] + "-class classification")
                        : "regression")
                + ", "
                + get_params()._distribution
                + " distribution, "
                + get_params()._loss
                + " loss, "
                + String.format("%,d", size())
                + " weights/biases, "
                + PrettyPrint.bytes(byte_size)
                + ", "
                + String.format("%,d", get_processed_global())
                + " training samples, "
                + "mini-batch size "
                + String.format("%,d", get_params()._mini_batch_size),
            new String[neurons.length],
            new String[] {
              "Layer",
              "Units",
              "Type",
              "Dropout",
              "L1",
              "L2",
              "Mean Rate",
              "Rate RMS",
              "Momentum",
              "Mean Weight",
              "Weight RMS",
              "Mean Bias",
              "Bias RMS"
            },
            new String[] {
              "int", "int", "string", "double", "double", "double", "double", "double", "double",
              "double", "double", "double", "double"
            },
            new String[] {
              "%d",
              "%d",
              "%s",
              "%2.2f %%",
              "%5f",
              "%5f",
              "%5f",
              "%5f",
              "%5f",
              "%5f",
              "%5f",
              "%5f",
              "%5f"
            },
            "");

    for (int i = 0; i < neurons.length; ++i) {
      table.set(i, 0, i + 1);
      table.set(i, 1, neurons[i].units);
      table.set(i, 2, neurons[i].getClass().getSimpleName());

      if (i == 0) {
        table.set(i, 3, neurons[i].params._input_dropout_ratio * 100);
        continue;
      } else if (i < neurons.length - 1) {
        if (neurons[i].params._hidden_dropout_ratios == null) {
          table.set(i, 3, 0);
        } else {
          table.set(i, 3, neurons[i].params._hidden_dropout_ratios[i - 1] * 100);
        }
      }
      table.set(i, 4, neurons[i].params._l1);
      table.set(i, 5, neurons[i].params._l2);
      table.set(
          i,
          6,
          (get_params()._adaptive_rate ? mean_rate[i] : neurons[i].rate(get_processed_total())));
      table.set(i, 7, (get_params()._adaptive_rate ? rms_rate[i] : 0));
      table.set(i, 8, get_params()._adaptive_rate ? 0 : neurons[i].momentum(get_processed_total()));
      table.set(i, 9, mean_weight[i]);
      table.set(i, 10, rms_weight[i]);
      table.set(i, 11, mean_bias[i]);
      table.set(i, 12, rms_bias[i]);
    }
    summaryTable = table;
    return summaryTable;
  }
Esempio n. 6
0
  private TwoDimTable createScoringHistoryTable(SharedTreeModel.SharedTreeOutput _output) {
    List<String> colHeaders = new ArrayList<>();
    List<String> colTypes = new ArrayList<>();
    List<String> colFormat = new ArrayList<>();
    colHeaders.add("Timestamp");
    colTypes.add("string");
    colFormat.add("%s");
    colHeaders.add("Duration");
    colTypes.add("string");
    colFormat.add("%s");
    colHeaders.add("Number of Trees");
    colTypes.add("long");
    colFormat.add("%d");
    colHeaders.add("Training MSE");
    colTypes.add("double");
    colFormat.add("%.5f");
    if (_output.isClassifier()) {
      colHeaders.add("Training LogLoss");
      colTypes.add("double");
      colFormat.add("%.5f");
    }
    if (_output.getModelCategory() == ModelCategory.Binomial) {
      colHeaders.add("Training AUC");
      colTypes.add("double");
      colFormat.add("%.5f");
    }
    if (_output.getModelCategory() == ModelCategory.Binomial
        || _output.getModelCategory() == ModelCategory.Multinomial) {
      colHeaders.add("Training Classification Error");
      colTypes.add("double");
      colFormat.add("%.5f");
    }

    if (valid() != null) {
      colHeaders.add("Validation MSE");
      colTypes.add("double");
      colFormat.add("%.5f");
      if (_output.isClassifier()) {
        colHeaders.add("Validation LogLoss");
        colTypes.add("double");
        colFormat.add("%.5f");
      }
      if (_output.getModelCategory() == ModelCategory.Binomial) {
        colHeaders.add("Validation AUC");
        colTypes.add("double");
        colFormat.add("%.5f");
      }
      if (_output.isClassifier()) {
        colHeaders.add("Validation Classification Error");
        colTypes.add("double");
        colFormat.add("%.5f");
      }
    }

    int rows = 0;
    for (int i = 1; i < _output._scored_train.length; i++) {
      if (!Double.isNaN(_output._scored_train[i]._mse)) ++rows;
    }
    TwoDimTable table =
        new TwoDimTable(
            "Scoring History",
            null,
            new String[rows],
            colHeaders.toArray(new String[0]),
            colTypes.toArray(new String[0]),
            colFormat.toArray(new String[0]),
            "");
    int row = 0;
    for (int i = 1; i < _output._scored_train.length; i++) {
      if (Double.isNaN(_output._scored_train[i]._mse)) continue;
      int col = 0;
      assert (row < table.getRowDim());
      assert (col < table.getColDim());
      DateTimeFormatter fmt = DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss");
      table.set(row, col++, fmt.print(_output._training_time_ms[i]));
      table.set(row, col++, PrettyPrint.msecs(_output._training_time_ms[i] - _start_time, true));
      table.set(row, col++, i);
      ScoreKeeper st = _output._scored_train[i];
      table.set(row, col++, st._mse);
      if (_output.isClassifier()) table.set(row, col++, st._logloss);
      if (_output.getModelCategory() == ModelCategory.Binomial) table.set(row, col++, st._AUC);
      if (_output.isClassifier()) table.set(row, col++, st._classError);

      if (_valid != null) {
        st = _output._scored_valid[i];
        table.set(row, col++, st._mse);
        if (_output.isClassifier()) table.set(row, col++, st._logloss);
        if (_output.getModelCategory() == ModelCategory.Binomial) table.set(row, col++, st._AUC);
        if (_output.isClassifier()) table.set(row, col++, st._classError);
      }
      row++;
    }
    return table;
  }
Esempio n. 7
0
    private GLMModelOutputV3 fillMultinomial(GLMOutput impl) {
      if (impl.get_global_beta_multinomial() == null) return this; // no coefificients yet
      String[] names = impl.coefficientNames().clone();
      // put intercept as the first
      String[] ns =
          ArrayUtils.append(new String[] {"Intercept"}, Arrays.copyOf(names, names.length - 1));

      coefficients_table = new TwoDimTableV3();
      if (impl.isStandardized()) {
        int n = impl.nclasses();
        String[] cols = new String[n * 2];
        for (int i = 0; i < n; ++i) {
          cols[i] = "Coefs_class_" + i;
          cols[n + i] = "Std_Coefs_class_" + i;
        }
        String[] colTypes = new String[cols.length];
        Arrays.fill(colTypes, "double");
        String[] colFormats = new String[cols.length];
        Arrays.fill(colFormats, "%5f");

        double[][] betaNorm = impl.getNormBetaMultinomial();
        if (betaNorm != null) {
          TwoDimTable tdt =
              new TwoDimTable(
                  "Coefficients",
                  "glm multinomial coefficients",
                  ns,
                  cols,
                  colTypes,
                  colFormats,
                  "names");
          for (int c = 0; c < n; ++c) {
            double[] beta = impl.get_global_beta_multinomial()[c];
            tdt.set(0, c, beta[beta.length - 1]);
            tdt.set(0, n + c, betaNorm[c][beta.length - 1]);
            for (int i = 0; i < beta.length - 1; ++i) {
              tdt.set(i + 1, c, beta[i]);
              tdt.set(i + 1, n + c, betaNorm[c][i]);
            }
          }
          coefficients_table.fillFromImpl(tdt);
          final double[] magnitudes = new double[betaNorm[0].length];
          for (int i = 0; i < betaNorm.length; ++i) {
            for (int j = 0; j < betaNorm[i].length; ++j) {
              double d = betaNorm[i][j];
              magnitudes[j] += d < 0 ? -d : d;
            }
          }
          Integer[] indices = new Integer[magnitudes.length - 1];
          for (int i = 0; i < indices.length; ++i) indices[i] = i;
          Arrays.sort(
              indices,
              new Comparator<Integer>() {
                @Override
                public int compare(Integer o1, Integer o2) {
                  if (magnitudes[o1] < magnitudes[o2]) return +1;
                  if (magnitudes[o1] > magnitudes[o2]) return -1;
                  return 0;
                }
              });
          String[] names2 = new String[names.length];
          for (int i = 0; i < names2.length - 1; ++i) names2[i] = names[indices[i]];
          tdt =
              new TwoDimTable(
                  "Standardized Coefficient Magnitudes",
                  "standardized coefficient magnitudes",
                  names2,
                  new String[] {"Coefficients", "Sign"},
                  new String[] {"double", "string"},
                  new String[] {"%5f", "%s"},
                  "names");
          for (int i = 0; i < magnitudes.length - 1; ++i) {
            tdt.set(i, 0, magnitudes[indices[i]]);
            tdt.set(i, 1, "POS");
          }
          standardized_coefficient_magnitudes = new TwoDimTableV3();
          standardized_coefficient_magnitudes.fillFromImpl(tdt);
        }
      } else {
        int n = impl.nclasses();
        String[] cols = new String[n];
        for (int i = 0; i < n; ++i) {
          cols[i] = "Coefs_class_" + i;
        }
        String[] colTypes = new String[cols.length];
        Arrays.fill(colTypes, "double");
        String[] colFormats = new String[cols.length];
        Arrays.fill(colFormats, "%5f");

        TwoDimTable tdt =
            new TwoDimTable(
                "Coefficients",
                "glm multinomial coefficients",
                ns,
                cols,
                colTypes,
                colFormats,
                "names");

        for (int c = 0; c < n; ++c) {
          double[] beta = impl.get_global_beta_multinomial()[c];
          tdt.set(0, c, beta[beta.length - 1]);
          for (int i = 0; i < beta.length - 1; ++i) tdt.set(i + 1, c, beta[i]);
        }
        coefficients_table.fillFromImpl(tdt);
      }
      return this;
    }
Esempio n. 8
0
    @Override
    public GLMModelOutputV3 fillFromImpl(GLMModel.GLMOutput impl) {
      super.fillFromImpl(impl);
      lambda_1se = impl.lambda_1se();
      lambda_best = impl.lambda_best();
      if (impl._multinomial) return fillMultinomial(impl);
      String[] names = impl.coefficientNames().clone();
      // put intercept as the first
      String[] ns =
          ArrayUtils.append(new String[] {"Intercept"}, Arrays.copyOf(names, names.length - 1));
      coefficients_table = new TwoDimTableV3();
      final double[] magnitudes;
      double[] beta = impl.beta();
      if (beta == null) beta = MemoryManager.malloc8d(names.length);
      String[] colTypes = new String[] {"double"};
      String[] colFormats = new String[] {"%5f"};
      String[] colnames = new String[] {"Coefficients"};

      if (impl.hasPValues()) {
        colTypes = new String[] {"double", "double", "double", "double"};
        colFormats = new String[] {"%5f", "%5f", "%5f", "%5f"};
        colnames = new String[] {"Coefficients", "Std. Error", "z value", "p value"};
      }
      int stdOff = colnames.length;
      if (impl.isStandardized()) {
        colTypes = ArrayUtils.append(colTypes, "double");
        colFormats = ArrayUtils.append(colFormats, "%5f");
        colnames = ArrayUtils.append(colnames, "Standardized Coefficients");
      }
      TwoDimTable tdt =
          new TwoDimTable(
              "Coefficients", "glm coefficients", ns, colnames, colTypes, colFormats, "names");
      // fill in coefficients

      tdt.set(0, 0, beta[beta.length - 1]);
      for (int i = 0; i < beta.length - 1; ++i) {
        tdt.set(i + 1, 0, beta[i]);
      }
      double[] norm_beta = null;
      if (impl.isStandardized() && impl.beta() != null) {
        norm_beta = impl.getNormBeta();
        tdt.set(0, stdOff, norm_beta[norm_beta.length - 1]);
        for (int i = 0; i < norm_beta.length - 1; ++i) tdt.set(i + 1, stdOff, norm_beta[i]);
      }
      if (impl.hasPValues()) { // fill in p values
        double[] stdErr = impl.stdErr();
        double[] zVals = impl.zValues();
        double[] pVals = impl.pValues();
        tdt.set(0, 1, stdErr[stdErr.length - 1]);
        tdt.set(0, 2, zVals[zVals.length - 1]);
        tdt.set(0, 3, pVals[pVals.length - 1]);
        for (int i = 0; i < stdErr.length - 1; ++i) {
          tdt.set(i + 1, 1, stdErr[i]);
          tdt.set(i + 1, 2, zVals[i]);
          tdt.set(i + 1, 3, pVals[i]);
        }
      }
      coefficients_table.fillFromImpl(tdt);
      if (impl.isStandardized() && impl.beta() != null) {
        magnitudes = norm_beta.clone();
        for (int i = 0; i < magnitudes.length; ++i) if (magnitudes[i] < 0) magnitudes[i] *= -1;
        Integer[] indices = new Integer[magnitudes.length - 1];
        for (int i = 0; i < indices.length; ++i) indices[i] = i;
        Arrays.sort(
            indices,
            new Comparator<Integer>() {
              @Override
              public int compare(Integer o1, Integer o2) {
                if (magnitudes[o1] < magnitudes[o2]) return +1;
                if (magnitudes[o1] > magnitudes[o2]) return -1;
                return 0;
              }
            });
        String[] names2 = new String[names.length];
        for (int i = 0; i < names2.length - 1; ++i) names2[i] = names[indices[i]];
        tdt =
            new TwoDimTable(
                "Standardized Coefficient Magnitudes",
                "standardized coefficient magnitudes",
                names2,
                new String[] {"Coefficients", "Sign"},
                new String[] {"double", "string"},
                new String[] {"%5f", "%s"},
                "names");
        for (int i = 0; i < beta.length - 1; ++i) {
          tdt.set(i, 0, magnitudes[indices[i]]);
          tdt.set(i, 1, beta[indices[i]] < 0 ? "NEG" : "POS");
        }
        standardized_coefficient_magnitudes = new TwoDimTableV3();
        standardized_coefficient_magnitudes.fillFromImpl(tdt);
      }
      return this;
    }