예제 #1
0
파일: GLMModel.java 프로젝트: vsynych/h2o-3
 public void addSubmodel(double[] beta, double lambda, int iter) {
   _output._submodels =
       ArrayUtils.append(_output._submodels, new Submodel(lambda, beta, iter, -1, -1));
 }
예제 #2
0
 @Override
 public void reduce(Sampler other) {
   _sampled = ArrayUtils.append(_sampled, other._sampled);
 }
예제 #3
0
    private GLMModelOutputV3 fillMultinomial(GLMOutput impl) {
      if (impl.get_global_beta_multinomial() == null) return this; // no coefificients yet
      String[] names = impl.coefficientNames().clone();
      // put intercept as the first
      String[] ns =
          ArrayUtils.append(new String[] {"Intercept"}, Arrays.copyOf(names, names.length - 1));

      coefficients_table = new TwoDimTableV3();
      if (impl.isStandardized()) {
        int n = impl.nclasses();
        String[] cols = new String[n * 2];
        for (int i = 0; i < n; ++i) {
          cols[i] = "Coefs_class_" + i;
          cols[n + i] = "Std_Coefs_class_" + i;
        }
        String[] colTypes = new String[cols.length];
        Arrays.fill(colTypes, "double");
        String[] colFormats = new String[cols.length];
        Arrays.fill(colFormats, "%5f");

        double[][] betaNorm = impl.getNormBetaMultinomial();
        if (betaNorm != null) {
          TwoDimTable tdt =
              new TwoDimTable(
                  "Coefficients",
                  "glm multinomial coefficients",
                  ns,
                  cols,
                  colTypes,
                  colFormats,
                  "names");
          for (int c = 0; c < n; ++c) {
            double[] beta = impl.get_global_beta_multinomial()[c];
            tdt.set(0, c, beta[beta.length - 1]);
            tdt.set(0, n + c, betaNorm[c][beta.length - 1]);
            for (int i = 0; i < beta.length - 1; ++i) {
              tdt.set(i + 1, c, beta[i]);
              tdt.set(i + 1, n + c, betaNorm[c][i]);
            }
          }
          coefficients_table.fillFromImpl(tdt);
          final double[] magnitudes = new double[betaNorm[0].length];
          for (int i = 0; i < betaNorm.length; ++i) {
            for (int j = 0; j < betaNorm[i].length; ++j) {
              double d = betaNorm[i][j];
              magnitudes[j] += d < 0 ? -d : d;
            }
          }
          Integer[] indices = new Integer[magnitudes.length - 1];
          for (int i = 0; i < indices.length; ++i) indices[i] = i;
          Arrays.sort(
              indices,
              new Comparator<Integer>() {
                @Override
                public int compare(Integer o1, Integer o2) {
                  if (magnitudes[o1] < magnitudes[o2]) return +1;
                  if (magnitudes[o1] > magnitudes[o2]) return -1;
                  return 0;
                }
              });
          String[] names2 = new String[names.length];
          for (int i = 0; i < names2.length - 1; ++i) names2[i] = names[indices[i]];
          tdt =
              new TwoDimTable(
                  "Standardized Coefficient Magnitudes",
                  "standardized coefficient magnitudes",
                  names2,
                  new String[] {"Coefficients", "Sign"},
                  new String[] {"double", "string"},
                  new String[] {"%5f", "%s"},
                  "names");
          for (int i = 0; i < magnitudes.length - 1; ++i) {
            tdt.set(i, 0, magnitudes[indices[i]]);
            tdt.set(i, 1, "POS");
          }
          standardized_coefficient_magnitudes = new TwoDimTableV3();
          standardized_coefficient_magnitudes.fillFromImpl(tdt);
        }
      } else {
        int n = impl.nclasses();
        String[] cols = new String[n];
        for (int i = 0; i < n; ++i) {
          cols[i] = "Coefs_class_" + i;
        }
        String[] colTypes = new String[cols.length];
        Arrays.fill(colTypes, "double");
        String[] colFormats = new String[cols.length];
        Arrays.fill(colFormats, "%5f");

        TwoDimTable tdt =
            new TwoDimTable(
                "Coefficients",
                "glm multinomial coefficients",
                ns,
                cols,
                colTypes,
                colFormats,
                "names");

        for (int c = 0; c < n; ++c) {
          double[] beta = impl.get_global_beta_multinomial()[c];
          tdt.set(0, c, beta[beta.length - 1]);
          for (int i = 0; i < beta.length - 1; ++i) tdt.set(i + 1, c, beta[i]);
        }
        coefficients_table.fillFromImpl(tdt);
      }
      return this;
    }
예제 #4
0
    // Initialize cluster centers
    double[][] initial_centers(
        KMeansModel model,
        final Vec[] vecs,
        final double[] means,
        final double[] mults,
        final int[] modes) {

      // Categoricals use a different distance metric than numeric columns.
      model._output._categorical_column_count = 0;
      _isCats = new String[vecs.length][];
      for (int v = 0; v < vecs.length; v++) {
        _isCats[v] = vecs[v].isCategorical() ? new String[0] : null;
        if (_isCats[v] != null) model._output._categorical_column_count++;
      }

      Random rand = water.util.RandomUtils.getRNG(_parms._seed - 1);
      double centers[][]; // Cluster centers
      if (null != _parms._user_points) { // User-specified starting points
        Frame user_points = _parms._user_points.get();
        int numCenters = (int) user_points.numRows();
        int numCols = model._output.nfeatures();
        centers = new double[numCenters][numCols];
        Vec[] centersVecs = user_points.vecs();
        // Get the centers and standardize them if requested
        for (int r = 0; r < numCenters; r++) {
          for (int c = 0; c < numCols; c++) {
            centers[r][c] = centersVecs[c].at(r);
            centers[r][c] = data(centers[r][c], c, means, mults, modes);
          }
        }
      } else { // Random, Furthest, or PlusPlus initialization
        if (_parms._init == Initialization.Random) {
          // Initialize all cluster centers to random rows
          centers = new double[_parms._k][model._output.nfeatures()];
          for (double[] center : centers) randomRow(vecs, rand, center, means, mults, modes);
        } else {
          centers = new double[1][model._output.nfeatures()];
          // Initialize first cluster center to random row
          randomRow(vecs, rand, centers[0], means, mults, modes);

          model._output._iterations = 0;
          while (model._output._iterations < 5) {
            // Sum squares distances to cluster center
            SumSqr sqr = new SumSqr(centers, means, mults, modes, _isCats).doAll(vecs);

            // Sample with probability inverse to square distance
            Sampler sampler =
                new Sampler(
                        centers,
                        means,
                        mults,
                        modes,
                        _isCats,
                        sqr._sqr,
                        _parms._k * 3,
                        _parms._seed,
                        hasWeightCol())
                    .doAll(vecs);
            centers = ArrayUtils.append(centers, sampler._sampled);

            // Fill in sample centers into the model
            if (!isRunning()) return null; // Stopped/cancelled
            model._output._centers_raw = destandardize(centers, _isCats, means, mults);
            model._output._tot_withinss = sqr._sqr / _train.numRows();

            model._output._iterations++; // One iteration done

            model.update(
                _key); // Make early version of model visible, but don't update progress using
            // update(1)
          }
          // Recluster down to k cluster centers
          centers = recluster(centers, rand, _parms._k, _parms._init, _isCats);
          model._output._iterations = 0; // Reset iteration count
        }
      }
      return centers;
    }
예제 #5
0
    @Override
    public GLMModelOutputV3 fillFromImpl(GLMModel.GLMOutput impl) {
      super.fillFromImpl(impl);
      lambda_1se = impl.lambda_1se();
      lambda_best = impl.lambda_best();
      if (impl._multinomial) return fillMultinomial(impl);
      String[] names = impl.coefficientNames().clone();
      // put intercept as the first
      String[] ns =
          ArrayUtils.append(new String[] {"Intercept"}, Arrays.copyOf(names, names.length - 1));
      coefficients_table = new TwoDimTableV3();
      final double[] magnitudes;
      double[] beta = impl.beta();
      if (beta == null) beta = MemoryManager.malloc8d(names.length);
      String[] colTypes = new String[] {"double"};
      String[] colFormats = new String[] {"%5f"};
      String[] colnames = new String[] {"Coefficients"};

      if (impl.hasPValues()) {
        colTypes = new String[] {"double", "double", "double", "double"};
        colFormats = new String[] {"%5f", "%5f", "%5f", "%5f"};
        colnames = new String[] {"Coefficients", "Std. Error", "z value", "p value"};
      }
      int stdOff = colnames.length;
      if (impl.isStandardized()) {
        colTypes = ArrayUtils.append(colTypes, "double");
        colFormats = ArrayUtils.append(colFormats, "%5f");
        colnames = ArrayUtils.append(colnames, "Standardized Coefficients");
      }
      TwoDimTable tdt =
          new TwoDimTable(
              "Coefficients", "glm coefficients", ns, colnames, colTypes, colFormats, "names");
      // fill in coefficients

      tdt.set(0, 0, beta[beta.length - 1]);
      for (int i = 0; i < beta.length - 1; ++i) {
        tdt.set(i + 1, 0, beta[i]);
      }
      double[] norm_beta = null;
      if (impl.isStandardized() && impl.beta() != null) {
        norm_beta = impl.getNormBeta();
        tdt.set(0, stdOff, norm_beta[norm_beta.length - 1]);
        for (int i = 0; i < norm_beta.length - 1; ++i) tdt.set(i + 1, stdOff, norm_beta[i]);
      }
      if (impl.hasPValues()) { // fill in p values
        double[] stdErr = impl.stdErr();
        double[] zVals = impl.zValues();
        double[] pVals = impl.pValues();
        tdt.set(0, 1, stdErr[stdErr.length - 1]);
        tdt.set(0, 2, zVals[zVals.length - 1]);
        tdt.set(0, 3, pVals[pVals.length - 1]);
        for (int i = 0; i < stdErr.length - 1; ++i) {
          tdt.set(i + 1, 1, stdErr[i]);
          tdt.set(i + 1, 2, zVals[i]);
          tdt.set(i + 1, 3, pVals[i]);
        }
      }
      coefficients_table.fillFromImpl(tdt);
      if (impl.isStandardized() && impl.beta() != null) {
        magnitudes = norm_beta.clone();
        for (int i = 0; i < magnitudes.length; ++i) if (magnitudes[i] < 0) magnitudes[i] *= -1;
        Integer[] indices = new Integer[magnitudes.length - 1];
        for (int i = 0; i < indices.length; ++i) indices[i] = i;
        Arrays.sort(
            indices,
            new Comparator<Integer>() {
              @Override
              public int compare(Integer o1, Integer o2) {
                if (magnitudes[o1] < magnitudes[o2]) return +1;
                if (magnitudes[o1] > magnitudes[o2]) return -1;
                return 0;
              }
            });
        String[] names2 = new String[names.length];
        for (int i = 0; i < names2.length - 1; ++i) names2[i] = names[indices[i]];
        tdt =
            new TwoDimTable(
                "Standardized Coefficient Magnitudes",
                "standardized coefficient magnitudes",
                names2,
                new String[] {"Coefficients", "Sign"},
                new String[] {"double", "string"},
                new String[] {"%5f", "%s"},
                "names");
        for (int i = 0; i < beta.length - 1; ++i) {
          tdt.set(i, 0, magnitudes[indices[i]]);
          tdt.set(i, 1, beta[indices[i]] < 0 ? "NEG" : "POS");
        }
        standardized_coefficient_magnitudes = new TwoDimTableV3();
        standardized_coefficient_magnitudes.fillFromImpl(tdt);
      }
      return this;
    }