Пример #1
0
    // Stopping criteria
    boolean isDone(KMeansModel model, double[][] newCenters, double[][] oldCenters) {
      if (!isRunning()) return true; // Stopped/cancelled
      // Stopped for running out iterations
      if (model._output._iterations >= _parms._max_iterations) return true;

      // Compute average change in standardized cluster centers
      if (oldCenters == null) return false; // No prior iteration, not stopping
      double average_change = 0;
      for (int clu = 0; clu < _parms._k; clu++)
        average_change +=
            hex.genmodel.GenModel.KMeans_distance(
                oldCenters[clu], newCenters[clu], _isCats, null, null);
      average_change /= _parms._k; // Average change per cluster
      model._output._avg_centroids_chg =
          ArrayUtils.copyAndFillOf(
              model._output._avg_centroids_chg,
              model._output._avg_centroids_chg.length + 1,
              average_change);
      model._output._training_time_ms =
          ArrayUtils.copyAndFillOf(
              model._output._training_time_ms,
              model._output._training_time_ms.length + 1,
              System.currentTimeMillis());
      return average_change < TOLERANCE;
    }
Пример #2
0
    // Compute all interesting KMeans stats (errors & variances of clusters,
    // etc).  Return new centers.
    double[][] computeStatsFillModel(
        Lloyds task,
        KMeansModel model,
        final Vec[] vecs,
        final double[] means,
        final double[] mults,
        final int[] modes) {
      // Fill in the model based on original destandardized centers
      if (model._parms._standardize) {
        model._output._centers_std_raw = task._cMeans;
      }
      model._output._centers_raw = destandardize(task._cMeans, _isCats, means, mults);
      model._output._size = task._size;
      model._output._withinss = task._cSqr;
      double ssq = 0; // sum squared error
      for (int i = 0; i < _parms._k; i++)
        ssq += model._output._withinss[i]; // sum squared error all clusters
      model._output._tot_withinss = ssq;

      // Sum-of-square distance from grand mean
      if (_parms._k == 1) model._output._totss = model._output._tot_withinss;
      else {
        // If data already standardized, grand mean is just the origin
        TotSS totss =
            new TotSS(means, mults, modes, _parms.train().domains(), _parms.train().cardinality())
                .doAll(vecs);
        model._output._totss = totss._tss;
      }
      model._output._betweenss =
          model._output._totss - model._output._tot_withinss; // MSE between-cluster
      model._output._iterations++;

      // add to scoring history
      model._output._history_withinss =
          ArrayUtils.copyAndFillOf(
              model._output._history_withinss,
              model._output._history_withinss.length + 1,
              model._output._tot_withinss);

      // Two small TwoDimTables - cheap
      model._output._model_summary = createModelSummaryTable(model._output);
      model._output._scoring_history = createScoringHistoryTable(model._output);

      // Take the cluster stats from the model, and assemble them into a model metrics object
      model._output._training_metrics = makeTrainingMetrics(model);

      return task._cMeans; // New centers
    }