// Main worker thread @Override protected void compute2() { KMeansModel model = null; try { init(true); // Do lock even before checking the errors, since this block is finalized by unlock // (not the best solution, but the code is more readable) _parms.read_lock_frames(KMeans.this); // Fetch & read-lock input frames // Something goes wrong if (error_count() > 0) throw H2OModelBuilderIllegalArgumentException.makeFromBuilder(KMeans.this); // The model to be built model = new KMeansModel(dest(), _parms, new KMeansModel.KMeansOutput(KMeans.this)); model.delete_and_lock(_key); // final Vec vecs[] = _train.vecs(); // mults & means for standardization final double[] means = _train.means(); // means are used to impute NAs final double[] mults = _parms._standardize ? _train.mults() : null; final int[] impute_cat = new int[vecs.length]; for (int i = 0; i < vecs.length; i++) impute_cat[i] = vecs[i].isNumeric() ? -1 : DataInfo.imputeCat(vecs[i]); model._output._normSub = means; model._output._normMul = mults; // Initialize cluster centers and standardize if requested double[][] centers = initial_centers(model, vecs, means, mults, impute_cat); if (centers == null) return; // Stopped/cancelled during center-finding double[][] oldCenters = null; // --- // Run the main KMeans Clustering loop // Stop after enough iterations or average_change < TOLERANCE model._output._iterations = 0; // Loop ends only when iterations > max_iterations with strict inequality while (!isDone(model, centers, oldCenters)) { Lloyds task = new Lloyds(centers, means, mults, impute_cat, _isCats, _parms._k, hasWeightCol()) .doAll(vecs); // Pick the max categorical level for cluster center max_cats(task._cMeans, task._cats, _isCats); // Handle the case where some centers go dry. Rescue only 1 cluster // per iteration ('cause we only tracked the 1 worst row) if (cleanupBadClusters(task, vecs, centers, means, mults, impute_cat)) continue; // Compute model stats; update standardized cluster centers oldCenters = centers; centers = computeStatsFillModel(task, model, vecs, means, mults, impute_cat); model.update(_key); // Update model in K/V store update(1); // One unit of work if (model._parms._score_each_iteration) Log.info(model._output._model_summary); } Log.info(model._output._model_summary); // Log.info(model._output._scoring_history); // // Log.info(((ModelMetricsClustering)model._output._training_metrics).createCentroidStatsTable().toString()); // At the end: validation scoring (no need to gather scoring history) if (_valid != null) { model.score(_parms.valid()).delete(); // this appends a ModelMetrics on the validation set model._output._validation_metrics = ModelMetrics.getFromDKV(model, _parms.valid()); model.update(_key); // Update model in K/V store } done(); // Job done! } catch (Throwable t) { Job thisJob = DKV.getGet(_key); if (thisJob._state == JobState.CANCELLED) { Log.info("Job cancelled by user."); } else { t.printStackTrace(); failed(t); throw t; } } finally { updateModelOutput(); if (model != null) model.unlock(_key); _parms.read_unlock_frames(KMeans.this); } tryComplete(); }