public static KMeansScore score(KMeansModel model, ValueArray ary) { KMeansScore kms = new KMeansScore(); kms._arykey = ary._key; kms._cols = model.columnMapping(ary.colNames()); kms._clusters = model._clusters; kms._normalized = model._normalized; kms.invoke(ary._key); return kms; }
/** * Adapt model for the given dataset. Default behavior is to map columns and categoricals to their * original indexes. Categorical values we have not seen when building the model are translated as * NaN. * * <p>Override this to get custom adapt behavior (eg. handle unseen cats differently). * * @param ary - tst dataset * @return OldModel - model adapted to be applied on the given data */ public OldModel adapt(ValueArray ary) { boolean id = true; final int[] colMap = columnMapping(ary.colNames()); if (!isCompatible(colMap)) throw new IllegalArgumentException("This model uses different columns than those provided"); int[][] catMap = new int[colMap.length][]; for (int i = 0; i < colMap.length - 1; ++i) { Column c = ary._cols[colMap[i]]; if (c.isEnum() && !Arrays.deepEquals(_va._cols[i]._domain, c._domain)) { id = false; catMap[i] = new int[c._domain.length]; for (int j = 0; j < c._domain.length; ++j) catMap[i][j] = find(c._domain[j], _va._cols[i]._domain); } } if (id && identityMap(colMap)) catMap = null; return new ModelDataAdaptor( this, colMap[colMap.length - 1], Arrays.copyOf(colMap, colMap.length - 1), catMap); }
// Bridge from new Model scoring to old Model scoring public Frame score(Frame data) { final double threshold = getThreshold(); String[][] ds = _va.domains(); if (ds[ds.length - 1] == null && !Double.isNaN(threshold)) { // This is a binomial classifier ds[ds.length - 1] = new String[] {"F", "T"}; } Model m = new Model(null, null, _va.colNames(), ds) { @Override protected float[] score0(double data[ /*ncols*/], float preds[ /*nclasses*/]) { float s = (float) OldModel.this.score0(data); if (preds.length == 1) preds[0] = s; else { assert preds.length == 2; preds[0] = 1 - s; preds[1] = s; } return preds; } }; return m.score(data); }
/** * Simple GLM wrapper to enable launching GLM from command line. * * <p>Example input: java -jar target/h2o.jar -name=test -runMethod water.util.GLMRunner * -file=smalldata/logreg/prostate.csv -y=CAPSULE -family=binomial * * @param args * @throws InterruptedException */ public static void main(String[] args) throws InterruptedException { try { GLMArgs ARGS = new GLMArgs(); new Arguments(args).extract(ARGS); System.out.println("==================<GLMRunner START>==================="); ValueArray ary = Utils.loadAndParseKey(ARGS.file); int ycol; try { ycol = Integer.parseInt(ARGS.y); } catch (NumberFormatException e) { ycol = ary.getColumnIds(new String[] {ARGS.y})[0]; } int ncols = ary.numCols(); if (ycol < 0 || ycol >= ary.numCols()) { System.err.println("invalid y column: " + ycol); H2O.exit(-1); } int[] xcols; if (ARGS.xs.equalsIgnoreCase("all")) { xcols = new int[ncols - 1]; for (int i = 0; i < ycol; ++i) xcols[i] = i; for (int i = ycol; i < ncols - 1; ++i) xcols[i] = i + 1; } else { System.out.println("xs = " + ARGS.xs); String[] names = ARGS.xs.split(","); xcols = new int[names.length]; try { for (int i = 0; i < names.length; ++i) xcols[i] = Integer.valueOf(names[i]); } catch (NumberFormatException e) { xcols = ary.getColumnIds(ARGS.xs.split(",")); } } for (int x : xcols) if (x < 0) { System.err.println("Invalid predictor specification " + ARGS.xs); H2O.exit(-1); } GLMJob j = DGLM.startGLMJob( DGLM.getData(ary, xcols, ycol, null, true), new ADMMSolver(ARGS.lambda, ARGS._alpha), new GLMParams(Family.valueOf(ARGS.family)), null, ARGS.xval, true); System.out.print("[GLM] computing model..."); int progress = 0; while (!j.isDone()) { int p = (int) (100 * j.progress()); int dots = p - progress; progress = p; for (int i = 0; i < dots; ++i) System.out.print('.'); Thread.sleep(250); } Log.debug(Sys.GENLM, "DONE."); GLMModel m = j.get(); String[] colnames = ary.colNames(); System.out.println("Intercept" + " = " + m._beta[ncols - 1]); for (int i = 0; i < xcols.length; ++i) { System.out.println(colnames[i] + " = " + m._beta[i]); } } catch (Throwable t) { Log.err(t); } finally { // we're done. shutdown the cloud Log.debug(Sys.GENLM, "==================<GLMRunner DONE>==================="); UDPRebooted.suicide(UDPRebooted.T.shutdown, H2O.SELF); } }