@Override public void reduce(DRemoteTask rt) { KMeansScore kms = (KMeansScore) rt; if (_rows == null) { _rows = kms._rows; _dist = kms._dist; } else { Utils.add(_rows, kms._rows); Utils.add(_dist, kms._dist); } }
public static byte [] unzipBytes(byte [] bs, Compression cmp) { InputStream is = null; int off = 0; try { switch(cmp) { case NONE: // No compression return bs; case ZIP: { ZipInputStream zis = new ZipInputStream(new ByteArrayInputStream(bs)); ZipEntry ze = zis.getNextEntry(); // Get the *FIRST* entry // There is at least one entry in zip file and it is not a directory. if( ze != null && !ze.isDirectory() ) { is = zis; break; } zis.close(); return bs; // Don't crash, ignore file if cannot unzip } case GZIP: is = new GZIPInputStream(new ByteArrayInputStream(bs)); break; default: assert false:"cmp = " + cmp; } // If reading from a compressed stream, estimate we can read 2x uncompressed assert( is != null ):"is is NULL, cmp = " + cmp; bs = new byte[bs.length * 2]; // Now read from the (possibly compressed) stream while( off < bs.length ) { int len = is.read(bs, off, bs.length - off); if( len < 0 ) break; off += len; if( off == bs.length ) { // Dataset is uncompressing alot! Need more space... if( bs.length >= ValueArray.CHUNK_SZ ) break; // Already got enough bs = Arrays.copyOf(bs, bs.length * 2); } } } catch( IOException ioe ) { // Stop at any io error Log.err(ioe); } finally { Utils.close(is); } return bs; }
/** * Simple GLM wrapper to enable launching GLM from command line. * * <p>Example input: java -jar target/h2o.jar -name=test -runMethod water.util.GLMRunner * -file=smalldata/logreg/prostate.csv -y=CAPSULE -family=binomial * * @param args * @throws InterruptedException */ public static void main(String[] args) throws InterruptedException { try { GLMArgs ARGS = new GLMArgs(); new Arguments(args).extract(ARGS); System.out.println("==================<GLMRunner START>==================="); ValueArray ary = Utils.loadAndParseKey(ARGS.file); int ycol; try { ycol = Integer.parseInt(ARGS.y); } catch (NumberFormatException e) { ycol = ary.getColumnIds(new String[] {ARGS.y})[0]; } int ncols = ary.numCols(); if (ycol < 0 || ycol >= ary.numCols()) { System.err.println("invalid y column: " + ycol); H2O.exit(-1); } int[] xcols; if (ARGS.xs.equalsIgnoreCase("all")) { xcols = new int[ncols - 1]; for (int i = 0; i < ycol; ++i) xcols[i] = i; for (int i = ycol; i < ncols - 1; ++i) xcols[i] = i + 1; } else { System.out.println("xs = " + ARGS.xs); String[] names = ARGS.xs.split(","); xcols = new int[names.length]; try { for (int i = 0; i < names.length; ++i) xcols[i] = Integer.valueOf(names[i]); } catch (NumberFormatException e) { xcols = ary.getColumnIds(ARGS.xs.split(",")); } } for (int x : xcols) if (x < 0) { System.err.println("Invalid predictor specification " + ARGS.xs); H2O.exit(-1); } GLMJob j = DGLM.startGLMJob( DGLM.getData(ary, xcols, ycol, null, true), new ADMMSolver(ARGS.lambda, ARGS._alpha), new GLMParams(Family.valueOf(ARGS.family)), null, ARGS.xval, true); System.out.print("[GLM] computing model..."); int progress = 0; while (!j.isDone()) { int p = (int) (100 * j.progress()); int dots = p - progress; progress = p; for (int i = 0; i < dots; ++i) System.out.print('.'); Thread.sleep(250); } Log.debug(Sys.GENLM, "DONE."); GLMModel m = j.get(); String[] colnames = ary.colNames(); System.out.println("Intercept" + " = " + m._beta[ncols - 1]); for (int i = 0; i < xcols.length; ++i) { System.out.println(colnames[i] + " = " + m._beta[i]); } } catch (Throwable t) { Log.err(t); } finally { // we're done. shutdown the cloud Log.debug(Sys.GENLM, "==================<GLMRunner DONE>==================="); UDPRebooted.suicide(UDPRebooted.T.shutdown, H2O.SELF); } }