Beispiel #1
0
 @Override
 public void reduce(DRemoteTask rt) {
   KMeansScore kms = (KMeansScore) rt;
   if (_rows == null) {
     _rows = kms._rows;
     _dist = kms._dist;
   } else {
     Utils.add(_rows, kms._rows);
     Utils.add(_dist, kms._dist);
   }
 }
Beispiel #2
0
 public static byte [] unzipBytes(byte [] bs, Compression cmp) {
   InputStream is = null;
   int off = 0;
   try {
     switch(cmp) {
     case NONE: // No compression
       return bs;
     case ZIP: {
       ZipInputStream zis = new ZipInputStream(new ByteArrayInputStream(bs));
       ZipEntry ze = zis.getNextEntry(); // Get the *FIRST* entry
       // There is at least one entry in zip file and it is not a directory.
       if( ze != null && !ze.isDirectory() ) {
         is = zis;
         break;
       }
       zis.close();
       return bs; // Don't crash, ignore file if cannot unzip
     }
     case GZIP:
       is = new GZIPInputStream(new ByteArrayInputStream(bs));
       break;
     default:
       assert false:"cmp = " + cmp;
     }
     // If reading from a compressed stream, estimate we can read 2x uncompressed
     assert( is != null ):"is is NULL, cmp = " + cmp;
     bs = new byte[bs.length * 2];
     // Now read from the (possibly compressed) stream
     while( off < bs.length ) {
       int len = is.read(bs, off, bs.length - off);
       if( len < 0 )
         break;
       off += len;
       if( off == bs.length ) { // Dataset is uncompressing alot! Need more space...
         if( bs.length >= ValueArray.CHUNK_SZ )
           break; // Already got enough
         bs = Arrays.copyOf(bs, bs.length * 2);
       }
     }
   } catch( IOException ioe ) { // Stop at any io error
     Log.err(ioe);
   } finally {
     Utils.close(is);
   }
   return bs;
 }
Beispiel #3
0
 /**
  * Simple GLM wrapper to enable launching GLM from command line.
  *
  * <p>Example input: java -jar target/h2o.jar -name=test -runMethod water.util.GLMRunner
  * -file=smalldata/logreg/prostate.csv -y=CAPSULE -family=binomial
  *
  * @param args
  * @throws InterruptedException
  */
 public static void main(String[] args) throws InterruptedException {
   try {
     GLMArgs ARGS = new GLMArgs();
     new Arguments(args).extract(ARGS);
     System.out.println("==================<GLMRunner START>===================");
     ValueArray ary = Utils.loadAndParseKey(ARGS.file);
     int ycol;
     try {
       ycol = Integer.parseInt(ARGS.y);
     } catch (NumberFormatException e) {
       ycol = ary.getColumnIds(new String[] {ARGS.y})[0];
     }
     int ncols = ary.numCols();
     if (ycol < 0 || ycol >= ary.numCols()) {
       System.err.println("invalid y column: " + ycol);
       H2O.exit(-1);
     }
     int[] xcols;
     if (ARGS.xs.equalsIgnoreCase("all")) {
       xcols = new int[ncols - 1];
       for (int i = 0; i < ycol; ++i) xcols[i] = i;
       for (int i = ycol; i < ncols - 1; ++i) xcols[i] = i + 1;
     } else {
       System.out.println("xs = " + ARGS.xs);
       String[] names = ARGS.xs.split(",");
       xcols = new int[names.length];
       try {
         for (int i = 0; i < names.length; ++i) xcols[i] = Integer.valueOf(names[i]);
       } catch (NumberFormatException e) {
         xcols = ary.getColumnIds(ARGS.xs.split(","));
       }
     }
     for (int x : xcols)
       if (x < 0) {
         System.err.println("Invalid predictor specification " + ARGS.xs);
         H2O.exit(-1);
       }
     GLMJob j =
         DGLM.startGLMJob(
             DGLM.getData(ary, xcols, ycol, null, true),
             new ADMMSolver(ARGS.lambda, ARGS._alpha),
             new GLMParams(Family.valueOf(ARGS.family)),
             null,
             ARGS.xval,
             true);
     System.out.print("[GLM] computing model...");
     int progress = 0;
     while (!j.isDone()) {
       int p = (int) (100 * j.progress());
       int dots = p - progress;
       progress = p;
       for (int i = 0; i < dots; ++i) System.out.print('.');
       Thread.sleep(250);
     }
     Log.debug(Sys.GENLM, "DONE.");
     GLMModel m = j.get();
     String[] colnames = ary.colNames();
     System.out.println("Intercept" + " = " + m._beta[ncols - 1]);
     for (int i = 0; i < xcols.length; ++i) {
       System.out.println(colnames[i] + " = " + m._beta[i]);
     }
   } catch (Throwable t) {
     Log.err(t);
   } finally { // we're done. shutdown the cloud
     Log.debug(Sys.GENLM, "==================<GLMRunner DONE>===================");
     UDPRebooted.suicide(UDPRebooted.T.shutdown, H2O.SELF);
   }
 }