public PersistManager(URI iceRoot) {
    I = new Persist[MAX_BACKENDS];
    stats = new PersistStatsEntry[MAX_BACKENDS];
    for (int i = 0; i < stats.length; i++) {
      stats[i] = new PersistStatsEntry();
    }

    if (iceRoot == null) {
      Log.err("ice_root must be specified.  Exiting.");
      H2O.exit(1);
    }

    Persist ice = null;
    boolean windowsPath = iceRoot.toString().matches("^[a-zA-Z]:.*");

    if (windowsPath) {
      ice = new PersistFS(new File(iceRoot.toString()));
    } else if ((iceRoot.getScheme() == null) || Schemes.FILE.equals(iceRoot.getScheme())) {
      ice = new PersistFS(new File(iceRoot.getPath()));
    } else if (Schemes.HDFS.equals(iceRoot.getScheme())) {
      Log.err("HDFS ice_root not yet supported.  Exiting.");
      H2O.exit(1);

      // I am not sure anyone actually ever does this.
      // H2O on Hadoop launches use local disk for ice root.
      // This has a chance to work, but turn if off until it gets tested.
      //
      //      try {
      //        Class klass = Class.forName("water.persist.PersistHdfs");
      //        java.lang.reflect.Constructor constructor = klass.getConstructor(new
      // Class[]{URI.class});
      //        ice = (Persist) constructor.newInstance(iceRoot);
      //      } catch (Exception e) {
      //        Log.err("Could not initialize HDFS");
      //        throw new RuntimeException(e);
      //      }
    }

    I[Value.ICE] = ice;
    I[Value.NFS] = new PersistNFS();

    try {
      Class klass = Class.forName("water.persist.PersistHdfs");
      java.lang.reflect.Constructor constructor = klass.getConstructor();
      I[Value.HDFS] = (Persist) constructor.newInstance();
      Log.info("HDFS subsystem successfully initialized");
    } catch (Throwable ignore) {
      Log.info("HDFS subsystem not available");
    }

    try {
      Class klass = Class.forName("water.persist.PersistS3");
      java.lang.reflect.Constructor constructor = klass.getConstructor();
      I[Value.S3] = (Persist) constructor.newInstance();
      Log.info("S3 subsystem successfully initialized");
    } catch (Throwable ignore) {
      Log.info("S3 subsystem not available");
    }
  }
Exemple #2
0
 /**
  * Simple GLM wrapper to enable launching GLM from command line.
  *
  * <p>Example input: java -jar target/h2o.jar -name=test -runMethod water.util.GLMRunner
  * -file=smalldata/logreg/prostate.csv -y=CAPSULE -family=binomial
  *
  * @param args
  * @throws InterruptedException
  */
 public static void main(String[] args) throws InterruptedException {
   try {
     GLMArgs ARGS = new GLMArgs();
     new Arguments(args).extract(ARGS);
     System.out.println("==================<GLMRunner START>===================");
     ValueArray ary = Utils.loadAndParseKey(ARGS.file);
     int ycol;
     try {
       ycol = Integer.parseInt(ARGS.y);
     } catch (NumberFormatException e) {
       ycol = ary.getColumnIds(new String[] {ARGS.y})[0];
     }
     int ncols = ary.numCols();
     if (ycol < 0 || ycol >= ary.numCols()) {
       System.err.println("invalid y column: " + ycol);
       H2O.exit(-1);
     }
     int[] xcols;
     if (ARGS.xs.equalsIgnoreCase("all")) {
       xcols = new int[ncols - 1];
       for (int i = 0; i < ycol; ++i) xcols[i] = i;
       for (int i = ycol; i < ncols - 1; ++i) xcols[i] = i + 1;
     } else {
       System.out.println("xs = " + ARGS.xs);
       String[] names = ARGS.xs.split(",");
       xcols = new int[names.length];
       try {
         for (int i = 0; i < names.length; ++i) xcols[i] = Integer.valueOf(names[i]);
       } catch (NumberFormatException e) {
         xcols = ary.getColumnIds(ARGS.xs.split(","));
       }
     }
     for (int x : xcols)
       if (x < 0) {
         System.err.println("Invalid predictor specification " + ARGS.xs);
         H2O.exit(-1);
       }
     GLMJob j =
         DGLM.startGLMJob(
             DGLM.getData(ary, xcols, ycol, null, true),
             new ADMMSolver(ARGS.lambda, ARGS._alpha),
             new GLMParams(Family.valueOf(ARGS.family)),
             null,
             ARGS.xval,
             true);
     System.out.print("[GLM] computing model...");
     int progress = 0;
     while (!j.isDone()) {
       int p = (int) (100 * j.progress());
       int dots = p - progress;
       progress = p;
       for (int i = 0; i < dots; ++i) System.out.print('.');
       Thread.sleep(250);
     }
     Log.debug(Sys.GENLM, "DONE.");
     GLMModel m = j.get();
     String[] colnames = ary.colNames();
     System.out.println("Intercept" + " = " + m._beta[ncols - 1]);
     for (int i = 0; i < xcols.length; ++i) {
       System.out.println(colnames[i] + " = " + m._beta[i]);
     }
   } catch (Throwable t) {
     Log.err(t);
   } finally { // we're done. shutdown the cloud
     Log.debug(Sys.GENLM, "==================<GLMRunner DONE>===================");
     UDPRebooted.suicide(UDPRebooted.T.shutdown, H2O.SELF);
   }
 }