Example #1
0
  @SuppressWarnings("unused") // called through reflection by RequestServer
  public RemoveAllV3 remove(int version, RemoveAllV3 u) {
    Log.info("Removing all objects");
    Futures fs = new Futures();
    for (Job j : Job.jobs()) {
      j.cancel();
      j.remove(fs);
    }
    fs.blockForPending();
    // Bulk brainless key removal.  Completely wipes all Keys without regard.
    new MRTask() {
      @Override
      public byte priority() {
        return H2O.GUI_PRIORITY;
      }

      @Override
      public void setupLocal() {
        H2O.raw_clear();
        water.fvec.Vec.ESPC.clear();
      }
    }.doAllNodes();
    Log.info("Finished removing objects");
    return u;
  }
Example #2
0
 @Override
 public void callback(final GLMTask.GLMLineSearchTask glmt) {
   double step = 0.5;
   for (int i = 0; i < glmt._objvals.length; ++i) {
     if (!needLineSearch(glmt._betas[i], glmt._objvals[i], step)) {
       Log.info("GLM2 (iteration=" + _iter + ") line search: found admissible step=" + step);
       _lastResult =
           null; // set last result to null so that the Iteration will not attempt to verify
       // whether or not it should do the line search.
       new GLMIterationTask(
               GLM2.this,
               _activeData,
               _glm,
               true,
               true,
               true,
               glmt._betas[i],
               _ymu,
               _reg,
               new Iteration())
           .asyncExec(_activeData._adaptedFrame);
       return;
     }
     step *= 0.5;
   } // no line step worked, forcibly converge
   Log.info(
       "GLM2 (iteration="
           + _iter
           + ") line search failed to find feasible step. Forcibly converged.");
   nextLambda(
       _lastResult._glmt.clone(),
       resizeVec(_lastResult._glmt._beta, _activeCols, _lastResult._activeCols));
 }
  @Test
  public void testExpandCatsIris() throws InterruptedException, ExecutionException {
    double[][] iris =
        ard(
            ard(6.3, 2.5, 4.9, 1.5, 1),
            ard(5.7, 2.8, 4.5, 1.3, 1),
            ard(5.6, 2.8, 4.9, 2.0, 2),
            ard(5.0, 3.4, 1.6, 0.4, 0),
            ard(6.0, 2.2, 5.0, 1.5, 2));
    double[][] iris_expandR =
        ard(
            ard(0, 1, 0, 6.3, 2.5, 4.9, 1.5),
            ard(0, 1, 0, 5.7, 2.8, 4.5, 1.3),
            ard(0, 0, 1, 5.6, 2.8, 4.9, 2.0),
            ard(1, 0, 0, 5.0, 3.4, 1.6, 0.4),
            ard(0, 0, 1, 6.0, 2.2, 5.0, 1.5));
    String[] iris_cols = new String[] {"sepal_len", "sepal_wid", "petal_len", "petal_wid", "class"};
    String[][] iris_domains =
        new String[][] {null, null, null, null, new String[] {"setosa", "versicolor", "virginica"}};

    Frame fr = null;
    try {
      fr = parse_test_file(Key.make("iris.hex"), "smalldata/iris/iris_wheader.csv");
      DataInfo dinfo =
          new DataInfo(
              Key.make(),
              fr,
              null,
              0,
              true,
              DataInfo.TransformType.NONE,
              DataInfo.TransformType.NONE,
              false,
              false,
              false, /* weights */
              false, /* offset */
              false, /* fold */
              false);

      Log.info("Original matrix:\n" + colFormat(iris_cols, "%8.7s") + ArrayUtils.pprint(iris));
      double[][] iris_perm = ArrayUtils.permuteCols(iris, dinfo._permutation);
      Log.info(
          "Permuted matrix:\n"
              + colFormat(iris_cols, "%8.7s", dinfo._permutation)
              + ArrayUtils.pprint(iris_perm));

      double[][] iris_exp = GLRM.expandCats(iris_perm, dinfo);
      Log.info(
          "Expanded matrix:\n"
              + colExpFormat(iris_cols, iris_domains, "%8.7s", dinfo._permutation)
              + ArrayUtils.pprint(iris_exp));
      Assert.assertArrayEquals(iris_expandR, iris_exp);
    } catch (Throwable t) {
      t.printStackTrace();
      throw new RuntimeException(t);
    } finally {
      if (fr != null) fr.delete();
    }
  }
Example #4
0
  public PersistManager(URI iceRoot) {
    I = new Persist[MAX_BACKENDS];
    stats = new PersistStatsEntry[MAX_BACKENDS];
    for (int i = 0; i < stats.length; i++) {
      stats[i] = new PersistStatsEntry();
    }

    if (iceRoot == null) {
      Log.err("ice_root must be specified.  Exiting.");
      H2O.exit(1);
    }

    Persist ice = null;
    boolean windowsPath = iceRoot.toString().matches("^[a-zA-Z]:.*");

    if (windowsPath) {
      ice = new PersistFS(new File(iceRoot.toString()));
    } else if ((iceRoot.getScheme() == null) || Schemes.FILE.equals(iceRoot.getScheme())) {
      ice = new PersistFS(new File(iceRoot.getPath()));
    } else if (Schemes.HDFS.equals(iceRoot.getScheme())) {
      Log.err("HDFS ice_root not yet supported.  Exiting.");
      H2O.exit(1);

      // I am not sure anyone actually ever does this.
      // H2O on Hadoop launches use local disk for ice root.
      // This has a chance to work, but turn if off until it gets tested.
      //
      //      try {
      //        Class klass = Class.forName("water.persist.PersistHdfs");
      //        java.lang.reflect.Constructor constructor = klass.getConstructor(new
      // Class[]{URI.class});
      //        ice = (Persist) constructor.newInstance(iceRoot);
      //      } catch (Exception e) {
      //        Log.err("Could not initialize HDFS");
      //        throw new RuntimeException(e);
      //      }
    }

    I[Value.ICE] = ice;
    I[Value.NFS] = new PersistNFS();

    try {
      Class klass = Class.forName("water.persist.PersistHdfs");
      java.lang.reflect.Constructor constructor = klass.getConstructor();
      I[Value.HDFS] = (Persist) constructor.newInstance();
      Log.info("HDFS subsystem successfully initialized");
    } catch (Throwable ignore) {
      Log.info("HDFS subsystem not available");
    }

    try {
      Class klass = Class.forName("water.persist.PersistS3");
      java.lang.reflect.Constructor constructor = klass.getConstructor();
      I[Value.S3] = (Persist) constructor.newInstance();
      Log.info("S3 subsystem successfully initialized");
    } catch (Throwable ignore) {
      Log.info("S3 subsystem not available");
    }
  }
Example #5
0
    private void sendAck() {
      // Send results back
      DTask dt, origDt = _dt; // _dt can go null the instant it is send over wire
      assert origDt != null; // Freed after completion
      while ((dt = _dt) != null) { // Retry loop for broken TCP sends
        AutoBuffer ab = null;
        try {
          // Start the ACK with results back to client.  If the client is
          // asking for a class/id mapping (or any job running at FETCH_ACK
          // priority) then return a udp.fetchack byte instead of a udp.ack.
          // The receiver thread then knows to handle the mapping at the higher
          // priority.

          UDP.udp udp = dt.priority() == H2O.FETCH_ACK_PRIORITY ? UDP.udp.fetchack : UDP.udp.ack;
          ab = new AutoBuffer(_client, udp._prior).putTask(udp, _tsknum).put1(SERVER_UDP_SEND);
          assert ab.position() == 1 + 2 + 4 + 1;
          dt.write(ab); // Write the DTask - could be very large write
          dt._repliedTcp = ab.hasTCP(); // Resends do not need to repeat TCP result
          ab.close(); // Then close; send final byte
          _computedAndReplied = true; // After the final handshake, set computed+replied bit
          break; // Break out of retry loop
        } catch (AutoBuffer.AutoBufferException e) {
          if (!_client._heartbeat._client) // Report on servers only; clients allowed to be flaky
          Log.info(
                "IOException during ACK, "
                    + e._ioe.getMessage()
                    + ", t#"
                    + _tsknum
                    + " AB="
                    + ab
                    + ", waiting and retrying...");
          ab.drainClose();
          if (_client._heartbeat._client) // Dead client will not accept a TCP ACK response?
          this.CAS_DT(dt, null); // cancel the ACK
          try {
            Thread.sleep(100);
          } catch (InterruptedException ignore) {
          }
        } catch (Exception e) { // Custom serializer just barfed?
          Log.err(e); // Log custom serializer exception
          ab.drainClose();
        }
      } // end of while(true)
      if (dt == null)
        Log.info(
            "Cancelled remote task#"
                + _tsknum
                + " "
                + origDt.getClass()
                + " to "
                + _client
                + " has been cancelled by remote");
      else {
        if (dt instanceof MRTask && dt.logVerbose())
          Log.debug("Done remote task#" + _tsknum + " " + dt.getClass() + " to " + _client);
        _client.record_task_answer(this); // Setup for retrying Ack & AckAck, if not canceled
      }
    }
  @Test
  public void testCategoricalProstate() throws InterruptedException, ExecutionException {
    GLRM job = null;
    GLRMModel model = null;
    Frame train = null;
    final int[] cats = new int[] {1, 3, 4, 5}; // Categoricals: CAPSULE, RACE, DPROS, DCAPS

    try {
      Scope.enter();
      train = parse_test_file(Key.make("prostate.hex"), "smalldata/logreg/prostate.csv");
      for (int i = 0; i < cats.length; i++)
        Scope.track(train.replace(cats[i], train.vec(cats[i]).toCategoricalVec())._key);
      train.remove("ID").remove();
      DKV.put(train._key, train);

      GLRMParameters parms = new GLRMParameters();
      parms._train = train._key;
      parms._k = 8;
      parms._gamma_x = parms._gamma_y = 0.1;
      parms._regularization_x = GLRMModel.GLRMParameters.Regularizer.Quadratic;
      parms._regularization_y = GLRMModel.GLRMParameters.Regularizer.Quadratic;
      parms._init = GLRM.Initialization.PlusPlus;
      parms._transform = DataInfo.TransformType.STANDARDIZE;
      parms._recover_svd = false;
      parms._max_iterations = 200;

      try {
        job = new GLRM(parms);
        model = job.trainModel().get();
        Log.info(
            "Iteration "
                + model._output._iterations
                + ": Objective value = "
                + model._output._objective);
        model.score(train).delete();
        ModelMetricsGLRM mm = (ModelMetricsGLRM) ModelMetrics.getFromDKV(model, train);
        Log.info(
            "Numeric Sum of Squared Error = "
                + mm._numerr
                + "\tCategorical Misclassification Error = "
                + mm._caterr);
      } catch (Throwable t) {
        t.printStackTrace();
        throw new RuntimeException(t);
      } finally {
        job.remove();
      }
    } catch (Throwable t) {
      t.printStackTrace();
      throw new RuntimeException(t);
    } finally {
      if (train != null) train.delete();
      if (model != null) model.delete();
      Scope.exit();
    }
  }
Example #7
0
  @Test
  public void testChunks() {
    Frame frame = parse_test_file("smalldata/covtype/covtype.20k.data");

    AggregatorModel.AggregatorParameters parms = new AggregatorModel.AggregatorParameters();
    parms._train = frame._key;
    parms._radius_scale = 3.0;
    long start = System.currentTimeMillis();
    AggregatorModel agg = new Aggregator(parms).trainModel().get(); // 0.418
    System.out.println(
        "AggregatorModel finished in: "
            + (System.currentTimeMillis() - start) / 1000.
            + " seconds");
    agg.checkConsistency();
    Frame output = agg._output._output_frame.get();
    Log.info("Number of exemplars: " + agg._exemplars.length);
    //    Assert.assertTrue(agg._exemplars.length==1993);
    output.remove();
    agg.remove();

    for (int i : new int[] {1, 2, 5, 10, 50, 100}) {
      Key key = Key.make();
      RebalanceDataSet rb = new RebalanceDataSet(frame, key, i);
      H2O.submitTask(rb);
      rb.join();
      Frame rebalanced = DKV.get(key).get();

      parms = new AggregatorModel.AggregatorParameters();
      parms._train = frame._key;
      parms._radius_scale = 3.0;
      start = System.currentTimeMillis();
      AggregatorModel agg2 =
          new Aggregator(parms).trainModel().get(); // 0.373 0.504 0.357 0.454 0.368 0.355
      System.out.println(
          "AggregatorModel finished in: "
              + (System.currentTimeMillis() - start) / 1000.
              + " seconds");
      agg2.checkConsistency();
      Log.info("Number of exemplars for " + i + " chunks: " + agg2._exemplars.length);
      rebalanced.delete();
      Assert.assertTrue(
          Math.abs(agg._exemplars.length - agg2._exemplars.length)
              == 0); // < agg._exemplars.length*0);
      output = agg2._output._output_frame.get();
      output.remove();
      agg2.remove();
    }
    frame.delete();
  }
Example #8
0
 public static void main(String[] args) throws Exception {
   Log._dontDie = true; // Ignore fatal class load error
   ArrayList<String> list = new ArrayList<String>();
   for (String name : Boot.getClasses()) {
     if (!name.equals("water.api.RequestServer")
         && !name.equals("water.External")
         && !name.startsWith("water.r.")) {
       Class c = Class.forName(name);
       if (Freezable.class.isAssignableFrom(c)) list.add(c.getName());
     }
   }
   Collections.sort(list);
   String s =
       ""
           + //
           "package water;\n"
           + //
           "\n"
           + //
           "// Do not edit - generated\n"
           + //
           "public class TypeMapGen {\n"
           + //
           "  static final String[] CLAZZES = {\n"
           + //
           "    \" BAD\",                     // 0: BAD\n"
           + //
           "    \"[B\",                       // 1: Array of Bytes\n";
   for (String c : list) s += "    \"" + c + "\",\n";
   s += "  };\n";
   s += "}";
   Utils.writeFile(new File("src/main/java/water/TypeMapGen.java"), s);
   Log.info("Generated TypeMap");
 }
Example #9
0
  @Test
  public void testAggregatorBinary() {
    CreateFrame cf = new CreateFrame();
    cf.rows = 1000;
    cf.cols = 10;
    cf.categorical_fraction = 0.6;
    cf.integer_fraction = 0.0;
    cf.binary_fraction = 0.0;
    cf.real_range = 100;
    cf.integer_range = 100;
    cf.missing_fraction = 0.1;
    cf.factors = 5;
    cf.seed = 1234;
    Frame frame = cf.execImpl().get();

    AggregatorModel.AggregatorParameters parms = new AggregatorModel.AggregatorParameters();
    parms._train = frame._key;
    parms._radius_scale = 1.0;
    parms._transform = DataInfo.TransformType.NORMALIZE;
    parms._categorical_encoding = Model.Parameters.CategoricalEncodingScheme.Binary;
    long start = System.currentTimeMillis();
    AggregatorModel agg = new Aggregator(parms).trainModel().get(); // 0.905
    System.out.println(
        "AggregatorModel finished in: "
            + (System.currentTimeMillis() - start) / 1000.
            + " seconds");
    agg.checkConsistency();
    Frame output = agg._output._output_frame.get();
    System.out.println(output.toTwoDimTable(0, 10));
    Log.info("Number of exemplars: " + agg._exemplars.length);
    //    Assert.assertTrue(agg._exemplars.length==649);
    output.remove();
    frame.remove();
    agg.remove();
  }
Example #10
0
  @Override
  protected Response serve() {
    try {
      // pull everything local
      Log.info("ExportFiles processing (" + path + ")");
      if (DKV.get(src_key) == null)
        throw new IllegalArgumentException(src_key.toString() + " not found.");
      Object value = DKV.get(src_key).get();
      // create a stream to read the entire VA or Frame
      if (!(value instanceof ValueArray) && !(value instanceof Frame))
        throw new UnsupportedOperationException("Can only export Frames or ValueArrays.");
      InputStream csv =
          value instanceof ValueArray
              ? new ValueArray.CsvVAStream((ValueArray) value, null)
              : ((Frame) value).toCSV(true);
      String p2 = path.toLowerCase();
      if (p2.startsWith("hdfs://")) serveHdfs(csv);
      else if (p2.startsWith("s3n://")) serveHdfs(csv);
      else serveLocalDisk(csv);

      return RequestBuilders.Response.done(this);
    } catch (Throwable t) {
      return RequestBuilders.Response.error(t);
    }
  }
Example #11
0
 /**
  * Compute the correct final quantile from these 4 values. If the lo and hi elements are equal,
  * use them. However if they differ, then there is no single value which exactly matches the
  * desired quantile. There are several well-accepted definitions in this case - including picking
  * either the lo or the hi, or averaging them, or doing a linear interpolation.
  *
  * @param lo the highest element less than or equal to the desired quantile
  * @param hi the lowest element greater than or equal to the desired quantile
  * @param row row number (zero based) of the lo element; high element is +1
  * @return desired quantile.
  */
 static double computeQuantile(
     double lo,
     double hi,
     double row,
     double nrows,
     double prob,
     QuantileModel.CombineMethod method) {
   if (lo == hi) return lo; // Equal; pick either
   if (method == null) method = QuantileModel.CombineMethod.INTERPOLATE;
   switch (method) {
     case INTERPOLATE:
       return linearInterpolate(lo, hi, row, nrows, prob);
     case AVERAGE:
       return 0.5 * (hi + lo);
     case LOW:
       return lo;
     case HIGH:
       return hi;
     default:
       Log.info(
           "Unknown even sample size quantile combination type: "
               + method
               + ". Doing linear interpolation.");
       return linearInterpolate(lo, hi, row, nrows, prob);
   }
 }
Example #12
0
  private static void addFolder2(
      FileSystem fs, Path p, ArrayList<String> keys, ArrayList<String> failed) {
    try {
      if (fs == null) return;

      Futures futures = new Futures();
      for (FileStatus file : fs.listStatus(p)) {
        Path pfs = file.getPath();
        if (file.isDir()) {
          addFolder2(fs, pfs, keys, failed);
        } else {
          long size = file.getLen();
          Key res;
          if (pfs.getName().endsWith(Extensions.JSON)) {
            throw H2O.unimpl();
          } else if (pfs.getName().endsWith(Extensions.HEX)) { // Hex file?
            throw H2O.unimpl();
          } else {
            Key k = null;
            keys.add((k = HdfsFileVec.make(file, futures)).toString());
            Log.info("PersistHdfs: DKV.put(" + k + ")");
          }
        }
      }
    } catch (Exception e) {
      Log.err(e);
      failed.add(p.toString());
    }
  }
Example #13
0
 // filter the current active columns using the strong rules
 // note: strong rules are update so tha they keep all previous coefficients in, to prevent issues
 // with line-search
 private int[] activeCols(final double l1, final double l2, final double[] grad) {
   final double rhs = alpha[0] * (2 * l1 - l2);
   int[] cols = MemoryManager.malloc4(_dinfo.fullN());
   int selected = 0;
   int j = 0;
   if (_activeCols == null) _activeCols = new int[] {-1};
   for (int i = 0; i < _dinfo.fullN(); ++i)
     if ((j < _activeCols.length && i == _activeCols[j]) || grad[i] > rhs || grad[i] < -rhs) {
       cols[selected++] = i;
       if (j < _activeCols.length && i == _activeCols[j]) ++j;
     }
   if (!strong_rules_enabled || selected == _dinfo.fullN()) {
     _activeCols = null;
     _activeData._adaptedFrame = _dinfo._adaptedFrame;
     _activeData = _dinfo;
   } else {
     _activeCols = Arrays.copyOf(cols, selected);
     _activeData = _dinfo.filterExpandedColumns(_activeCols);
   }
   Log.info(
       "GLM2 strong rule at lambda="
           + l1
           + ", got "
           + selected
           + " active cols out of "
           + _dinfo.fullN()
           + " total.");
   return _activeCols;
 }
Example #14
0
  @Test
  public void sparseTester() {
    Storage.DenseVector dv = new Storage.DenseVector(20);
    dv.set(3, 0.21f);
    dv.set(7, 0.13f);
    dv.set(18, 0.14f);
    Storage.SparseVector sv = new Storage.SparseVector(dv);
    assert (sv.size() == 20);
    assert (sv.nnz() == 3);

    // dense treatment
    for (int i = 0; i < sv.size(); ++i) Log.info("sparse [" + i + "] = " + sv.get(i));

    // sparse treatment
    for (Storage.SparseVector.Iterator it = sv.begin(); !it.equals(sv.end()); it.next()) {
      //      Log.info(it.toString());
      Log.info(it.index() + " -> " + it.value());
    }

    Storage.DenseColMatrix dcm = new Storage.DenseColMatrix(3, 5);
    dcm.set(2, 1, 3.2f);
    dcm.set(1, 3, -1.2f);
    assert (dcm.get(2, 1) == 3.2f);
    assert (dcm.get(1, 3) == -1.2f);
    assert (dcm.get(0, 0) == 0f);

    Storage.DenseRowMatrix drm = new Storage.DenseRowMatrix(3, 5);
    drm.set(2, 1, 3.2f);
    drm.set(1, 3, -1.2f);
    assert (drm.get(2, 1) == 3.2f);
    assert (drm.get(1, 3) == -1.2f);
    assert (drm.get(0, 0) == 0f);

    Storage.SparseColMatrix scm = new Storage.SparseColMatrix(3, 5);
    scm.set(2, 1, 3.2f);
    scm.set(1, 3, -1.2f);
    assert (scm.get(2, 1) == 3.2f);
    assert (scm.get(1, 3) == -1.2f);
    assert (scm.get(0, 0) == 0f);

    Storage.SparseRowMatrix srm = new Storage.SparseRowMatrix(3, 5);
    srm.set(2, 1, 3.2f);
    srm.set(1, 3, -1.2f);
    assert (srm.get(2, 1) == 3.2f);
    assert (srm.get(1, 3) == -1.2f);
    assert (srm.get(0, 0) == 0f);
  }
Example #15
0
 public static void setMemLow() {
   if (!CAN_ALLOC) return;
   synchronized (_lock) {
     CAN_ALLOC = false;
   }
   // NO LOGGING UNDER LOCK!
   Log.info(Sys.CLEAN, "Pausing to swap to disk; more memory may help");
 }
  @Test
  public void testCategoricalIris() throws InterruptedException, ExecutionException {
    GLRM job = null;
    GLRMModel model = null;
    Frame train = null;

    try {
      train = parse_test_file(Key.make("iris.hex"), "smalldata/iris/iris_wheader.csv");
      GLRMParameters parms = new GLRMParameters();
      parms._train = train._key;
      parms._k = 4;
      parms._loss = GLRMParameters.Loss.Absolute;
      parms._init = GLRM.Initialization.SVD;
      parms._transform = DataInfo.TransformType.NONE;
      parms._recover_svd = true;
      parms._max_iterations = 1000;

      try {
        job = new GLRM(parms);
        model = job.trainModel().get();
        Log.info(
            "Iteration "
                + model._output._iterations
                + ": Objective value = "
                + model._output._objective);
        model.score(train).delete();
        ModelMetricsGLRM mm = (ModelMetricsGLRM) ModelMetrics.getFromDKV(model, train);
        Log.info(
            "Numeric Sum of Squared Error = "
                + mm._numerr
                + "\tCategorical Misclassification Error = "
                + mm._caterr);
      } catch (Throwable t) {
        t.printStackTrace();
        throw new RuntimeException(t);
      } finally {
        job.remove();
      }
    } catch (Throwable t) {
      t.printStackTrace();
      throw new RuntimeException(t);
    } finally {
      if (train != null) train.delete();
      if (model != null) model.delete();
    }
  }
Example #17
0
 public static void setMemGood() {
   if (CAN_ALLOC) return;
   synchronized (_lock) {
     CAN_ALLOC = true;
     _lock.notifyAll();
   }
   // NO LOGGING UNDER LOCK!
   Log.info(Sys.CLEAN, "Continuing after swapping");
 }
Example #18
0
 private static void addFolder(FileSystem fs, Path p, JsonArray succeeded, JsonArray failed) {
   try {
     if (fs == null) return;
     for (FileStatus file : fs.listStatus(p)) {
       Path pfs = file.getPath();
       if (file.isDir()) {
         addFolder(fs, pfs, succeeded, failed);
       } else {
         Key k = Key.make(pfs.toString());
         long size = file.getLen();
         Value val = null;
         if (pfs.getName().endsWith(Extensions.JSON)) {
           JsonParser parser = new JsonParser();
           JsonObject json = parser.parse(new InputStreamReader(fs.open(pfs))).getAsJsonObject();
           JsonElement v = json.get(Constants.VERSION);
           if (v == null) throw new RuntimeException("Missing version");
           JsonElement type = json.get(Constants.TYPE);
           if (type == null) throw new RuntimeException("Missing type");
           Class c = Class.forName(type.getAsString());
           OldModel model = (OldModel) c.newInstance();
           model.fromJson(json);
         } else if (pfs.getName().endsWith(Extensions.HEX)) { // Hex file?
           FSDataInputStream s = fs.open(pfs);
           int sz = (int) Math.min(1L << 20, size); // Read up to the 1st meg
           byte[] mem = MemoryManager.malloc1(sz);
           s.readFully(mem);
           // Convert to a ValueArray (hope it fits in 1Meg!)
           ValueArray ary = new ValueArray(k, 0).read(new AutoBuffer(mem));
           val = new Value(k, ary, Value.HDFS);
         } else if (size >= 2 * ValueArray.CHUNK_SZ) {
           val =
               new Value(
                   k,
                   new ValueArray(k, size),
                   Value.HDFS); // ValueArray byte wrapper over a large file
         } else {
           val = new Value(k, (int) size, Value.HDFS); // Plain Value
           val.setdsk();
         }
         DKV.put(k, val);
         Log.info("PersistHdfs: DKV.put(" + k + ")");
         JsonObject o = new JsonObject();
         o.addProperty(Constants.KEY, k.toString());
         o.addProperty(Constants.FILE, pfs.toString());
         o.addProperty(Constants.VALUE_SIZE, file.getLen());
         succeeded.add(o);
       }
     }
   } catch (Exception e) {
     Log.err(e);
     JsonObject o = new JsonObject();
     o.addProperty(Constants.FILE, p.toString());
     o.addProperty(Constants.ERROR, e.getMessage());
     failed.add(o);
   }
 }
Example #19
0
 protected double checkGradient(final double[] newBeta, final double[] grad) {
   // check the gradient
   ADMMSolver.subgrad(alpha[0], lambda[_lambdaIdx], newBeta, grad);
   double err = 0;
   for (double d : grad)
     if (d > err) err = d;
     else if (d < -err) err = -d;
   Log.info("GLM converged with max |subgradient| = " + err);
   return err;
 }
Example #20
0
 @Override
 protected Response serve() {
   try {
     Log.info("Unlocking all locked keys on the cluster.");
     UnlockTask cleanup = new UnlockTask();
     cleanup.invokeOnAllNodes();
   } catch (Throwable e) {
     return Response.error(e);
   }
   return Response.done(this);
 }
Example #21
0
 GridSearch start() {
   Log.info("Starting gridsearch: _total_models=" + _total_models);
   start(
       new H2OCountedCompleter() {
         @Override
         public void compute2() {
           gridSearch(_params);
           tryComplete();
         }
       },
       _total_models);
   return this;
 }
Example #22
0
  @Test
  public void testCovtype() {
    Frame frame = parse_test_file("smalldata/covtype/covtype.20k.data");

    AggregatorModel.AggregatorParameters parms = new AggregatorModel.AggregatorParameters();
    parms._train = frame._key;
    parms._radius_scale = 5.0;
    long start = System.currentTimeMillis();
    AggregatorModel agg = new Aggregator(parms).trainModel().get(); // 0.179
    System.out.println(
        "AggregatorModel finished in: "
            + (System.currentTimeMillis() - start) / 1000.
            + " seconds");
    agg.checkConsistency();
    frame.delete();
    Frame output = agg._output._output_frame.get();
    Log.info("Exemplars: " + output.toString());
    output.remove();
    Log.info("Number of exemplars: " + agg._exemplars.length);
    //    Assert.assertTrue(agg._exemplars.length==615);
    agg.remove();
  }
Example #23
0
 protected void nextLambda(final GLMIterationTask glmt, GLMValidation val) {
   currentLambdaIter = 0;
   boolean improved = _model.setAndTestValidation(_lambdaIdx, val);
   _model.clone().update(self());
   boolean done = false; // _iter < max_iter && (improved || _runAllLambdas) && _lambdaIdx <
   // (lambda.length-1);
   if (_iter == max_iter) {
     Log.info("GLM2 reached max #iterations.");
     done = true;
   } else if (!improved && !_runAllLambdas) {
     Log.info("GLM2 converged as solution stopped improving with decreasing lambda.");
     done = true;
   } else if (_lambdaIdx == lambda.length - 1) {
     Log.info("GLM2 done with all given lambdas.");
     done = true;
   } else if (_activeCols != null && _activeCols.length + 1 >= MAX_PREDICTORS) {
     Log.info(
         "GLM2 reached maximum allowed number of predictors at lambda = " + lambda[_lambdaIdx]);
     done = true;
   }
   if (!done) { // continue with next lambda value?
     ++_lambdaIdx;
     glmt._val = null;
     if (glmt._gram == null) { // assume we had lambda search with strong rules
       // we use strong rules so we can't really used this gram for the next lambda computation
       // (different sets of coefficients)
       // I expect that:
       //  1) beta has been expanded to match current set of active cols
       //  2) it is new GLMIteration ready to be launched
       // caller (nextLambda(glmt,beta)) is expected to ensure this...
       assert _activeCols == null || (glmt._beta.length == _activeCols.length + 1);
       assert !glmt.isDone();
       glmt.asyncExec(_activeData._adaptedFrame);
     } else // we have the right gram, just solve with with next lambda
     new Iteration().callback(glmt);
   } else // nope, we're done
   GLM2.this.complete(); // signal we're done to anyone waiting for the job
 }
Example #24
0
 public void cancel(final String msg) {
   state = msg == null ? JobState.CANCELLED : JobState.CRASHED;
   if(state == JobState.CANCELLED)Log.info("Job " + self() + "("  + description + ") was cancelled.");
   exception = msg;
   // replace finished job by a job handle
   replaceByJobHandle();
   DKV.write_barrier();
   final Job job = this;
   H2O.submitTask(new H2OCountedCompleter() {
     @Override public void compute2() {
       job.onCancelled();
     }
   });
 }
Example #25
0
  @Ignore
  @Test
  public void testCovtypeMemberIndices() {
    Frame frame = parse_test_file("smalldata/covtype/covtype.20k.data");

    AggregatorModel.AggregatorParameters parms = new AggregatorModel.AggregatorParameters();
    parms._train = frame._key;
    parms._radius_scale = 5.0;
    long start = System.currentTimeMillis();
    AggregatorModel agg = new Aggregator(parms).trainModel().get(); // 1.489
    System.out.println(
        "AggregatorModel finished in: "
            + (System.currentTimeMillis() - start) / 1000.
            + " seconds");
    agg.checkConsistency();

    //    Frame assignment = new Frame(new Vec[]{(Vec)agg._exemplar_assignment_vec_key.get()});
    //    Frame.export(assignment, "/tmp/assignment", "yada", true);
    //    Log.info("Exemplars: " + new Frame(new
    // Vec[]{(Vec)agg._exemplar_assignment_vec_key.get()}).toString(0,20000));
    Log.info("Number of exemplars: " + agg._exemplars.length);

    Key<Frame> memberKey = Key.make();
    for (int i = 0; i < agg._exemplars.length; ++i) {
      Frame members = agg.scoreExemplarMembers(memberKey, i);
      assert (members.numRows() == agg._counts[i]);
      //    Log.info(members);
      members.delete();
    }

    Frame output = agg._output._output_frame.get();
    output.remove();
    Log.info("Number of exemplars: " + agg._exemplars.length);
    //    Assert.assertTrue(agg._exemplars.length==615);
    frame.delete();
    agg.remove();
  }
Example #26
0
 @Override
 protected Frame rebalance(final Frame original_fr, boolean local, final String name) {
   if (original_fr == null) return null;
   if (_parms._force_load_balance) {
     int original_chunks = original_fr.anyVec().nChunks();
     _job.update(0, "Load balancing " + name.substring(name.length() - 5) + " data...");
     int chunks = desiredChunks(original_fr, local);
     if (!_parms._reproducible) {
       if (original_chunks >= chunks) {
         if (!_parms._quiet_mode)
           Log.info(
               "Dataset already contains " + original_chunks + " chunks. No need to rebalance.");
         return original_fr;
       }
     } else { // reproducible, set chunks to 1
       assert chunks == 1;
       if (!_parms._quiet_mode)
         Log.warn("Reproducibility enforced - using only 1 thread - can be slow.");
       if (original_chunks == 1) return original_fr;
     }
     if (!_parms._quiet_mode)
       Log.info(
           "Rebalancing "
               + name.substring(name.length() - 5)
               + " dataset into "
               + chunks
               + " chunks.");
     Key newKey = Key.make(name + ".chks" + chunks);
     RebalanceDataSet rb = new RebalanceDataSet(original_fr, newKey, chunks);
     H2O.submitTask(rb).join();
     Frame rebalanced_fr = DKV.get(newKey).get();
     Scope.track(rebalanced_fr);
     return rebalanced_fr;
   }
   return original_fr;
 }
Example #27
0
    @Override
    public void lcompute() {
      // Optional: cancel all jobs
      //      for (Job job : Job.all()) {
      //        job.cancel();
      //        Job.waitUntilJobEnded(job.self());
      //      }

      final Set<Key> keySet = H2O.globalKeySet(null);
      for (Key key : keySet) {
        if (!key.home()) continue; // only unlock local keys
        final Value val = DKV.get(key);
        if (val == null) continue;
        if (val.rawPOJO() == null) continue; // need to have a POJO to be locked
        if (!val.isLockable()) continue;
        final Object obj = val.rawPOJO();
        assert (obj instanceof Lockable<?>);
        final Lockable<?> lockable = (Lockable<?>) (obj);
        final Key[] lockers = ((Lockable) obj)._lockers;
        if (lockers != null) {
          // check that none of the locking jobs is still running
          for (Key locker : lockers) {
            if (locker != null && locker.type() == Key.JOB) {
              final Job job = UKV.get(locker);
              if (job != null && job.isRunning())
                throw new UnsupportedOperationException(
                    "Cannot unlock all keys since locking jobs are still running.");
            }
          }
          lockable.unlock_all();
          Log.info("Unlocked key '" + key + "' from " + lockers.length + " lockers.");
        }
      }
      Log.info("All keys are now unlocked.");
      tryComplete();
    }
Example #28
0
  // Start by splitting all the data according to some criteria (minimize
  // variance at the leaves).  Record on each row which split it goes to, and
  // assign a split number to it (for next pass).  On *this* pass, use the
  // split-number to build a per-split histogram, with a per-histogram-bucket
  // variance.
  @Override
  protected GBMModel buildModel(
      GBMModel model,
      final Frame fr,
      String names[],
      String domains[][],
      String[] cmDomain,
      Timer t_build) {

    // Tag out rows missing the response column
    new ExcludeNAResponse().doAll(fr);

    // Build trees until we hit the limit
    int tid;
    DTree[] ktrees = null; // Trees
    TreeStats tstats = new TreeStats(); // Tree stats
    for (tid = 0; tid < ntrees; tid++) {
      // During first iteration model contains 0 trees, then 0-trees, then 1-tree,...
      // BUT if validation is not specified model does not participate in voting
      // but on-the-fly computed data are used
      model = doScoring(model, fr, ktrees, tid, cmDomain, tstats, false, false, false);
      // ESL2, page 387
      // Step 2a: Compute prediction (prob distribution) from prior tree results:
      //   Work <== f(Tree)
      new ComputeProb().doAll(fr);

      // ESL2, page 387
      // Step 2b i: Compute residuals from the prediction (probability distribution)
      //   Work <== f(Work)
      new ComputeRes().doAll(fr);

      // ESL2, page 387, Step 2b ii, iii, iv
      Timer kb_timer = new Timer();
      ktrees = buildNextKTrees(fr);
      Log.info(Sys.GBM__, (tid + 1) + ". tree was built in " + kb_timer.toString());
      if (!Job.isRunning(self())) break; // If canceled during building, do not bulkscore

      // Check latest predictions
      tstats.updateBy(ktrees);
    }
    // Final scoring
    model = doScoring(model, fr, ktrees, tid, cmDomain, tstats, true, false, false);

    return model;
  }
Example #29
0
  @Override
  protected DRFModel buildModel(
      DRFModel model, final Frame fr, String names[], String domains[][], final Timer t_build) {
    // Append number of trees participating in on-the-fly scoring
    fr.add("OUT_BAG_TREES", response.makeZero());

    // The RNG used to pick split columns
    Random rand = createRNG(_seed);

    // Prepare working columns
    new SetWrkTask().doAll(fr);

    int tid;
    DTree[] ktrees = null;
    // Prepare tree statistics
    TreeStats tstats = new TreeStats();
    // Build trees until we hit the limit
    for (tid = 0; tid < ntrees; tid++) { // Building tid-tree
      model =
          doScoring(
              model, fr, ktrees, tid, tstats, tid == 0, !hasValidation(), build_tree_one_node);
      // At each iteration build K trees (K = nclass = response column domain size)

      // TODO: parallelize more? build more than k trees at each time, we need to care about
      // temporary data
      // Idea: launch more DRF at once.
      Timer kb_timer = new Timer();
      ktrees = buildNextKTrees(fr, _mtry, sample_rate, rand, tid);
      Log.info(Sys.DRF__, (tid + 1) + ". tree was built " + kb_timer.toString());
      if (!Job.isRunning(self())) break; // If canceled during building, do not bulkscore

      // Check latest predictions
      tstats.updateBy(ktrees);
    }

    model = doScoring(model, fr, ktrees, tid, tstats, true, !hasValidation(), build_tree_one_node);
    // Make sure that we did not miss any votes
    assert !importance
            || _treeMeasuresOnOOB.npredictors() == _treeMeasuresOnSOOB[0 /*variable*/].npredictors()
        : "Missing some tree votes in variable importance voting?!";

    return model;
  }
Example #30
0
 public ValidationMessage(
     ModelBuilder.ValidationMessage.MessageType message_type,
     String field_name,
     String message) {
   this.message_type = message_type;
   this.field_name = field_name;
   this.message = message;
   switch (message_type) {
     case INFO:
       Log.info(field_name + ": " + message);
       break;
     case WARN:
       Log.warn(field_name + ": " + message);
       break;
     case ERROR:
       Log.err(field_name + ": " + message);
       break;
   }
 }