예제 #1
  private static void addFolder2(
      FileSystem fs, Path p, ArrayList<String> keys, ArrayList<String> failed) {
    try {
      if (fs == null) return;

      Futures futures = new Futures();
      for (FileStatus file : fs.listStatus(p)) {
        Path pfs = file.getPath();
        if (file.isDir()) {
          addFolder2(fs, pfs, keys, failed);
        } else {
          long size = file.getLen();
          Key res;
          if (pfs.getName().endsWith(Extensions.JSON)) {
            throw H2O.unimpl();
          } else if (pfs.getName().endsWith(Extensions.HEX)) { // Hex file?
            throw H2O.unimpl();
          } else {
            Key k = null;
            keys.add((k = HdfsFileVec.make(file, futures)).toString());
            Log.info("PersistHdfs: DKV.put(" + k + ")");
    } catch (Exception e) {
예제 #2
파일: Frame.java 프로젝트: vmlaker/h2o
 public void map(Chunk chks[], NewChunk nchks[]) {
   long rstart = chks[0]._start;
   int rlen = chks[0]._len; // Total row count
   int rx = 0; // Which row to in/ex-clude
   int rlo = 0; // Lo/Hi for this block of rows
   int rhi = rlen;
   while (true) { // Still got rows to include?
     if (_rows != null) { // Got a row selector?
       if (rx >= _rows.length) break; // All done with row selections
       long r = _rows[rx++] - 1; // Next row selector
       if (r < 0) { // Row exclusion
         if (rx > 0 && _rows[rx - 1] < _rows[rx]) throw H2O.unimpl();
         long er = Math.abs(r) - 2;
         if (er < rstart) continue;
         // scoop up all of the rows before the first exclusion
         if (rx == 1 && ((int) (er + 1 - rstart)) > 0 && _ex) {
           rlo = (int) rstart;
           rhi = (int) (er - rstart);
           _ex = false;
         } else {
           rlo = (int) (er + 1 - rstart);
           // TODO: handle jumbled row indices ( e.g. -c(1,5,3) )
           while (rx < _rows.length && (_rows[rx] + 1 == _rows[rx - 1] && rlo < rlen)) {
             if (rx < _rows.length - 1 && _rows[rx] < _rows[rx + 1]) throw H2O.unimpl();
             rlo++; // Exclude consecutive rows
           rhi = rx >= _rows.length ? rlen : (int) Math.abs(_rows[rx] - 1) - 2;
           if (rx < _rows.length - 1 && _rows[rx] < _rows[rx + 1]) throw H2O.unimpl();
       } else { // Positive row list?
         if (r < rstart) continue;
         rlo = (int) (r - rstart);
         rhi = rlo + 1; // Stop at the next row
         while (rx < _rows.length && (_rows[rx] - 1 - rstart) == rhi && rhi < rlen) {
           rhi++; // Grab sequential rows
     // Process this next set of rows
     // For all cols in the new set
     for (int i = 0; i < _cols.length; i++) {
       Chunk oc = chks[_cols[i]];
       NewChunk nc = nchks[i];
       if (oc._vec.isInt()) { // Slice on integer columns
         for (int j = rlo; j < rhi; j++)
           if (oc.isNA0(j)) nc.addNA();
           else nc.addNum(oc.at80(j), 0);
       } else { // Slice on double columns
         for (int j = rlo; j < rhi; j++) nc.addNum(oc.at0(j));
     rlo = rhi;
     if (_rows == null) break;
예제 #3
  public PersistManager(URI iceRoot) {
    I = new Persist[MAX_BACKENDS];
    stats = new PersistStatsEntry[MAX_BACKENDS];
    for (int i = 0; i < stats.length; i++) {
      stats[i] = new PersistStatsEntry();

    if (iceRoot == null) {
      Log.err("ice_root must be specified.  Exiting.");

    Persist ice = null;
    boolean windowsPath = iceRoot.toString().matches("^[a-zA-Z]:.*");

    if (windowsPath) {
      ice = new PersistFS(new File(iceRoot.toString()));
    } else if ((iceRoot.getScheme() == null) || Schemes.FILE.equals(iceRoot.getScheme())) {
      ice = new PersistFS(new File(iceRoot.getPath()));
    } else if (Schemes.HDFS.equals(iceRoot.getScheme())) {
      Log.err("HDFS ice_root not yet supported.  Exiting.");

      // I am not sure anyone actually ever does this.
      // H2O on Hadoop launches use local disk for ice root.
      // This has a chance to work, but turn if off until it gets tested.
      //      try {
      //        Class klass = Class.forName("water.persist.PersistHdfs");
      //        java.lang.reflect.Constructor constructor = klass.getConstructor(new
      // Class[]{URI.class});
      //        ice = (Persist) constructor.newInstance(iceRoot);
      //      } catch (Exception e) {
      //        Log.err("Could not initialize HDFS");
      //        throw new RuntimeException(e);
      //      }

    I[Value.ICE] = ice;
    I[Value.NFS] = new PersistNFS();

    try {
      Class klass = Class.forName("water.persist.PersistHdfs");
      java.lang.reflect.Constructor constructor = klass.getConstructor();
      I[Value.HDFS] = (Persist) constructor.newInstance();
      Log.info("HDFS subsystem successfully initialized");
    } catch (Throwable ignore) {
      Log.info("HDFS subsystem not available");

    try {
      Class klass = Class.forName("water.persist.PersistS3");
      java.lang.reflect.Constructor constructor = klass.getConstructor();
      I[Value.S3] = (Persist) constructor.newInstance();
      Log.info("S3 subsystem successfully initialized");
    } catch (Throwable ignore) {
      Log.info("S3 subsystem not available");
예제 #4
파일: NewChunk.java 프로젝트: hihihippp/h2o
 // Set & At on NewChunks are weird: only used after inflating some other
 // chunk.  At this point the NewChunk is full size, no more appends allowed,
 // and the xs exponent array should be only full of zeros.  Accesses must be
 // in-range and refer to the inflated values of the original Chunk.
 boolean set_impl(int i, long l) {
   if (_ds != null) throw H2O.unimpl();
   if (_len2 != _len) throw H2O.unimpl();
   _ls[i] = l;
   _xs[i] = 0;
   return true;
예제 #5
 public static void stall_till_cloudsize(String[] args, int x) {
   if (!_stall_called_before) {
     _stall_called_before = true;
   H2O.waitForCloudSize(x, 30000);
   _initial_keycnt = H2O.store_size();
예제 #6
  Val apply(Env env, Env.StackHelp stk, AST asts[]) {

    // Compute the variable args.  Find the common row count
    Val vals[] = new Val[asts.length];
    Vec vec = null;
    for (int i = 1; i < asts.length; i++) {
      vals[i] = stk.track(asts[i].exec(env));
      if (vals[i].isFrame()) {
        Vec anyvec = vals[i].getFrame().anyVec();
        if (anyvec == null) continue; // Ignore the empty frame
        if (vec == null) vec = anyvec;
        else if (vec.length() != anyvec.length())
          throw new IllegalArgumentException(
              "cbind frames must have all the same rows, found "
                  + vec.length()
                  + " and "
                  + anyvec.length()
                  + " rows.");
    boolean clean = false;
    if (vec == null) {
      vec = Vec.makeZero(1);
      clean = true;
    } // Default to length 1

    // Populate the new Frame
    Frame fr = new Frame();
    for (int i = 1; i < asts.length; i++) {
      switch (vals[i].type()) {
        case Val.FRM:
        case Val.FUN:
          throw H2O.unimpl();
        case Val.STR:
          throw H2O.unimpl();
        case Val.NUM:
          // Auto-expand scalars to fill every row
          double d = vals[i].getNum();
          fr.add(Double.toString(d), vec.makeCon(d));
          throw H2O.unimpl();
    if (clean) vec.remove();

    return new ValFrame(fr);
예제 #7
파일: Frame.java 프로젝트: vmlaker/h2o
  public final Vec[] vecs() {
    if (_vecs != null) return _vecs;
    // Load all Vec headers; load them all in parallel by spawning F/J tasks.
    final Vec[] vecs = new Vec[_keys.length];
    Futures fs = new Futures();
    for (int i = 0; i < _keys.length; i++) {
      final int ii = i;
      final Key k = _keys[i];
      H2OCountedCompleter t =
          new H2OCountedCompleter() {
            // We need higher priority here as there is a danger of deadlock in
            // case of many calls from MRTask2 at once (e.g. frame with many
            // vectors invokes rollup tasks for all vectors in parallel).  Should
            // probably be done in CPS style in the future
            public byte priority() {
              return H2O.MIN_HI_PRIORITY;

            public void compute2() {
              vecs[ii] = DKV.get(k).get();
    return _vecs = vecs;
예제 #8
 // Compute a compressed integer buffer
 private byte[] bufX(long bias, int scale, int off, int log) {
   byte[] bs = new byte[(_len << log) + off];
   int j = 0;
   for (int i = 0; i < _len; i++) {
     long le = -bias;
     if (_id == null || _id.length == 0 || (j < _id.length && _id[j] == i)) {
       if (isNA2(j)) {
         le = NAS[log];
       } else {
         int x = (_xs[j] == Integer.MIN_VALUE + 1 ? 0 : _xs[j]) - scale;
         le += x >= 0 ? _ls[j] * PrettyPrint.pow10i(x) : _ls[j] / PrettyPrint.pow10i(-x);
     switch (log) {
       case 0:
         bs[i + off] = (byte) le;
       case 1:
         UnsafeUtils.set2(bs, (i << 1) + off, (short) le);
       case 2:
         UnsafeUtils.set4(bs, (i << 2) + off, (int) le);
       case 3:
         UnsafeUtils.set8(bs, (i << 3) + off, le);
         throw H2O.fail();
   assert j == sparseLen()
       : "j = " + j + ", len = " + sparseLen() + ", len2 = " + _len + ", id[j] = " + _id[j];
   return bs;
예제 #9
 // Read up to 'len' bytes of Value. Value should already be persisted to
 // disk.  A racing delete can trigger a failure where we get a null return,
 // but no crash (although one could argue that a racing load&delete is a bug
 // no matter what).
 public byte[] load(Value v) {
   long skip = 0;
   Key k = v._key;
   // Convert an arraylet chunk into a long-offset from the base file.
   if (k._kb[0] == Key.ARRAYLET_CHUNK) {
     skip = ValueArray.getChunkOffset(k); // The offset
     k = ValueArray.getArrayKey(k); // From the base file key
   if (k._kb[0] == Key.DVEC) {
     skip = water.fvec.NFSFileVec.chunkOffset(k); // The offset
   try {
     FileInputStream s = null;
     try {
       s = new FileInputStream(getFileForKey(k));
       FileChannel fc = s.getChannel();
       AutoBuffer ab = new AutoBuffer(fc, true, Value.NFS);
       byte[] b = ab.getA1(v._max);
       assert v.isPersisted();
       return b;
     } finally {
       if (s != null) s.close();
   } catch (IOException e) { // Broken disk / short-file???
     return null;
예제 #10
 // Compute a sparse float buffer
 private byte[] bufD(final int valsz) {
   int log = 0;
   while ((1 << log) < valsz) ++log;
   assert (1 << log) == valsz;
   final int ridsz = _len >= 65535 ? 4 : 2;
   final int elmsz = ridsz + valsz;
   int off = CXDChunk._OFF;
   byte[] buf = MemoryManager.malloc1(off + sparseLen() * elmsz, true);
   for (int i = 0; i < sparseLen(); i++, off += elmsz) {
     if (ridsz == 2) UnsafeUtils.set2(buf, off, (short) _id[i]);
     else UnsafeUtils.set4(buf, off, _id[i]);
     final double dval =
         _ds == null ? isNA2(i) ? Double.NaN : _ls[i] * PrettyPrint.pow10(_xs[i]) : _ds[i];
     switch (valsz) {
       case 4:
         UnsafeUtils.set4f(buf, off + ridsz, (float) dval);
       case 8:
         UnsafeUtils.set8d(buf, off + ridsz, dval);
         throw H2O.fail();
   assert off == buf.length;
   return buf;
예제 #11
파일: DataInfo.java 프로젝트: liaochy/h2o-3
 private void setTransform(
     TransformType t, double[] normMul, double[] normSub, int vecStart, int n) {
   for (int i = 0; i < n; ++i) {
     Vec v = _adaptedFrame.vec(vecStart + i);
     switch (t) {
       case STANDARDIZE:
         normMul[i] = (v.sigma() != 0) ? 1.0 / v.sigma() : 1.0;
         normSub[i] = v.mean();
       case NORMALIZE:
         normMul[i] = (v.max() - v.min() > 0) ? 1.0 / (v.max() - v.min()) : 1.0;
         normSub[i] = v.mean();
       case DEMEAN:
         normMul[i] = 1;
         normSub[i] = v.mean();
       case DESCALE:
         normMul[i] = (v.sigma() != 0) ? 1.0 / v.sigma() : 1.0;
         normSub[i] = 0;
         throw H2O.unimpl();
     assert !Double.isNaN(normMul[i]);
     assert !Double.isNaN(normSub[i]);
예제 #12
 private void setTransform(
     TransformType t, double[] normMul, double[] normSub, int vecStart, int n) {
   int idx = 0; // idx!=i when interactions are in play, otherwise, it's just 'i'
   for (int i = 0; i < n; ++i) {
     Vec v = _adaptedFrame.vec(vecStart + i);
     boolean isIWV = isInteractionVec(vecStart + i);
     switch (t) {
       case STANDARDIZE:
         normMul[idx] = (v.sigma() != 0) ? 1.0 / v.sigma() : 1.0;
         if (isIWV) for (int j = idx + 1; j < nextNumericIdx(i) + idx; j++) normMul[j] = 1;
         normSub[idx] = v.mean();
       case NORMALIZE:
         normMul[idx] = (v.max() - v.min() > 0) ? 1.0 / (v.max() - v.min()) : 1.0;
         if (isIWV) for (int j = idx + 1; j < nextNumericIdx(i) + idx; j++) normMul[j] = 1;
         normSub[idx] = v.mean();
       case DEMEAN:
         normMul[idx] = 1;
         if (isIWV) for (int j = idx + 1; j < nextNumericIdx(i) + idx; j++) normMul[j] = 1;
         normSub[idx] = v.mean();
       case DESCALE:
         normMul[idx] = (v.sigma() != 0) ? 1.0 / v.sigma() : 1.0;
         if (isIWV) for (int j = idx + 1; j < nextNumericIdx(i) + idx; j++) normMul[j] = 1;
         normSub[idx] = 0;
         throw H2O.unimpl();
     assert !Double.isNaN(normMul[idx]);
     assert !Double.isNaN(normSub[idx]);
     idx = isIWV ? (idx + nextNumericIdx(i)) : (idx + 1);
예제 #13
 // ------------------------------------------------------------------------
 // Zipped file; no parallel decompression; decompress into local chunks,
 // parse local chunks; distribute chunks later.
 ParseWriter streamParseZip(final InputStream is, final StreamParseWriter dout, InputStream bvs)
     throws IOException {
   // All output into a fresh pile of NewChunks, one per column
   if (!_setup._parse_type._parallelParseSupported) throw H2O.unimpl();
   StreamData din = new StreamData(is);
   int cidx = 0;
   StreamParseWriter nextChunk = dout;
   int zidx = bvs.read(null, 0, 0); // Back-channel read of chunk index
   assert zidx == 1;
   while (is.available() > 0) {
     int xidx = bvs.read(null, 0, 0); // Back-channel read of chunk index
     if (xidx > zidx) { // Advanced chunk index of underlying ByteVec stream?
       zidx = xidx; // Record advancing of chunk
       nextChunk.close(); // Match output chunks to input zipfile chunks
       if (dout != nextChunk) {
         if (_jobKey != null && ((Job) DKV.getGet(_jobKey)).isCancelledOrCrashed()) break;
       nextChunk = nextChunk.nextChunk();
     parseChunk(cidx++, din, nextChunk);
   parseChunk(cidx, din, nextChunk); // Parse the remaining partial 32K buffer
   if (dout != nextChunk) dout.reduce(nextChunk);
   return dout;
예제 #14
파일: NewChunk.java 프로젝트: hihihippp/h2o
 public double atd_impl(int i) {
   if (_len2 != _len) throw H2O.unimpl();
   if (_ds == null) return at8_impl(i);
   assert _xs == null;
   return _ds[i];
예제 #15
파일: GLMModel.java 프로젝트: vsynych/h2o-3
    public final double linkDeriv(double x) { // note: compute an inverse of what R does
      switch (_link) {
        case logit:
          //        case multinomial:
          double div = (x * (1 - x));
          if (div < 1e-6) return 1e6; // avoid numerical instability
          return 1.0 / div;
        case identity:
          return 1;
        case log:
          return 1.0 / x;
        case inverse:
          return -1.0 / (x * x);
        case tweedie:
          //          double res = _tweedie_link_power == 0
          //            ?Math.max(2e-16,Math.exp(x))
          //            // (1/lambda) * eta^(1/lambda - 1)
          //            :(1.0/_tweedie_link_power) * Math.pow(link(x), 1.0/_tweedie_link_power -
          // 1.0);

          return _tweedie_link_power == 0
              ? 1.0 / Math.max(2e-16, x)
              : _tweedie_link_power * Math.pow(x, _tweedie_link_power - 1);
          throw H2O.unimpl();
예제 #16
 static Frame exec_str(String str, String id) {
   Val val = Exec.exec(str);
   switch (val.type()) {
     case Val.FRM:
       Frame fr = val.getFrame();
       Key k = Key.make(id);
       // Smart delete any prior top-level result
       Iced i = DKV.getGet(k);
       if (i instanceof Lockable) ((Lockable) i).delete();
       else if (i instanceof Keyed) ((Keyed) i).remove();
       else if (i != null)
         throw new IllegalArgumentException("Attempting to overright an unexpected key");
       DKV.put(fr = new Frame(k, fr._names, fr.vecs()));
       return fr;
     case Val.NUM:
       System.out.println("num= " + val.getNum());
       assert id == null;
       return null;
     case Val.STR:
       System.out.println("str= " + val.getStr());
       assert id == null;
       return null;
       throw water.H2O.fail();
예제 #17
 private static void ignoreAndWait(final Exception e, boolean printException) {
   H2O.ignore(e, "Hit HDFS reset problem, retrying...", printException);
   try {
   } catch (InterruptedException ie) {
예제 #18
 protected void testExecFail(String expr, int errorPos) {
   int keys = H2O.store_size();
   try {
     int i = UNIQUE.getAndIncrement();
     System.err.println("result" + (new Integer(i).toString()) + ": " + expr);
     Key key = Exec.exec(expr, "result" + (new Integer(i).toString()));
     assertTrue("An exception should have been thrown.", false);
   } catch (ParserException e) {
   } catch (EvaluationException e) {
     if (errorPos != -1) assertEquals(errorPos, e._pos);
   assertEquals("Keys were not properly deleted for expression " + expr, keys, H2O.store_size());
예제 #19
 protected ModelMetricsListSchemaV3 schema(int version) {
   switch (version) {
     case 3:
       return new ModelMetricsListSchemaV3();
       throw H2O.fail("Bad version for ModelMetrics schema: " + version);
예제 #20
파일: DRF.java 프로젝트: rohit2412/h2o
  * On-the-fly version for varimp. After generation a new tree, its tree votes are collected on
  * shuffled OOB rows and variable importance is recomputed.
  * <p>The <a
  * href="http://www.stat.berkeley.edu/~breiman/RandomForests/cc_home.htm#varimp">page</a> says:
  * <cite> "In every tree grown in the forest, put down the oob cases and count the number of votes
  * cast for the correct class. Now randomly permute the values of variable m in the oob cases and
  * put these cases down the tree. Subtract the number of votes for the correct class in the
  * variable-m-permuted oob data from the number of votes for the correct class in the untouched
  * oob data. The average of this number over all trees in the forest is the raw importance score
  * for variable m." </cite>
 protected VarImp doVarImpCalc(
     final DRFModel model, DTree[] ktrees, final int tid, final Frame fTrain, boolean scale) {
   // Check if we have already serialized 'ktrees'-trees in the model
   assert model.ntrees() - 1 == tid
       : "Cannot compute DRF varimp since 'ktrees' are not serialized in the model! tid=" + tid;
   assert _treeMeasuresOnOOB.npredictors() - 1 == tid
       : "Tree votes over OOB rows for this tree (var ktrees) were not found!";
   // Compute tree votes over shuffled data
   final CompressedTree[ /*nclass*/] theTree =
       model.ctree(tid); // get the last tree FIXME we should pass only keys
   final int nclasses = model.nclasses();
   Futures fs = new Futures();
   for (int var = 0; var < _ncols; var++) {
     final int variable = var;
     H2OCountedCompleter task4var =
             ? new H2OCountedCompleter() {
               public void compute2() {
                 // Compute this tree votes over all data over given variable
                 TreeVotes cd =
                         theTree, nclasses, fTrain, _ncols, sample_rate, variable);
                 assert cd.npredictors() == 1;
             : /* regression */ new H2OCountedCompleter() {
               public void compute2() {
                 // Compute this tree votes over all data over given variable
                 TreeSSE cd =
                         theTree, nclasses, fTrain, _ncols, sample_rate, variable);
                 assert cd.npredictors() == 1;
     H2O.submitTask(task4var); // Fork computation
   fs.blockForPending(); // Wait for results
   // Compute varimp for individual features (_ncols)
   final float[] varimp = new float[_ncols]; // output variable importance
   final float[] varimpSD = new float[_ncols]; // output variable importance sd
   for (int var = 0; var < _ncols; var++) {
     double[ /*2*/] imp =
             ? asVotes(_treeMeasuresOnSOOB[var]).imp(asVotes(_treeMeasuresOnOOB))
             : asSSE(_treeMeasuresOnSOOB[var]).imp(asSSE(_treeMeasuresOnOOB));
     varimp[var] = (float) imp[0];
     varimpSD[var] = (float) imp[1];
   return new VarImp.VarImpMDA(varimp, varimpSD, model.ntrees());
예제 #21
파일: NewChunk.java 프로젝트: hihihippp/h2o
 // Compute a compressed integer buffer
 private byte[] bufX(long bias, int scale, int off, int log) {
   if (_len2 != _len) cancel_sparse();
   byte[] bs = new byte[(_len2 << log) + off];
   for (int i = 0; i < _len; i++) {
     if (isNA(i)) {
       switch (log) {
         case 0:
           bs[i + off] = (byte) (C1Chunk._NA);
         case 1:
           UDP.set2(bs, (i << 1) + off, (short) C2Chunk._NA);
         case 2:
           UDP.set4(bs, (i << 2) + off, (int) C4Chunk._NA);
         case 3:
           UDP.set8(bs, (i << 3) + off, C8Chunk._NA);
     } else {
       int x = (_xs[i] == Integer.MIN_VALUE + 1 ? 0 : _xs[i]) - scale;
       long le = x >= 0 ? _ls[i] * DParseTask.pow10i(x) : _ls[i] / DParseTask.pow10i(-x);
       le -= bias;
       switch (log) {
         case 0:
           bs[i + off] = (byte) le;
         case 1:
           UDP.set2(bs, (i << 1) + off, (short) le);
         case 2:
           UDP.set4(bs, (i << 2) + off, (int) le);
         case 3:
           UDP.set8(bs, (i << 3) + off, le);
   return bs;
예제 #22
 /** Stop H2O */
 public static void stopH2O() {
   if (isH2OServerStarted) {
     isH2OServerStarted = false;
     log.info("H2O Server has shutdown.");
   } else {
     log.warn("H2O server is not started, hence cannot be stopped");
예제 #23
  public void compute2() {
    // Lock all possible data
    // Create a template vector for each segment
    final Vec[][] templates = makeTemplates(dataset, ratios);
    final int nsplits = templates.length;
    assert nsplits == ratios.length + 1 : "Unexpected number of split templates!";
    // Launch number of distributed FJ for each split part
    final Vec[] datasetVecs = dataset.vecs();
    splits = new Frame[nsplits];
    for (int s = 0; s < nsplits; s++) {
      Frame split = new Frame(destKeys[s], dataset.names(), templates[s]);
      splits[s] = split;
        new H2OCountedCompleter(FrameSplitter.this) {
          public void compute2() {
            for (int s = 0; s < nsplits; s++) {
              new FrameSplitTask(
                      new H2OCountedCompleter(this) { // Completer for this task
                        public void compute2() {}

                        public boolean onExceptionalCompletion(
                            Throwable ex, CountedCompleter caller) {
                          synchronized (
                                  .this) { // synchronized on this since can be accessed from
                            // different workers
                            workersExceptions =
                                workersExceptions != null
                                    ? Arrays.copyOf(workersExceptions, workersExceptions.length + 1)
                                    : new Throwable[1];
                            workersExceptions[workersExceptions.length - 1] = ex;
                          tryComplete(); // we handle the exception so wait perform normal
                          // completion
                          return false;
            tryComplete(); // complete the computation of nsplits-tasks
    tryComplete(); // complete the computation of thrown tasks
예제 #24
  // --------------------------------------------------------------------------
  // Build an entire layer of all K trees
  protected DHistogram[][][] buildLayer(
      final Frame fr,
      final int nbins,
      int nbins_cats,
      final DTree ktrees[],
      final int leafs[],
      final DHistogram hcs[][][],
      boolean subset,
      boolean build_tree_one_node) {
    // Build K trees, one per class.

    // Build up the next-generation tree splits from the current histograms.
    // Nearly all leaves will split one more level.  This loop nest is
    //           O( #active_splits * #bins * #ncols )
    // but is NOT over all the data.
    ScoreBuildOneTree sb1ts[] = new ScoreBuildOneTree[_nclass];
    Vec vecs[] = fr.vecs();
    for (int k = 0; k < _nclass; k++) {
      final DTree tree = ktrees[k]; // Tree for class K
      if (tree == null) continue;
      // Build a frame with just a single tree (& work & nid) columns, so the
      // nested MRTask ScoreBuildHistogram in ScoreBuildOneTree does not try
      // to close other tree's Vecs when run in parallel.
      Frame fr2 = new Frame(Arrays.copyOf(fr._names, _ncols + 1), Arrays.copyOf(vecs, _ncols + 1));
      fr2.add(fr._names[idx_tree(k)], vecs[idx_tree(k)]);
      fr2.add(fr._names[idx_work(k)], vecs[idx_work(k)]);
      fr2.add(fr._names[idx_nids(k)], vecs[idx_nids(k)]);
      if (idx_weight() >= 0) fr2.add(fr._names[idx_weight()], vecs[idx_weight()]);
      // Start building one of the K trees in parallel
          sb1ts[k] =
              new ScoreBuildOneTree(
    // Block for all K trees to complete.
    boolean did_split = false;
    for (int k = 0; k < _nclass; k++) {
      final DTree tree = ktrees[k]; // Tree for class K
      if (tree == null) continue;
      if (sb1ts[k]._did_split) did_split = true;
    // The layer is done.
    return did_split ? hcs : null;
예제 #25
 // check if n is in this list of numbers
 // NB: all contiguous ranges have already been checked to have stride 1
 boolean has(long v) {
   assert _isSort; // Only called when already sorted
   // do something special for negative indexing... that does not involve
   // allocating arrays, once per list element!
   if (v < 0) throw H2O.unimpl();
   int idx = Arrays.binarySearch(_bases, v);
   if (idx >= 0) return true;
   idx = -idx - 2; // See Arrays.binarySearch; returns (-idx-1), we want +idx-1
   assert _bases[idx] < v; // Sanity check binary search, AND idx >= 0
   return v < _bases[idx] + _cnts[idx] * _strides[idx];
예제 #26
 // TODO: Constant response shouldn't be regression. Need to override getModelCategory()
 public ModelMetrics.MetricBuilder makeMetricBuilder(String[] domain) {
   switch (_output.getModelCategory()) {
     case Binomial:
       return new ModelMetricsBinomial.MetricBuilderBinomial(domain);
     case Multinomial:
       return new ModelMetricsMultinomial.MetricBuilderMultinomial(domain.length, domain);
       throw H2O.unimpl();
예제 #27
 ParseWriter streamParse(final InputStream is, final ParseWriter dout) throws IOException {
   if (!_setup._parse_type._parallelParseSupported) throw H2O.unimpl();
   StreamData din = new StreamData(is);
   int cidx = 0;
   // FIXME leaving _jobKey == null until sampling is done, this mean entire zip files
   // FIXME are parsed for parseSetup
   while (is.available() > 0
       && (_jobKey == null || !((Job) DKV.getGet(_jobKey)).isCancelledOrCrashed()))
     parseChunk(cidx++, din, dout);
   parseChunk(cidx, din, dout); // Parse the remaining partial 32K buffer
   return dout;
예제 #28
 /** Starts H20 server in local mode */
 public static void startH2O(String port) {
   if (!isH2OServerStarted) {
     String[] args = new String[2];
     args[0] = "-port";
     args[1] = port;
     H2O.waitForCloudSize(1, 10 * 1000 /* ms */);
     isH2OServerStarted = true;
     log.info("H2o Server has started.");
   } else {
     log.warn("H2O Server is already Running.");
예제 #29
파일: NewChunk.java 프로젝트: hihihippp/h2o
 boolean setNA_impl(int i) {
   if (isNA(i)) return true;
   if (_len2 != _len) throw H2O.unimpl();
   if (_ls != null) {
     _ls[i] = 0;
     _xs[i] = Integer.MIN_VALUE;
   if (_ds != null) {
     _ds[i] = Double.NaN;
   return true;
예제 #30
파일: DataInfo.java 프로젝트: liaochy/h2o-3
 public boolean isSigmaScaled() {
   switch (this) {
     case NONE:
     case DEMEAN:
     case NORMALIZE:
       return false;
     case STANDARDIZE:
     case DESCALE:
       return true;
       throw H2O.unimpl();