private static void addFolder2( FileSystem fs, Path p, ArrayList<String> keys, ArrayList<String> failed) { try { if (fs == null) return; Futures futures = new Futures(); for (FileStatus file : fs.listStatus(p)) { Path pfs = file.getPath(); if (file.isDir()) { addFolder2(fs, pfs, keys, failed); } else { long size = file.getLen(); Key res; if (pfs.getName().endsWith(Extensions.JSON)) { throw H2O.unimpl(); } else if (pfs.getName().endsWith(Extensions.HEX)) { // Hex file? throw H2O.unimpl(); } else { Key k = null; keys.add((k = HdfsFileVec.make(file, futures)).toString()); Log.info("PersistHdfs: DKV.put(" + k + ")"); } } } } catch (Exception e) { Log.err(e); failed.add(p.toString()); } }
@Override public void map(Chunk chks[], NewChunk nchks[]) { long rstart = chks[0]._start; int rlen = chks[0]._len; // Total row count int rx = 0; // Which row to in/ex-clude int rlo = 0; // Lo/Hi for this block of rows int rhi = rlen; while (true) { // Still got rows to include? if (_rows != null) { // Got a row selector? if (rx >= _rows.length) break; // All done with row selections long r = _rows[rx++] - 1; // Next row selector if (r < 0) { // Row exclusion if (rx > 0 && _rows[rx - 1] < _rows[rx]) throw H2O.unimpl(); long er = Math.abs(r) - 2; if (er < rstart) continue; // scoop up all of the rows before the first exclusion if (rx == 1 && ((int) (er + 1 - rstart)) > 0 && _ex) { rlo = (int) rstart; rhi = (int) (er - rstart); _ex = false; rx--; } else { rlo = (int) (er + 1 - rstart); // TODO: handle jumbled row indices ( e.g. -c(1,5,3) ) while (rx < _rows.length && (_rows[rx] + 1 == _rows[rx - 1] && rlo < rlen)) { if (rx < _rows.length - 1 && _rows[rx] < _rows[rx + 1]) throw H2O.unimpl(); rx++; rlo++; // Exclude consecutive rows } rhi = rx >= _rows.length ? rlen : (int) Math.abs(_rows[rx] - 1) - 2; if (rx < _rows.length - 1 && _rows[rx] < _rows[rx + 1]) throw H2O.unimpl(); } } else { // Positive row list? if (r < rstart) continue; rlo = (int) (r - rstart); rhi = rlo + 1; // Stop at the next row while (rx < _rows.length && (_rows[rx] - 1 - rstart) == rhi && rhi < rlen) { rx++; rhi++; // Grab sequential rows } } } // Process this next set of rows // For all cols in the new set for (int i = 0; i < _cols.length; i++) { Chunk oc = chks[_cols[i]]; NewChunk nc = nchks[i]; if (oc._vec.isInt()) { // Slice on integer columns for (int j = rlo; j < rhi; j++) if (oc.isNA0(j)) nc.addNA(); else nc.addNum(oc.at80(j), 0); } else { // Slice on double columns for (int j = rlo; j < rhi; j++) nc.addNum(oc.at0(j)); } } rlo = rhi; if (_rows == null) break; } }
public PersistManager(URI iceRoot) { I = new Persist[MAX_BACKENDS]; stats = new PersistStatsEntry[MAX_BACKENDS]; for (int i = 0; i < stats.length; i++) { stats[i] = new PersistStatsEntry(); } if (iceRoot == null) { Log.err("ice_root must be specified. Exiting."); H2O.exit(1); } Persist ice = null; boolean windowsPath = iceRoot.toString().matches("^[a-zA-Z]:.*"); if (windowsPath) { ice = new PersistFS(new File(iceRoot.toString())); } else if ((iceRoot.getScheme() == null) || Schemes.FILE.equals(iceRoot.getScheme())) { ice = new PersistFS(new File(iceRoot.getPath())); } else if (Schemes.HDFS.equals(iceRoot.getScheme())) { Log.err("HDFS ice_root not yet supported. Exiting."); H2O.exit(1); // I am not sure anyone actually ever does this. // H2O on Hadoop launches use local disk for ice root. // This has a chance to work, but turn if off until it gets tested. // // try { // Class klass = Class.forName("water.persist.PersistHdfs"); // java.lang.reflect.Constructor constructor = klass.getConstructor(new // Class[]{URI.class}); // ice = (Persist) constructor.newInstance(iceRoot); // } catch (Exception e) { // Log.err("Could not initialize HDFS"); // throw new RuntimeException(e); // } } I[Value.ICE] = ice; I[Value.NFS] = new PersistNFS(); try { Class klass = Class.forName("water.persist.PersistHdfs"); java.lang.reflect.Constructor constructor = klass.getConstructor(); I[Value.HDFS] = (Persist) constructor.newInstance(); Log.info("HDFS subsystem successfully initialized"); } catch (Throwable ignore) { Log.info("HDFS subsystem not available"); } try { Class klass = Class.forName("water.persist.PersistS3"); java.lang.reflect.Constructor constructor = klass.getConstructor(); I[Value.S3] = (Persist) constructor.newInstance(); Log.info("S3 subsystem successfully initialized"); } catch (Throwable ignore) { Log.info("S3 subsystem not available"); } }
// Set & At on NewChunks are weird: only used after inflating some other // chunk. At this point the NewChunk is full size, no more appends allowed, // and the xs exponent array should be only full of zeros. Accesses must be // in-range and refer to the inflated values of the original Chunk. @Override boolean set_impl(int i, long l) { if (_ds != null) throw H2O.unimpl(); if (_len2 != _len) throw H2O.unimpl(); _ls[i] = l; _xs[i] = 0; return true; }
public static void stall_till_cloudsize(String[] args, int x) { if (!_stall_called_before) { H2O.main(args); H2O.registerRestApis(System.getProperty("user.dir")); _stall_called_before = true; } H2O.waitForCloudSize(x, 30000); _initial_keycnt = H2O.store_size(); }
@Override Val apply(Env env, Env.StackHelp stk, AST asts[]) { // Compute the variable args. Find the common row count Val vals[] = new Val[asts.length]; Vec vec = null; for (int i = 1; i < asts.length; i++) { vals[i] = stk.track(asts[i].exec(env)); if (vals[i].isFrame()) { Vec anyvec = vals[i].getFrame().anyVec(); if (anyvec == null) continue; // Ignore the empty frame if (vec == null) vec = anyvec; else if (vec.length() != anyvec.length()) throw new IllegalArgumentException( "cbind frames must have all the same rows, found " + vec.length() + " and " + anyvec.length() + " rows."); } } boolean clean = false; if (vec == null) { vec = Vec.makeZero(1); clean = true; } // Default to length 1 // Populate the new Frame Frame fr = new Frame(); for (int i = 1; i < asts.length; i++) { switch (vals[i].type()) { case Val.FRM: fr.add(fr.makeCompatible(vals[i].getFrame())); break; case Val.FUN: throw H2O.unimpl(); case Val.STR: throw H2O.unimpl(); case Val.NUM: // Auto-expand scalars to fill every row double d = vals[i].getNum(); fr.add(Double.toString(d), vec.makeCon(d)); break; default: throw H2O.unimpl(); } } if (clean) vec.remove(); return new ValFrame(fr); }
public final Vec[] vecs() { if (_vecs != null) return _vecs; // Load all Vec headers; load them all in parallel by spawning F/J tasks. final Vec[] vecs = new Vec[_keys.length]; Futures fs = new Futures(); for (int i = 0; i < _keys.length; i++) { final int ii = i; final Key k = _keys[i]; H2OCountedCompleter t = new H2OCountedCompleter() { // We need higher priority here as there is a danger of deadlock in // case of many calls from MRTask2 at once (e.g. frame with many // vectors invokes rollup tasks for all vectors in parallel). Should // probably be done in CPS style in the future @Override public byte priority() { return H2O.MIN_HI_PRIORITY; } @Override public void compute2() { vecs[ii] = DKV.get(k).get(); tryComplete(); } }; H2O.submitTask(t); fs.add(t); } fs.blockForPending(); return _vecs = vecs; }
// Compute a compressed integer buffer private byte[] bufX(long bias, int scale, int off, int log) { byte[] bs = new byte[(_len << log) + off]; int j = 0; for (int i = 0; i < _len; i++) { long le = -bias; if (_id == null || _id.length == 0 || (j < _id.length && _id[j] == i)) { if (isNA2(j)) { le = NAS[log]; } else { int x = (_xs[j] == Integer.MIN_VALUE + 1 ? 0 : _xs[j]) - scale; le += x >= 0 ? _ls[j] * PrettyPrint.pow10i(x) : _ls[j] / PrettyPrint.pow10i(-x); } ++j; } switch (log) { case 0: bs[i + off] = (byte) le; break; case 1: UnsafeUtils.set2(bs, (i << 1) + off, (short) le); break; case 2: UnsafeUtils.set4(bs, (i << 2) + off, (int) le); break; case 3: UnsafeUtils.set8(bs, (i << 3) + off, le); break; default: throw H2O.fail(); } } assert j == sparseLen() : "j = " + j + ", len = " + sparseLen() + ", len2 = " + _len + ", id[j] = " + _id[j]; return bs; }
// Read up to 'len' bytes of Value. Value should already be persisted to // disk. A racing delete can trigger a failure where we get a null return, // but no crash (although one could argue that a racing load&delete is a bug // no matter what). @Override public byte[] load(Value v) { long skip = 0; Key k = v._key; // Convert an arraylet chunk into a long-offset from the base file. if (k._kb[0] == Key.ARRAYLET_CHUNK) { skip = ValueArray.getChunkOffset(k); // The offset k = ValueArray.getArrayKey(k); // From the base file key } if (k._kb[0] == Key.DVEC) { skip = water.fvec.NFSFileVec.chunkOffset(k); // The offset } try { FileInputStream s = null; try { s = new FileInputStream(getFileForKey(k)); FileChannel fc = s.getChannel(); fc.position(skip); AutoBuffer ab = new AutoBuffer(fc, true, Value.NFS); byte[] b = ab.getA1(v._max); ab.close(); assert v.isPersisted(); return b; } finally { if (s != null) s.close(); } } catch (IOException e) { // Broken disk / short-file??? H2O.ignore(e); return null; } }
// Compute a sparse float buffer private byte[] bufD(final int valsz) { int log = 0; while ((1 << log) < valsz) ++log; assert (1 << log) == valsz; final int ridsz = _len >= 65535 ? 4 : 2; final int elmsz = ridsz + valsz; int off = CXDChunk._OFF; byte[] buf = MemoryManager.malloc1(off + sparseLen() * elmsz, true); for (int i = 0; i < sparseLen(); i++, off += elmsz) { if (ridsz == 2) UnsafeUtils.set2(buf, off, (short) _id[i]); else UnsafeUtils.set4(buf, off, _id[i]); final double dval = _ds == null ? isNA2(i) ? Double.NaN : _ls[i] * PrettyPrint.pow10(_xs[i]) : _ds[i]; switch (valsz) { case 4: UnsafeUtils.set4f(buf, off + ridsz, (float) dval); break; case 8: UnsafeUtils.set8d(buf, off + ridsz, dval); break; default: throw H2O.fail(); } } assert off == buf.length; return buf; }
private void setTransform( TransformType t, double[] normMul, double[] normSub, int vecStart, int n) { for (int i = 0; i < n; ++i) { Vec v = _adaptedFrame.vec(vecStart + i); switch (t) { case STANDARDIZE: normMul[i] = (v.sigma() != 0) ? 1.0 / v.sigma() : 1.0; normSub[i] = v.mean(); break; case NORMALIZE: normMul[i] = (v.max() - v.min() > 0) ? 1.0 / (v.max() - v.min()) : 1.0; normSub[i] = v.mean(); break; case DEMEAN: normMul[i] = 1; normSub[i] = v.mean(); break; case DESCALE: normMul[i] = (v.sigma() != 0) ? 1.0 / v.sigma() : 1.0; normSub[i] = 0; break; default: throw H2O.unimpl(); } assert !Double.isNaN(normMul[i]); assert !Double.isNaN(normSub[i]); } }
private void setTransform( TransformType t, double[] normMul, double[] normSub, int vecStart, int n) { int idx = 0; // idx!=i when interactions are in play, otherwise, it's just 'i' for (int i = 0; i < n; ++i) { Vec v = _adaptedFrame.vec(vecStart + i); boolean isIWV = isInteractionVec(vecStart + i); switch (t) { case STANDARDIZE: normMul[idx] = (v.sigma() != 0) ? 1.0 / v.sigma() : 1.0; if (isIWV) for (int j = idx + 1; j < nextNumericIdx(i) + idx; j++) normMul[j] = 1; normSub[idx] = v.mean(); break; case NORMALIZE: normMul[idx] = (v.max() - v.min() > 0) ? 1.0 / (v.max() - v.min()) : 1.0; if (isIWV) for (int j = idx + 1; j < nextNumericIdx(i) + idx; j++) normMul[j] = 1; normSub[idx] = v.mean(); break; case DEMEAN: normMul[idx] = 1; if (isIWV) for (int j = idx + 1; j < nextNumericIdx(i) + idx; j++) normMul[j] = 1; normSub[idx] = v.mean(); break; case DESCALE: normMul[idx] = (v.sigma() != 0) ? 1.0 / v.sigma() : 1.0; if (isIWV) for (int j = idx + 1; j < nextNumericIdx(i) + idx; j++) normMul[j] = 1; normSub[idx] = 0; break; default: throw H2O.unimpl(); } assert !Double.isNaN(normMul[idx]); assert !Double.isNaN(normSub[idx]); idx = isIWV ? (idx + nextNumericIdx(i)) : (idx + 1); } }
// ------------------------------------------------------------------------ // Zipped file; no parallel decompression; decompress into local chunks, // parse local chunks; distribute chunks later. ParseWriter streamParseZip(final InputStream is, final StreamParseWriter dout, InputStream bvs) throws IOException { // All output into a fresh pile of NewChunks, one per column if (!_setup._parse_type._parallelParseSupported) throw H2O.unimpl(); StreamData din = new StreamData(is); int cidx = 0; StreamParseWriter nextChunk = dout; int zidx = bvs.read(null, 0, 0); // Back-channel read of chunk index assert zidx == 1; while (is.available() > 0) { int xidx = bvs.read(null, 0, 0); // Back-channel read of chunk index if (xidx > zidx) { // Advanced chunk index of underlying ByteVec stream? zidx = xidx; // Record advancing of chunk nextChunk.close(); // Match output chunks to input zipfile chunks if (dout != nextChunk) { dout.reduce(nextChunk); if (_jobKey != null && ((Job) DKV.getGet(_jobKey)).isCancelledOrCrashed()) break; } nextChunk = nextChunk.nextChunk(); } parseChunk(cidx++, din, nextChunk); } parseChunk(cidx, din, nextChunk); // Parse the remaining partial 32K buffer nextChunk.close(); if (dout != nextChunk) dout.reduce(nextChunk); return dout; }
@Override public double atd_impl(int i) { if (_len2 != _len) throw H2O.unimpl(); if (_ds == null) return at8_impl(i); assert _xs == null; return _ds[i]; }
public final double linkDeriv(double x) { // note: compute an inverse of what R does switch (_link) { case logit: // case multinomial: double div = (x * (1 - x)); if (div < 1e-6) return 1e6; // avoid numerical instability return 1.0 / div; case identity: return 1; case log: return 1.0 / x; case inverse: return -1.0 / (x * x); case tweedie: // double res = _tweedie_link_power == 0 // ?Math.max(2e-16,Math.exp(x)) // // (1/lambda) * eta^(1/lambda - 1) // :(1.0/_tweedie_link_power) * Math.pow(link(x), 1.0/_tweedie_link_power - // 1.0); return _tweedie_link_power == 0 ? 1.0 / Math.max(2e-16, x) : _tweedie_link_power * Math.pow(x, _tweedie_link_power - 1); default: throw H2O.unimpl(); } }
static Frame exec_str(String str, String id) { Val val = Exec.exec(str); switch (val.type()) { case Val.FRM: Frame fr = val.getFrame(); Key k = Key.make(id); // Smart delete any prior top-level result Iced i = DKV.getGet(k); if (i instanceof Lockable) ((Lockable) i).delete(); else if (i instanceof Keyed) ((Keyed) i).remove(); else if (i != null) throw new IllegalArgumentException("Attempting to overright an unexpected key"); DKV.put(fr = new Frame(k, fr._names, fr.vecs())); System.out.println(fr); checkSaneFrame(); return fr; case Val.NUM: System.out.println("num= " + val.getNum()); assert id == null; checkSaneFrame(); return null; case Val.STR: System.out.println("str= " + val.getStr()); assert id == null; checkSaneFrame(); return null; default: throw water.H2O.fail(); } }
private static void ignoreAndWait(final Exception e, boolean printException) { H2O.ignore(e, "Hit HDFS reset problem, retrying...", printException); try { Thread.sleep(500); } catch (InterruptedException ie) { } }
protected void testExecFail(String expr, int errorPos) { DKV.write_barrier(); int keys = H2O.store_size(); try { int i = UNIQUE.getAndIncrement(); System.err.println("result" + (new Integer(i).toString()) + ": " + expr); Key key = Exec.exec(expr, "result" + (new Integer(i).toString())); UKV.remove(key); assertTrue("An exception should have been thrown.", false); } catch (ParserException e) { assertTrue(false); } catch (EvaluationException e) { if (errorPos != -1) assertEquals(errorPos, e._pos); } DKV.write_barrier(); assertEquals("Keys were not properly deleted for expression " + expr, keys, H2O.store_size()); }
protected ModelMetricsListSchemaV3 schema(int version) { switch (version) { case 3: return new ModelMetricsListSchemaV3(); default: throw H2O.fail("Bad version for ModelMetrics schema: " + version); } }
/** * On-the-fly version for varimp. After generation a new tree, its tree votes are collected on * shuffled OOB rows and variable importance is recomputed. * * <p>The <a * href="http://www.stat.berkeley.edu/~breiman/RandomForests/cc_home.htm#varimp">page</a> says: * <cite> "In every tree grown in the forest, put down the oob cases and count the number of votes * cast for the correct class. Now randomly permute the values of variable m in the oob cases and * put these cases down the tree. Subtract the number of votes for the correct class in the * variable-m-permuted oob data from the number of votes for the correct class in the untouched * oob data. The average of this number over all trees in the forest is the raw importance score * for variable m." </cite> */ @Override protected VarImp doVarImpCalc( final DRFModel model, DTree[] ktrees, final int tid, final Frame fTrain, boolean scale) { // Check if we have already serialized 'ktrees'-trees in the model assert model.ntrees() - 1 == tid : "Cannot compute DRF varimp since 'ktrees' are not serialized in the model! tid=" + tid; assert _treeMeasuresOnOOB.npredictors() - 1 == tid : "Tree votes over OOB rows for this tree (var ktrees) were not found!"; // Compute tree votes over shuffled data final CompressedTree[ /*nclass*/] theTree = model.ctree(tid); // get the last tree FIXME we should pass only keys final int nclasses = model.nclasses(); Futures fs = new Futures(); for (int var = 0; var < _ncols; var++) { final int variable = var; H2OCountedCompleter task4var = classification ? new H2OCountedCompleter() { @Override public void compute2() { // Compute this tree votes over all data over given variable TreeVotes cd = TreeMeasuresCollector.collectVotes( theTree, nclasses, fTrain, _ncols, sample_rate, variable); assert cd.npredictors() == 1; asVotes(_treeMeasuresOnSOOB[variable]).append(cd); tryComplete(); } } : /* regression */ new H2OCountedCompleter() { @Override public void compute2() { // Compute this tree votes over all data over given variable TreeSSE cd = TreeMeasuresCollector.collectSSE( theTree, nclasses, fTrain, _ncols, sample_rate, variable); assert cd.npredictors() == 1; asSSE(_treeMeasuresOnSOOB[variable]).append(cd); tryComplete(); } }; H2O.submitTask(task4var); // Fork computation fs.add(task4var); } fs.blockForPending(); // Wait for results // Compute varimp for individual features (_ncols) final float[] varimp = new float[_ncols]; // output variable importance final float[] varimpSD = new float[_ncols]; // output variable importance sd for (int var = 0; var < _ncols; var++) { double[ /*2*/] imp = classification ? asVotes(_treeMeasuresOnSOOB[var]).imp(asVotes(_treeMeasuresOnOOB)) : asSSE(_treeMeasuresOnSOOB[var]).imp(asSSE(_treeMeasuresOnOOB)); varimp[var] = (float) imp[0]; varimpSD[var] = (float) imp[1]; } return new VarImp.VarImpMDA(varimp, varimpSD, model.ntrees()); }
// Compute a compressed integer buffer private byte[] bufX(long bias, int scale, int off, int log) { if (_len2 != _len) cancel_sparse(); byte[] bs = new byte[(_len2 << log) + off]; for (int i = 0; i < _len; i++) { if (isNA(i)) { switch (log) { case 0: bs[i + off] = (byte) (C1Chunk._NA); break; case 1: UDP.set2(bs, (i << 1) + off, (short) C2Chunk._NA); break; case 2: UDP.set4(bs, (i << 2) + off, (int) C4Chunk._NA); break; case 3: UDP.set8(bs, (i << 3) + off, C8Chunk._NA); break; default: H2O.fail(); } } else { int x = (_xs[i] == Integer.MIN_VALUE + 1 ? 0 : _xs[i]) - scale; long le = x >= 0 ? _ls[i] * DParseTask.pow10i(x) : _ls[i] / DParseTask.pow10i(-x); le -= bias; switch (log) { case 0: bs[i + off] = (byte) le; break; case 1: UDP.set2(bs, (i << 1) + off, (short) le); break; case 2: UDP.set4(bs, (i << 2) + off, (int) le); break; case 3: UDP.set8(bs, (i << 3) + off, le); break; default: H2O.fail(); } } } return bs; }
/** Stop H2O */ public static void stopH2O() { if (isH2OServerStarted) { H2O.shutdown(0); isH2OServerStarted = false; log.info("H2O Server has shutdown."); } else { log.warn("H2O server is not started, hence cannot be stopped"); } }
@Override public void compute2() { // Lock all possible data dataset.read_lock(jobKey); // Create a template vector for each segment final Vec[][] templates = makeTemplates(dataset, ratios); final int nsplits = templates.length; assert nsplits == ratios.length + 1 : "Unexpected number of split templates!"; // Launch number of distributed FJ for each split part final Vec[] datasetVecs = dataset.vecs(); splits = new Frame[nsplits]; for (int s = 0; s < nsplits; s++) { Frame split = new Frame(destKeys[s], dataset.names(), templates[s]); split.delete_and_lock(jobKey); splits[s] = split; } setPendingCount(1); H2O.submitTask( new H2OCountedCompleter(FrameSplitter.this) { @Override public void compute2() { setPendingCount(nsplits); for (int s = 0; s < nsplits; s++) { new FrameSplitTask( new H2OCountedCompleter(this) { // Completer for this task @Override public void compute2() {} @Override public boolean onExceptionalCompletion( Throwable ex, CountedCompleter caller) { synchronized ( FrameSplitter .this) { // synchronized on this since can be accessed from // different workers workersExceptions = workersExceptions != null ? Arrays.copyOf(workersExceptions, workersExceptions.length + 1) : new Throwable[1]; workersExceptions[workersExceptions.length - 1] = ex; } tryComplete(); // we handle the exception so wait perform normal // completion return false; } }, datasetVecs, ratios, s) .asyncExec(splits[s]); } tryComplete(); // complete the computation of nsplits-tasks } }); tryComplete(); // complete the computation of thrown tasks }
// -------------------------------------------------------------------------- // Build an entire layer of all K trees protected DHistogram[][][] buildLayer( final Frame fr, final int nbins, int nbins_cats, final DTree ktrees[], final int leafs[], final DHistogram hcs[][][], boolean subset, boolean build_tree_one_node) { // Build K trees, one per class. // Build up the next-generation tree splits from the current histograms. // Nearly all leaves will split one more level. This loop nest is // O( #active_splits * #bins * #ncols ) // but is NOT over all the data. ScoreBuildOneTree sb1ts[] = new ScoreBuildOneTree[_nclass]; Vec vecs[] = fr.vecs(); for (int k = 0; k < _nclass; k++) { final DTree tree = ktrees[k]; // Tree for class K if (tree == null) continue; // Build a frame with just a single tree (& work & nid) columns, so the // nested MRTask ScoreBuildHistogram in ScoreBuildOneTree does not try // to close other tree's Vecs when run in parallel. Frame fr2 = new Frame(Arrays.copyOf(fr._names, _ncols + 1), Arrays.copyOf(vecs, _ncols + 1)); fr2.add(fr._names[idx_tree(k)], vecs[idx_tree(k)]); fr2.add(fr._names[idx_work(k)], vecs[idx_work(k)]); fr2.add(fr._names[idx_nids(k)], vecs[idx_nids(k)]); if (idx_weight() >= 0) fr2.add(fr._names[idx_weight()], vecs[idx_weight()]); // Start building one of the K trees in parallel H2O.submitTask( sb1ts[k] = new ScoreBuildOneTree( this, k, nbins, nbins_cats, tree, leafs, hcs, fr2, subset, build_tree_one_node, _improvPerVar, _model._parms._distribution)); } // Block for all K trees to complete. boolean did_split = false; for (int k = 0; k < _nclass; k++) { final DTree tree = ktrees[k]; // Tree for class K if (tree == null) continue; sb1ts[k].join(); if (sb1ts[k]._did_split) did_split = true; } // The layer is done. return did_split ? hcs : null; }
// check if n is in this list of numbers // NB: all contiguous ranges have already been checked to have stride 1 boolean has(long v) { assert _isSort; // Only called when already sorted // do something special for negative indexing... that does not involve // allocating arrays, once per list element! if (v < 0) throw H2O.unimpl(); int idx = Arrays.binarySearch(_bases, v); if (idx >= 0) return true; idx = -idx - 2; // See Arrays.binarySearch; returns (-idx-1), we want +idx-1 assert _bases[idx] < v; // Sanity check binary search, AND idx >= 0 return v < _bases[idx] + _cnts[idx] * _strides[idx]; }
// TODO: Constant response shouldn't be regression. Need to override getModelCategory() @Override public ModelMetrics.MetricBuilder makeMetricBuilder(String[] domain) { switch (_output.getModelCategory()) { case Binomial: return new ModelMetricsBinomial.MetricBuilderBinomial(domain); case Multinomial: return new ModelMetricsMultinomial.MetricBuilderMultinomial(domain.length, domain); default: throw H2O.unimpl(); } }
ParseWriter streamParse(final InputStream is, final ParseWriter dout) throws IOException { if (!_setup._parse_type._parallelParseSupported) throw H2O.unimpl(); StreamData din = new StreamData(is); int cidx = 0; // FIXME leaving _jobKey == null until sampling is done, this mean entire zip files // FIXME are parsed for parseSetup while (is.available() > 0 && (_jobKey == null || !((Job) DKV.getGet(_jobKey)).isCancelledOrCrashed())) parseChunk(cidx++, din, dout); parseChunk(cidx, din, dout); // Parse the remaining partial 32K buffer return dout; }
/** Starts H20 server in local mode */ public static void startH2O(String port) { if (!isH2OServerStarted) { String[] args = new String[2]; args[0] = "-port"; args[1] = port; H2OApp.main(args); H2O.waitForCloudSize(1, 10 * 1000 /* ms */); isH2OServerStarted = true; log.info("H2o Server has started."); } else { log.warn("H2O Server is already Running."); } }
@Override boolean setNA_impl(int i) { if (isNA(i)) return true; if (_len2 != _len) throw H2O.unimpl(); if (_ls != null) { _ls[i] = 0; _xs[i] = Integer.MIN_VALUE; } if (_ds != null) { _ds[i] = Double.NaN; } return true; }
public boolean isSigmaScaled() { switch (this) { case NONE: case DEMEAN: case NORMALIZE: return false; case STANDARDIZE: case DESCALE: return true; default: throw H2O.unimpl(); } }