public static void stall_till_cloudsize(String[] args, int x) { if (!_stall_called_before) { H2O.main(args); H2O.registerRestApis(System.getProperty("user.dir")); _stall_called_before = true; } H2O.waitForCloudSize(x, 30000); _initial_keycnt = H2O.store_size(); }
@Override Val apply(Env env, Env.StackHelp stk, AST asts[]) { // Compute the variable args. Find the common row count Val vals[] = new Val[asts.length]; Vec vec = null; for (int i = 1; i < asts.length; i++) { vals[i] = stk.track(asts[i].exec(env)); if (vals[i].isFrame()) { Vec anyvec = vals[i].getFrame().anyVec(); if (anyvec == null) continue; // Ignore the empty frame if (vec == null) vec = anyvec; else if (vec.length() != anyvec.length()) throw new IllegalArgumentException( "cbind frames must have all the same rows, found " + vec.length() + " and " + anyvec.length() + " rows."); } } boolean clean = false; if (vec == null) { vec = Vec.makeZero(1); clean = true; } // Default to length 1 // Populate the new Frame Frame fr = new Frame(); for (int i = 1; i < asts.length; i++) { switch (vals[i].type()) { case Val.FRM: fr.add(fr.makeCompatible(vals[i].getFrame())); break; case Val.FUN: throw H2O.unimpl(); case Val.STR: throw H2O.unimpl(); case Val.NUM: // Auto-expand scalars to fill every row double d = vals[i].getNum(); fr.add(Double.toString(d), vec.makeCon(d)); break; default: throw H2O.unimpl(); } } if (clean) vec.remove(); return new ValFrame(fr); }
private void setTransform( TransformType t, double[] normMul, double[] normSub, int vecStart, int n) { int idx = 0; // idx!=i when interactions are in play, otherwise, it's just 'i' for (int i = 0; i < n; ++i) { Vec v = _adaptedFrame.vec(vecStart + i); boolean isIWV = isInteractionVec(vecStart + i); switch (t) { case STANDARDIZE: normMul[idx] = (v.sigma() != 0) ? 1.0 / v.sigma() : 1.0; if (isIWV) for (int j = idx + 1; j < nextNumericIdx(i) + idx; j++) normMul[j] = 1; normSub[idx] = v.mean(); break; case NORMALIZE: normMul[idx] = (v.max() - v.min() > 0) ? 1.0 / (v.max() - v.min()) : 1.0; if (isIWV) for (int j = idx + 1; j < nextNumericIdx(i) + idx; j++) normMul[j] = 1; normSub[idx] = v.mean(); break; case DEMEAN: normMul[idx] = 1; if (isIWV) for (int j = idx + 1; j < nextNumericIdx(i) + idx; j++) normMul[j] = 1; normSub[idx] = v.mean(); break; case DESCALE: normMul[idx] = (v.sigma() != 0) ? 1.0 / v.sigma() : 1.0; if (isIWV) for (int j = idx + 1; j < nextNumericIdx(i) + idx; j++) normMul[j] = 1; normSub[idx] = 0; break; default: throw H2O.unimpl(); } assert !Double.isNaN(normMul[idx]); assert !Double.isNaN(normSub[idx]); idx = isIWV ? (idx + nextNumericIdx(i)) : (idx + 1); } }
@Override public void compute2() { // Lock all possible data dataset.read_lock(jobKey); // Create a template vector for each segment final Vec[][] templates = makeTemplates(dataset, ratios); final int nsplits = templates.length; assert nsplits == ratios.length + 1 : "Unexpected number of split templates!"; // Launch number of distributed FJ for each split part final Vec[] datasetVecs = dataset.vecs(); splits = new Frame[nsplits]; for (int s = 0; s < nsplits; s++) { Frame split = new Frame(destKeys[s], dataset.names(), templates[s]); split.delete_and_lock(jobKey); splits[s] = split; } setPendingCount(1); H2O.submitTask( new H2OCountedCompleter(FrameSplitter.this) { @Override public void compute2() { setPendingCount(nsplits); for (int s = 0; s < nsplits; s++) { new FrameSplitTask( new H2OCountedCompleter(this) { // Completer for this task @Override public void compute2() {} @Override public boolean onExceptionalCompletion( Throwable ex, CountedCompleter caller) { synchronized ( FrameSplitter .this) { // synchronized on this since can be accessed from // different workers workersExceptions = workersExceptions != null ? Arrays.copyOf(workersExceptions, workersExceptions.length + 1) : new Throwable[1]; workersExceptions[workersExceptions.length - 1] = ex; } tryComplete(); // we handle the exception so wait perform normal // completion return false; } }, datasetVecs, ratios, s) .asyncExec(splits[s]); } tryComplete(); // complete the computation of nsplits-tasks } }); tryComplete(); // complete the computation of thrown tasks }
public boolean isSigmaScaled() { switch (this) { case NONE: case DEMEAN: case NORMALIZE: return false; case STANDARDIZE: case DESCALE: return true; default: throw H2O.unimpl(); } }
public double[] expandCats() { if (isSparse() || _responses > 0) throw H2O.unimpl(); int N = fullN(); int numStart = numStart(); double[] res = new double[N + (_intercept ? 1 : 0)]; for (int i = 0; i < nBins; ++i) res[binIds[i]] = 1; if (numIds == null) { System.arraycopy(numVals, 0, res, numStart, numVals.length); } else { for (int i = 0; i < nNums; ++i) res[numIds[i]] = numVals[i]; } if (_intercept) res[res.length - 1] = 1; return res; }
@Override Val apply(Env env, Env.StackHelp stk, AST asts[]) { // Execute all args. Find a canonical frame; all Frames must look like this one. // Each argument turns into either a Frame (whose rows are entirely // inlined) or a scalar (which is replicated across as a single row). Frame fr = null; // Canonical Frame; all frames have the same column count, types and names int nchks = 0; // Total chunks Val vals[] = new Val[asts.length]; // Computed AST results for (int i = 1; i < asts.length; i++) { vals[i] = stk.track(asts[i].exec(env)); if (vals[i].isFrame()) { fr = vals[i].getFrame(); nchks += fr.anyVec().nChunks(); // Total chunks } else nchks++; // One chunk per scalar } // No Frame, just a pile-o-scalars? Vec zz = null; // The zero-length vec for the zero-frame frame if (fr == null) { // Zero-length, 1-column, default name fr = new Frame(new String[] {Frame.defaultColName(0)}, new Vec[] {zz = Vec.makeZero(0)}); if (asts.length == 1) return new ValFrame(fr); } // Verify all Frames are the same columns, names, and types. Domains can vary, and will be the // union final Frame frs[] = new Frame[asts.length]; // Input frame final byte[] types = fr.types(); // Column types final int ncols = fr.numCols(); final long[] espc = new long[nchks + 1]; // Compute a new layout! int coffset = 0; for (int i = 1; i < asts.length; i++) { Val val = vals[i]; // Save values computed for pass 2 Frame fr0 = val.isFrame() ? val.getFrame() // Scalar: auto-expand into a 1-row frame : stk.track(new Frame(fr._names, Vec.makeCons(val.getNum(), 1L, fr.numCols()))); // Check that all frames are compatible if (fr.numCols() != fr0.numCols()) throw new IllegalArgumentException( "rbind frames must have all the same columns, found " + fr.numCols() + " and " + fr0.numCols() + " columns."); if (!Arrays.deepEquals(fr._names, fr0._names)) throw new IllegalArgumentException( "rbind frames must have all the same column names, found " + Arrays.toString(fr._names) + " and " + Arrays.toString(fr0._names)); if (!Arrays.equals(types, fr0.types())) throw new IllegalArgumentException( "rbind frames must have all the same column types, found " + Arrays.toString(types) + " and " + Arrays.toString(fr0.types())); frs[i] = fr0; // Save frame // Roll up the ESPC row counts long roffset = espc[coffset]; long[] espc2 = fr0.anyVec().espc(); for (int j = 1; j < espc2.length; j++) // Roll up the row counts espc[coffset + j] = (roffset + espc2[j]); coffset += espc2.length - 1; // Chunk offset } if (zz != null) zz.remove(); // build up the new domains for each vec HashMap<String, Integer>[] dmap = new HashMap[types.length]; String[][] domains = new String[types.length][]; int[][][] cmaps = new int[types.length][][]; for (int k = 0; k < types.length; ++k) { dmap[k] = new HashMap<>(); int c = 0; byte t = types[k]; if (t == Vec.T_CAT) { int[][] maps = new int[frs.length][]; for (int i = 1; i < frs.length; i++) { maps[i] = new int[frs[i].vec(k).domain().length]; for (int j = 0; j < maps[i].length; j++) { String s = frs[i].vec(k).domain()[j]; if (!dmap[k].containsKey(s)) dmap[k].put(s, maps[i][j] = c++); else maps[i][j] = dmap[k].get(s); } } cmaps[k] = maps; } else { cmaps[k] = new int[frs.length][]; } domains[k] = c == 0 ? null : new String[c]; for (Map.Entry<String, Integer> e : dmap[k].entrySet()) domains[k][e.getValue()] = e.getKey(); } // Now make Keys for the new Vecs Key<Vec>[] keys = fr.anyVec().group().addVecs(fr.numCols()); Vec[] vecs = new Vec[fr.numCols()]; int rowLayout = Vec.ESPC.rowLayout(keys[0], espc); for (int i = 0; i < vecs.length; i++) vecs[i] = new Vec(keys[i], rowLayout, domains[i], types[i]); // Do the row-binds column-by-column. // Switch to F/J thread for continuations ParallelRbinds t; H2O.submitTask(t = new ParallelRbinds(frs, espc, vecs, cmaps)).join(); return new ValFrame(new Frame(fr.names(), t._vecs)); }
@Override protected long checksum_impl() { throw H2O.unimpl(); } // don't really need checksum