@Override public void compute2() { _in.read_lock(_jobKey); // simply create a bogus new vector (don't even put it into KV) with appropriate number of lines // per chunk and then use it as a source to do multiple makeZero calls // to create empty vecs and than call RebalanceTask on each one of them. // RebalanceTask will fetch the appropriate src chunks and fetch the data from them. int rpc = (int) (_in.numRows() / _nchunks); int rem = (int) (_in.numRows() % _nchunks); long[] espc = new long[_nchunks + 1]; Arrays.fill(espc, rpc); for (int i = 0; i < rem; ++i) ++espc[i]; long sum = 0; for (int i = 0; i < espc.length; ++i) { long s = espc[i]; espc[i] = sum; sum += s; } assert espc[espc.length - 1] == _in.numRows() : "unexpected number of rows, expected " + _in.numRows() + ", got " + espc[espc.length - 1]; final Vec[] srcVecs = _in.vecs(); _out = new Frame( _okey, _in.names(), new Vec(Vec.newKey(), espc).makeZeros(srcVecs.length, _in.domains())); _out.delete_and_lock(_jobKey); new RebalanceTask(this, srcVecs).asyncExec(_out); }
@Override public void compute2() { // Lock all possible data dataset.read_lock(jobKey); // Create a template vector for each segment final Vec[][] templates = makeTemplates(dataset, ratios); final int nsplits = templates.length; assert nsplits == ratios.length + 1 : "Unexpected number of split templates!"; // Launch number of distributed FJ for each split part final Vec[] datasetVecs = dataset.vecs(); splits = new Frame[nsplits]; for (int s = 0; s < nsplits; s++) { Frame split = new Frame(destKeys[s], dataset.names(), templates[s]); split.delete_and_lock(jobKey); splits[s] = split; } setPendingCount(1); H2O.submitTask( new H2OCountedCompleter(FrameSplitter.this) { @Override public void compute2() { setPendingCount(nsplits); for (int s = 0; s < nsplits; s++) { new FrameSplitTask( new H2OCountedCompleter(this) { // Completer for this task @Override public void compute2() {} @Override public boolean onExceptionalCompletion( Throwable ex, CountedCompleter caller) { synchronized ( FrameSplitter .this) { // synchronized on this since can be accessed from // different workers workersExceptions = workersExceptions != null ? Arrays.copyOf(workersExceptions, workersExceptions.length + 1) : new Throwable[1]; workersExceptions[workersExceptions.length - 1] = ex; } tryComplete(); // we handle the exception so wait perform normal // completion return false; } }, datasetVecs, ratios, s) .asyncExec(splits[s]); } tryComplete(); // complete the computation of nsplits-tasks } }); tryComplete(); // complete the computation of thrown tasks }