@Override public void compute2() { _in.read_lock(_jobKey); // simply create a bogus new vector (don't even put it into KV) with appropriate number of lines // per chunk and then use it as a source to do multiple makeZero calls // to create empty vecs and than call RebalanceTask on each one of them. // RebalanceTask will fetch the appropriate src chunks and fetch the data from them. int rpc = (int) (_in.numRows() / _nchunks); int rem = (int) (_in.numRows() % _nchunks); long[] espc = new long[_nchunks + 1]; Arrays.fill(espc, rpc); for (int i = 0; i < rem; ++i) ++espc[i]; long sum = 0; for (int i = 0; i < espc.length; ++i) { long s = espc[i]; espc[i] = sum; sum += s; } assert espc[espc.length - 1] == _in.numRows() : "unexpected number of rows, expected " + _in.numRows() + ", got " + espc[espc.length - 1]; final Vec[] srcVecs = _in.vecs(); _out = new Frame( _okey, _in.names(), new Vec(Vec.newKey(), espc).makeZeros(srcVecs.length, _in.domains())); _out.delete_and_lock(_jobKey); new RebalanceTask(this, srcVecs).asyncExec(_out); }
@Test public void test_setNA() { // Create a vec with one chunk with 15 elements, and set its numbers Vec vec = new Vec(Vec.newKey(), new long[] {0, 15}).makeZeros(1, null, null, null, null)[0]; int[] vals = new int[] {0, 3, 0, 6, 0, 0, 0, -32769, 0, 12, 234, 32765, 0, 0, 19}; Vec.Writer w = vec.open(); for (int i = 0; i < vals.length; ++i) w.set(i, vals[i]); w.close(); Chunk cc = vec.chunkForChunkIdx(0); assert cc instanceof C2SChunk; Futures fs = new Futures(); fs.blockForPending(); for (int i = 0; i < vals.length; ++i) Assert.assertEquals(vals[i], cc.at80(i)); for (int i = 0; i < vals.length; ++i) Assert.assertEquals(vals[i], cc.at8(i)); int[] NAs = new int[] {1, 5, 2}; int[] notNAs = new int[] {0, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14}; for (int na : NAs) cc.setNA(na); for (int na : NAs) Assert.assertTrue(cc.isNA0(na)); for (int na : NAs) Assert.assertTrue(cc.isNA(na)); for (int notna : notNAs) Assert.assertTrue(!cc.isNA0(notna)); for (int notna : notNAs) Assert.assertTrue(!cc.isNA(notna)); NewChunk nc = new NewChunk(null, 0); cc.inflate_impl(nc); nc.values(0, nc.len()); Assert.assertEquals(vals.length, nc.sparseLen()); Assert.assertEquals(vals.length, nc.len()); Iterator<NewChunk.Value> it = nc.values(0, vals.length); for (int i = 0; i < vals.length; ++i) Assert.assertTrue(it.next().rowId0() == i); Assert.assertTrue(!it.hasNext()); for (int na : NAs) Assert.assertTrue(cc.isNA0(na)); for (int na : NAs) Assert.assertTrue(cc.isNA(na)); for (int notna : notNAs) Assert.assertTrue(!cc.isNA0(notna)); for (int notna : notNAs) Assert.assertTrue(!cc.isNA(notna)); Chunk cc2 = nc.compress(); Assert.assertEquals(vals.length, cc.len()); Assert.assertTrue(cc2 instanceof C2SChunk); for (int na : NAs) Assert.assertTrue(cc.isNA0(na)); for (int na : NAs) Assert.assertTrue(cc.isNA(na)); for (int notna : notNAs) Assert.assertTrue(!cc.isNA0(notna)); for (int notna : notNAs) Assert.assertTrue(!cc.isNA(notna)); Assert.assertTrue(Arrays.equals(cc._mem, cc2._mem)); vec.remove(); }
// Make vector templates for all output frame vectors private Vec[][] makeTemplates(Frame dataset, float[] ratios) { Vec anyVec = dataset.anyVec(); final long[][] espcPerSplit = computeEspcPerSplit(anyVec._espc, anyVec.length(), ratios); final int num = dataset.numCols(); // number of columns in input frame final int nsplits = espcPerSplit.length; // number of splits final String[][] domains = dataset.domains(); // domains Vec[][] t = new Vec[nsplits][ /*num*/]; // resulting vectors for all for (int i = 0; i < nsplits; i++) { // vectors for j-th split t[i] = new Vec(Vec.newKey(), espcPerSplit[i /*-th split*/]).makeZeros(num, domains); } return t; }
@Test public void test() { Frame frame = null; try { Futures fs = new Futures(); Random random = new Random(); Vec[] vecs = new Vec[1]; AppendableVec vec = new AppendableVec(Vec.newKey(), Vec.T_NUM); for (int i = 0; i < 2; i++) { NewChunk chunk = new NewChunk(vec, i); for (int r = 0; r < 1000; r++) chunk.addNum(random.nextInt(1000)); chunk.close(i, fs); } vecs[0] = vec.layout_and_close(fs); fs.blockForPending(); frame = new Frame(Key.<Frame>make(), null, vecs); // Make sure we test the multi-chunk case vecs = frame.vecs(); assert vecs[0].nChunks() > 1; long rows = frame.numRows(); Vec v = vecs[0]; double min = Double.POSITIVE_INFINITY, max = Double.NEGATIVE_INFINITY, mean = 0, sigma = 0; for (int r = 0; r < rows; r++) { double d = v.at(r); if (d < min) min = d; if (d > max) max = d; mean += d; } mean /= rows; for (int r = 0; r < rows; r++) { double d = v.at(r); sigma += (d - mean) * (d - mean); } sigma = Math.sqrt(sigma / (rows - 1)); double epsilon = 1e-9; assertEquals(max, v.max(), epsilon); assertEquals(min, v.min(), epsilon); assertEquals(mean, v.mean(), epsilon); assertEquals(sigma, v.sigma(), epsilon); } finally { if (frame != null) frame.delete(); } }
public static Key makeByteVec(Key k, String... data) { byte[][] chunks = new byte[data.length][]; long[] espc = new long[data.length + 1]; for (int i = 0; i < chunks.length; ++i) { chunks[i] = data[i].getBytes(); espc[i + 1] = espc[i] + data[i].length(); } Futures fs = new Futures(); Key key = Vec.newKey(); ByteVec bv = new ByteVec(key, Vec.ESPC.rowLayout(key, espc)); for (int i = 0; i < chunks.length; ++i) { Key chunkKey = bv.chunkKey(i); DKV.put( chunkKey, new Value(chunkKey, chunks[i].length, chunks[i], TypeMap.C1NCHUNK, Value.ICE), fs); } DKV.put(bv._key, bv, fs); Frame fr = new Frame(k, new String[] {"makeByteVec"}, new Vec[] {bv}); DKV.put(k, fr, fs); fs.blockForPending(); return k; }