@Override public void map(Chunk chks[], NewChunk nchks[]) { long rstart = chks[0]._start; int rlen = chks[0]._len; // Total row count int rx = 0; // Which row to in/ex-clude int rlo = 0; // Lo/Hi for this block of rows int rhi = rlen; while (true) { // Still got rows to include? if (_rows != null) { // Got a row selector? if (rx >= _rows.length) break; // All done with row selections long r = _rows[rx++] - 1; // Next row selector if (r < 0) { // Row exclusion if (rx > 0 && _rows[rx - 1] < _rows[rx]) throw H2O.unimpl(); long er = Math.abs(r) - 2; if (er < rstart) continue; // scoop up all of the rows before the first exclusion if (rx == 1 && ((int) (er + 1 - rstart)) > 0 && _ex) { rlo = (int) rstart; rhi = (int) (er - rstart); _ex = false; rx--; } else { rlo = (int) (er + 1 - rstart); // TODO: handle jumbled row indices ( e.g. -c(1,5,3) ) while (rx < _rows.length && (_rows[rx] + 1 == _rows[rx - 1] && rlo < rlen)) { if (rx < _rows.length - 1 && _rows[rx] < _rows[rx + 1]) throw H2O.unimpl(); rx++; rlo++; // Exclude consecutive rows } rhi = rx >= _rows.length ? rlen : (int) Math.abs(_rows[rx] - 1) - 2; if (rx < _rows.length - 1 && _rows[rx] < _rows[rx + 1]) throw H2O.unimpl(); } } else { // Positive row list? if (r < rstart) continue; rlo = (int) (r - rstart); rhi = rlo + 1; // Stop at the next row while (rx < _rows.length && (_rows[rx] - 1 - rstart) == rhi && rhi < rlen) { rx++; rhi++; // Grab sequential rows } } } // Process this next set of rows // For all cols in the new set for (int i = 0; i < _cols.length; i++) { Chunk oc = chks[_cols[i]]; NewChunk nc = nchks[i]; if (oc._vec.isInt()) { // Slice on integer columns for (int j = rlo; j < rhi; j++) if (oc.isNA0(j)) nc.addNA(); else nc.addNum(oc.at80(j), 0); } else { // Slice on double columns for (int j = rlo; j < rhi; j++) nc.addNum(oc.at0(j)); } } rlo = rhi; if (_rows == null) break; } }
@Override NewChunk inflate_impl(NewChunk nc) { double dx = Math.log10(_scale); assert DParseTask.fitsIntoInt(dx); Arrays.fill(nc._xs = MemoryManager.malloc4(_len), (int) dx); nc._ls = MemoryManager.malloc8(_len); for (int i = 0; i < _len; i++) { int res = UDP.get2(_mem, (i << 1) + OFF); if (res == C2Chunk._NA) nc.setNA_impl2(i); else nc._ls[i] = res + _bias; } return nc; }
public NewChunk convertEnum2Str(ValueString[] emap) { NewChunk strChunk = new NewChunk(_vec, _cidx); int j = 0, l = _len; for (int i = 0; i < l; ++i) { if (_id != null && _id.length > 0 && (j < _id.length && _id[j] == i)) // Sparse storage // adjust for enum ids using 1-based indexing strChunk.addStr(emap[(int) _ls[j++] - 1]); else if (_xs[i] != Integer.MIN_VALUE) // Categorical value isn't NA strChunk.addStr(emap[(int) _ls[i] - 1]); else strChunk.addNA(); } if (_id != null) assert j == sparseLen() : "j = " + j + ", sparseLen = " + sparseLen(); return strChunk; }
@Override public NewChunk inflate_impl(NewChunk nc) { double dx = Math.log10(_scale); assert water.util.PrettyPrint.fitsIntoInt(dx); nc.set_sparseLen(0); nc.set_len(0); final int len = _len; for (int i = 0; i < len; i++) { int res = 0xFF & _mem[i + _OFF]; if (res == C1Chunk._NA) nc.addNA(); else nc.addNum((res + _bias), (int) dx); } return nc; }
@Test public void test() { Frame frame = null; try { Futures fs = new Futures(); Random random = new Random(); Vec[] vecs = new Vec[1]; AppendableVec vec = new AppendableVec(Vec.newKey(), Vec.T_NUM); for (int i = 0; i < 2; i++) { NewChunk chunk = new NewChunk(vec, i); for (int r = 0; r < 1000; r++) chunk.addNum(random.nextInt(1000)); chunk.close(i, fs); } vecs[0] = vec.layout_and_close(fs); fs.blockForPending(); frame = new Frame(Key.<Frame>make(), null, vecs); // Make sure we test the multi-chunk case vecs = frame.vecs(); assert vecs[0].nChunks() > 1; long rows = frame.numRows(); Vec v = vecs[0]; double min = Double.POSITIVE_INFINITY, max = Double.NEGATIVE_INFINITY, mean = 0, sigma = 0; for (int r = 0; r < rows; r++) { double d = v.at(r); if (d < min) min = d; if (d > max) max = d; mean += d; } mean /= rows; for (int r = 0; r < rows; r++) { double d = v.at(r); sigma += (d - mean) * (d - mean); } sigma = Math.sqrt(sigma / (rows - 1)); double epsilon = 1e-9; assertEquals(max, v.max(), epsilon); assertEquals(min, v.min(), epsilon); assertEquals(mean, v.mean(), epsilon); assertEquals(sigma, v.sigma(), epsilon); } finally { if (frame != null) frame.delete(); } }
@Override public void map(Chunk cs) { int idx = _chunkOffset + cs.cidx(); Key ckey = Vec.chunkKey(_v._key, idx); if (_cmap != null) { assert !cs.hasFloat() : "Input chunk (" + cs.getClass() + ") has float, but is expected to be categorical"; NewChunk nc = new NewChunk(_v, idx); // loop over rows and update ints for new domain mapping according to vecs[c].domain() for (int r = 0; r < cs._len; ++r) { if (cs.isNA(r)) nc.addNA(); else nc.addNum(_cmap[(int) cs.at8(r)], 0); } nc.close(_fs); } else { DKV.put(ckey, cs.deepCopy(), _fs, true); } }
public void add2Chunk(NewChunk c) { if (_ds == null && _ss == null) { c.addNum(_ls[_lId], _xs[_lId]); } else { if (_ls != null) { c.addUUID(_ls[_lId], Double.doubleToRawLongBits(_ds[_lId])); } else if (_ss != null) { int sidx = _is[_lId]; int nextNotNAIdx = _lId + 1; // Find next not-NA value (_is[idx] != -1) while (nextNotNAIdx < _is.length && _is[nextNotNAIdx] == -1) nextNotNAIdx++; int slen = nextNotNAIdx < _is.length ? _is[nextNotNAIdx] - sidx : _sslen - sidx; // null-BufferedString represents NA value BufferedString bStr = sidx == -1 ? null : new BufferedString().set(_ss, sidx, slen); c.addStr(bStr); } else c.addNum(_ds[_lId]); } }
@Test public void test_setNA() { // Create a vec with one chunk with 15 elements, and set its numbers Vec vec = new Vec(Vec.newKey(), new long[] {0, 15}).makeZeros(1, null, null, null, null)[0]; int[] vals = new int[] {0, 3, 0, 6, 0, 0, 0, -32769, 0, 12, 234, 32765, 0, 0, 19}; Vec.Writer w = vec.open(); for (int i = 0; i < vals.length; ++i) w.set(i, vals[i]); w.close(); Chunk cc = vec.chunkForChunkIdx(0); assert cc instanceof C2SChunk; Futures fs = new Futures(); fs.blockForPending(); for (int i = 0; i < vals.length; ++i) Assert.assertEquals(vals[i], cc.at80(i)); for (int i = 0; i < vals.length; ++i) Assert.assertEquals(vals[i], cc.at8(i)); int[] NAs = new int[] {1, 5, 2}; int[] notNAs = new int[] {0, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14}; for (int na : NAs) cc.setNA(na); for (int na : NAs) Assert.assertTrue(cc.isNA0(na)); for (int na : NAs) Assert.assertTrue(cc.isNA(na)); for (int notna : notNAs) Assert.assertTrue(!cc.isNA0(notna)); for (int notna : notNAs) Assert.assertTrue(!cc.isNA(notna)); NewChunk nc = new NewChunk(null, 0); cc.inflate_impl(nc); nc.values(0, nc.len()); Assert.assertEquals(vals.length, nc.sparseLen()); Assert.assertEquals(vals.length, nc.len()); Iterator<NewChunk.Value> it = nc.values(0, vals.length); for (int i = 0; i < vals.length; ++i) Assert.assertTrue(it.next().rowId0() == i); Assert.assertTrue(!it.hasNext()); for (int na : NAs) Assert.assertTrue(cc.isNA0(na)); for (int na : NAs) Assert.assertTrue(cc.isNA(na)); for (int notna : notNAs) Assert.assertTrue(!cc.isNA0(notna)); for (int notna : notNAs) Assert.assertTrue(!cc.isNA(notna)); Chunk cc2 = nc.compress(); Assert.assertEquals(vals.length, cc.len()); Assert.assertTrue(cc2 instanceof C2SChunk); for (int na : NAs) Assert.assertTrue(cc.isNA0(na)); for (int na : NAs) Assert.assertTrue(cc.isNA(na)); for (int notna : notNAs) Assert.assertTrue(!cc.isNA0(notna)); for (int notna : notNAs) Assert.assertTrue(!cc.isNA(notna)); Assert.assertTrue(Arrays.equals(cc._mem, cc2._mem)); vec.remove(); }
// PREpend all of 'nc' onto the current NewChunk. Kill nc. public void addr(NewChunk nc) { long[] tmpl = _ls; _ls = nc._ls; nc._ls = tmpl; int[] tmpi = _xs; _xs = nc._xs; nc._xs = tmpi; tmpi = _id; _id = nc._id; nc._id = tmpi; double[] tmpd = _ds; _ds = nc._ds; nc._ds = tmpd; int tmp = _sparseLen; _sparseLen = nc._sparseLen; nc._sparseLen = tmp; tmp = _len; _len = nc._len; nc._len = tmp; add(nc); }
@Test public void test_inflate_impl2() { for (int l = 0; l < 2; ++l) { NewChunk nc = new NewChunk(null, 0); long[] man = new long[] {-12767, 34, 0, 52767}; int[] exp = new int[] {-3, -2, 1, -3}; if (l == 1) nc.addNA(); // -32768 for (int i = 0; i < man.length; ++i) nc.addNum(man[i], exp[i]); nc.addNA(); Chunk cc = nc.compress(); Assert.assertEquals(man.length + 1 + l, cc.len()); Assert.assertTrue(cc instanceof C2SChunk); if (l == 1) { Assert.assertTrue(cc.isNA0(0)); Assert.assertTrue(cc.isNA(0)); } for (int i = 0; i < man.length; ++i) { Assert.assertEquals((float) (man[i] * Math.pow(10, exp[i])), (float) cc.at0(l + i), 0); Assert.assertEquals((float) (man[i] * Math.pow(10, exp[i])), (float) cc.at(l + i), 0); } Assert.assertTrue(cc.isNA0(man.length + l)); nc = cc.inflate_impl(new NewChunk(null, 0)); nc.values(0, nc.len()); Assert.assertEquals(man.length + 1 + l, nc.len()); Assert.assertEquals(man.length + 1 + l, nc.sparseLen()); if (l == 1) { Assert.assertTrue(nc.isNA0(0)); Assert.assertTrue(nc.isNA(0)); } for (int i = 0; i < man.length; ++i) { Assert.assertEquals((float) (man[i] * Math.pow(10, exp[i])), (float) nc.at0(l + i), 0); Assert.assertEquals((float) (man[i] * Math.pow(10, exp[i])), (float) nc.at(l + i), 0); } Assert.assertTrue(nc.isNA0(man.length + l)); Assert.assertTrue(nc.isNA(man.length + l)); Chunk cc2 = nc.compress(); Assert.assertEquals(man.length + 1 + l, cc.len()); if (l == 1) { Assert.assertTrue(cc2.isNA0(0)); Assert.assertTrue(cc2.isNA(0)); } for (int i = 0; i < man.length; ++i) { Assert.assertEquals((float) (man[i] * Math.pow(10, exp[i])), (float) cc2.at0(l + i), 0); Assert.assertEquals((float) (man[i] * Math.pow(10, exp[i])), (float) cc2.at(l + i), 0); } Assert.assertTrue(cc2.isNA0(man.length + l)); Assert.assertTrue(cc2.isNA(man.length + l)); Assert.assertTrue(cc2 instanceof C2SChunk); Assert.assertTrue(Arrays.equals(cc._mem, cc2._mem)); } }
// Append all of 'nc' onto the current NewChunk. Kill nc. public void add(NewChunk nc) { assert _cidx >= 0; assert sparseLen() <= _len; assert nc.sparseLen() <= nc._len : "_len = " + nc.sparseLen() + ", _len2 = " + nc._len; if (nc._len == 0) return; if (_len == 0) { _ls = nc._ls; nc._ls = null; _xs = nc._xs; nc._xs = null; _id = nc._id; nc._id = null; _ds = nc._ds; nc._ds = null; _is = nc._is; nc._is = null; _ss = nc._ss; nc._ss = null; set_sparseLen(nc.sparseLen()); set_len(nc._len); return; } if (nc.sparse() != sparse()) { // for now, just make it dense cancel_sparse(); nc.cancel_sparse(); } if (_ds != null) throw H2O.fail(); while (sparseLen() + nc.sparseLen() >= _xs.length) _xs = MemoryManager.arrayCopyOf(_xs, _xs.length << 1); _ls = MemoryManager.arrayCopyOf(_ls, _xs.length); System.arraycopy(nc._ls, 0, _ls, sparseLen(), nc.sparseLen()); System.arraycopy(nc._xs, 0, _xs, sparseLen(), nc.sparseLen()); if (_id != null) { assert nc._id != null; _id = MemoryManager.arrayCopyOf(_id, _xs.length); System.arraycopy(nc._id, 0, _id, sparseLen(), nc.sparseLen()); for (int i = sparseLen(); i < sparseLen() + nc.sparseLen(); ++i) _id[i] += _len; } else assert nc._id == null; set_sparseLen(sparseLen() + nc.sparseLen()); set_len(_len + nc._len); nc._ls = null; nc._xs = null; nc._id = null; nc.set_sparseLen(nc.set_len(0)); assert sparseLen() <= _len; }
@Override public void map(Chunk c, NewChunk nc) { double acc = _init; for (int i = 0; i < c._len; ++i) nc.addNum(acc = op(acc, c.atd(i))); _chkCumu[c.cidx()] = acc; }
public Frame deepSlice(Object orows, Object ocols) { // ocols is either a long[] or a Frame-of-1-Vec long[] cols; if (ocols == null) { cols = (long[]) ocols; assert cols == null; } else { if (ocols instanceof long[]) { cols = (long[]) ocols; } else if (ocols instanceof Frame) { Frame fr = (Frame) ocols; if (fr.numCols() != 1) { throw new IllegalArgumentException( "Columns Frame must have only one column (actually has " + fr.numCols() + " columns)"); } long n = fr.anyVec().length(); if (n > MAX_EQ2_COLS) { throw new IllegalArgumentException( "Too many requested columns (requested " + n + ", max " + MAX_EQ2_COLS + ")"); } cols = new long[(int) n]; Vec v = fr._vecs[0]; for (long i = 0; i < v.length(); i++) { cols[(int) i] = v.at8(i); } } else { throw new IllegalArgumentException( "Columns is specified by an unsupported data type (" + ocols.getClass().getName() + ")"); } } // Since cols is probably short convert to a positive list. int c2[] = null; if (cols == null) { c2 = new int[numCols()]; for (int i = 0; i < c2.length; i++) c2[i] = i; } else if (cols.length == 0) { c2 = new int[0]; } else if (cols[0] > 0) { c2 = new int[cols.length]; for (int i = 0; i < cols.length; i++) c2[i] = (int) cols[i] - 1; // Convert 1-based cols to zero-based } else { c2 = new int[numCols() - cols.length]; int j = 0; for (int i = 0; i < numCols(); i++) { if (j >= cols.length || i < (-cols[j] - 1)) c2[i - j] = i; else j++; } } for (int i = 0; i < c2.length; i++) if (c2[i] >= numCols()) throw new IllegalArgumentException( "Trying to select column " + c2[i] + " but only " + numCols() + " present."); if (c2.length == 0) throw new IllegalArgumentException( "No columns selected (did you try to select column 0 instead of column 1?)"); // Do Da Slice // orows is either a long[] or a Vec if (orows == null) return new DeepSlice((long[]) orows, c2) .doAll(c2.length, this) .outputFrame(names(c2), domains(c2)); else if (orows instanceof long[]) { final long CHK_ROWS = 1000000; long[] rows = (long[]) orows; if (rows.length == 0) return new DeepSlice(rows, c2).doAll(c2.length, this).outputFrame(names(c2), domains(c2)); if (rows[0] < 0) return new DeepSlice(rows, c2).doAll(c2.length, this).outputFrame(names(c2), domains(c2)); // Vec'ize the index array AppendableVec av = new AppendableVec("rownames"); int r = 0; int c = 0; while (r < rows.length) { NewChunk nc = new NewChunk(av, c); long end = Math.min(r + CHK_ROWS, rows.length); for (; r < end; r++) { nc.addNum(rows[r]); } nc.close(c++, null); } Vec c0 = av.close(null); // c0 is the row index vec Frame fr2 = new Slice(c2, this) .doAll(c2.length, new Frame(new String[] {"rownames"}, new Vec[] {c0})) .outputFrame(names(c2), domains(c2)); UKV.remove(c0._key); // Remove hidden vector return fr2; } Frame frows = (Frame) orows; Vec vrows = frows.anyVec(); // It's a compatible Vec; use it as boolean selector. // Build column names for the result. Vec[] vecs = new Vec[c2.length + 1]; String[] names = new String[c2.length + 1]; for (int i = 0; i < c2.length; ++i) { vecs[i] = _vecs[c2[i]]; names[i] = _names[c2[i]]; } vecs[c2.length] = vrows; names[c2.length] = "predicate"; return new DeepSelect() .doAll(c2.length, new Frame(names, vecs)) .outputFrame(names(c2), domains(c2)); }
@Test public void test_inflate_impl() { final int K = 1 << 16; for (Double d : new Double[] { 3.14159265358, Double.POSITIVE_INFINITY, Double.NEGATIVE_INFINITY, Double.MAX_VALUE, Double.NaN }) { NewChunk nc = new NewChunk(null, 0); for (int i = 0; i < K; ++i) nc.addNum(d); Assert.assertEquals(K, nc.len()); Assert.assertEquals(K, nc.sparseLen()); Chunk cc = nc.compress(); Assert.assertEquals(K, cc.len()); Assert.assertTrue(cc instanceof C0DChunk); for (int i = 0; i < K; ++i) Assert.assertEquals(d, cc.at0(i), Math.ulp(d)); for (int i = 0; i < K; ++i) Assert.assertEquals(d, cc.at(i), Math.ulp(d)); nc = cc.inflate_impl(new NewChunk(null, 0)); nc.values(0, nc.len()); Assert.assertEquals(K, nc.len()); Assert.assertEquals(K, nc.sparseLen()); for (int i = 0; i < K; ++i) Assert.assertEquals(d, nc.at0(i), Math.ulp(d)); for (int i = 0; i < K; ++i) Assert.assertEquals(d, nc.at(i), Math.ulp(d)); Chunk cc2 = nc.compress(); Assert.assertEquals(K, cc2.len()); Assert.assertTrue(cc2 instanceof C0DChunk); for (int i = 0; i < K; ++i) Assert.assertEquals(d, cc2.at0(i), Math.ulp(d)); for (int i = 0; i < K; ++i) Assert.assertEquals(d, cc2.at(i), Math.ulp(d)); Assert.assertTrue(Arrays.equals(cc._mem, cc2._mem)); } }
private void rebalanceChunk(Vec srcVec, Chunk chk) { NewChunk dst = new NewChunk(chk); dst._len = dst._len2 = 0; int rem = chk._len; while (rem > 0 && dst._len2 < chk._len) { Chunk srcRaw = srcVec.chunkForRow(chk._start + dst._len2); NewChunk src = new NewChunk((srcRaw)); src = srcRaw.inflate_impl(src); assert src._len2 == srcRaw._len; int srcFrom = (int) (chk._start + dst._len2 - src._start); // check if the result is sparse (not exact since we only take subset of src in general) if ((src.sparse() && dst.sparse()) || (src._len + dst._len < NewChunk.MIN_SPARSE_RATIO * (src._len2 + dst._len2))) { src.set_sparse(src._len); dst.set_sparse(dst._len); } final int srcTo = srcFrom + rem; int off = srcFrom - 1; Iterator<NewChunk.Value> it = src.values(Math.max(0, srcFrom), srcTo); while (it.hasNext()) { NewChunk.Value v = it.next(); final int rid = v.rowId0(); assert rid < srcTo; int add = rid - off; off = rid; dst.addZeros(add - 1); v.add2Chunk(dst); rem -= add; assert rem >= 0; } int trailingZeros = Math.min(rem, src._len2 - off - 1); dst.addZeros(trailingZeros); rem -= trailingZeros; } assert rem == 0 : "rem = " + rem; assert dst._len2 == chk._len : "len2 = " + dst._len2 + ", _len = " + chk._len; dst.close(dst.cidx(), _fs); }