@Test public void test_inflate_impl() { for (int l = 0; l < 2; ++l) { NewChunk nc = new NewChunk(null, 0); // -32.767, 0.34, 0, 32.767, NA for l==0 // NA, -32.767, 0.34, 0, 32.767, NA for l==1 long[] man = new long[] {-32767, 34, 0, 32767}; int[] exp = new int[] {-3, -2, 1, -3}; if (l == 1) nc.addNA(); // -32768 for (int i = 0; i < man.length; ++i) nc.addNum(man[i], exp[i]); nc.addNA(); Chunk cc = nc.compress(); Assert.assertEquals(man.length + 1 + l, cc.len()); Assert.assertTrue(cc instanceof C2SChunk); if (l == 1) { Assert.assertTrue(cc.isNA0(0)); Assert.assertTrue(cc.isNA(0)); } for (int i = 0; i < man.length; ++i) { Assert.assertEquals((float) (man[i] * Math.pow(10, exp[i])), (float) cc.at0(l + i), 0); Assert.assertEquals((float) (man[i] * Math.pow(10, exp[i])), (float) cc.at(l + i), 0); } Assert.assertTrue(cc.isNA0(man.length + l)); Assert.assertTrue(cc.isNA(man.length + l)); nc = cc.inflate_impl(new NewChunk(null, 0)); nc.values(0, nc.len()); Assert.assertEquals(man.length + 1 + l, nc.len()); Assert.assertEquals(man.length + 1 + l, nc.sparseLen()); if (l == 1) { Assert.assertTrue(nc.isNA0(0)); Assert.assertTrue(nc.isNA(0)); } for (int i = 0; i < man.length; ++i) { Assert.assertEquals((float) (man[i] * Math.pow(10, exp[i])), (float) nc.at0(l + i), 0); Assert.assertEquals((float) (man[i] * Math.pow(10, exp[i])), (float) nc.at(l + i), 0); } Assert.assertTrue(nc.isNA0(man.length + l)); Assert.assertTrue(nc.isNA(man.length + l)); Chunk cc2 = nc.compress(); Assert.assertEquals(man.length + 1 + l, cc.len()); if (l == 1) { Assert.assertTrue(cc2.isNA0(0)); Assert.assertTrue(cc2.isNA(0)); } for (int i = 0; i < man.length; ++i) { Assert.assertEquals((float) (man[i] * Math.pow(10, exp[i])), (float) cc2.at0(l + i), 0); Assert.assertEquals((float) (man[i] * Math.pow(10, exp[i])), (float) cc2.at(l + i), 0); } Assert.assertTrue(cc2.isNA0(man.length + l)); Assert.assertTrue(cc2.isNA(man.length + l)); Assert.assertTrue(cc2 instanceof C2SChunk); Assert.assertTrue(Arrays.equals(cc._mem, cc2._mem)); } }
public void addStr(Chunk c, int row) { if (c.isNA(row)) addNA(); else { addStr(c.atStr(new BufferedString(), row)); _isAllASCII &= ((CStrChunk) c)._isAllASCII; } }
@Override public void map(Chunk chks[], NewChunk nchks[]) { long rstart = chks[0]._start; int rlen = chks[0]._len; // Total row count int rx = 0; // Which row to in/ex-clude int rlo = 0; // Lo/Hi for this block of rows int rhi = rlen; while (true) { // Still got rows to include? if (_rows != null) { // Got a row selector? if (rx >= _rows.length) break; // All done with row selections long r = _rows[rx++] - 1; // Next row selector if (r < 0) { // Row exclusion if (rx > 0 && _rows[rx - 1] < _rows[rx]) throw H2O.unimpl(); long er = Math.abs(r) - 2; if (er < rstart) continue; // scoop up all of the rows before the first exclusion if (rx == 1 && ((int) (er + 1 - rstart)) > 0 && _ex) { rlo = (int) rstart; rhi = (int) (er - rstart); _ex = false; rx--; } else { rlo = (int) (er + 1 - rstart); // TODO: handle jumbled row indices ( e.g. -c(1,5,3) ) while (rx < _rows.length && (_rows[rx] + 1 == _rows[rx - 1] && rlo < rlen)) { if (rx < _rows.length - 1 && _rows[rx] < _rows[rx + 1]) throw H2O.unimpl(); rx++; rlo++; // Exclude consecutive rows } rhi = rx >= _rows.length ? rlen : (int) Math.abs(_rows[rx] - 1) - 2; if (rx < _rows.length - 1 && _rows[rx] < _rows[rx + 1]) throw H2O.unimpl(); } } else { // Positive row list? if (r < rstart) continue; rlo = (int) (r - rstart); rhi = rlo + 1; // Stop at the next row while (rx < _rows.length && (_rows[rx] - 1 - rstart) == rhi && rhi < rlen) { rx++; rhi++; // Grab sequential rows } } } // Process this next set of rows // For all cols in the new set for (int i = 0; i < _cols.length; i++) { Chunk oc = chks[_cols[i]]; NewChunk nc = nchks[i]; if (oc._vec.isInt()) { // Slice on integer columns for (int j = rlo; j < rhi; j++) if (oc.isNA0(j)) nc.addNA(); else nc.addNum(oc.at80(j), 0); } else { // Slice on double columns for (int j = rlo; j < rhi; j++) nc.addNum(oc.at0(j)); } } rlo = rhi; if (_rows == null) break; } }
public NewChunk convertEnum2Str(ValueString[] emap) { NewChunk strChunk = new NewChunk(_vec, _cidx); int j = 0, l = _len; for (int i = 0; i < l; ++i) { if (_id != null && _id.length > 0 && (j < _id.length && _id[j] == i)) // Sparse storage // adjust for enum ids using 1-based indexing strChunk.addStr(emap[(int) _ls[j++] - 1]); else if (_xs[i] != Integer.MIN_VALUE) // Categorical value isn't NA strChunk.addStr(emap[(int) _ls[i] - 1]); else strChunk.addNA(); } if (_id != null) assert j == sparseLen() : "j = " + j + ", sparseLen = " + sparseLen(); return strChunk; }
@Override public NewChunk inflate_impl(NewChunk nc) { double dx = Math.log10(_scale); assert water.util.PrettyPrint.fitsIntoInt(dx); nc.set_sparseLen(0); nc.set_len(0); final int len = _len; for (int i = 0; i < len; i++) { int res = 0xFF & _mem[i + _OFF]; if (res == C1Chunk._NA) nc.addNA(); else nc.addNum((res + _bias), (int) dx); } return nc; }
public void addNum(long val, int exp) { if (isUUID() || isString()) addNA(); else if (_ds != null) { assert _ls == null; addNum(val * PrettyPrint.pow10(exp)); } else { if (val == 0) exp = 0; // Canonicalize zero long t; // Remove extra scaling while (exp < 0 && exp > -9999999 && (t = val / 10) * 10 == val) { val = t; exp++; } append2(val, exp); } }
@Override public void map(Chunk cs) { int idx = _chunkOffset + cs.cidx(); Key ckey = Vec.chunkKey(_v._key, idx); if (_cmap != null) { assert !cs.hasFloat() : "Input chunk (" + cs.getClass() + ") has float, but is expected to be categorical"; NewChunk nc = new NewChunk(_v, idx); // loop over rows and update ints for new domain mapping according to vecs[c].domain() for (int r = 0; r < cs._len; ++r) { if (cs.isNA(r)) nc.addNA(); else nc.addNum(_cmap[(int) cs.at8(r)], 0); } nc.close(_fs); } else { DKV.put(ckey, cs.deepCopy(), _fs, true); } }
// Fast-path append double data public void addNum(double d) { if (isUUID() || isString()) { addNA(); return; } if (_id == null || d != 0) { if (_ls != null) switch_to_doubles(); if (_ds == null || sparseLen() >= _ds.length) { append2slowd(); // call addNum again since append2slow might have flipped to sparse addNum(d); assert sparseLen() <= _len; return; } if (_id != null) _id[sparseLen()] = _len; _ds[sparseLen()] = d; set_sparseLen(sparseLen() + 1); } set_len(_len + 1); assert sparseLen() <= _len; }
public void addStr(Chunk c, int row) { if (c.isNA(row)) addNA(); else addStr(c.atStr(new ValueString(), row)); }
public void addStr(Chunk c, long row) { if (c.isNA_abs(row)) addNA(); else addStr(c.atStr_abs(new ValueString(), row)); }