@Test public void test_inflate_impl() { for (int l = 0; l < 2; ++l) { NewChunk nc = new NewChunk(null, 0); // -32.767, 0.34, 0, 32.767, NA for l==0 // NA, -32.767, 0.34, 0, 32.767, NA for l==1 long[] man = new long[] {-32767, 34, 0, 32767}; int[] exp = new int[] {-3, -2, 1, -3}; if (l == 1) nc.addNA(); // -32768 for (int i = 0; i < man.length; ++i) nc.addNum(man[i], exp[i]); nc.addNA(); Chunk cc = nc.compress(); Assert.assertEquals(man.length + 1 + l, cc.len()); Assert.assertTrue(cc instanceof C2SChunk); if (l == 1) { Assert.assertTrue(cc.isNA0(0)); Assert.assertTrue(cc.isNA(0)); } for (int i = 0; i < man.length; ++i) { Assert.assertEquals((float) (man[i] * Math.pow(10, exp[i])), (float) cc.at0(l + i), 0); Assert.assertEquals((float) (man[i] * Math.pow(10, exp[i])), (float) cc.at(l + i), 0); } Assert.assertTrue(cc.isNA0(man.length + l)); Assert.assertTrue(cc.isNA(man.length + l)); nc = cc.inflate_impl(new NewChunk(null, 0)); nc.values(0, nc.len()); Assert.assertEquals(man.length + 1 + l, nc.len()); Assert.assertEquals(man.length + 1 + l, nc.sparseLen()); if (l == 1) { Assert.assertTrue(nc.isNA0(0)); Assert.assertTrue(nc.isNA(0)); } for (int i = 0; i < man.length; ++i) { Assert.assertEquals((float) (man[i] * Math.pow(10, exp[i])), (float) nc.at0(l + i), 0); Assert.assertEquals((float) (man[i] * Math.pow(10, exp[i])), (float) nc.at(l + i), 0); } Assert.assertTrue(nc.isNA0(man.length + l)); Assert.assertTrue(nc.isNA(man.length + l)); Chunk cc2 = nc.compress(); Assert.assertEquals(man.length + 1 + l, cc.len()); if (l == 1) { Assert.assertTrue(cc2.isNA0(0)); Assert.assertTrue(cc2.isNA(0)); } for (int i = 0; i < man.length; ++i) { Assert.assertEquals((float) (man[i] * Math.pow(10, exp[i])), (float) cc2.at0(l + i), 0); Assert.assertEquals((float) (man[i] * Math.pow(10, exp[i])), (float) cc2.at(l + i), 0); } Assert.assertTrue(cc2.isNA0(man.length + l)); Assert.assertTrue(cc2.isNA(man.length + l)); Assert.assertTrue(cc2 instanceof C2SChunk); Assert.assertTrue(Arrays.equals(cc._mem, cc2._mem)); } }
public void addStr(Chunk c, int row) { if (c.isNA(row)) addNA(); else { addStr(c.atStr(new BufferedString(), row)); _isAllASCII &= ((CStrChunk) c)._isAllASCII; } }
@Test public void test_setNA() { // Create a vec with one chunk with 15 elements, and set its numbers Vec vec = new Vec(Vec.newKey(), new long[] {0, 15}).makeZeros(1, null, null, null, null)[0]; int[] vals = new int[] {0, 3, 0, 6, 0, 0, 0, -32769, 0, 12, 234, 32765, 0, 0, 19}; Vec.Writer w = vec.open(); for (int i = 0; i < vals.length; ++i) w.set(i, vals[i]); w.close(); Chunk cc = vec.chunkForChunkIdx(0); assert cc instanceof C2SChunk; Futures fs = new Futures(); fs.blockForPending(); for (int i = 0; i < vals.length; ++i) Assert.assertEquals(vals[i], cc.at80(i)); for (int i = 0; i < vals.length; ++i) Assert.assertEquals(vals[i], cc.at8(i)); int[] NAs = new int[] {1, 5, 2}; int[] notNAs = new int[] {0, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14}; for (int na : NAs) cc.setNA(na); for (int na : NAs) Assert.assertTrue(cc.isNA0(na)); for (int na : NAs) Assert.assertTrue(cc.isNA(na)); for (int notna : notNAs) Assert.assertTrue(!cc.isNA0(notna)); for (int notna : notNAs) Assert.assertTrue(!cc.isNA(notna)); NewChunk nc = new NewChunk(null, 0); cc.inflate_impl(nc); nc.values(0, nc.len()); Assert.assertEquals(vals.length, nc.sparseLen()); Assert.assertEquals(vals.length, nc.len()); Iterator<NewChunk.Value> it = nc.values(0, vals.length); for (int i = 0; i < vals.length; ++i) Assert.assertTrue(it.next().rowId0() == i); Assert.assertTrue(!it.hasNext()); for (int na : NAs) Assert.assertTrue(cc.isNA0(na)); for (int na : NAs) Assert.assertTrue(cc.isNA(na)); for (int notna : notNAs) Assert.assertTrue(!cc.isNA0(notna)); for (int notna : notNAs) Assert.assertTrue(!cc.isNA(notna)); Chunk cc2 = nc.compress(); Assert.assertEquals(vals.length, cc.len()); Assert.assertTrue(cc2 instanceof C2SChunk); for (int na : NAs) Assert.assertTrue(cc.isNA0(na)); for (int na : NAs) Assert.assertTrue(cc.isNA(na)); for (int notna : notNAs) Assert.assertTrue(!cc.isNA0(notna)); for (int notna : notNAs) Assert.assertTrue(!cc.isNA(notna)); Assert.assertTrue(Arrays.equals(cc._mem, cc2._mem)); vec.remove(); }
@Override public void map(Chunk cs) { int idx = _chunkOffset + cs.cidx(); Key ckey = Vec.chunkKey(_v._key, idx); if (_cmap != null) { assert !cs.hasFloat() : "Input chunk (" + cs.getClass() + ") has float, but is expected to be categorical"; NewChunk nc = new NewChunk(_v, idx); // loop over rows and update ints for new domain mapping according to vecs[c].domain() for (int r = 0; r < cs._len; ++r) { if (cs.isNA(r)) nc.addNA(); else nc.addNum(_cmap[(int) cs.at8(r)], 0); } nc.close(_fs); } else { DKV.put(ckey, cs.deepCopy(), _fs, true); } }
public void addUUID(Chunk c, int row) { if (c.isNA(row)) addUUID(C16Chunk._LO_NA, C16Chunk._HI_NA); else addUUID(c.at16l(row), c.at16h(row)); }
public void addStr(Chunk c, int row) { if (c.isNA(row)) addNA(); else addStr(c.atStr(new ValueString(), row)); }
/** * Extract (sparse) rows from given chunks. Note: 0 remains 0 - _normSub of DataInfo isn't used * (mean shift during standarization is not reverted) - UNLESS offset is specified (for GLM only) * Essentially turns the dataset 90 degrees. * * @param chunks - chunk of dataset * @return array of sparse rows */ public final Row[] extractSparseRows(Chunk[] chunks) { Row[] rows = new Row[chunks[0]._len]; long startOff = chunks[0].start(); for (int i = 0; i < rows.length; ++i) { rows[i] = new Row( true, Math.min(_nums, 16), _cats, _responses, i, startOff); // if sparse, _nums is the correct number of nonzero values! i.e., do not // use numNums() rows[i].rid = chunks[0].start() + i; if (_offset) { rows[i].offset = chunks[offsetChunkId()].atd(i); if (Double.isNaN(rows[i].offset)) rows[i].bad = true; } if (_weights) { rows[i].weight = chunks[weightChunkId()].atd(i); if (Double.isNaN(rows[i].weight)) rows[i].bad = true; } if (_skipMissing) { int N = _cats + _nums; for (int c = 0; c < N; ++c) if (chunks[c].isNA(i)) rows[i].bad = true; } } // categoricals for (int i = 0; i < _cats; ++i) { for (int r = 0; r < chunks[0]._len; ++r) { Row row = rows[r]; if (row.bad) continue; int cid = getCategoricalId(i, chunks[i].isNA(r) ? _catModes[i] : (int) chunks[i].at8(r)); if (cid >= 0) row.binIds[row.nBins++] = cid; } } // generic numbers + interactions int interactionOffset = 0; for (int cid = 0; cid < _nums; ++cid) { Chunk c = chunks[_cats + cid]; int oldRow = -1; if (c instanceof InteractionWrappedVec .InteractionWrappedChunk) { // for each row, only 1 value in an interaction is 'hot' // all other values are off (i.e., are 0) for (int r = 0; r < c._len; ++r) { // the vec is "vertically" dense and "horizontally" sparse (i.e., every row has // one, and only one, value) Row row = rows[r]; if (row.bad) continue; if (c.isNA(r)) row.bad = _skipMissing; int cidVirtualOffset = getInteractionOffset( chunks, _cats + cid, r); // the "virtual" offset into the hot-expanded interaction row.addNum( _numOffsets[cid] + cidVirtualOffset, c.atd(r)); // FIXME: if this produces a "true" NA then should sub with mean? with? } interactionOffset += nextNumericIdx(cid); } else { for (int r = c.nextNZ(-1); r < c._len; r = c.nextNZ(r)) { if (c.atd(r) == 0) continue; assert r > oldRow; oldRow = r; Row row = rows[r]; if (row.bad) continue; if (c.isNA(r)) row.bad = _skipMissing; double d = c.atd(r); if (Double.isNaN(d)) d = _numMeans[cid]; if (_normMul != null) d *= _normMul[interactionOffset]; row.addNum(_numOffsets[cid], d); } interactionOffset++; } } // response(s) for (int i = 1; i <= _responses; ++i) { int rid = responseChunkId(i - 1); Chunk rChunk = chunks[rid]; for (int r = 0; r < chunks[0]._len; ++r) { Row row = rows[r]; if (row.bad) continue; row.response[i - 1] = rChunk.atd(r); if (_normRespMul != null) { row.response[i - 1] = (row.response[i - 1] - _normRespSub[i - 1]) * _normRespMul[i - 1]; } if (Double.isNaN(row.response[row.response.length - i])) row.bad = true; } } return rows; }