// Compute a compressed integer buffer private byte[] bufX(long bias, int scale, int off, int log) { byte[] bs = new byte[(_len << log) + off]; int j = 0; for (int i = 0; i < _len; i++) { long le = -bias; if (_id == null || _id.length == 0 || (j < _id.length && _id[j] == i)) { if (isNA2(j)) { le = NAS[log]; } else { int x = (_xs[j] == Integer.MIN_VALUE + 1 ? 0 : _xs[j]) - scale; le += x >= 0 ? _ls[j] * PrettyPrint.pow10i(x) : _ls[j] / PrettyPrint.pow10i(-x); } ++j; } switch (log) { case 0: bs[i + off] = (byte) le; break; case 1: UnsafeUtils.set2(bs, (i << 1) + off, (short) le); break; case 2: UnsafeUtils.set4(bs, (i << 2) + off, (int) le); break; case 3: UnsafeUtils.set8(bs, (i << 3) + off, le); break; default: throw H2O.fail(); } } assert j == sparseLen() : "j = " + j + ", len = " + sparseLen() + ", len2 = " + _len + ", id[j] = " + _id[j]; return bs; }
// Compute a sparse float buffer private byte[] bufD(final int valsz) { int log = 0; while ((1 << log) < valsz) ++log; assert (1 << log) == valsz; final int ridsz = _len >= 65535 ? 4 : 2; final int elmsz = ridsz + valsz; int off = CXDChunk._OFF; byte[] buf = MemoryManager.malloc1(off + sparseLen() * elmsz, true); for (int i = 0; i < sparseLen(); i++, off += elmsz) { if (ridsz == 2) UnsafeUtils.set2(buf, off, (short) _id[i]); else UnsafeUtils.set4(buf, off, _id[i]); final double dval = _ds == null ? isNA2(i) ? Double.NaN : _ls[i] * PrettyPrint.pow10(_xs[i]) : _ds[i]; switch (valsz) { case 4: UnsafeUtils.set4f(buf, off + ridsz, (float) dval); break; case 8: UnsafeUtils.set8d(buf, off + ridsz, dval); break; default: throw H2O.fail(); } } assert off == buf.length; return buf; }
@Override public final void initFromBytes() { _start = -1; _cidx = -1; set_len(_mem.length - _OFF); _scale = UnsafeUtils.get8d(_mem, 0); _bias = UnsafeUtils.get8(_mem, 8); }
C1SChunk(byte[] bs, long bias, double scale) { _mem = bs; _start = -1; set_len(_mem.length - _OFF); _bias = bias; _scale = scale; UnsafeUtils.set8d(_mem, 0, scale); UnsafeUtils.set8(_mem, 8, bias); }
// Compute a compressed double buffer private Chunk chunkD() { HashMap<Long, Byte> hs = new HashMap<>(CUDChunk.MAX_UNIQUES); Byte dummy = 0; final byte[] bs = MemoryManager.malloc1(_len * 8, true); int j = 0; boolean fitsInUnique = true; for (int i = 0; i < _len; ++i) { double d = 0; if (_id == null || _id.length == 0 || (j < _id.length && _id[j] == i)) { d = _ds != null ? _ds[j] : (isNA2(j) || isCategorical(j)) ? Double.NaN : _ls[j] * PrettyPrint.pow10(_xs[j]); ++j; } if (fitsInUnique) { if (hs.size() < CUDChunk.MAX_UNIQUES) // still got space hs.put( Double.doubleToLongBits(d), dummy); // store doubles as longs to avoid NaN comparison issues during extraction else fitsInUnique = (hs.size() == CUDChunk.MAX_UNIQUES) && // full, but might not need more space because of repeats hs.containsKey(Double.doubleToLongBits(d)); } UnsafeUtils.set8d(bs, 8 * i, d); } assert j == sparseLen() : "j = " + j + ", _len = " + sparseLen(); if (fitsInUnique && CUDChunk.computeByteSize(hs.size(), len()) < 0.8 * bs.length) return new CUDChunk(bs, hs, len()); else return new C8DChunk(bs); }
// Compute a compressed UUID buffer private Chunk chunkUUID() { final byte[] bs = MemoryManager.malloc1(_len * 16, true); int j = 0; for (int i = 0; i < _len; ++i) { long lo = 0, hi = 0; if (_id == null || _id.length == 0 || (j < _id.length && _id[j] == i)) { lo = _ls[j]; hi = Double.doubleToRawLongBits(_ds[j++]); if (_xs != null && _xs[j] == Integer.MAX_VALUE) { // NA? lo = Long.MIN_VALUE; hi = 0; // Canonical NA value } } UnsafeUtils.set8(bs, 16 * i, lo); UnsafeUtils.set8(bs, 16 * i + 8, hi); } assert j == sparseLen() : "j = " + j + ", _len = " + sparseLen(); return new C16Chunk(bs); }
// Compute a sparse integer buffer private byte[] bufS(final int valsz) { int log = 0; while ((1 << log) < valsz) ++log; assert valsz == 0 || (1 << log) == valsz; final int ridsz = _len >= 65535 ? 4 : 2; final int elmsz = ridsz + valsz; int off = CXIChunk._OFF; byte[] buf = MemoryManager.malloc1(off + sparseLen() * elmsz, true); for (int i = 0; i < sparseLen(); i++, off += elmsz) { if (ridsz == 2) UnsafeUtils.set2(buf, off, (short) _id[i]); else UnsafeUtils.set4(buf, off, _id[i]); if (valsz == 0) { assert _xs[i] == 0 && _ls[i] == 1; continue; } assert _xs[i] == Integer.MIN_VALUE || _xs[i] >= 0 : "unexpected exponent " + _xs[i]; // assert we have int or NA final long lval = _xs[i] == Integer.MIN_VALUE ? NAS[log] : _ls[i] * PrettyPrint.pow10i(_xs[i]); switch (valsz) { case 1: buf[off + ridsz] = (byte) lval; break; case 2: short sval = (short) lval; UnsafeUtils.set2(buf, off + ridsz, sval); break; case 4: int ival = (int) lval; UnsafeUtils.set4(buf, off + ridsz, ival); break; case 8: UnsafeUtils.set8(buf, off + ridsz, lval); break; default: throw H2O.fail(); } } assert off == buf.length; return buf; }
// *Desired* distribution function on keys & replication factor. Replica #0 // is the master, replica #1, 2, 3, etc represent additional desired // replication nodes. Note that this function is just the distribution // function - it does not DO any replication, nor does it dictate any policy // on how fast replication occurs. Returns -1 if the desired replica // is nonsense, e.g. asking for replica #3 in a 2-Node system. int D(int repl) { int hsz = H2O.CLOUD.size(); // See if this is a specifically homed Key if (!user_allowed() && repl < _kb[1]) { // Asking for a replica# from the homed list? assert _kb[0] != Key.DVEC; H2ONode h2o = H2ONode.intern(_kb, 2 + repl * (4 + 2 /*serialized bytesize of H2OKey*/)); // Reverse the home to the index int idx = h2o.index(); if (idx >= 0) return idx; // Else homed to a node which is no longer in the cloud! // Fall back to the normal home mode } // Distribution of Fluid Vectors is a special case. // Fluid Vectors are grouped into vector groups, each of which must have // the same distribution of chunks so that MRTask2 run over group of // vectors will keep data-locality. The fluid vecs from the same group // share the same key pattern + each has 4 bytes identifying particular // vector in the group. Since we need the same chunks end up on the same // node in the group, we need to skip the 4 bytes containing vec# from the // hash. Apart from that, we keep the previous mode of operation, so that // ByteVec would have first 64MB distributed around cloud randomly and then // go round-robin in 64MB chunks. if (_kb[0] == DVEC) { // Homed Chunk? if (_kb[1] != -1) throw H2O.unimpl(); // For round-robin on Chunks in the following pattern: // 1 Chunk-per-node, until all nodes have 1 chunk (max parallelism). // Then 2 chunks-per-node, once around, then 4, then 8, then 16. // Getting several chunks-in-a-row on a single Node means that stencil // calculations that step off the end of one chunk into the next won't // force a chunk local - replicating the data. If all chunks round robin // exactly, then any stencil calc will double the cached volume of data // (every node will have it's own chunk, plus a cached next-chunk). // Above 16-chunks-in-a-row we hit diminishing returns. int cidx = UnsafeUtils.get4(_kb, 1 + 1 + 4); // Chunk index int x = cidx / hsz; // Multiples of cluster size // 0 -> 1st trip around the cluster; nidx= (cidx- 0*hsz)>>0 // 1,2 -> 2nd & 3rd trip; allocate in pairs: nidx= (cidx- 1*hsz)>>1 // 3,4,5,6 -> next 4 rounds; allocate in quads: nidx= (cidx- 3*hsz)>>2 // 7-14 -> next 8 rounds in octets: nidx= (cidx- 7*hsz)>>3 // 15+ -> remaining rounds in groups of 16: nidx= (cidx-15*hsz)>>4 int z = x == 0 ? 0 : (x <= 2 ? 1 : (x <= 6 ? 2 : (x <= 14 ? 3 : 4))); int nidx = (cidx - ((1 << z) - 1) * hsz) >> z; return ((nidx + repl) & 0x7FFFFFFF) % hsz; } // Easy Cheesy Stupid: return ((_hash + repl) & 0x7FFFFFFF) % hsz; }