// Compute a compressed double buffer private Chunk chunkD() { HashMap<Long, Byte> hs = new HashMap<>(CUDChunk.MAX_UNIQUES); Byte dummy = 0; final byte[] bs = MemoryManager.malloc1(_len * 8, true); int j = 0; boolean fitsInUnique = true; for (int i = 0; i < _len; ++i) { double d = 0; if (_id == null || _id.length == 0 || (j < _id.length && _id[j] == i)) { d = _ds != null ? _ds[j] : (isNA2(j) || isCategorical(j)) ? Double.NaN : _ls[j] * PrettyPrint.pow10(_xs[j]); ++j; } if (fitsInUnique) { if (hs.size() < CUDChunk.MAX_UNIQUES) // still got space hs.put( Double.doubleToLongBits(d), dummy); // store doubles as longs to avoid NaN comparison issues during extraction else fitsInUnique = (hs.size() == CUDChunk.MAX_UNIQUES) && // full, but might not need more space because of repeats hs.containsKey(Double.doubleToLongBits(d)); } UnsafeUtils.set8d(bs, 8 * i, d); } assert j == sparseLen() : "j = " + j + ", _len = " + sparseLen(); if (fitsInUnique && CUDChunk.computeByteSize(hs.size(), len()) < 0.8 * bs.length) return new CUDChunk(bs, hs, len()); else return new C8DChunk(bs); }
// Compute a sparse float buffer private byte[] bufD(final int valsz) { int log = 0; while ((1 << log) < valsz) ++log; assert (1 << log) == valsz; final int ridsz = _len >= 65535 ? 4 : 2; final int elmsz = ridsz + valsz; int off = CXDChunk._OFF; byte[] buf = MemoryManager.malloc1(off + sparseLen() * elmsz, true); for (int i = 0; i < sparseLen(); i++, off += elmsz) { if (ridsz == 2) UnsafeUtils.set2(buf, off, (short) _id[i]); else UnsafeUtils.set4(buf, off, _id[i]); final double dval = _ds == null ? isNA2(i) ? Double.NaN : _ls[i] * PrettyPrint.pow10(_xs[i]) : _ds[i]; switch (valsz) { case 4: UnsafeUtils.set4f(buf, off + ridsz, (float) dval); break; case 8: UnsafeUtils.set8d(buf, off + ridsz, dval); break; default: throw H2O.fail(); } } assert off == buf.length; return buf; }
Col(String s, int rows, boolean isClass) { _name = s; _isClass = isClass; _rawB = MemoryManager.malloc1(rows); _isFloat = false; _isByte = true; _colBinLimit = 0; }
@Override public byte[] atomic(byte[] bits1) { byte[] mem = DKV.get(_key).get(); int len = Math.max(_dst_off + mem.length, bits1 == null ? 0 : bits1.length); byte[] bits2 = MemoryManager.malloc1(len); if (bits1 != null) System.arraycopy(bits1, 0, bits2, 0, bits1.length); System.arraycopy(mem, 0, bits2, _dst_off, mem.length); return bits2; }
private static void addFolder(FileSystem fs, Path p, JsonArray succeeded, JsonArray failed) { try { if (fs == null) return; for (FileStatus file : fs.listStatus(p)) { Path pfs = file.getPath(); if (file.isDir()) { addFolder(fs, pfs, succeeded, failed); } else { Key k = Key.make(pfs.toString()); long size = file.getLen(); Value val = null; if (pfs.getName().endsWith(Extensions.JSON)) { JsonParser parser = new JsonParser(); JsonObject json = parser.parse(new InputStreamReader(fs.open(pfs))).getAsJsonObject(); JsonElement v = json.get(Constants.VERSION); if (v == null) throw new RuntimeException("Missing version"); JsonElement type = json.get(Constants.TYPE); if (type == null) throw new RuntimeException("Missing type"); Class c = Class.forName(type.getAsString()); OldModel model = (OldModel) c.newInstance(); model.fromJson(json); } else if (pfs.getName().endsWith(Extensions.HEX)) { // Hex file? FSDataInputStream s = fs.open(pfs); int sz = (int) Math.min(1L << 20, size); // Read up to the 1st meg byte[] mem = MemoryManager.malloc1(sz); s.readFully(mem); // Convert to a ValueArray (hope it fits in 1Meg!) ValueArray ary = new ValueArray(k, 0).read(new AutoBuffer(mem)); val = new Value(k, ary, Value.HDFS); } else if (size >= 2 * ValueArray.CHUNK_SZ) { val = new Value( k, new ValueArray(k, size), Value.HDFS); // ValueArray byte wrapper over a large file } else { val = new Value(k, (int) size, Value.HDFS); // Plain Value val.setdsk(); } DKV.put(k, val); Log.info("PersistHdfs: DKV.put(" + k + ")"); JsonObject o = new JsonObject(); o.addProperty(Constants.KEY, k.toString()); o.addProperty(Constants.FILE, pfs.toString()); o.addProperty(Constants.VALUE_SIZE, file.getLen()); succeeded.add(o); } } } catch (Exception e) { Log.err(e); JsonObject o = new JsonObject(); o.addProperty(Constants.FILE, p.toString()); o.addProperty(Constants.ERROR, e.getMessage()); failed.add(o); } }
private void append_ss(String str) { if (_ss == null) { _ss = MemoryManager.malloc1((str.length() + 1) * 4); } while (_ss.length < (_sslen + str.length() + 1)) { _ss = MemoryManager.arrayCopyOf(_ss, _ss.length << 1); } for (byte b : str.getBytes()) _ss[_sslen++] = b; _ss[_sslen++] = (byte) 0; // for trailing 0; }
// Compute a compressed double buffer private Chunk chunkD() { assert _len2 == _len; final byte[] bs = MemoryManager.malloc1(_len * 8); for (int i = 0; i < _len; ++i) UDP.set8d( bs, 8 * i, _ds != null ? _ds[i] : (isNA(i) || isEnum(i)) ? Double.NaN : _ls[i] * DParseTask.pow10(_xs[i])); return new C8DChunk(bs); }
private void append_ss(String str) { byte[] bytes = str.getBytes(Charsets.UTF_8); // Allocate memory if necessary if (_ss == null) _ss = MemoryManager.malloc1((bytes.length + 1) * 4); while (_ss.length < (_sslen + bytes.length + 1)) _ss = MemoryManager.arrayCopyOf(_ss, _ss.length << 1); // Copy bytes to _ss for (byte b : bytes) _ss[_sslen++] = b; _ss[_sslen++] = (byte) 0; // for trailing 0; }
private void append_ss(BufferedString str) { int strlen = str.length(); int off = str.getOffset(); byte b[] = str.getBuffer(); if (_ss == null) { _ss = MemoryManager.malloc1((strlen + 1) * 4); } while (_ss.length < (_sslen + strlen + 1)) { _ss = MemoryManager.arrayCopyOf(_ss, _ss.length << 1); } for (int i = off; i < off + strlen; i++) _ss[_sslen++] = b[i]; _ss[_sslen++] = (byte) 0; // for trailing 0; }
@Override public byte[] load(final Value v) { final byte[] b = MemoryManager.malloc1(v._max); long skip = 0; Key k = v._key; final Path p; if (_iceRoot != null) { p = new Path(_iceRoot, getIceName(v)); } else { // Convert an arraylet chunk into a long-offset from the base file. if (k._kb[0] == Key.ARRAYLET_CHUNK) { skip = ValueArray.getChunkOffset(k); // The offset k = ValueArray.getArrayKey(k); // From the base file key if (k.toString().endsWith(Extensions.HEX)) { // Hex file? int value_len = DKV.get(k).memOrLoad().length; // How long is the ValueArray header? skip += value_len; } } p = new Path(k.toString()); } final long skip_ = skip; run( new Callable() { @Override public Object call() throws Exception { FileSystem fs = FileSystem.get(p.toUri(), CONF); FSDataInputStream s = null; try { s = fs.open(p); // NOTE: // The following line degrades performance of HDFS load from S3 API: // s.readFully(skip,b,0,b.length); // Google API's simple seek has better performance // Load of 300MB file via Google API ~ 14sec, via s.readFully ~ 5min (under the same // condition) ByteStreams.skipFully(s, skip_); ByteStreams.readFully(s, b); assert v.isPersisted(); } finally { Utils.close(s); } return null; } }, true, v._max); return b; }
// Compute a compressed UUID buffer private Chunk chunkUUID() { final byte[] bs = MemoryManager.malloc1(_len * 16, true); int j = 0; for (int i = 0; i < _len; ++i) { long lo = 0, hi = 0; if (_id == null || _id.length == 0 || (j < _id.length && _id[j] == i)) { lo = _ls[j]; hi = Double.doubleToRawLongBits(_ds[j++]); if (_xs != null && _xs[j] == Integer.MAX_VALUE) { // NA? lo = Long.MIN_VALUE; hi = 0; // Canonical NA value } } UnsafeUtils.set8(bs, 16 * i, lo); UnsafeUtils.set8(bs, 16 * i + 8, hi); } assert j == sparseLen() : "j = " + j + ", _len = " + sparseLen(); return new C16Chunk(bs); }
// Compute a sparse integer buffer private byte[] bufS(final int valsz) { int log = 0; while ((1 << log) < valsz) ++log; assert valsz == 0 || (1 << log) == valsz; final int ridsz = _len >= 65535 ? 4 : 2; final int elmsz = ridsz + valsz; int off = CXIChunk._OFF; byte[] buf = MemoryManager.malloc1(off + sparseLen() * elmsz, true); for (int i = 0; i < sparseLen(); i++, off += elmsz) { if (ridsz == 2) UnsafeUtils.set2(buf, off, (short) _id[i]); else UnsafeUtils.set4(buf, off, _id[i]); if (valsz == 0) { assert _xs[i] == 0 && _ls[i] == 1; continue; } assert _xs[i] == Integer.MIN_VALUE || _xs[i] >= 0 : "unexpected exponent " + _xs[i]; // assert we have int or NA final long lval = _xs[i] == Integer.MIN_VALUE ? NAS[log] : _ls[i] * PrettyPrint.pow10i(_xs[i]); switch (valsz) { case 1: buf[off + ridsz] = (byte) lval; break; case 2: short sval = (short) lval; UnsafeUtils.set2(buf, off + ridsz, sval); break; case 4: int ival = (int) lval; UnsafeUtils.set4(buf, off + ridsz, ival); break; case 8: UnsafeUtils.set8(buf, off + ridsz, lval); break; default: throw H2O.fail(); } } assert off == buf.length; return buf; }
@Override public byte[] load(final Value v) { final byte[] b = MemoryManager.malloc1(v._max); long skip = 0; Key k = v._key; if (k._kb[0] == Key.ARRAYLET_CHUNK) { skip = ValueArray.getChunkOffset(k); // The offset k = ValueArray.getArrayKey(k); // From the base file key } else if (k._kb[0] == Key.DVEC) { skip = water.fvec.NFSFileVec.chunkOffset(k); // The offset } final Path p = _iceRoot == null ? new Path(getPathForKey(k)) : new Path(_iceRoot, getIceName(v)); final long skip_ = skip; run( new Callable() { @Override public Object call() throws Exception { FileSystem fs = FileSystem.get(p.toUri(), CONF); FSDataInputStream s = null; try { s = fs.open(p); // NOTE: // The following line degrades performance of HDFS load from S3 API: // s.readFully(skip,b,0,b.length); // Google API's simple seek has better performance // Load of 300MB file via Google API ~ 14sec, via s.readFully ~ 5min (under the same // condition) ByteStreams.skipFully(s, skip_); ByteStreams.readFully(s, b); assert v.isPersisted(); } finally { Utils.close(s); } return null; } }, true, v._max); return b; }