@Override public Value lazyArrayChunk(final Key key) { final Key arykey = ValueArray.getArrayKey(key); // From the base file key final long off = (_iceRoot != null) ? 0 : ValueArray.getChunkOffset(key); // The offset final Path p = (_iceRoot != null) ? new Path(_iceRoot, getIceName(key, (byte) 'V')) : new Path(arykey.toString()); final Size sz = new Size(); run( new Callable() { @Override public Object call() throws Exception { FileSystem fs = FileSystem.get(p.toUri(), CONF); long rem = fs.getFileStatus(p).getLen() - off; sz._value = (rem > ValueArray.CHUNK_SZ * 2) ? (int) ValueArray.CHUNK_SZ : (int) rem; return null; } }, true, 0); Value val = new Value(key, sz._value, Value.HDFS); val.setdsk(); // But its already on disk. return val; }
private static void addFolder(FileSystem fs, Path p, JsonArray succeeded, JsonArray failed) { try { if (fs == null) return; for (FileStatus file : fs.listStatus(p)) { Path pfs = file.getPath(); if (file.isDir()) { addFolder(fs, pfs, succeeded, failed); } else { Key k = Key.make(pfs.toString()); long size = file.getLen(); Value val = null; if (pfs.getName().endsWith(Extensions.JSON)) { JsonParser parser = new JsonParser(); JsonObject json = parser.parse(new InputStreamReader(fs.open(pfs))).getAsJsonObject(); JsonElement v = json.get(Constants.VERSION); if (v == null) throw new RuntimeException("Missing version"); JsonElement type = json.get(Constants.TYPE); if (type == null) throw new RuntimeException("Missing type"); Class c = Class.forName(type.getAsString()); OldModel model = (OldModel) c.newInstance(); model.fromJson(json); } else if (pfs.getName().endsWith(Extensions.HEX)) { // Hex file? FSDataInputStream s = fs.open(pfs); int sz = (int) Math.min(1L << 20, size); // Read up to the 1st meg byte[] mem = MemoryManager.malloc1(sz); s.readFully(mem); // Convert to a ValueArray (hope it fits in 1Meg!) ValueArray ary = new ValueArray(k, 0).read(new AutoBuffer(mem)); val = new Value(k, ary, Value.HDFS); } else if (size >= 2 * ValueArray.CHUNK_SZ) { val = new Value( k, new ValueArray(k, size), Value.HDFS); // ValueArray byte wrapper over a large file } else { val = new Value(k, (int) size, Value.HDFS); // Plain Value val.setdsk(); } DKV.put(k, val); Log.info("PersistHdfs: DKV.put(" + k + ")"); JsonObject o = new JsonObject(); o.addProperty(Constants.KEY, k.toString()); o.addProperty(Constants.FILE, pfs.toString()); o.addProperty(Constants.VALUE_SIZE, file.getLen()); succeeded.add(o); } } } catch (Exception e) { Log.err(e); JsonObject o = new JsonObject(); o.addProperty(Constants.FILE, p.toString()); o.addProperty(Constants.ERROR, e.getMessage()); failed.add(o); } }
@Override public void store(Value v) { // Should be used only if ice goes to HDFS assert this == getIce(); assert !v.isPersisted(); byte[] m = v.memOrLoad(); assert (m == null || m.length == v._max); // Assert not saving partial files store(new Path(_iceRoot, getIceName(v)), m); v.setdsk(); // Set as write-complete to disk }
@Override public Value lazyArrayChunk(Key key) { Key arykey = ValueArray.getArrayKey(key); // From the base file key long off = ValueArray.getChunkOffset(key); // The offset long size = getFileForKey(arykey).length(); long rem = size - off; // the last chunk can be fat, so it got packed into the earlier chunk if (rem < ValueArray.CHUNK_SZ && off > 0) return null; int sz = (rem >= ValueArray.CHUNK_SZ * 2) ? (int) ValueArray.CHUNK_SZ : (int) rem; Value val = new Value(key, sz, Value.NFS); val.setdsk(); // But its already on disk. return val; }
// Convert a chunk# into a chunk - does lazy-chunk creation. As chunks are // asked-for the first time, we make the Key and an empty backing DVec. // Touching the DVec will force the file load. @Override public Value chunkIdx(int cidx) { final long nchk = nChunks(); assert 0 <= cidx && cidx < nchk; Key dkey = chunkKey(cidx); Value val1 = DKV.get(dkey); // Check for an existing one... will fetch data as needed if (val1 != null) return val1; // Found an existing one? // Lazily create a DVec for this chunk int len = (int) (cidx < nchk - 1 ? ValueArray.CHUNK_SZ : (_len - chunk2StartElem(cidx))); // DVec is just the raw file data with a null-compression scheme Value val2 = new Value(dkey, len, null, TypeMap.C1CHUNK, Value.NFS); val2.setdsk(); // It is already on disk. // Atomically insert: fails on a race, but then return the old version Value val3 = DKV.DputIfMatch(dkey, val2, null, null); return val3 == null ? val2 : val3; }
public Key importFile(int i, Futures fs) { if (_ok[i] < H2O.CLOUD.size()) return null; File f = new File(_files[i]); Key k; if (_newApi) { k = PersistNFS.decodeFile(f); NFSFileVec nfs = DKV.get(NFSFileVec.make(f, fs)).get(); UKV.put(k, new Frame(new String[] {"0"}, new Vec[] {nfs}), fs); } else { k = PersistNFS.decodeFile(f); long size = f.length(); Value val = (size < 2 * ValueArray.CHUNK_SZ) ? new Value(k, (int) size, Value.NFS) : new Value(k, new ValueArray(k, size), Value.NFS); val.setdsk(); UKV.put(k, val, fs); } return k; }
// Convert a chunk# into a chunk - does lazy-chunk creation. As chunks are // asked-for the first time, we make the Key and an empty backing DVec. // Touching the DVec will force the file load. @Override public Value chunkIdx(int cidx) { final long nchk = nChunks(); assert 0 <= cidx && cidx < nchk; Key dkey = chunkKey(cidx); Value val1 = DKV.get(dkey); // Check for an existing one... will fetch data as needed if (val1 != null) return val1; // Found an existing one? // Lazily create a DVec for this chunk int len = (int) (cidx < nchk - 1 ? CHUNK_SZ : (_len - chunk2StartElem(cidx))); // DVec is just the raw file data with a null-compression scheme Value val2 = new Value(dkey, len, null, TypeMap.C1NCHUNK, _be); val2.setdsk(); // It is already on disk. // If not-home, then block till the Key is everywhere. Most calls here are // from the parser loading a text file, and the parser splits the work such // that most puts here are on home - so this is a simple speed optimization: // do not make a Futures nor block on it on home. Futures fs = dkey.home() ? null : new Futures(); // Atomically insert: fails on a race, but then return the old version Value val3 = DKV.DputIfMatch(dkey, val2, null, fs); if (!dkey.home() && fs != null) fs.blockForPending(); return val3 == null ? val2 : val3; }
// Store Value v to disk. @Override public void store(Value v) { // Only the home node does persistence on NFS if (!v._key.home()) return; // A perhaps useless cutout: the upper layers should test this first. if (v.isPersisted()) return; // Never store arraylets on NFS, instead we'll store the entire array. assert !v.isArray(); try { File f = getFileForKey(v._key); f.mkdirs(); FileOutputStream s = new FileOutputStream(f); try { byte[] m = v.memOrLoad(); assert (m == null || m.length == v._max); // Assert not saving partial files if (m != null) new AutoBuffer(s.getChannel(), false, Value.NFS).putA1(m, m.length).close(); v.setdsk(); // Set as write-complete to disk } finally { s.close(); } } catch (IOException e) { H2O.ignore(e); } }