@Override public Value lazyArrayChunk(final Key key) { final Key arykey = ValueArray.getArrayKey(key); // From the base file key final long off = (_iceRoot != null) ? 0 : ValueArray.getChunkOffset(key); // The offset final Path p = (_iceRoot != null) ? new Path(_iceRoot, getIceName(key, (byte) 'V')) : new Path(arykey.toString()); final Size sz = new Size(); run( new Callable() { @Override public Object call() throws Exception { FileSystem fs = FileSystem.get(p.toUri(), CONF); long rem = fs.getFileStatus(p).getLen() - off; sz._value = (rem > ValueArray.CHUNK_SZ * 2) ? (int) ValueArray.CHUNK_SZ : (int) rem; return null; } }, true, 0); Value val = new Value(key, sz._value, Value.HDFS); val.setdsk(); // But its already on disk. return val; }
@Override public NanoHTTPD.Response serve(NanoHTTPD server, Properties args, RequestType type) { if (type == RequestType.json) { JsonObject resp = new JsonObject(); resp.addProperty(ERROR, "This request is only provided for browser connections"); return wrap(server, resp); } else if (type != RequestType.www) { return super.serve(server, args, type); } String query = checkArguments(args, type); if (query != null) return wrap(server, query, type); try { Value val = _key.value(); Key key = val._key; if (!key.user_allowed()) return wrap(server, build(Response.error("Not a user key: " + key))); // HTML file save of Value NanoHTTPD.Response res = server.new Response(NanoHTTPD.HTTP_OK, NanoHTTPD.MIME_DEFAULT_BINARY, val.openStream()); res.addHeader("Content-Length", Long.toString(val.length())); res.addHeader("Content-Disposition", "attachment; filename=" + key.toString()); return res; } catch (Throwable e) { return wrap(server, build(Response.error(e))); } }
private static void addFolder(FileSystem fs, Path p, JsonArray succeeded, JsonArray failed) { try { if (fs == null) return; for (FileStatus file : fs.listStatus(p)) { Path pfs = file.getPath(); if (file.isDir()) { addFolder(fs, pfs, succeeded, failed); } else { Key k = Key.make(pfs.toString()); long size = file.getLen(); Value val = null; if (pfs.getName().endsWith(Extensions.JSON)) { JsonParser parser = new JsonParser(); JsonObject json = parser.parse(new InputStreamReader(fs.open(pfs))).getAsJsonObject(); JsonElement v = json.get(Constants.VERSION); if (v == null) throw new RuntimeException("Missing version"); JsonElement type = json.get(Constants.TYPE); if (type == null) throw new RuntimeException("Missing type"); Class c = Class.forName(type.getAsString()); OldModel model = (OldModel) c.newInstance(); model.fromJson(json); } else if (pfs.getName().endsWith(Extensions.HEX)) { // Hex file? FSDataInputStream s = fs.open(pfs); int sz = (int) Math.min(1L << 20, size); // Read up to the 1st meg byte[] mem = MemoryManager.malloc1(sz); s.readFully(mem); // Convert to a ValueArray (hope it fits in 1Meg!) ValueArray ary = new ValueArray(k, 0).read(new AutoBuffer(mem)); val = new Value(k, ary, Value.HDFS); } else if (size >= 2 * ValueArray.CHUNK_SZ) { val = new Value( k, new ValueArray(k, size), Value.HDFS); // ValueArray byte wrapper over a large file } else { val = new Value(k, (int) size, Value.HDFS); // Plain Value val.setdsk(); } DKV.put(k, val); Log.info("PersistHdfs: DKV.put(" + k + ")"); JsonObject o = new JsonObject(); o.addProperty(Constants.KEY, k.toString()); o.addProperty(Constants.FILE, pfs.toString()); o.addProperty(Constants.VALUE_SIZE, file.getLen()); succeeded.add(o); } } } catch (Exception e) { Log.err(e); JsonObject o = new JsonObject(); o.addProperty(Constants.FILE, p.toString()); o.addProperty(Constants.ERROR, e.getMessage()); failed.add(o); } }
@Override public void store(Value v) { // Should be used only if ice goes to HDFS assert this == getIce(); assert !v.isPersisted(); byte[] m = v.memOrLoad(); assert (m == null || m.length == v._max); // Assert not saving partial files store(new Path(_iceRoot, getIceName(v)), m); v.setdsk(); // Set as write-complete to disk }
/** * Initialize the ModelBuilder, validating all arguments and preparing the training frame. This * call is expected to be overridden in the subclasses and each subclass will start with * "super.init();". This call is made by the front-end whenever the GUI is clicked, and needs to * be fast; heavy-weight prep needs to wait for the trainModel() call. * * <p>Validate the requested ntrees; precompute actual ntrees. Validate the number of classes to * predict on; validate a checkpoint. */ @Override public void init(boolean expensive) { super.init(expensive); if (H2O.ARGS.client && _parms._build_tree_one_node) error("_build_tree_one_node", "Cannot run on a single node in client mode"); if (_vresponse != null) _vresponse_key = _vresponse._key; if (_response != null) _response_key = _response._key; if (_parms._min_rows < 0) error("_min_rows", "Requested min_rows must be greater than 0"); if (_parms._ntrees < 0 || _parms._ntrees > MAX_NTREES) error("_ntrees", "Requested ntrees must be between 1 and " + MAX_NTREES); _ntrees = _parms._ntrees; // Total trees in final model if (_parms.hasCheckpoint()) { // Asking to continue from checkpoint? Value cv = DKV.get(_parms._checkpoint); if (cv != null) { // Look for prior model M checkpointModel = cv.get(); try { _parms.validateWithCheckpoint(checkpointModel._parms); } catch (H2OIllegalArgumentException e) { error(e.values.get("argument").toString(), e.values.get("value").toString()); } if (_parms._ntrees < checkpointModel._output._ntrees + 1) error( "_ntrees", "If checkpoint is specified then requested ntrees must be higher than " + (checkpointModel._output._ntrees + 1)); // Compute number of trees to build for this checkpoint _ntrees = _parms._ntrees - checkpointModel._output._ntrees; // Needed trees } } if (_parms._nbins <= 1) error("_nbins", "_nbins must be > 1."); if (_parms._nbins >= 1 << 16) error("_nbins", "_nbins must be < " + (1 << 16)); if (_parms._nbins_cats <= 1) error("_nbins_cats", "_nbins_cats must be > 1."); if (_parms._nbins_cats >= 1 << 16) error("_nbins_cats", "_nbins_cats must be < " + (1 << 16)); if (_parms._max_depth <= 0) error("_max_depth", "_max_depth must be > 0."); if (_parms._min_rows <= 0) error("_min_rows", "_min_rows must be > 0."); if (_train != null) { double sumWeights = _train.numRows() * (hasWeightCol() ? _train.vec(_parms._weights_column).mean() : 1); if (sumWeights < 2 * _parms._min_rows) // Need at least 2*min_rows weighted rows to split even once error( "_min_rows", "The dataset size is too small to split for min_rows=" + _parms._min_rows + ": must have at least " + 2 * _parms._min_rows + " (weighted) rows, but have only " + sumWeights + "."); } if (_train != null) _ncols = _train.numCols() - 1 - numSpecialCols(); }
/** * Initialize the ModelBuilder, validating all arguments and preparing the training frame. This * call is expected to be overridden in the subclasses and each subclass will start with * "super.init();". This call is made by the front-end whenever the GUI is clicked, and needs to * be fast; heavy-weight prep needs to wait for the trainModel() call. * * <p>Validate the requested ntrees; precompute actual ntrees. Validate the number of classes to * predict on; validate a checkpoint. */ @Override public void init(boolean expensive) { super.init(expensive); if (H2O.ARGS.client && _parms._build_tree_one_node) error("_build_tree_one_node", "Cannot run on a single node in client mode"); if (_vresponse != null) _vresponse_key = _vresponse._key; if (_response != null) _response_key = _response._key; if (_nclass > SharedTreeModel.SharedTreeParameters.MAX_SUPPORTED_LEVELS) error("_nclass", "Too many levels in response column!"); if (_parms._min_rows < 0) error("_min_rows", "Requested min_rows must be greater than 0"); if (_parms._ntrees < 0 || _parms._ntrees > 100000) error("_ntrees", "Requested ntrees must be between 1 and 100000"); _ntrees = _parms._ntrees; // Total trees in final model if (_parms._checkpoint) { // Asking to continue from checkpoint? Value cv = DKV.get(_parms._model_id); if (cv != null) { // Look for prior model M checkpointModel = cv.get(); if (_parms._ntrees < checkpointModel._output._ntrees + 1) error( "_ntrees", "Requested ntrees must be between " + checkpointModel._output._ntrees + 1 + " and 100000"); _ntrees = _parms._ntrees - checkpointModel._output._ntrees; // Needed trees } } if (_parms._nbins <= 1) error("_nbins", "_nbins must be > 1."); if (_parms._nbins >= 1 << 16) error("_nbins", "_nbins must be < " + (1 << 16)); if (_parms._nbins_cats <= 1) error("_nbins_cats", "_nbins_cats must be > 1."); if (_parms._nbins_cats >= 1 << 16) error("_nbins_cats", "_nbins_cats must be < " + (1 << 16)); if (_parms._max_depth <= 0) error("_max_depth", "_max_depth must be > 0."); if (_parms._min_rows <= 0) error("_min_rows", "_min_rows must be > 0."); if (_parms._distribution == Distributions.Family.tweedie) { _parms._distribution.tweedie.p = _parms._tweedie_power; } if (_train != null) { double sumWeights = _train.numRows() * (hasWeightCol() ? _train.vec(_parms._weights_column).mean() : 1); if (sumWeights < 2 * _parms._min_rows) // Need at least 2*min_rows weighted rows to split even once error( "_min_rows", "The dataset size is too small to split for min_rows=" + _parms._min_rows + ": must have at least " + 2 * _parms._min_rows + " (weighted) rows, but have only " + sumWeights + "."); } if (_train != null) _ncols = _train.numCols() - 1 - numSpecialCols(); }
// TODO: almost identical to ModelsHandler; refactor public static ModelMetrics getFromDKV(Key key) { if (null == key) throw new IllegalArgumentException("Got null key."); Value v = DKV.get(key); if (null == v) throw new IllegalArgumentException("Did not find key: " + key.toString()); Iced ice = v.get(); if (!(ice instanceof ModelMetrics)) throw new IllegalArgumentException( "Expected a Model for key: " + key.toString() + "; got a: " + ice.getClass()); return (ModelMetrics) ice; }
@Override public Value lazyArrayChunk(Key key) { Key arykey = ValueArray.getArrayKey(key); // From the base file key long off = ValueArray.getChunkOffset(key); // The offset long size = getFileForKey(arykey).length(); long rem = size - off; // the last chunk can be fat, so it got packed into the earlier chunk if (rem < ValueArray.CHUNK_SZ && off > 0) return null; int sz = (rem >= ValueArray.CHUNK_SZ * 2) ? (int) ValueArray.CHUNK_SZ : (int) rem; Value val = new Value(key, sz, Value.NFS); val.setdsk(); // But its already on disk. return val; }
// Convert a chunk# into a chunk - does lazy-chunk creation. As chunks are // asked-for the first time, we make the Key and an empty backing DVec. // Touching the DVec will force the file load. @Override public Value chunkIdx(int cidx) { final long nchk = nChunks(); assert 0 <= cidx && cidx < nchk; Key dkey = chunkKey(cidx); Value val1 = DKV.get(dkey); // Check for an existing one... will fetch data as needed if (val1 != null) return val1; // Found an existing one? // Lazily create a DVec for this chunk int len = (int) (cidx < nchk - 1 ? ValueArray.CHUNK_SZ : (_len - chunk2StartElem(cidx))); // DVec is just the raw file data with a null-compression scheme Value val2 = new Value(dkey, len, null, TypeMap.C1CHUNK, Value.NFS); val2.setdsk(); // It is already on disk. // Atomically insert: fails on a race, but then return the old version Value val3 = DKV.DputIfMatch(dkey, val2, null, null); return val3 == null ? val2 : val3; }
// Read up to 'len' bytes of Value. Value should already be persisted to // disk. A racing delete can trigger a failure where we get a null return, // but no crash (although one could argue that a racing load&delete is a bug // no matter what). @Override public byte[] load(Value v) { long skip = 0; Key k = v._key; // Convert an arraylet chunk into a long-offset from the base file. if (k._kb[0] == Key.ARRAYLET_CHUNK) { skip = ValueArray.getChunkOffset(k); // The offset k = ValueArray.getArrayKey(k); // From the base file key } if (k._kb[0] == Key.DVEC) { skip = water.fvec.NFSFileVec.chunkOffset(k); // The offset } try { FileInputStream s = null; try { s = new FileInputStream(getFileForKey(k)); FileChannel fc = s.getChannel(); fc.position(skip); AutoBuffer ab = new AutoBuffer(fc, true, Value.NFS); byte[] b = ab.getA1(v._max); ab.close(); assert v.isPersisted(); return b; } finally { if (s != null) s.close(); } } catch (IOException e) { // Broken disk / short-file??? H2O.ignore(e); return null; } }
public static String store2Hdfs(Key srcKey) { assert srcKey._kb[0] != Key.ARRAYLET_CHUNK; assert PersistHdfs.getPathForKey(srcKey) != null; // Validate key name Value v = DKV.get(srcKey); if (v == null) return "Key " + srcKey + " not found"; if (v._isArray == 0) { // Simple chunk? v.setHdfs(); // Set to HDFS and be done return null; // Success } // For ValueArrays, make the .hex header ValueArray ary = ValueArray.value(v); String err = PersistHdfs.freeze(srcKey, ary); if (err != null) return err; // The task managing which chunks to write next, // store in a known key TaskStore2HDFS ts = new TaskStore2HDFS(srcKey); Key selfKey = ts.selfKey(); UKV.put(selfKey, ts); // Then start writing chunks in-order with the zero chunk H2ONode chk0_home = ValueArray.getChunkKey(0, srcKey).home_node(); RPC.call(ts.chunkHome(), ts); // Watch the progress key until it gets removed or an error appears long idx = 0; while (UKV.get(selfKey, ts) != null) { if (ts._indexFrom != idx) { System.out.print(" " + idx + "/" + ary.chunks()); idx = ts._indexFrom; } if (ts._err != null) { // Found an error? UKV.remove(selfKey); // Cleanup & report return ts._err; } try { Thread.sleep(100); } catch (InterruptedException e) { } } System.out.println(" " + ary.chunks() + "/" + ary.chunks()); // PersistHdfs.refreshHDFSKeys(); return null; }
public Key importFile(int i, Futures fs) { if (_ok[i] < H2O.CLOUD.size()) return null; File f = new File(_files[i]); Key k; if (_newApi) { k = PersistNFS.decodeFile(f); NFSFileVec nfs = DKV.get(NFSFileVec.make(f, fs)).get(); UKV.put(k, new Frame(new String[] {"0"}, new Vec[] {nfs}), fs); } else { k = PersistNFS.decodeFile(f); long size = f.length(); Value val = (size < 2 * ValueArray.CHUNK_SZ) ? new Value(k, (int) size, Value.NFS) : new Value(k, new ValueArray(k, size), Value.NFS); val.setdsk(); UKV.put(k, val, fs); } return k; }
@SuppressWarnings("unused") // called through reflection by RequestServer public JobsV3 fetch(int version, JobsV3 s) { Key key = s.job_id.key(); Value val = DKV.get(key); if (null == val) throw new IllegalArgumentException("Job is missing"); Iced ice = val.get(); if (!(ice instanceof Job)) throw new IllegalArgumentException("Must be a Job not a " + ice.getClass()); Job j = (Job) ice; s.jobs = new JobV3[1]; // s.fillFromImpl(jobs); try { s.jobs[0] = (JobV3) Schema.schema(version, j).fillFromImpl(j); } // no special schema for this job subclass, so fall back to JobV3 catch (H2ONotFoundArgumentException e) { s.jobs[0] = new JobV3().fillFromImpl(j); } return s; }
/** * Executes the phase one of the parser. * * <p>First phase detects the encoding and basic statistics of the parsed dataset. * * <p>For CSV parsers it detects the parser setup and then launches the distributed computation on * per chunk basis. * * <p>For XLS and XLSX parsers that do not work in distrubuted way parses the whole datasets. * * @throws Exception */ public void passOne(CsvParser.Setup setup) throws Exception { switch (_parserType) { case CSV: // precompute the parser setup, column setup and other settings byte[] bits = _sourceDataset.getFirstBytes(); // Can limit to eg 256*1024 if (setup == null) setup = CsvParser.guessCsvSetup(bits); if (setup._data == null) { _error = "Unable to determine the separator or number of columns on the dataset"; return; } _colNames = setup._data[0]; setColumnNames(_colNames); _skipFirstLine = setup._header; // set the separator this._sep = setup._separator; // if parsing value array, initialize the nrows array if (_sourceDataset._isArray != 0) { ValueArray ary = ValueArray.value(_sourceDataset); _nrows = new int[(int) ary.chunks()]; } // launch the distributed parser on its chunks. this.invoke(_sourceDataset._key); break; case XLS: // XLS parsing is not distributed, just obtain the value stream and // run the parser CustomParser p = new XlsParser(this); p.parse(_sourceDataset._key); --_myrows; // do not count the header break; case XLSX: // XLS parsing is not distributed, just obtain the value stream and // run the parser CustomParser px = new XlsxParser(this); px.parse(_sourceDataset._key); break; default: throw new Error("NOT IMPLEMENTED"); } // calculate proper numbers of rows for the chunks if (_nrows != null) { _numRows = 0; for (int i = 0; i < _nrows.length; ++i) { _numRows += _nrows[i]; _nrows[i] = _numRows; } } else { _numRows = _myrows; } // normalize mean for (int i = 0; i < _ncolumns; ++i) _mean[i] = _mean[i] / (_numRows - _invalidValues[i]); }
// Convert a chunk# into a chunk - does lazy-chunk creation. As chunks are // asked-for the first time, we make the Key and an empty backing DVec. // Touching the DVec will force the file load. @Override public Value chunkIdx(int cidx) { final long nchk = nChunks(); assert 0 <= cidx && cidx < nchk; Key dkey = chunkKey(cidx); Value val1 = DKV.get(dkey); // Check for an existing one... will fetch data as needed if (val1 != null) return val1; // Found an existing one? // Lazily create a DVec for this chunk int len = (int) (cidx < nchk - 1 ? CHUNK_SZ : (_len - chunk2StartElem(cidx))); // DVec is just the raw file data with a null-compression scheme Value val2 = new Value(dkey, len, null, TypeMap.C1NCHUNK, _be); val2.setdsk(); // It is already on disk. // If not-home, then block till the Key is everywhere. Most calls here are // from the parser loading a text file, and the parser splits the work such // that most puts here are on home - so this is a simple speed optimization: // do not make a Futures nor block on it on home. Futures fs = dkey.home() ? null : new Futures(); // Atomically insert: fails on a race, but then return the old version Value val3 = DKV.DputIfMatch(dkey, val2, null, fs); if (!dkey.home() && fs != null) fs.blockForPending(); return val3 == null ? val2 : val3; }
static boolean checkSaneFrame_impl() { for (Key k : H2O.localKeySet()) { Value val = H2O.raw_get(k); if (val.isFrame()) { Frame fr = val.get(); Vec vecs[] = fr.vecs(); for (int i = 0; i < vecs.length; i++) { Vec v = vecs[i]; if (DKV.get(v._key) == null) { System.err.println( "Frame " + fr._key + " in the DKV, is missing Vec " + v._key + ", name=" + fr._names[i]); return false; } } } } return true; }
@Override public void lcompute() { // Optional: cancel all jobs // for (Job job : Job.all()) { // job.cancel(); // Job.waitUntilJobEnded(job.self()); // } final Set<Key> keySet = H2O.globalKeySet(null); for (Key key : keySet) { if (!key.home()) continue; // only unlock local keys final Value val = DKV.get(key); if (val == null) continue; if (val.rawPOJO() == null) continue; // need to have a POJO to be locked if (!val.isLockable()) continue; final Object obj = val.rawPOJO(); assert (obj instanceof Lockable<?>); final Lockable<?> lockable = (Lockable<?>) (obj); final Key[] lockers = ((Lockable) obj)._lockers; if (lockers != null) { // check that none of the locking jobs is still running for (Key locker : lockers) { if (locker != null && locker.type() == Key.JOB) { final Job job = UKV.get(locker); if (job != null && job.isRunning()) throw new UnsupportedOperationException( "Cannot unlock all keys since locking jobs are still running."); } } lockable.unlock_all(); Log.info("Unlocked key '" + key + "' from " + lockers.length + " lockers."); } } Log.info("All keys are now unlocked."); tryComplete(); }
@Override protected JsonArray serve(String filter, int limit) { JsonArray array = new JsonArray(); Key[] keys = new Key[limit]; int len = 0; // Gather some keys that pass all filters for (Key key : H2O.keySet()) { if (filter != null && // Have a filter? key.toString().indexOf(filter) == -1) continue; // Ignore this filtered-out key if (!key.user_allowed()) // Also filter out for user-keys continue; Value val = DKV.get(key); if (val == null) continue; // Deleted key? if (_typeid != 0 && val.type() != _typeid) continue; // Wrong type? if (!shouldIncludeKey(key)) continue; // Generic override keys[len++] = key; // Capture the key if (len == keys.length) break; } // sort the keys, for pretty display & reliable ordering Arrays.sort(keys, 0, len); for (int i = 0; i < len; ++i) array.add(new JsonPrimitive(keys[i].toString())); return array; }
// Store Value v to disk. @Override public void store(Value v) { // Only the home node does persistence on NFS if (!v._key.home()) return; // A perhaps useless cutout: the upper layers should test this first. if (v.isPersisted()) return; // Never store arraylets on NFS, instead we'll store the entire array. assert !v.isArray(); try { File f = getFileForKey(v._key); f.mkdirs(); FileOutputStream s = new FileOutputStream(f); try { byte[] m = v.memOrLoad(); assert (m == null || m.length == v._max); // Assert not saving partial files if (m != null) new AutoBuffer(s.getChannel(), false, Value.NFS).putA1(m, m.length).close(); v.setdsk(); // Set as write-complete to disk } finally { s.close(); } } catch (IOException e) { H2O.ignore(e); } }
@Override public void delete(final Value v) { assert this == getIce(); assert !v.isPersisted(); // Upper layers already cleared out run( new Callable() { @Override public Object call() throws Exception { Path p = new Path(_iceRoot, getIceName(v)); FileSystem fs = FileSystem.get(p.toUri(), CONF); fs.delete(p, true); if (v.isArray()) { // Also nuke directory if the top-level ValueArray dies p = new Path(_iceRoot, getIceDirectory(v._key)); fs = FileSystem.get(p.toUri(), CONF); fs.delete(p, true); } return null; } }, false, 0); }
/** * Copy properties "of the same name" from one POJO to the other. If the fields are named * consistently (both sides have fields named "_foo" and/or "bar") this acts like Apache Commons * PojoUtils.copyProperties(). If one side has leading underscores and the other does not then the * names are conformed according to the field_naming parameter. * * @param dest Destination POJO * @param origin Origin POJO * @param field_naming Are the fields named consistently, or does one side have underscores? * @param skip_fields Array of origin or destination field names to skip * @param only_fields Array of origin or destination field names to include; ones not in this list * will be skipped */ public static void copyProperties( Object dest, Object origin, FieldNaming field_naming, String[] skip_fields, String[] only_fields) { if (null == dest || null == origin) return; Field[] dest_fields = Weaver.getWovenFields(dest.getClass()); Field[] orig_fields = Weaver.getWovenFields(origin.getClass()); for (Field orig_field : orig_fields) { String origin_name = orig_field.getName(); if (skip_fields != null & ArrayUtils.contains(skip_fields, origin_name)) continue; if (only_fields != null & !ArrayUtils.contains(only_fields, origin_name)) continue; String dest_name = null; if (field_naming == FieldNaming.CONSISTENT) { dest_name = origin_name; } else if (field_naming == FieldNaming.DEST_HAS_UNDERSCORES) { dest_name = "_" + origin_name; } else if (field_naming == FieldNaming.ORIGIN_HAS_UNDERSCORES) { dest_name = origin_name.substring(1); } if (skip_fields != null & ArrayUtils.contains(skip_fields, dest_name)) continue; if (only_fields != null & !ArrayUtils.contains(only_fields, dest_name)) continue; try { Field dest_field = null; for (Field fd : dest_fields) { if (fd.getName().equals(dest_name)) { dest_field = fd; break; } } if (dest_field != null) { dest_field.setAccessible(true); orig_field.setAccessible(true); // Log.info("PojoUtils.copyProperties, origin field: " + orig_field + "; destination // field: " + dest_field); if (null == orig_field.get(origin)) { // // Assigning null to dest. // dest_field.set(dest, null); } else if (dest_field.getType().isArray() && orig_field.getType().isArray() && (dest_field.getType().getComponentType() != orig_field.getType().getComponentType())) { // // Assigning an array to another array. // // You can't use reflection to set an int[] with an Integer[]. Argh. // TODO: other types of arrays. . . if (dest_field.getType().getComponentType() == double.class && orig_field.getType().getComponentType() == Double.class) { // // Assigning an Double[] to an double[] // double[] copy = (double[]) orig_field.get(origin); dest_field.set(dest, copy); } else if (dest_field.getType().getComponentType() == Double.class && orig_field.getType().getComponentType() == double.class) { // // Assigning an double[] to an Double[] // Double[] copy = (Double[]) orig_field.get(origin); dest_field.set(dest, copy); } else if (dest_field.getType().getComponentType() == int.class && orig_field.getType().getComponentType() == Integer.class) { // // Assigning an Integer[] to an int[] // int[] copy = (int[]) orig_field.get(origin); dest_field.set(dest, copy); } else if (dest_field.getType().getComponentType() == Integer.class && orig_field.getType().getComponentType() == int.class) { // // Assigning an int[] to an Integer[] // Integer[] copy = (Integer[]) orig_field.get(origin); dest_field.set(dest, copy); } else if (Schema.class.isAssignableFrom(dest_field.getType().getComponentType()) && (Schema.getImplClass( (Class<? extends Schema>) dest_field.getType().getComponentType())) .isAssignableFrom(orig_field.getType().getComponentType())) { // // Assigning an array of impl fields to an array of schema fields, e.g. a // DeepLearningParameters[] into a DeepLearningParametersV2[] // Class dest_component_class = dest_field.getType().getComponentType(); Schema[] translation = (Schema[]) Array.newInstance( dest_component_class, Array.getLength(orig_field.get(origin))); int i = 0; int version = ((Schema) dest).getSchemaVersion(); // Look up the schema for each element of the array; if not found fall back to the // schema for the base class. for (Iced impl : ((Iced[]) orig_field.get(origin))) { if (null == impl) { translation[i++] = null; } else { Schema s = null; try { s = Schema.schema(version, impl); } catch (H2ONotFoundArgumentException e) { s = ((Schema) dest_field.getType().getComponentType().newInstance()); } translation[i++] = s.fillFromImpl(impl); } } dest_field.set(dest, translation); } else if (Schema.class.isAssignableFrom(orig_field.getType().getComponentType()) && Iced.class.isAssignableFrom(dest_field.getType().getComponentType())) { // // Assigning an array of schema fields to an array of impl fields, e.g. a // DeepLearningParametersV2[] into a DeepLearningParameters[] // // We can't check against the actual impl class I, because we can't instantiate the // schema base classes to get the impl class from an instance: // dest_field.getType().getComponentType().isAssignableFrom(((Schema)f.getType().getComponentType().newInstance()).getImplClass())) { Class dest_component_class = dest_field.getType().getComponentType(); Iced[] translation = (Iced[]) Array.newInstance( dest_component_class, Array.getLength(orig_field.get(origin))); int i = 0; for (Schema s : ((Schema[]) orig_field.get(origin))) { translation[i++] = s.createImpl(); } dest_field.set(dest, translation); } else { throw H2O.fail( "Don't know how to cast an array of: " + orig_field.getType().getComponentType() + " to an array of: " + dest_field.getType().getComponentType()); } // end of array handling } else if (dest_field.getType() == Key.class && Keyed.class.isAssignableFrom(orig_field.getType())) { // // Assigning a Keyed (e.g., a Frame or Model) to a Key. // dest_field.set(dest, ((Keyed) orig_field.get(origin))._key); } else if (orig_field.getType() == Key.class && Keyed.class.isAssignableFrom(dest_field.getType())) { // // Assigning a Key (for e.g., a Frame or Model) to a Keyed (e.g., a Frame or Model). // Value v = DKV.get((Key) orig_field.get(origin)); dest_field.set(dest, (null == v ? null : v.get())); } else if (KeyV3.class.isAssignableFrom(dest_field.getType()) && Keyed.class.isAssignableFrom(orig_field.getType())) { // // Assigning a Keyed (e.g., a Frame or Model) to a KeyV1. // dest_field.set( dest, KeyV3.make( ((Class<? extends KeyV3>) dest_field.getType()), ((Keyed) orig_field.get(origin))._key)); } else if (KeyV3.class.isAssignableFrom(orig_field.getType()) && Keyed.class.isAssignableFrom(dest_field.getType())) { // // Assigning a KeyV1 (for e.g., a Frame or Model) to a Keyed (e.g., a Frame or Model). // KeyV3 k = (KeyV3) orig_field.get(origin); Value v = DKV.get(Key.make(k.name)); dest_field.set(dest, (null == v ? null : v.get())); } else if (KeyV3.class.isAssignableFrom(dest_field.getType()) && Key.class.isAssignableFrom(orig_field.getType())) { // // Assigning a Key to a KeyV1. // dest_field.set( dest, KeyV3.make( ((Class<? extends KeyV3>) dest_field.getType()), (Key) orig_field.get(origin))); } else if (KeyV3.class.isAssignableFrom(orig_field.getType()) && Key.class.isAssignableFrom(dest_field.getType())) { // // Assigning a KeyV1 to a Key. // KeyV3 k = (KeyV3) orig_field.get(origin); dest_field.set(dest, (null == k.name ? null : Key.make(k.name))); } else if (dest_field.getType() == Pattern.class && String.class.isAssignableFrom(orig_field.getType())) { // // Assigning a String to a Pattern. // dest_field.set(dest, Pattern.compile((String) orig_field.get(origin))); } else if (orig_field.getType() == Pattern.class && String.class.isAssignableFrom(dest_field.getType())) { // // We are assigning a Pattern to a String. // dest_field.set(dest, orig_field.get(origin).toString()); } else if (dest_field.getType() == FrameV3.ColSpecifierV3.class && String.class.isAssignableFrom(orig_field.getType())) { // // Assigning a String to a ColSpecifier. Note that we currently support only the // colname, not a frame name too. // dest_field.set(dest, new FrameV3.ColSpecifierV3((String) orig_field.get(origin))); } else if (orig_field.getType() == FrameV3.ColSpecifierV3.class && String.class.isAssignableFrom(dest_field.getType())) { // // We are assigning a ColSpecifierV2 to a String. The column_name gets copied. // dest_field.set(dest, ((FrameV3.ColSpecifierV3) orig_field.get(origin)).column_name); } else if (Enum.class.isAssignableFrom(dest_field.getType()) && String.class.isAssignableFrom(orig_field.getType())) { // // Assigning a String into an enum field. // Class<Enum> dest_class = (Class<Enum>) dest_field.getType(); dest_field.set(dest, Enum.valueOf(dest_class, (String) orig_field.get(origin))); } else if (Enum.class.isAssignableFrom(orig_field.getType()) && String.class.isAssignableFrom(dest_field.getType())) { // // Assigning an enum field into a String. // Object o = orig_field.get(origin); dest_field.set(dest, (o == null ? null : o.toString())); } else if (Schema.class.isAssignableFrom(dest_field.getType()) && Schema.getImplClass((Class<? extends Schema>) dest_field.getType()) .isAssignableFrom(orig_field.getType())) { // // Assigning an impl field into a schema field, e.g. a DeepLearningParameters into a // DeepLearningParametersV2. // dest_field.set( dest, Schema.schema( /* ((Schema)dest).getSchemaVersion() TODO: remove HACK!! */ 3, (Class<? extends Iced>) orig_field.get(origin).getClass()) .fillFromImpl((Iced) orig_field.get(origin))); } else if (Schema.class.isAssignableFrom(orig_field.getType()) && Schema.getImplClass((Class<? extends Schema>) orig_field.getType()) .isAssignableFrom(dest_field.getType())) { // // Assigning a schema field into an impl field, e.g. a DeepLearningParametersV2 into a // DeepLearningParameters. // Schema s = ((Schema) orig_field.get(origin)); dest_field.set(dest, s.fillImpl(s.createImpl())); } else if ((Schema.class.isAssignableFrom(dest_field.getType()) && Key.class.isAssignableFrom(orig_field.getType()))) { // // Assigning an impl field fetched via a Key into a schema field, e.g. a // DeepLearningParameters into a DeepLearningParametersV2. // Note that unlike the cases above we don't know the type of the impl class until we // fetch in the body of the if. // Key origin_key = (Key) orig_field.get(origin); Value v = DKV.get(origin_key); if (null == v || null == v.get()) { dest_field.set(dest, null); } else { if (((Schema) dest_field.get(dest)) .getImplClass() .isAssignableFrom(v.get().getClass())) { Schema s = ((Schema) dest_field.get(dest)); dest_field.set( dest, Schema.schema(s.getSchemaVersion(), s.getImplClass()).fillFromImpl(v.get())); } else { Log.err( "Can't fill Schema of type: " + dest_field.getType() + " with value of type: " + v.getClass() + " fetched from Key: " + origin_key); dest_field.set(dest, null); } } } else if (Schema.class.isAssignableFrom(orig_field.getType()) && Keyed.class.isAssignableFrom(dest_field.getType())) { // // Assigning a schema field into a Key field, e.g. a DeepLearningV2 into a // (DeepLearningParameters) key. // Schema s = ((Schema) orig_field.get(origin)); dest_field.set(dest, ((Keyed) s.fillImpl(s.createImpl()))._key); } else { // // Normal case: not doing any type conversion. // dest_field.set(dest, orig_field.get(origin)); } } } catch (IllegalAccessException e) { Log.err( "Illegal access exception trying to copy field: " + origin_name + " of class: " + origin.getClass() + " to field: " + dest_name + " of class: " + dest.getClass()); } catch (InstantiationException e) { Log.err( "Instantiation exception trying to copy field: " + origin_name + " of class: " + origin.getClass() + " to field: " + dest_name + " of class: " + dest.getClass()); } } }
public static byte [] getFirstUnzipedBytes(Value v){ byte [] bits = v.getFirstBytes(); try{ return unzipBytes(bits, guessCompressionMethod(bits)); } catch(Exception e){return null;} }