public Vec replace(int col, Vec nv) { assert col < _names.length; Vec rv = vecs()[col]; assert rv.group().equals(nv.group()); _vecs[col] = nv; _keys[col] = nv._key; if (DKV.get(nv._key) == null) // If not already in KV, put it there DKV.put(nv._key, nv); return rv; }
@Override public void modifyParmsForCrossValidationMainModel(ModelBuilder[] cvModelBuilders) { _parms._overwrite_with_best_model = false; if (_parms._stopping_rounds == 0 && _parms._max_runtime_secs == 0) return; // No exciting changes to stopping conditions // Extract stopping conditions from each CV model, and compute the best stopping answer _parms._stopping_rounds = 0; _parms._max_runtime_secs = 0; double sum = 0; for (ModelBuilder cvmb : cvModelBuilders) sum += ((DeepLearningModel) DKV.getGet(cvmb.dest())).last_scored().epoch_counter; _parms._epochs = sum / cvModelBuilders.length; if (!_parms._quiet_mode) { warn( "_epochs", "Setting optimal _epochs to " + _parms._epochs + " for cross-validation main model based on early stopping of cross-validation models."); warn( "_stopping_rounds", "Disabling convergence-based early stopping for cross-validation main model."); warn( "_max_runtime_secs", "Disabling maximum allowed runtime for cross-validation main model."); } }
public Frame(String[] names, Vec[] vecs) { // assert names==null || names.length == vecs.length : "Number of columns does not match to // number of cols' names."; _names = names; _vecs = vecs; _keys = new Key[vecs.length]; for (int i = 0; i < vecs.length; i++) { Key k = _keys[i] = vecs[i]._key; if (DKV.get(k) == null) // If not already in KV, put it there DKV.put(k, vecs[i]); } Vec v0 = anyVec(); if (v0 == null) return; VectorGroup grp = v0.group(); for (int i = 0; i < vecs.length; i++) assert grp.equals(vecs[i].group()); }
// ------------------------------------------------------------------------ // Zipped file; no parallel decompression; decompress into local chunks, // parse local chunks; distribute chunks later. ParseWriter streamParseZip(final InputStream is, final StreamParseWriter dout, InputStream bvs) throws IOException { // All output into a fresh pile of NewChunks, one per column if (!_setup._parse_type._parallelParseSupported) throw H2O.unimpl(); StreamData din = new StreamData(is); int cidx = 0; StreamParseWriter nextChunk = dout; int zidx = bvs.read(null, 0, 0); // Back-channel read of chunk index assert zidx == 1; while (is.available() > 0) { int xidx = bvs.read(null, 0, 0); // Back-channel read of chunk index if (xidx > zidx) { // Advanced chunk index of underlying ByteVec stream? zidx = xidx; // Record advancing of chunk nextChunk.close(); // Match output chunks to input zipfile chunks if (dout != nextChunk) { dout.reduce(nextChunk); if (_jobKey != null && ((Job) DKV.getGet(_jobKey)).isCancelledOrCrashed()) break; } nextChunk = nextChunk.nextChunk(); } parseChunk(cidx++, din, nextChunk); } parseChunk(cidx, din, nextChunk); // Parse the remaining partial 32K buffer nextChunk.close(); if (dout != nextChunk) dout.reduce(nextChunk); return dout; }
public static ValueArray loadAndParseKey(Key okey, String path) { FileIntegrityChecker c = FileIntegrityChecker.check(new File(path),false); Key k = c.syncDirectory(null,null,null,null); ParseDataset.forkParseDataset(okey, new Key[] { k }, null).get(); UKV.remove(k); ValueArray res = DKV.get(okey).get(); return res; }
// Close all AppendableVec public Futures closeAppendables(Futures fs) { _col0 = null; // Reset cache int len = vecs().length; for (int i = 0; i < len; i++) { Vec v = _vecs[i]; if (v instanceof AppendableVec) DKV.put(_keys[i], _vecs[i] = ((AppendableVec) v).close(fs), fs); } return fs; }
ParseWriter streamParse(final InputStream is, final ParseWriter dout) throws IOException { if (!_setup._parse_type._parallelParseSupported) throw H2O.unimpl(); StreamData din = new StreamData(is); int cidx = 0; // FIXME leaving _jobKey == null until sampling is done, this mean entire zip files // FIXME are parsed for parseSetup while (is.available() > 0 && (_jobKey == null || !((Job) DKV.getGet(_jobKey)).isCancelledOrCrashed())) parseChunk(cidx++, din, dout); parseChunk(cidx, din, dout); // Parse the remaining partial 32K buffer return dout; }
@Override protected Key parse(String input) { if (_validator != null) _validator.validateRaw(input); Key k = Key.make(input); Value v = DKV.get(k); if (v == null && _mustExist) throw new IllegalArgumentException("Key '" + input + "' does not exist!"); if (_type != null) { if (v != null && !compatible(_type, v.get())) throw new IllegalArgumentException(input + ":" + errors()[0]); if (v == null && _required) throw new IllegalArgumentException("Key '" + input + "' does not exist!"); } return k; }
@Override public void map(Chunk cs) { int idx = _chunkOffset + cs.cidx(); Key ckey = Vec.chunkKey(_v._key, idx); if (_cmap != null) { assert !cs.hasFloat() : "Input chunk (" + cs.getClass() + ") has float, but is expected to be categorical"; NewChunk nc = new NewChunk(_v, idx); // loop over rows and update ints for new domain mapping according to vecs[c].domain() for (int r = 0; r < cs._len; ++r) { if (cs.isNA(r)) nc.addNA(); else nc.addNum(_cmap[(int) cs.at8(r)], 0); } nc.close(_fs); } else { DKV.put(ckey, cs.deepCopy(), _fs, true); } }
@Override protected void doGet(HttpServletRequest request, HttpServletResponse response) throws IOException, ServletException { String uri = getDecodedUri(request); try { boolean use_hex = false; String f_name = request.getParameter("frame_id"); String hex_string = request.getParameter("hex_string"); if (f_name == null) { throw new RuntimeException("Cannot find value for parameter \'frame_id\'"); } if (hex_string != null && hex_string.toLowerCase().equals("true")) { use_hex = true; } Frame dataset = DKV.getGet(f_name); // TODO: Find a way to determing the hex_string parameter. It should not always be false InputStream is = dataset.toCSV(true, use_hex); response.setContentType("application/octet-stream"); // Clean up the file name int x = f_name.length() - 1; boolean dot = false; for (; x >= 0; x--) if (!Character.isLetterOrDigit(f_name.charAt(x)) && f_name.charAt(x) != '_') if (f_name.charAt(x) == '.' && !dot) dot = true; else break; String suggested_fname = f_name.substring(x + 1).replace(".hex", ".csv"); if (!suggested_fname.endsWith(".csv")) suggested_fname = suggested_fname + ".csv"; f_name = suggested_fname; response.addHeader("Content-Disposition", "attachment; filename=" + f_name); setResponseStatus(response, HttpServletResponse.SC_OK); OutputStream os = response.getOutputStream(); water.util.FileUtils.copyStream(is, os, 2048); } catch (Exception e) { sendErrorResponse(response, e, uri); } finally { logRequest("GET", request, response); } }
@Override protected Frame rebalance(final Frame original_fr, boolean local, final String name) { if (original_fr == null) return null; if (_parms._force_load_balance) { int original_chunks = original_fr.anyVec().nChunks(); _job.update(0, "Load balancing " + name.substring(name.length() - 5) + " data..."); int chunks = desiredChunks(original_fr, local); if (!_parms._reproducible) { if (original_chunks >= chunks) { if (!_parms._quiet_mode) Log.info( "Dataset already contains " + original_chunks + " chunks. No need to rebalance."); return original_fr; } } else { // reproducible, set chunks to 1 assert chunks == 1; if (!_parms._quiet_mode) Log.warn("Reproducibility enforced - using only 1 thread - can be slow."); if (original_chunks == 1) return original_fr; } if (!_parms._quiet_mode) Log.info( "Rebalancing " + name.substring(name.length() - 5) + " dataset into " + chunks + " chunks."); Key newKey = Key.make(name + ".chks" + chunks); RebalanceDataSet rb = new RebalanceDataSet(original_fr, newKey, chunks); H2O.submitTask(rb).join(); Frame rebalanced_fr = DKV.get(newKey).get(); Scope.track(rebalanced_fr); return rebalanced_fr; } return original_fr; }
/** * Train a Deep Learning neural net model * * @param model Input model (e.g., from initModel(), or from a previous training run) * @return Trained model */ public final DeepLearningModel trainModel(DeepLearningModel model) { Frame validScoreFrame = null; Frame train, trainScoreFrame; try { // if (checkpoint == null && !quiet_mode) logStart(); //if checkpoint is given, some // Job's params might be uninitialized (but the restarted model's parameters are correct) if (model == null) { model = DKV.get(dest()).get(); } Log.info( "Model category: " + (_parms._autoencoder ? "Auto-Encoder" : isClassifier() ? "Classification" : "Regression")); final long model_size = model.model_info().size(); Log.info( "Number of model parameters (weights/biases): " + String.format("%,d", model_size)); model.write_lock(_job); _job.update(0, "Setting up training data..."); final DeepLearningParameters mp = model.model_info().get_params(); // temporary frames of the same "name" as the orig _train/_valid (asking the parameter's // Key, not the actual frame) // Note: don't put into DKV or they would overwrite the _train/_valid frames! Frame tra_fr = new Frame(mp._train, _train.names(), _train.vecs()); Frame val_fr = _valid != null ? new Frame(mp._valid, _valid.names(), _valid.vecs()) : null; train = tra_fr; if (model._output.isClassifier() && mp._balance_classes) { _job.update(0, "Balancing class distribution of training data..."); float[] trainSamplingFactors = new float [train .lastVec() .domain() .length]; // leave initialized to 0 -> will be filled up below if (mp._class_sampling_factors != null) { if (mp._class_sampling_factors.length != train.lastVec().domain().length) throw new IllegalArgumentException( "class_sampling_factors must have " + train.lastVec().domain().length + " elements"); trainSamplingFactors = mp._class_sampling_factors.clone(); // clone: don't modify the original } train = sampleFrameStratified( train, train.lastVec(), train.vec(model._output.weightsName()), trainSamplingFactors, (long) (mp._max_after_balance_size * train.numRows()), mp._seed, true, false); Vec l = train.lastVec(); Vec w = train.vec(model._output.weightsName()); MRUtils.ClassDist cd = new MRUtils.ClassDist(l); model._output._modelClassDist = _weights != null ? cd.doAll(l, w).rel_dist() : cd.doAll(l).rel_dist(); } model.training_rows = train.numRows(); if (_weights != null && _weights.min() == 0 && _weights.max() == 1 && _weights.isInt()) { model.training_rows = Math.round(train.numRows() * _weights.mean()); Log.warn( "Not counting " + (train.numRows() - model.training_rows) + " rows with weight=0 towards an epoch."); } Log.info("One epoch corresponds to " + model.training_rows + " training data rows."); trainScoreFrame = sampleFrame( train, mp._score_training_samples, mp._seed); // training scoring dataset is always sampled uniformly from the training // dataset if (trainScoreFrame != train) Scope.track(trainScoreFrame); if (!_parms._quiet_mode) Log.info("Number of chunks of the training data: " + train.anyVec().nChunks()); if (val_fr != null) { model.validation_rows = val_fr.numRows(); // validation scoring dataset can be sampled in multiple ways from the given validation // dataset if (model._output.isClassifier() && mp._balance_classes && mp._score_validation_sampling == DeepLearningParameters.ClassSamplingMethod.Stratified) { _job.update(0, "Sampling validation data (stratified)..."); validScoreFrame = sampleFrameStratified( val_fr, val_fr.lastVec(), val_fr.vec(model._output.weightsName()), null, mp._score_validation_samples > 0 ? mp._score_validation_samples : val_fr.numRows(), mp._seed + 1, false /* no oversampling */, false); } else { _job.update(0, "Sampling validation data..."); validScoreFrame = sampleFrame(val_fr, mp._score_validation_samples, mp._seed + 1); if (validScoreFrame != val_fr) Scope.track(validScoreFrame); } if (!_parms._quiet_mode) Log.info( "Number of chunks of the validation data: " + validScoreFrame.anyVec().nChunks()); } // Set train_samples_per_iteration size (cannot be done earlier since this depends on // whether stratified sampling is done) model.actual_train_samples_per_iteration = computeTrainSamplesPerIteration(mp, model.training_rows, model); // Determine whether shuffling is enforced if (mp._replicate_training_data && (model.actual_train_samples_per_iteration == model.training_rows * (mp._single_node_mode ? 1 : H2O.CLOUD.size())) && !mp._shuffle_training_data && H2O.CLOUD.size() > 1 && !mp._reproducible) { if (!mp._quiet_mode) Log.info( "Enabling training data shuffling, because all nodes train on the full dataset (replicated training data)."); mp._shuffle_training_data = true; } if (!mp._shuffle_training_data && model.actual_train_samples_per_iteration == model.training_rows && train.anyVec().nChunks() == 1) { if (!mp._quiet_mode) Log.info( "Enabling training data shuffling to avoid training rows in the same order over and over (no Hogwild since there's only 1 chunk)."); mp._shuffle_training_data = true; } // if (!mp._quiet_mode) Log.info("Initial model:\n" + model.model_info()); long now = System.currentTimeMillis(); model._timeLastIterationEnter = now; if (_parms._autoencoder) { _job.update(0, "Scoring null model of autoencoder..."); if (!mp._quiet_mode) Log.info("Scoring the null model of the autoencoder."); model.doScoring( trainScoreFrame, validScoreFrame, _job._key, 0, false); // get the null model reconstruction error } // put the initial version of the model into DKV model.update(_job); model.total_setup_time_ms += now - _job.start_time(); Log.info("Total setup time: " + PrettyPrint.msecs(model.total_setup_time_ms, true)); Log.info("Starting to train the Deep Learning model."); _job.update(0, "Training..."); // main loop for (; ; ) { model.iterations++; model.set_model_info( mp._epochs == 0 ? model.model_info() : H2O.CLOUD.size() > 1 && mp._replicate_training_data ? (mp._single_node_mode ? new DeepLearningTask2( _job._key, train, model.model_info(), rowFraction(train, mp, model), model.iterations) .doAll(Key.make(H2O.SELF)) .model_info() : // replicated data + single node mode new DeepLearningTask2( _job._key, train, model.model_info(), rowFraction(train, mp, model), model.iterations) .doAllNodes() .model_info()) : // replicated data + multi-node mode new DeepLearningTask( _job._key, model.model_info(), rowFraction(train, mp, model), model.iterations) .doAll(train) .model_info()); // distributed data (always in multi-node mode) if (stop_requested() && !timeout()) break; // cancellation if (!model.doScoring( trainScoreFrame, validScoreFrame, _job._key, model.iterations, false)) break; // finished training (or early stopping or convergence) if (timeout()) break; // stop after scoring } // replace the model with the best model so far (if it's better) if (!stop_requested() && _parms._overwrite_with_best_model && model.actual_best_model_key != null && _parms._nfolds == 0) { DeepLearningModel best_model = DKV.getGet(model.actual_best_model_key); if (best_model != null && best_model.loss() < model.loss() && Arrays.equals(best_model.model_info().units, model.model_info().units)) { if (!_parms._quiet_mode) Log.info("Setting the model to be the best model so far (based on scoring history)."); DeepLearningModelInfo mi = best_model.model_info().deep_clone(); // Don't cheat - count full amount of training samples, since that's the amount of // training it took to train (without finding anything better) mi.set_processed_global(model.model_info().get_processed_global()); mi.set_processed_local(model.model_info().get_processed_local()); model.set_model_info(mi); model.update(_job); model.doScoring(trainScoreFrame, validScoreFrame, _job._key, model.iterations, true); assert (best_model.loss() == model.loss()); } } // store coefficient names for future use // possibly change model.model_info().data_info().coefNames(); if (!_parms._quiet_mode) { Log.info( "=============================================================================================================================================================================="); if (stop_requested()) { Log.info("Deep Learning model training was interrupted."); } else { Log.info("Finished training the Deep Learning model."); Log.info(model); } Log.info( "=============================================================================================================================================================================="); } } finally { if (model != null) { model.deleteElasticAverageModels(); model.unlock(_job); if (model.actual_best_model_key != null) { assert (model.actual_best_model_key != model._key); DKV.remove(model.actual_best_model_key); } } } return model; }
/** * Train a Deep Learning model, assumes that all members are populated If checkpoint == null, * then start training a new model, otherwise continue from a checkpoint */ public final void buildModel() { DeepLearningModel cp = null; if (_parms._checkpoint == null) { cp = new DeepLearningModel( dest(), _parms, new DeepLearningModel.DeepLearningModelOutput(DeepLearning.this), _train, _valid, nclasses()); cp.model_info().initializeMembers(); } else { final DeepLearningModel previous = DKV.getGet(_parms._checkpoint); if (previous == null) throw new IllegalArgumentException("Checkpoint not found."); Log.info("Resuming from checkpoint."); _job.update(0, "Resuming from checkpoint"); if (isClassifier() != previous._output.isClassifier()) throw new H2OIllegalArgumentException( "Response type must be the same as for the checkpointed model."); if (isSupervised() != previous._output.isSupervised()) throw new H2OIllegalArgumentException( "Model type must be the same as for the checkpointed model."); // check the user-given arguments for consistency DeepLearningParameters oldP = previous._parms; // sanitized parameters for checkpointed model DeepLearningParameters newP = _parms; // user-given parameters for restart DeepLearningParameters oldP2 = (DeepLearningParameters) oldP.clone(); DeepLearningParameters newP2 = (DeepLearningParameters) newP.clone(); DeepLearningParameters.Sanity.modifyParms( oldP, oldP2, nclasses()); // sanitize the user-given parameters DeepLearningParameters.Sanity.modifyParms( newP, newP2, nclasses()); // sanitize the user-given parameters DeepLearningParameters.Sanity.checkpoint(oldP2, newP2); DataInfo dinfo; try { // PUBDEV-2513: Adapt _train and _valid (in-place) to match the frames that were used for // the previous model // This can add or remove dummy columns (can happen if the dataset is sparse and datasets // have different non-const columns) for (String st : previous.adaptTestForTrain(_train, true, false)) Log.warn(st); for (String st : previous.adaptTestForTrain(_valid, true, false)) Log.warn(st); dinfo = makeDataInfo(_train, _valid, _parms, nclasses()); DKV.put(dinfo); cp = new DeepLearningModel(dest(), _parms, previous, false, dinfo); cp.write_lock(_job); if (!Arrays.equals(cp._output._names, previous._output._names)) { throw new H2OIllegalArgumentException( "The columns of the training data must be the same as for the checkpointed model. Check ignored columns (or disable ignore_const_cols)."); } if (!Arrays.deepEquals(cp._output._domains, previous._output._domains)) { throw new H2OIllegalArgumentException( "Categorical factor levels of the training data must be the same as for the checkpointed model."); } if (dinfo.fullN() != previous.model_info().data_info().fullN()) { throw new H2OIllegalArgumentException( "Total number of predictors is different than for the checkpointed model."); } if (_parms._epochs <= previous.epoch_counter) { throw new H2OIllegalArgumentException( "Total number of epochs must be larger than the number of epochs already trained for the checkpointed model (" + previous.epoch_counter + ")."); } // these are the mutable parameters that are to be used by the model (stored in // model_info._parms) final DeepLearningParameters actualNewP = cp.model_info() .get_params(); // actually used parameters for model building (defaults filled in, // etc.) assert (actualNewP != previous.model_info().get_params()); assert (actualNewP != newP); assert (actualNewP != oldP); DeepLearningParameters.Sanity.update(actualNewP, newP, nclasses()); Log.info( "Continuing training after " + String.format("%.3f", previous.epoch_counter) + " epochs from the checkpointed model."); cp.update(_job); } catch (H2OIllegalArgumentException ex) { if (cp != null) { cp.unlock(_job); cp.delete(); cp = null; } throw ex; } finally { if (cp != null) cp.unlock(_job); } } trainModel(cp); // clean up, but don't delete weights and biases if user asked for export List<Key> keep = new ArrayList<>(); try { if (_parms._export_weights_and_biases && cp._output.weights != null && cp._output.biases != null) { for (Key k : Arrays.asList(cp._output.weights)) { keep.add(k); for (Vec vk : ((Frame) DKV.getGet(k)).vecs()) { keep.add(vk._key); } } for (Key k : Arrays.asList(cp._output.biases)) { keep.add(k); for (Vec vk : ((Frame) DKV.getGet(k)).vecs()) { keep.add(vk._key); } } } } finally { Scope.exit(keep.toArray(new Key[keep.size()])); } }
@Override public void onCompletion(CountedCompleter cc) { DKV.put(_v); }
public static byte [] getFirstUnzipedBytes(Key k){ return getFirstUnzipedBytes(DKV.get(k)); }
@Override public boolean toHTML(StringBuilder sb) { if (jobs != null) { DocGen.HTML.arrayHead(sb); sb.append("<tr class='warning'>"); ArrayList<Argument> args = jobs[0].arguments(); // Filter some keys to simplify UI args = (ArrayList<Argument>) args.clone(); filter( args, "destination_key", "source", "cols", "ignored_cols_by_name", "response", "classification", "validation"); for (int i = 0; i < args.size(); i++) sb.append("<td><b>").append(args.get(i)._name).append("</b></td>"); sb.append("<td><b>").append("run time").append("</b></td>"); String perf = jobs[0].speedDescription(); if (perf != null) sb.append("<td><b>").append(perf).append("</b></td>"); sb.append("<td><b>").append("model key").append("</b></td>"); sb.append("<td><b>").append("prediction error").append("</b></td>"); sb.append("<td><b>").append("F1 score").append("</b></td>"); sb.append("</tr>"); ArrayList<JobInfo> infos = new ArrayList<JobInfo>(); for (Job job : jobs) { JobInfo info = new JobInfo(); info._job = job; Object value = UKV.get(job.destination_key); info._model = value instanceof Model ? (Model) value : null; if (info._model != null) info._cm = info._model.cm(); if (info._cm != null) info._error = info._cm.err(); infos.add(info); } Collections.sort( infos, new Comparator<JobInfo>() { @Override public int compare(JobInfo a, JobInfo b) { return Double.compare(a._error, b._error); } }); for (JobInfo info : infos) { sb.append("<tr>"); for (Argument a : args) { try { Object value = a._field.get(info._job); String s; if (value instanceof int[]) s = Utils.sampleToString((int[]) value, 20); else s = "" + value; sb.append("<td>").append(s).append("</td>"); } catch (Exception e) { throw new RuntimeException(e); } } String runTime = "Pending", speed = ""; if (info._job.start_time != 0) { runTime = PrettyPrint.msecs(info._job.runTimeMs(), true); speed = perf != null ? PrettyPrint.msecs(info._job.speedValue(), true) : ""; } sb.append("<td>").append(runTime).append("</td>"); if (perf != null) sb.append("<td>").append(speed).append("</td>"); String link = info._job.destination_key.toString(); if (info._job.start_time != 0 && DKV.get(info._job.destination_key) != null) { if (info._model instanceof GBMModel) link = GBMModelView.link(link, info._job.destination_key); else if (info._model instanceof NeuralNetModel) link = NeuralNetProgress.link(info._job.self(), info._job.destination_key, link); if (info._model instanceof KMeans2Model) link = KMeans2ModelView.link(link, info._job.destination_key); else link = Inspect.link(link, info._job.destination_key); } sb.append("<td>").append(link).append("</td>"); String pct = "", f1 = ""; if (info._cm != null) { pct = String.format("%.2f", 100 * info._error) + "%"; if (info._cm._arr.length == 2) f1 = String.format("%.2f", info._cm.precisionAndRecall()); } sb.append("<td><b>").append(pct).append("</b></td>"); sb.append("<td><b>").append(f1).append("</b></td>"); sb.append("</tr>"); } DocGen.HTML.arrayTail(sb); } return true; }