/** * Returns a list of all jobs in a system. * * @return list of all jobs including running, done, cancelled, crashed jobs. */ public static Job[] all() { List list = UKV.get(LIST); Job[] jobs = new Job[list == null ? 0 : list._jobs.length]; int j = 0; for (int i = 0; i < jobs.length; i++) { Job job = UKV.get(list._jobs[i]); if (job != null) jobs[j++] = job; } if (j < jobs.length) jobs = Arrays.copyOf(jobs, j); return jobs; }
static final int findResponseIdx(RFModel model) { String nresponse = model.responseName(); ValueArray ary = UKV.get(model._dataKey); int idx = 0; for (ValueArray.Column cols : ary._cols) if (nresponse.equals(cols._name)) return idx; else idx++; return -1; }
public void onException(Throwable ex) { UKV.remove(dest()); Value v = DKV.get(progressKey()); if (v != null) { ChunkProgress p = v.get(); p = p.error(ex.getMessage()); DKV.put(progressKey(), p); } cancel(ex); }
protected String[] getVectorDomain(final Vec v) { assert v == null || v.isInt() || v.isEnum() : "Cannot get vector domain!"; if (v == null) return null; String[] r; if (v.isEnum()) { r = v.domain(); } else { Vec tmp = v.toEnum(); r = tmp.domain(); UKV.remove(tmp._key); } return r; }
/** * Start this task based on given top-level fork-join task representing job computation. * * @param fjtask top-level job computation task. * @return this job in {@link JobState#RUNNING} state * @see JobState * @see H2OCountedCompleter */ public /** FIXME: should be final or at least protected */ Job start(final H2OCountedCompleter fjtask) { assert state == JobState.CREATED : "Trying to run job which was already run?"; assert fjtask != null : "Starting a job with null working task is not permitted! Fix you API"; _fjtask = fjtask; start_time = System.currentTimeMillis(); state = JobState.RUNNING; // Save the full state of the job UKV.put(self(), this); // Update job list new TAtomic<List>() { @Override public List atomic(List old) { if (old == null) old = new List(); Key[] jobs = old._jobs; old._jobs = Arrays.copyOf(jobs, jobs.length + 1); old._jobs[jobs.length] = job_key; return old; } }.invoke(LIST); return this; }
@Override protected Response serve() { int tasks = 0; int finished = 0; RFModel model = _modelKey.value(); double[] weights = _weights.value(); // Finish refresh after rf model is done and confusion matrix for all trees is computed boolean done = false; int classCol = _classCol.specified() ? _classCol.value() : findResponseIdx(model); tasks = model._totalTrees; finished = model.size(); // Handle cancelled/aborted jobs if (_job.value() != null) { Job jjob = Job.findJob(_job.value()); if (jjob != null && jjob.isCancelled()) return Response.error( jjob.exception == null ? "Job was cancelled by user!" : jjob.exception); } JsonObject response = defaultJsonResponse(); // CM return and possible computation is requested if (!_noCM.value() && (finished == tasks || _iterativeCM.value()) && finished > 0) { // Compute the highest number of trees which is less then a threshold int modelSize = tasks * _refreshThresholdCM.value() / 100; modelSize = modelSize == 0 || finished == tasks ? finished : modelSize * (finished / modelSize); // Get the computing the matrix - if no job is computing, then start a new job Job cmJob = ConfusionTask.make( model, modelSize, _dataKey.value()._key, classCol, weights, _oobee.value()); // Here the the job is running - it saved a CM which can be already finished or in invalid // state. CMFinal confusion = UKV.get(cmJob.dest()); // if the matrix is valid, report it in the JSON if (confusion != null && confusion.valid() && modelSize > 0) { // finished += 1; JsonObject cm = new JsonObject(); JsonArray cmHeader = new JsonArray(); JsonArray matrix = new JsonArray(); cm.addProperty(JSON_CM_TYPE, _oobee.value() ? "OOB error estimate" : "full scoring"); cm.addProperty(JSON_CM_CLASS_ERR, confusion.classError()); cm.addProperty(JSON_CM_ROWS_SKIPPED, confusion.skippedRows()); cm.addProperty(JSON_CM_ROWS, confusion.rows()); // create the header for (String s : cfDomain(confusion, 1024)) cmHeader.add(new JsonPrimitive(s)); cm.add(JSON_CM_HEADER, cmHeader); // add the matrix final int nclasses = confusion.dimension(); JsonArray classErrors = new JsonArray(); for (int crow = 0; crow < nclasses; ++crow) { JsonArray row = new JsonArray(); int classHitScore = 0; for (int ccol = 0; ccol < nclasses; ++ccol) { row.add(new JsonPrimitive(confusion.matrix(crow, ccol))); if (crow != ccol) classHitScore += confusion.matrix(crow, ccol); } // produce infinity members in case of 0.f/0 classErrors.add( new JsonPrimitive( (float) classHitScore / (classHitScore + confusion.matrix(crow, crow)))); matrix.add(row); } cm.add(JSON_CM_CLASSES_ERRORS, classErrors); cm.add(JSON_CM_MATRIX, matrix); cm.addProperty(JSON_CM_TREES, modelSize); response.add(JSON_CM, cm); // Signal end only and only if all trees were generated and confusion matrix is valid done = finished == tasks; } } else if (_noCM.value() && finished == tasks) done = true; // Trees JsonObject trees = new JsonObject(); trees.addProperty(Constants.TREE_COUNT, model.size()); if (model.size() > 0) { trees.add(Constants.TREE_DEPTH, model.depth().toJson()); trees.add(Constants.TREE_LEAVES, model.leaves().toJson()); } response.add(Constants.TREES, trees); // Build a response Response r; if (done) { r = jobDone(response); r.addHeader( "<div class='alert'>" + /*RFScore.link(MODEL_KEY, model._key, "Use this model for scoring.") */ GeneratePredictionsPage .link(model._key, "Predict!") + " </div>"); } else { r = Response.poll(response, finished, tasks); } r.setBuilder(JSON_CM, new ConfusionMatrixBuilder()); r.setBuilder(TREES, new TreeListBuilder()); return r; }
@Override public void remove() { super.remove(); UKV.remove(_progress); }
public ChunkProgressJob(long chunksTotal, Key destinationKey) { destination_key = destinationKey; _progress = Key.make(Key.make()._kb, (byte) 0, Key.DFJ_INTERNAL_USER, destinationKey.home_node()); UKV.put(_progress, new ChunkProgress(chunksTotal)); }
/** * Finds a job with given key or returns null. * * @param jobkey job key * @return returns a job with given job key or null if a job is not found. */ public static Job findJob(final Key jobkey) { return UKV.get(jobkey); }
/** * Check if given job is running. * * @param job_key job key * @return true if job is still running else returns false. */ public static boolean isRunning(Key job_key) { Job j = UKV.get(job_key); assert j != null : "Job should be always in DKV!"; return j.isRunning(); }
/** * Blocks and get result of this job. * * <p>The call blocks on working task which was passed via {@link #start(H2OCountedCompleter)} * method and returns the result which is fetched from UKV based on job destination key. * * @return result of this job fetched from UKV by destination key. * @see #start(H2OCountedCompleter) * @see UKV */ public <T> T get() { _fjtask.join(); // Block until top-level job is done T ans = (T) UKV.get(destination_key); remove(); // Remove self-job return ans; }
/** * Return progress of this job. * * @return the value in interval <0,1> representing job progress. */ public float progress() { Freezable f = UKV.get(destination_key); if (f instanceof Progress) return ((Progress) f).progress(); return 0; }