@SuppressWarnings("unused") // called through reflection by RequestServer public RemoveAllV3 remove(int version, RemoveAllV3 u) { Log.info("Removing all objects"); Futures fs = new Futures(); for (Job j : Job.jobs()) { j.cancel(); j.remove(fs); } fs.blockForPending(); // Bulk brainless key removal. Completely wipes all Keys without regard. new MRTask() { @Override public byte priority() { return H2O.GUI_PRIORITY; } @Override public void setupLocal() { H2O.raw_clear(); water.fvec.Vec.ESPC.clear(); } }.doAllNodes(); Log.info("Finished removing objects"); return u; }
@Override public void callback(final GLMTask.GLMLineSearchTask glmt) { double step = 0.5; for (int i = 0; i < glmt._objvals.length; ++i) { if (!needLineSearch(glmt._betas[i], glmt._objvals[i], step)) { Log.info("GLM2 (iteration=" + _iter + ") line search: found admissible step=" + step); _lastResult = null; // set last result to null so that the Iteration will not attempt to verify // whether or not it should do the line search. new GLMIterationTask( GLM2.this, _activeData, _glm, true, true, true, glmt._betas[i], _ymu, _reg, new Iteration()) .asyncExec(_activeData._adaptedFrame); return; } step *= 0.5; } // no line step worked, forcibly converge Log.info( "GLM2 (iteration=" + _iter + ") line search failed to find feasible step. Forcibly converged."); nextLambda( _lastResult._glmt.clone(), resizeVec(_lastResult._glmt._beta, _activeCols, _lastResult._activeCols)); }
@Test public void testExpandCatsIris() throws InterruptedException, ExecutionException { double[][] iris = ard( ard(6.3, 2.5, 4.9, 1.5, 1), ard(5.7, 2.8, 4.5, 1.3, 1), ard(5.6, 2.8, 4.9, 2.0, 2), ard(5.0, 3.4, 1.6, 0.4, 0), ard(6.0, 2.2, 5.0, 1.5, 2)); double[][] iris_expandR = ard( ard(0, 1, 0, 6.3, 2.5, 4.9, 1.5), ard(0, 1, 0, 5.7, 2.8, 4.5, 1.3), ard(0, 0, 1, 5.6, 2.8, 4.9, 2.0), ard(1, 0, 0, 5.0, 3.4, 1.6, 0.4), ard(0, 0, 1, 6.0, 2.2, 5.0, 1.5)); String[] iris_cols = new String[] {"sepal_len", "sepal_wid", "petal_len", "petal_wid", "class"}; String[][] iris_domains = new String[][] {null, null, null, null, new String[] {"setosa", "versicolor", "virginica"}}; Frame fr = null; try { fr = parse_test_file(Key.make("iris.hex"), "smalldata/iris/iris_wheader.csv"); DataInfo dinfo = new DataInfo( Key.make(), fr, null, 0, true, DataInfo.TransformType.NONE, DataInfo.TransformType.NONE, false, false, false, /* weights */ false, /* offset */ false, /* fold */ false); Log.info("Original matrix:\n" + colFormat(iris_cols, "%8.7s") + ArrayUtils.pprint(iris)); double[][] iris_perm = ArrayUtils.permuteCols(iris, dinfo._permutation); Log.info( "Permuted matrix:\n" + colFormat(iris_cols, "%8.7s", dinfo._permutation) + ArrayUtils.pprint(iris_perm)); double[][] iris_exp = GLRM.expandCats(iris_perm, dinfo); Log.info( "Expanded matrix:\n" + colExpFormat(iris_cols, iris_domains, "%8.7s", dinfo._permutation) + ArrayUtils.pprint(iris_exp)); Assert.assertArrayEquals(iris_expandR, iris_exp); } catch (Throwable t) { t.printStackTrace(); throw new RuntimeException(t); } finally { if (fr != null) fr.delete(); } }
public PersistManager(URI iceRoot) { I = new Persist[MAX_BACKENDS]; stats = new PersistStatsEntry[MAX_BACKENDS]; for (int i = 0; i < stats.length; i++) { stats[i] = new PersistStatsEntry(); } if (iceRoot == null) { Log.err("ice_root must be specified. Exiting."); H2O.exit(1); } Persist ice = null; boolean windowsPath = iceRoot.toString().matches("^[a-zA-Z]:.*"); if (windowsPath) { ice = new PersistFS(new File(iceRoot.toString())); } else if ((iceRoot.getScheme() == null) || Schemes.FILE.equals(iceRoot.getScheme())) { ice = new PersistFS(new File(iceRoot.getPath())); } else if (Schemes.HDFS.equals(iceRoot.getScheme())) { Log.err("HDFS ice_root not yet supported. Exiting."); H2O.exit(1); // I am not sure anyone actually ever does this. // H2O on Hadoop launches use local disk for ice root. // This has a chance to work, but turn if off until it gets tested. // // try { // Class klass = Class.forName("water.persist.PersistHdfs"); // java.lang.reflect.Constructor constructor = klass.getConstructor(new // Class[]{URI.class}); // ice = (Persist) constructor.newInstance(iceRoot); // } catch (Exception e) { // Log.err("Could not initialize HDFS"); // throw new RuntimeException(e); // } } I[Value.ICE] = ice; I[Value.NFS] = new PersistNFS(); try { Class klass = Class.forName("water.persist.PersistHdfs"); java.lang.reflect.Constructor constructor = klass.getConstructor(); I[Value.HDFS] = (Persist) constructor.newInstance(); Log.info("HDFS subsystem successfully initialized"); } catch (Throwable ignore) { Log.info("HDFS subsystem not available"); } try { Class klass = Class.forName("water.persist.PersistS3"); java.lang.reflect.Constructor constructor = klass.getConstructor(); I[Value.S3] = (Persist) constructor.newInstance(); Log.info("S3 subsystem successfully initialized"); } catch (Throwable ignore) { Log.info("S3 subsystem not available"); } }
private void sendAck() { // Send results back DTask dt, origDt = _dt; // _dt can go null the instant it is send over wire assert origDt != null; // Freed after completion while ((dt = _dt) != null) { // Retry loop for broken TCP sends AutoBuffer ab = null; try { // Start the ACK with results back to client. If the client is // asking for a class/id mapping (or any job running at FETCH_ACK // priority) then return a udp.fetchack byte instead of a udp.ack. // The receiver thread then knows to handle the mapping at the higher // priority. UDP.udp udp = dt.priority() == H2O.FETCH_ACK_PRIORITY ? UDP.udp.fetchack : UDP.udp.ack; ab = new AutoBuffer(_client, udp._prior).putTask(udp, _tsknum).put1(SERVER_UDP_SEND); assert ab.position() == 1 + 2 + 4 + 1; dt.write(ab); // Write the DTask - could be very large write dt._repliedTcp = ab.hasTCP(); // Resends do not need to repeat TCP result ab.close(); // Then close; send final byte _computedAndReplied = true; // After the final handshake, set computed+replied bit break; // Break out of retry loop } catch (AutoBuffer.AutoBufferException e) { if (!_client._heartbeat._client) // Report on servers only; clients allowed to be flaky Log.info( "IOException during ACK, " + e._ioe.getMessage() + ", t#" + _tsknum + " AB=" + ab + ", waiting and retrying..."); ab.drainClose(); if (_client._heartbeat._client) // Dead client will not accept a TCP ACK response? this.CAS_DT(dt, null); // cancel the ACK try { Thread.sleep(100); } catch (InterruptedException ignore) { } } catch (Exception e) { // Custom serializer just barfed? Log.err(e); // Log custom serializer exception ab.drainClose(); } } // end of while(true) if (dt == null) Log.info( "Cancelled remote task#" + _tsknum + " " + origDt.getClass() + " to " + _client + " has been cancelled by remote"); else { if (dt instanceof MRTask && dt.logVerbose()) Log.debug("Done remote task#" + _tsknum + " " + dt.getClass() + " to " + _client); _client.record_task_answer(this); // Setup for retrying Ack & AckAck, if not canceled } }
@Test public void testCategoricalProstate() throws InterruptedException, ExecutionException { GLRM job = null; GLRMModel model = null; Frame train = null; final int[] cats = new int[] {1, 3, 4, 5}; // Categoricals: CAPSULE, RACE, DPROS, DCAPS try { Scope.enter(); train = parse_test_file(Key.make("prostate.hex"), "smalldata/logreg/prostate.csv"); for (int i = 0; i < cats.length; i++) Scope.track(train.replace(cats[i], train.vec(cats[i]).toCategoricalVec())._key); train.remove("ID").remove(); DKV.put(train._key, train); GLRMParameters parms = new GLRMParameters(); parms._train = train._key; parms._k = 8; parms._gamma_x = parms._gamma_y = 0.1; parms._regularization_x = GLRMModel.GLRMParameters.Regularizer.Quadratic; parms._regularization_y = GLRMModel.GLRMParameters.Regularizer.Quadratic; parms._init = GLRM.Initialization.PlusPlus; parms._transform = DataInfo.TransformType.STANDARDIZE; parms._recover_svd = false; parms._max_iterations = 200; try { job = new GLRM(parms); model = job.trainModel().get(); Log.info( "Iteration " + model._output._iterations + ": Objective value = " + model._output._objective); model.score(train).delete(); ModelMetricsGLRM mm = (ModelMetricsGLRM) ModelMetrics.getFromDKV(model, train); Log.info( "Numeric Sum of Squared Error = " + mm._numerr + "\tCategorical Misclassification Error = " + mm._caterr); } catch (Throwable t) { t.printStackTrace(); throw new RuntimeException(t); } finally { job.remove(); } } catch (Throwable t) { t.printStackTrace(); throw new RuntimeException(t); } finally { if (train != null) train.delete(); if (model != null) model.delete(); Scope.exit(); } }
@Test public void testChunks() { Frame frame = parse_test_file("smalldata/covtype/covtype.20k.data"); AggregatorModel.AggregatorParameters parms = new AggregatorModel.AggregatorParameters(); parms._train = frame._key; parms._radius_scale = 3.0; long start = System.currentTimeMillis(); AggregatorModel agg = new Aggregator(parms).trainModel().get(); // 0.418 System.out.println( "AggregatorModel finished in: " + (System.currentTimeMillis() - start) / 1000. + " seconds"); agg.checkConsistency(); Frame output = agg._output._output_frame.get(); Log.info("Number of exemplars: " + agg._exemplars.length); // Assert.assertTrue(agg._exemplars.length==1993); output.remove(); agg.remove(); for (int i : new int[] {1, 2, 5, 10, 50, 100}) { Key key = Key.make(); RebalanceDataSet rb = new RebalanceDataSet(frame, key, i); H2O.submitTask(rb); rb.join(); Frame rebalanced = DKV.get(key).get(); parms = new AggregatorModel.AggregatorParameters(); parms._train = frame._key; parms._radius_scale = 3.0; start = System.currentTimeMillis(); AggregatorModel agg2 = new Aggregator(parms).trainModel().get(); // 0.373 0.504 0.357 0.454 0.368 0.355 System.out.println( "AggregatorModel finished in: " + (System.currentTimeMillis() - start) / 1000. + " seconds"); agg2.checkConsistency(); Log.info("Number of exemplars for " + i + " chunks: " + agg2._exemplars.length); rebalanced.delete(); Assert.assertTrue( Math.abs(agg._exemplars.length - agg2._exemplars.length) == 0); // < agg._exemplars.length*0); output = agg2._output._output_frame.get(); output.remove(); agg2.remove(); } frame.delete(); }
public static void main(String[] args) throws Exception { Log._dontDie = true; // Ignore fatal class load error ArrayList<String> list = new ArrayList<String>(); for (String name : Boot.getClasses()) { if (!name.equals("water.api.RequestServer") && !name.equals("water.External") && !name.startsWith("water.r.")) { Class c = Class.forName(name); if (Freezable.class.isAssignableFrom(c)) list.add(c.getName()); } } Collections.sort(list); String s = "" + // "package water;\n" + // "\n" + // "// Do not edit - generated\n" + // "public class TypeMapGen {\n" + // " static final String[] CLAZZES = {\n" + // " \" BAD\", // 0: BAD\n" + // " \"[B\", // 1: Array of Bytes\n"; for (String c : list) s += " \"" + c + "\",\n"; s += " };\n"; s += "}"; Utils.writeFile(new File("src/main/java/water/TypeMapGen.java"), s); Log.info("Generated TypeMap"); }
@Test public void testAggregatorBinary() { CreateFrame cf = new CreateFrame(); cf.rows = 1000; cf.cols = 10; cf.categorical_fraction = 0.6; cf.integer_fraction = 0.0; cf.binary_fraction = 0.0; cf.real_range = 100; cf.integer_range = 100; cf.missing_fraction = 0.1; cf.factors = 5; cf.seed = 1234; Frame frame = cf.execImpl().get(); AggregatorModel.AggregatorParameters parms = new AggregatorModel.AggregatorParameters(); parms._train = frame._key; parms._radius_scale = 1.0; parms._transform = DataInfo.TransformType.NORMALIZE; parms._categorical_encoding = Model.Parameters.CategoricalEncodingScheme.Binary; long start = System.currentTimeMillis(); AggregatorModel agg = new Aggregator(parms).trainModel().get(); // 0.905 System.out.println( "AggregatorModel finished in: " + (System.currentTimeMillis() - start) / 1000. + " seconds"); agg.checkConsistency(); Frame output = agg._output._output_frame.get(); System.out.println(output.toTwoDimTable(0, 10)); Log.info("Number of exemplars: " + agg._exemplars.length); // Assert.assertTrue(agg._exemplars.length==649); output.remove(); frame.remove(); agg.remove(); }
@Override protected Response serve() { try { // pull everything local Log.info("ExportFiles processing (" + path + ")"); if (DKV.get(src_key) == null) throw new IllegalArgumentException(src_key.toString() + " not found."); Object value = DKV.get(src_key).get(); // create a stream to read the entire VA or Frame if (!(value instanceof ValueArray) && !(value instanceof Frame)) throw new UnsupportedOperationException("Can only export Frames or ValueArrays."); InputStream csv = value instanceof ValueArray ? new ValueArray.CsvVAStream((ValueArray) value, null) : ((Frame) value).toCSV(true); String p2 = path.toLowerCase(); if (p2.startsWith("hdfs://")) serveHdfs(csv); else if (p2.startsWith("s3n://")) serveHdfs(csv); else serveLocalDisk(csv); return RequestBuilders.Response.done(this); } catch (Throwable t) { return RequestBuilders.Response.error(t); } }
/** * Compute the correct final quantile from these 4 values. If the lo and hi elements are equal, * use them. However if they differ, then there is no single value which exactly matches the * desired quantile. There are several well-accepted definitions in this case - including picking * either the lo or the hi, or averaging them, or doing a linear interpolation. * * @param lo the highest element less than or equal to the desired quantile * @param hi the lowest element greater than or equal to the desired quantile * @param row row number (zero based) of the lo element; high element is +1 * @return desired quantile. */ static double computeQuantile( double lo, double hi, double row, double nrows, double prob, QuantileModel.CombineMethod method) { if (lo == hi) return lo; // Equal; pick either if (method == null) method = QuantileModel.CombineMethod.INTERPOLATE; switch (method) { case INTERPOLATE: return linearInterpolate(lo, hi, row, nrows, prob); case AVERAGE: return 0.5 * (hi + lo); case LOW: return lo; case HIGH: return hi; default: Log.info( "Unknown even sample size quantile combination type: " + method + ". Doing linear interpolation."); return linearInterpolate(lo, hi, row, nrows, prob); } }
private static void addFolder2( FileSystem fs, Path p, ArrayList<String> keys, ArrayList<String> failed) { try { if (fs == null) return; Futures futures = new Futures(); for (FileStatus file : fs.listStatus(p)) { Path pfs = file.getPath(); if (file.isDir()) { addFolder2(fs, pfs, keys, failed); } else { long size = file.getLen(); Key res; if (pfs.getName().endsWith(Extensions.JSON)) { throw H2O.unimpl(); } else if (pfs.getName().endsWith(Extensions.HEX)) { // Hex file? throw H2O.unimpl(); } else { Key k = null; keys.add((k = HdfsFileVec.make(file, futures)).toString()); Log.info("PersistHdfs: DKV.put(" + k + ")"); } } } } catch (Exception e) { Log.err(e); failed.add(p.toString()); } }
// filter the current active columns using the strong rules // note: strong rules are update so tha they keep all previous coefficients in, to prevent issues // with line-search private int[] activeCols(final double l1, final double l2, final double[] grad) { final double rhs = alpha[0] * (2 * l1 - l2); int[] cols = MemoryManager.malloc4(_dinfo.fullN()); int selected = 0; int j = 0; if (_activeCols == null) _activeCols = new int[] {-1}; for (int i = 0; i < _dinfo.fullN(); ++i) if ((j < _activeCols.length && i == _activeCols[j]) || grad[i] > rhs || grad[i] < -rhs) { cols[selected++] = i; if (j < _activeCols.length && i == _activeCols[j]) ++j; } if (!strong_rules_enabled || selected == _dinfo.fullN()) { _activeCols = null; _activeData._adaptedFrame = _dinfo._adaptedFrame; _activeData = _dinfo; } else { _activeCols = Arrays.copyOf(cols, selected); _activeData = _dinfo.filterExpandedColumns(_activeCols); } Log.info( "GLM2 strong rule at lambda=" + l1 + ", got " + selected + " active cols out of " + _dinfo.fullN() + " total."); return _activeCols; }
@Test public void sparseTester() { Storage.DenseVector dv = new Storage.DenseVector(20); dv.set(3, 0.21f); dv.set(7, 0.13f); dv.set(18, 0.14f); Storage.SparseVector sv = new Storage.SparseVector(dv); assert (sv.size() == 20); assert (sv.nnz() == 3); // dense treatment for (int i = 0; i < sv.size(); ++i) Log.info("sparse [" + i + "] = " + sv.get(i)); // sparse treatment for (Storage.SparseVector.Iterator it = sv.begin(); !it.equals(sv.end()); it.next()) { // Log.info(it.toString()); Log.info(it.index() + " -> " + it.value()); } Storage.DenseColMatrix dcm = new Storage.DenseColMatrix(3, 5); dcm.set(2, 1, 3.2f); dcm.set(1, 3, -1.2f); assert (dcm.get(2, 1) == 3.2f); assert (dcm.get(1, 3) == -1.2f); assert (dcm.get(0, 0) == 0f); Storage.DenseRowMatrix drm = new Storage.DenseRowMatrix(3, 5); drm.set(2, 1, 3.2f); drm.set(1, 3, -1.2f); assert (drm.get(2, 1) == 3.2f); assert (drm.get(1, 3) == -1.2f); assert (drm.get(0, 0) == 0f); Storage.SparseColMatrix scm = new Storage.SparseColMatrix(3, 5); scm.set(2, 1, 3.2f); scm.set(1, 3, -1.2f); assert (scm.get(2, 1) == 3.2f); assert (scm.get(1, 3) == -1.2f); assert (scm.get(0, 0) == 0f); Storage.SparseRowMatrix srm = new Storage.SparseRowMatrix(3, 5); srm.set(2, 1, 3.2f); srm.set(1, 3, -1.2f); assert (srm.get(2, 1) == 3.2f); assert (srm.get(1, 3) == -1.2f); assert (srm.get(0, 0) == 0f); }
public static void setMemLow() { if (!CAN_ALLOC) return; synchronized (_lock) { CAN_ALLOC = false; } // NO LOGGING UNDER LOCK! Log.info(Sys.CLEAN, "Pausing to swap to disk; more memory may help"); }
@Test public void testCategoricalIris() throws InterruptedException, ExecutionException { GLRM job = null; GLRMModel model = null; Frame train = null; try { train = parse_test_file(Key.make("iris.hex"), "smalldata/iris/iris_wheader.csv"); GLRMParameters parms = new GLRMParameters(); parms._train = train._key; parms._k = 4; parms._loss = GLRMParameters.Loss.Absolute; parms._init = GLRM.Initialization.SVD; parms._transform = DataInfo.TransformType.NONE; parms._recover_svd = true; parms._max_iterations = 1000; try { job = new GLRM(parms); model = job.trainModel().get(); Log.info( "Iteration " + model._output._iterations + ": Objective value = " + model._output._objective); model.score(train).delete(); ModelMetricsGLRM mm = (ModelMetricsGLRM) ModelMetrics.getFromDKV(model, train); Log.info( "Numeric Sum of Squared Error = " + mm._numerr + "\tCategorical Misclassification Error = " + mm._caterr); } catch (Throwable t) { t.printStackTrace(); throw new RuntimeException(t); } finally { job.remove(); } } catch (Throwable t) { t.printStackTrace(); throw new RuntimeException(t); } finally { if (train != null) train.delete(); if (model != null) model.delete(); } }
public static void setMemGood() { if (CAN_ALLOC) return; synchronized (_lock) { CAN_ALLOC = true; _lock.notifyAll(); } // NO LOGGING UNDER LOCK! Log.info(Sys.CLEAN, "Continuing after swapping"); }
private static void addFolder(FileSystem fs, Path p, JsonArray succeeded, JsonArray failed) { try { if (fs == null) return; for (FileStatus file : fs.listStatus(p)) { Path pfs = file.getPath(); if (file.isDir()) { addFolder(fs, pfs, succeeded, failed); } else { Key k = Key.make(pfs.toString()); long size = file.getLen(); Value val = null; if (pfs.getName().endsWith(Extensions.JSON)) { JsonParser parser = new JsonParser(); JsonObject json = parser.parse(new InputStreamReader(fs.open(pfs))).getAsJsonObject(); JsonElement v = json.get(Constants.VERSION); if (v == null) throw new RuntimeException("Missing version"); JsonElement type = json.get(Constants.TYPE); if (type == null) throw new RuntimeException("Missing type"); Class c = Class.forName(type.getAsString()); OldModel model = (OldModel) c.newInstance(); model.fromJson(json); } else if (pfs.getName().endsWith(Extensions.HEX)) { // Hex file? FSDataInputStream s = fs.open(pfs); int sz = (int) Math.min(1L << 20, size); // Read up to the 1st meg byte[] mem = MemoryManager.malloc1(sz); s.readFully(mem); // Convert to a ValueArray (hope it fits in 1Meg!) ValueArray ary = new ValueArray(k, 0).read(new AutoBuffer(mem)); val = new Value(k, ary, Value.HDFS); } else if (size >= 2 * ValueArray.CHUNK_SZ) { val = new Value( k, new ValueArray(k, size), Value.HDFS); // ValueArray byte wrapper over a large file } else { val = new Value(k, (int) size, Value.HDFS); // Plain Value val.setdsk(); } DKV.put(k, val); Log.info("PersistHdfs: DKV.put(" + k + ")"); JsonObject o = new JsonObject(); o.addProperty(Constants.KEY, k.toString()); o.addProperty(Constants.FILE, pfs.toString()); o.addProperty(Constants.VALUE_SIZE, file.getLen()); succeeded.add(o); } } } catch (Exception e) { Log.err(e); JsonObject o = new JsonObject(); o.addProperty(Constants.FILE, p.toString()); o.addProperty(Constants.ERROR, e.getMessage()); failed.add(o); } }
protected double checkGradient(final double[] newBeta, final double[] grad) { // check the gradient ADMMSolver.subgrad(alpha[0], lambda[_lambdaIdx], newBeta, grad); double err = 0; for (double d : grad) if (d > err) err = d; else if (d < -err) err = -d; Log.info("GLM converged with max |subgradient| = " + err); return err; }
@Override protected Response serve() { try { Log.info("Unlocking all locked keys on the cluster."); UnlockTask cleanup = new UnlockTask(); cleanup.invokeOnAllNodes(); } catch (Throwable e) { return Response.error(e); } return Response.done(this); }
GridSearch start() { Log.info("Starting gridsearch: _total_models=" + _total_models); start( new H2OCountedCompleter() { @Override public void compute2() { gridSearch(_params); tryComplete(); } }, _total_models); return this; }
@Test public void testCovtype() { Frame frame = parse_test_file("smalldata/covtype/covtype.20k.data"); AggregatorModel.AggregatorParameters parms = new AggregatorModel.AggregatorParameters(); parms._train = frame._key; parms._radius_scale = 5.0; long start = System.currentTimeMillis(); AggregatorModel agg = new Aggregator(parms).trainModel().get(); // 0.179 System.out.println( "AggregatorModel finished in: " + (System.currentTimeMillis() - start) / 1000. + " seconds"); agg.checkConsistency(); frame.delete(); Frame output = agg._output._output_frame.get(); Log.info("Exemplars: " + output.toString()); output.remove(); Log.info("Number of exemplars: " + agg._exemplars.length); // Assert.assertTrue(agg._exemplars.length==615); agg.remove(); }
protected void nextLambda(final GLMIterationTask glmt, GLMValidation val) { currentLambdaIter = 0; boolean improved = _model.setAndTestValidation(_lambdaIdx, val); _model.clone().update(self()); boolean done = false; // _iter < max_iter && (improved || _runAllLambdas) && _lambdaIdx < // (lambda.length-1); if (_iter == max_iter) { Log.info("GLM2 reached max #iterations."); done = true; } else if (!improved && !_runAllLambdas) { Log.info("GLM2 converged as solution stopped improving with decreasing lambda."); done = true; } else if (_lambdaIdx == lambda.length - 1) { Log.info("GLM2 done with all given lambdas."); done = true; } else if (_activeCols != null && _activeCols.length + 1 >= MAX_PREDICTORS) { Log.info( "GLM2 reached maximum allowed number of predictors at lambda = " + lambda[_lambdaIdx]); done = true; } if (!done) { // continue with next lambda value? ++_lambdaIdx; glmt._val = null; if (glmt._gram == null) { // assume we had lambda search with strong rules // we use strong rules so we can't really used this gram for the next lambda computation // (different sets of coefficients) // I expect that: // 1) beta has been expanded to match current set of active cols // 2) it is new GLMIteration ready to be launched // caller (nextLambda(glmt,beta)) is expected to ensure this... assert _activeCols == null || (glmt._beta.length == _activeCols.length + 1); assert !glmt.isDone(); glmt.asyncExec(_activeData._adaptedFrame); } else // we have the right gram, just solve with with next lambda new Iteration().callback(glmt); } else // nope, we're done GLM2.this.complete(); // signal we're done to anyone waiting for the job }
public void cancel(final String msg) { state = msg == null ? JobState.CANCELLED : JobState.CRASHED; if(state == JobState.CANCELLED)Log.info("Job " + self() + "(" + description + ") was cancelled."); exception = msg; // replace finished job by a job handle replaceByJobHandle(); DKV.write_barrier(); final Job job = this; H2O.submitTask(new H2OCountedCompleter() { @Override public void compute2() { job.onCancelled(); } }); }
@Ignore @Test public void testCovtypeMemberIndices() { Frame frame = parse_test_file("smalldata/covtype/covtype.20k.data"); AggregatorModel.AggregatorParameters parms = new AggregatorModel.AggregatorParameters(); parms._train = frame._key; parms._radius_scale = 5.0; long start = System.currentTimeMillis(); AggregatorModel agg = new Aggregator(parms).trainModel().get(); // 1.489 System.out.println( "AggregatorModel finished in: " + (System.currentTimeMillis() - start) / 1000. + " seconds"); agg.checkConsistency(); // Frame assignment = new Frame(new Vec[]{(Vec)agg._exemplar_assignment_vec_key.get()}); // Frame.export(assignment, "/tmp/assignment", "yada", true); // Log.info("Exemplars: " + new Frame(new // Vec[]{(Vec)agg._exemplar_assignment_vec_key.get()}).toString(0,20000)); Log.info("Number of exemplars: " + agg._exemplars.length); Key<Frame> memberKey = Key.make(); for (int i = 0; i < agg._exemplars.length; ++i) { Frame members = agg.scoreExemplarMembers(memberKey, i); assert (members.numRows() == agg._counts[i]); // Log.info(members); members.delete(); } Frame output = agg._output._output_frame.get(); output.remove(); Log.info("Number of exemplars: " + agg._exemplars.length); // Assert.assertTrue(agg._exemplars.length==615); frame.delete(); agg.remove(); }
@Override protected Frame rebalance(final Frame original_fr, boolean local, final String name) { if (original_fr == null) return null; if (_parms._force_load_balance) { int original_chunks = original_fr.anyVec().nChunks(); _job.update(0, "Load balancing " + name.substring(name.length() - 5) + " data..."); int chunks = desiredChunks(original_fr, local); if (!_parms._reproducible) { if (original_chunks >= chunks) { if (!_parms._quiet_mode) Log.info( "Dataset already contains " + original_chunks + " chunks. No need to rebalance."); return original_fr; } } else { // reproducible, set chunks to 1 assert chunks == 1; if (!_parms._quiet_mode) Log.warn("Reproducibility enforced - using only 1 thread - can be slow."); if (original_chunks == 1) return original_fr; } if (!_parms._quiet_mode) Log.info( "Rebalancing " + name.substring(name.length() - 5) + " dataset into " + chunks + " chunks."); Key newKey = Key.make(name + ".chks" + chunks); RebalanceDataSet rb = new RebalanceDataSet(original_fr, newKey, chunks); H2O.submitTask(rb).join(); Frame rebalanced_fr = DKV.get(newKey).get(); Scope.track(rebalanced_fr); return rebalanced_fr; } return original_fr; }
@Override public void lcompute() { // Optional: cancel all jobs // for (Job job : Job.all()) { // job.cancel(); // Job.waitUntilJobEnded(job.self()); // } final Set<Key> keySet = H2O.globalKeySet(null); for (Key key : keySet) { if (!key.home()) continue; // only unlock local keys final Value val = DKV.get(key); if (val == null) continue; if (val.rawPOJO() == null) continue; // need to have a POJO to be locked if (!val.isLockable()) continue; final Object obj = val.rawPOJO(); assert (obj instanceof Lockable<?>); final Lockable<?> lockable = (Lockable<?>) (obj); final Key[] lockers = ((Lockable) obj)._lockers; if (lockers != null) { // check that none of the locking jobs is still running for (Key locker : lockers) { if (locker != null && locker.type() == Key.JOB) { final Job job = UKV.get(locker); if (job != null && job.isRunning()) throw new UnsupportedOperationException( "Cannot unlock all keys since locking jobs are still running."); } } lockable.unlock_all(); Log.info("Unlocked key '" + key + "' from " + lockers.length + " lockers."); } } Log.info("All keys are now unlocked."); tryComplete(); }
// Start by splitting all the data according to some criteria (minimize // variance at the leaves). Record on each row which split it goes to, and // assign a split number to it (for next pass). On *this* pass, use the // split-number to build a per-split histogram, with a per-histogram-bucket // variance. @Override protected GBMModel buildModel( GBMModel model, final Frame fr, String names[], String domains[][], String[] cmDomain, Timer t_build) { // Tag out rows missing the response column new ExcludeNAResponse().doAll(fr); // Build trees until we hit the limit int tid; DTree[] ktrees = null; // Trees TreeStats tstats = new TreeStats(); // Tree stats for (tid = 0; tid < ntrees; tid++) { // During first iteration model contains 0 trees, then 0-trees, then 1-tree,... // BUT if validation is not specified model does not participate in voting // but on-the-fly computed data are used model = doScoring(model, fr, ktrees, tid, cmDomain, tstats, false, false, false); // ESL2, page 387 // Step 2a: Compute prediction (prob distribution) from prior tree results: // Work <== f(Tree) new ComputeProb().doAll(fr); // ESL2, page 387 // Step 2b i: Compute residuals from the prediction (probability distribution) // Work <== f(Work) new ComputeRes().doAll(fr); // ESL2, page 387, Step 2b ii, iii, iv Timer kb_timer = new Timer(); ktrees = buildNextKTrees(fr); Log.info(Sys.GBM__, (tid + 1) + ". tree was built in " + kb_timer.toString()); if (!Job.isRunning(self())) break; // If canceled during building, do not bulkscore // Check latest predictions tstats.updateBy(ktrees); } // Final scoring model = doScoring(model, fr, ktrees, tid, cmDomain, tstats, true, false, false); return model; }
@Override protected DRFModel buildModel( DRFModel model, final Frame fr, String names[], String domains[][], final Timer t_build) { // Append number of trees participating in on-the-fly scoring fr.add("OUT_BAG_TREES", response.makeZero()); // The RNG used to pick split columns Random rand = createRNG(_seed); // Prepare working columns new SetWrkTask().doAll(fr); int tid; DTree[] ktrees = null; // Prepare tree statistics TreeStats tstats = new TreeStats(); // Build trees until we hit the limit for (tid = 0; tid < ntrees; tid++) { // Building tid-tree model = doScoring( model, fr, ktrees, tid, tstats, tid == 0, !hasValidation(), build_tree_one_node); // At each iteration build K trees (K = nclass = response column domain size) // TODO: parallelize more? build more than k trees at each time, we need to care about // temporary data // Idea: launch more DRF at once. Timer kb_timer = new Timer(); ktrees = buildNextKTrees(fr, _mtry, sample_rate, rand, tid); Log.info(Sys.DRF__, (tid + 1) + ". tree was built " + kb_timer.toString()); if (!Job.isRunning(self())) break; // If canceled during building, do not bulkscore // Check latest predictions tstats.updateBy(ktrees); } model = doScoring(model, fr, ktrees, tid, tstats, true, !hasValidation(), build_tree_one_node); // Make sure that we did not miss any votes assert !importance || _treeMeasuresOnOOB.npredictors() == _treeMeasuresOnSOOB[0 /*variable*/].npredictors() : "Missing some tree votes in variable importance voting?!"; return model; }
public ValidationMessage( ModelBuilder.ValidationMessage.MessageType message_type, String field_name, String message) { this.message_type = message_type; this.field_name = field_name; this.message = message; switch (message_type) { case INFO: Log.info(field_name + ": " + message); break; case WARN: Log.warn(field_name + ": " + message); break; case ERROR: Log.err(field_name + ": " + message); break; } }