@Override public void callback(final GLMTask.GLMLineSearchTask glmt) { double step = 0.5; for (int i = 0; i < glmt._objvals.length; ++i) { if (!needLineSearch(glmt._betas[i], glmt._objvals[i], step)) { Log.info("GLM2 (iteration=" + _iter + ") line search: found admissible step=" + step); _lastResult = null; // set last result to null so that the Iteration will not attempt to verify // whether or not it should do the line search. new GLMIterationTask( GLM2.this, _activeData, _glm, true, true, true, glmt._betas[i], _ymu, _reg, new Iteration()) .asyncExec(_activeData._adaptedFrame); return; } step *= 0.5; } // no line step worked, forcibly converge Log.info( "GLM2 (iteration=" + _iter + ") line search failed to find feasible step. Forcibly converged."); nextLambda( _lastResult._glmt.clone(), resizeVec(_lastResult._glmt._beta, _activeCols, _lastResult._activeCols)); }
private static void addFolder2( FileSystem fs, Path p, ArrayList<String> keys, ArrayList<String> failed) { try { if (fs == null) return; Futures futures = new Futures(); for (FileStatus file : fs.listStatus(p)) { Path pfs = file.getPath(); if (file.isDir()) { addFolder2(fs, pfs, keys, failed); } else { long size = file.getLen(); Key res; if (pfs.getName().endsWith(Extensions.JSON)) { throw H2O.unimpl(); } else if (pfs.getName().endsWith(Extensions.HEX)) { // Hex file? throw H2O.unimpl(); } else { Key k = null; keys.add((k = HdfsFileVec.make(file, futures)).toString()); Log.info("PersistHdfs: DKV.put(" + k + ")"); } } } } catch (Exception e) { Log.err(e); failed.add(p.toString()); } }
// Handle the case where some centers go dry. Rescue only 1 cluster // per iteration ('cause we only tracked the 1 worst row) boolean cleanupBadClusters( Lloyds task, final Vec[] vecs, final double[][] centers, final double[] means, final double[] mults, final int[] modes) { // Find any bad clusters int clu; for (clu = 0; clu < _parms._k; clu++) if (task._size[clu] == 0) break; if (clu == _parms._k) return false; // No bad clusters long row = task._worst_row; Log.warn("KMeans: Re-initializing cluster " + clu + " to row " + row); data(centers[clu] = task._cMeans[clu], vecs, row, means, mults, modes); task._size[clu] = 1; // FIXME: PUBDEV-871 Some other cluster had their membership count reduced by one! // (which one?) // Find any MORE bad clusters; we only fixed the first one for (clu = 0; clu < _parms._k; clu++) if (task._size[clu] == 0) break; if (clu == _parms._k) return false; // No MORE bad clusters // If we see 2 or more bad rows, just re-run Lloyds to get the // next-worst row. We don't count this as an iteration, because // we're not really adjusting the centers, we're trying to get // some centers *at-all*. Log.warn("KMeans: Re-running Lloyds to re-init another cluster"); if (_reinit_attempts++ < _parms._k) { return true; // Rerun Lloyds, and assign points to centroids } else { _reinit_attempts = 0; return false; } }
protected static String encodeRedirectArgs(JsonObject args, Object[] args2) { if (args == null && args2 == null) return ""; if (args2 != null) { StringBuilder sb = new StringBuilder(); assert (args2.length & 1) == 0 : "Number of arguments shoud be power of 2."; // Must be field-name / value pairs for (int i = 0; i < args2.length; i += 2) { sb.append(i == 0 ? '?' : '&').append(args2[i]).append('='); try { sb.append(URLEncoder.encode(args2[i + 1].toString(), "UTF-8")); } catch (UnsupportedEncodingException ex) { throw Log.errRTExcept(ex); } } return sb.toString(); } StringBuilder sb = new StringBuilder(); sb.append("?"); for (Map.Entry<String, JsonElement> entry : args.entrySet()) { JsonElement e = entry.getValue(); if (sb.length() != 1) sb.append("&"); sb.append(entry.getKey()); sb.append("="); try { sb.append(URLEncoder.encode(e.getAsString(), "UTF-8")); } catch (UnsupportedEncodingException ex) { throw Log.errRTExcept(ex); } } return sb.toString(); }
@SuppressWarnings("unused") // called through reflection by RequestServer public RemoveAllV3 remove(int version, RemoveAllV3 u) { Log.info("Removing all objects"); Futures fs = new Futures(); for (Job j : Job.jobs()) { j.cancel(); j.remove(fs); } fs.blockForPending(); // Bulk brainless key removal. Completely wipes all Keys without regard. new MRTask() { @Override public byte priority() { return H2O.GUI_PRIORITY; } @Override public void setupLocal() { H2O.raw_clear(); water.fvec.Vec.ESPC.clear(); } }.doAllNodes(); Log.info("Finished removing objects"); return u; }
@Test public void testExpandCatsIris() throws InterruptedException, ExecutionException { double[][] iris = ard( ard(6.3, 2.5, 4.9, 1.5, 1), ard(5.7, 2.8, 4.5, 1.3, 1), ard(5.6, 2.8, 4.9, 2.0, 2), ard(5.0, 3.4, 1.6, 0.4, 0), ard(6.0, 2.2, 5.0, 1.5, 2)); double[][] iris_expandR = ard( ard(0, 1, 0, 6.3, 2.5, 4.9, 1.5), ard(0, 1, 0, 5.7, 2.8, 4.5, 1.3), ard(0, 0, 1, 5.6, 2.8, 4.9, 2.0), ard(1, 0, 0, 5.0, 3.4, 1.6, 0.4), ard(0, 0, 1, 6.0, 2.2, 5.0, 1.5)); String[] iris_cols = new String[] {"sepal_len", "sepal_wid", "petal_len", "petal_wid", "class"}; String[][] iris_domains = new String[][] {null, null, null, null, new String[] {"setosa", "versicolor", "virginica"}}; Frame fr = null; try { fr = parse_test_file(Key.make("iris.hex"), "smalldata/iris/iris_wheader.csv"); DataInfo dinfo = new DataInfo( Key.make(), fr, null, 0, true, DataInfo.TransformType.NONE, DataInfo.TransformType.NONE, false, false, false, /* weights */ false, /* offset */ false, /* fold */ false); Log.info("Original matrix:\n" + colFormat(iris_cols, "%8.7s") + ArrayUtils.pprint(iris)); double[][] iris_perm = ArrayUtils.permuteCols(iris, dinfo._permutation); Log.info( "Permuted matrix:\n" + colFormat(iris_cols, "%8.7s", dinfo._permutation) + ArrayUtils.pprint(iris_perm)); double[][] iris_exp = GLRM.expandCats(iris_perm, dinfo); Log.info( "Expanded matrix:\n" + colExpFormat(iris_cols, iris_domains, "%8.7s", dinfo._permutation) + ArrayUtils.pprint(iris_exp)); Assert.assertArrayEquals(iris_expandR, iris_exp); } catch (Throwable t) { t.printStackTrace(); throw new RuntimeException(t); } finally { if (fr != null) fr.delete(); } }
private void sendAck() { // Send results back DTask dt, origDt = _dt; // _dt can go null the instant it is send over wire assert origDt != null; // Freed after completion while ((dt = _dt) != null) { // Retry loop for broken TCP sends AutoBuffer ab = null; try { // Start the ACK with results back to client. If the client is // asking for a class/id mapping (or any job running at FETCH_ACK // priority) then return a udp.fetchack byte instead of a udp.ack. // The receiver thread then knows to handle the mapping at the higher // priority. UDP.udp udp = dt.priority() == H2O.FETCH_ACK_PRIORITY ? UDP.udp.fetchack : UDP.udp.ack; ab = new AutoBuffer(_client, udp._prior).putTask(udp, _tsknum).put1(SERVER_UDP_SEND); assert ab.position() == 1 + 2 + 4 + 1; dt.write(ab); // Write the DTask - could be very large write dt._repliedTcp = ab.hasTCP(); // Resends do not need to repeat TCP result ab.close(); // Then close; send final byte _computedAndReplied = true; // After the final handshake, set computed+replied bit break; // Break out of retry loop } catch (AutoBuffer.AutoBufferException e) { if (!_client._heartbeat._client) // Report on servers only; clients allowed to be flaky Log.info( "IOException during ACK, " + e._ioe.getMessage() + ", t#" + _tsknum + " AB=" + ab + ", waiting and retrying..."); ab.drainClose(); if (_client._heartbeat._client) // Dead client will not accept a TCP ACK response? this.CAS_DT(dt, null); // cancel the ACK try { Thread.sleep(100); } catch (InterruptedException ignore) { } } catch (Exception e) { // Custom serializer just barfed? Log.err(e); // Log custom serializer exception ab.drainClose(); } } // end of while(true) if (dt == null) Log.info( "Cancelled remote task#" + _tsknum + " " + origDt.getClass() + " to " + _client + " has been cancelled by remote"); else { if (dt instanceof MRTask && dt.logVerbose()) Log.debug("Done remote task#" + _tsknum + " " + dt.getClass() + " to " + _client); _client.record_task_answer(this); // Setup for retrying Ack & AckAck, if not canceled } }
@Test public void testCategoricalProstate() throws InterruptedException, ExecutionException { GLRM job = null; GLRMModel model = null; Frame train = null; final int[] cats = new int[] {1, 3, 4, 5}; // Categoricals: CAPSULE, RACE, DPROS, DCAPS try { Scope.enter(); train = parse_test_file(Key.make("prostate.hex"), "smalldata/logreg/prostate.csv"); for (int i = 0; i < cats.length; i++) Scope.track(train.replace(cats[i], train.vec(cats[i]).toCategoricalVec())._key); train.remove("ID").remove(); DKV.put(train._key, train); GLRMParameters parms = new GLRMParameters(); parms._train = train._key; parms._k = 8; parms._gamma_x = parms._gamma_y = 0.1; parms._regularization_x = GLRMModel.GLRMParameters.Regularizer.Quadratic; parms._regularization_y = GLRMModel.GLRMParameters.Regularizer.Quadratic; parms._init = GLRM.Initialization.PlusPlus; parms._transform = DataInfo.TransformType.STANDARDIZE; parms._recover_svd = false; parms._max_iterations = 200; try { job = new GLRM(parms); model = job.trainModel().get(); Log.info( "Iteration " + model._output._iterations + ": Objective value = " + model._output._objective); model.score(train).delete(); ModelMetricsGLRM mm = (ModelMetricsGLRM) ModelMetrics.getFromDKV(model, train); Log.info( "Numeric Sum of Squared Error = " + mm._numerr + "\tCategorical Misclassification Error = " + mm._caterr); } catch (Throwable t) { t.printStackTrace(); throw new RuntimeException(t); } finally { job.remove(); } } catch (Throwable t) { t.printStackTrace(); throw new RuntimeException(t); } finally { if (train != null) train.delete(); if (model != null) model.delete(); Scope.exit(); } }
private static void addFolder(FileSystem fs, Path p, JsonArray succeeded, JsonArray failed) { try { if (fs == null) return; for (FileStatus file : fs.listStatus(p)) { Path pfs = file.getPath(); if (file.isDir()) { addFolder(fs, pfs, succeeded, failed); } else { Key k = Key.make(pfs.toString()); long size = file.getLen(); Value val = null; if (pfs.getName().endsWith(Extensions.JSON)) { JsonParser parser = new JsonParser(); JsonObject json = parser.parse(new InputStreamReader(fs.open(pfs))).getAsJsonObject(); JsonElement v = json.get(Constants.VERSION); if (v == null) throw new RuntimeException("Missing version"); JsonElement type = json.get(Constants.TYPE); if (type == null) throw new RuntimeException("Missing type"); Class c = Class.forName(type.getAsString()); OldModel model = (OldModel) c.newInstance(); model.fromJson(json); } else if (pfs.getName().endsWith(Extensions.HEX)) { // Hex file? FSDataInputStream s = fs.open(pfs); int sz = (int) Math.min(1L << 20, size); // Read up to the 1st meg byte[] mem = MemoryManager.malloc1(sz); s.readFully(mem); // Convert to a ValueArray (hope it fits in 1Meg!) ValueArray ary = new ValueArray(k, 0).read(new AutoBuffer(mem)); val = new Value(k, ary, Value.HDFS); } else if (size >= 2 * ValueArray.CHUNK_SZ) { val = new Value( k, new ValueArray(k, size), Value.HDFS); // ValueArray byte wrapper over a large file } else { val = new Value(k, (int) size, Value.HDFS); // Plain Value val.setdsk(); } DKV.put(k, val); Log.info("PersistHdfs: DKV.put(" + k + ")"); JsonObject o = new JsonObject(); o.addProperty(Constants.KEY, k.toString()); o.addProperty(Constants.FILE, pfs.toString()); o.addProperty(Constants.VALUE_SIZE, file.getLen()); succeeded.add(o); } } } catch (Exception e) { Log.err(e); JsonObject o = new JsonObject(); o.addProperty(Constants.FILE, p.toString()); o.addProperty(Constants.ERROR, e.getMessage()); failed.add(o); } }
public static void main(String[] args) throws Exception { // Can be necessary to run in parallel to other clouds, so find open ports int[] ports = new int[3]; int port = 54321; for( int i = 0; i < ports.length; i++ ) { for( ;; ) { if( isOpen(port) && isOpen(port + 1) ) { ports[i] = port; port += 2; break; } port++; } } String flat = ""; for( int i = 0; i < ports.length; i++ ) flat += "127.0.0.1:" + ports[i] + "\n"; // Force all IPs to local so that users can run with a firewall String[] a = new String[] { "-ip", "127.0.0.1", "-flatfile", Utils.writeFile(flat).getAbsolutePath() }; H2O.OPT_ARGS.ip = "127.0.0.1"; args = (String[]) ArrayUtils.addAll(a, args); ArrayList<Node> nodes = new ArrayList<Node>(); for( int i = 1; i < ports.length; i++ ) nodes.add(new NodeVM(Utils.append(args, "-port", "" + ports[i]))); args = Utils.append(new String[] { "-mainClass", Master.class.getName() }, args); Node master = new NodeVM(Utils.append(args, "-port", "" + ports[0])); nodes.add(master); File out = null, err = null, sandbox = new File("sandbox"); sandbox.mkdirs(); Utils.clearFolder(sandbox); for( int i = 0; i < nodes.size(); i++ ) { out = File.createTempFile("junit-" + i + "-out-", null, sandbox); err = File.createTempFile("junit-" + i + "-err-", null, sandbox); nodes.get(i).persistIO(out.getAbsolutePath(), err.getAbsolutePath()); nodes.get(i).start(); } int exit = master.waitFor(); if( exit != 0 ) { Log.log(out, System.out); Thread.sleep(100); // Or mixed (?) Log.log(err, System.err); } for( Node node : nodes ) node.kill(); if( exit == 0 ) System.out.println("OK"); System.exit(exit); }
@Test public void testChunks() { Frame frame = parse_test_file("smalldata/covtype/covtype.20k.data"); AggregatorModel.AggregatorParameters parms = new AggregatorModel.AggregatorParameters(); parms._train = frame._key; parms._radius_scale = 3.0; long start = System.currentTimeMillis(); AggregatorModel agg = new Aggregator(parms).trainModel().get(); // 0.418 System.out.println( "AggregatorModel finished in: " + (System.currentTimeMillis() - start) / 1000. + " seconds"); agg.checkConsistency(); Frame output = agg._output._output_frame.get(); Log.info("Number of exemplars: " + agg._exemplars.length); // Assert.assertTrue(agg._exemplars.length==1993); output.remove(); agg.remove(); for (int i : new int[] {1, 2, 5, 10, 50, 100}) { Key key = Key.make(); RebalanceDataSet rb = new RebalanceDataSet(frame, key, i); H2O.submitTask(rb); rb.join(); Frame rebalanced = DKV.get(key).get(); parms = new AggregatorModel.AggregatorParameters(); parms._train = frame._key; parms._radius_scale = 3.0; start = System.currentTimeMillis(); AggregatorModel agg2 = new Aggregator(parms).trainModel().get(); // 0.373 0.504 0.357 0.454 0.368 0.355 System.out.println( "AggregatorModel finished in: " + (System.currentTimeMillis() - start) / 1000. + " seconds"); agg2.checkConsistency(); Log.info("Number of exemplars for " + i + " chunks: " + agg2._exemplars.length); rebalanced.delete(); Assert.assertTrue( Math.abs(agg._exemplars.length - agg2._exemplars.length) == 0); // < agg._exemplars.length*0); output = agg2._output._output_frame.get(); output.remove(); agg2.remove(); } frame.delete(); }
static { InputStream resource = Boot._init.getResource2("/page.html"); try { _htmlTemplate = new String(ByteStreams.toByteArray(resource)).replace("%cloud_name", H2O.NAME); } catch (NullPointerException e) { Log.err(e); Log.die("page.html not found in resources."); } catch (Exception e) { Log.err(e); Log.die(e.getMessage()); } finally { Closeables.closeQuietly(resource); } }
public void handle( String target, Request baseRequest, HttpServletRequest request, HttpServletResponse response) throws IOException, ServletException { if (!H2O.ARGS.ldap_login) { return; } String loginName = request.getUserPrincipal().getName(); if (!loginName.equals(H2O.ARGS.user_name)) { Log.warn( "Login name (" + loginName + ") does not match cluster owner name (" + H2O.ARGS.user_name + ")"); sendResponseError( response, HttpServletResponse.SC_UNAUTHORIZED, "Login name does not match cluster owner name"); baseRequest.setHandled(true); } }
// TCP large RECEIVE of results. Note that 'this' is NOT the RPC object // that is hoping to get the received object, nor is the current thread the // RPC thread blocking for the object. The current thread is the TCP // reader thread. static void tcp_ack(final AutoBuffer ab) throws IOException { // Get the RPC we're waiting on int task = ab.getTask(); RPC rpc = ab._h2o.taskGet(task); // Race with canceling a large RPC fetch: Task is already dead. Do not // bother reading from the TCP socket, just bail out & close socket. if (rpc == null || rpc._done) { ab.drainClose(); } else { assert rpc._tasknum == task; assert !rpc._done; // Here we have the result, and we're on the correct Node but wrong // Thread. If we just return, the TCP reader thread will close the // remote, the remote will UDP ACK the RPC back, and back on the current // Node but in the correct Thread, we'd wake up and realize we received a // large result. try { rpc.response(ab); } catch (AutoBuffer.AutoBufferException e) { // If TCP fails, we will have done a short-read crushing the original // _dt object, and be unable to resend. This is fatal right now. // Really: an unimplemented feature; fix is to notice that a partial // TCP read means that the server (1) got our remote_exec request, (2) // has computed an answer and was trying to send it to us, (3) failed // sending via TCP hence the server knows it failed and will send again // without any further work from us. We need to disable all the resend // & retry logic, and wait for the server to re-send our result. // Meanwhile the _dt object is crushed with half-read crap, and cannot // be trusted except in the base fields. throw Log.throwErr(e._ioe); } } // ACKACK the remote, telling him "we got the answer" new AutoBuffer(ab._h2o, H2O.ACK_ACK_PRIORITY).putTask(UDP.udp.ackack.ordinal(), task).close(); }
@Override protected void checkMemoryFootPrint() { if (_model._output._ntrees == 0) return; int trees_so_far = _model._output._ntrees; // existing trees long model_mem_size = new ComputeModelSize(trees_so_far, _model._output._treeKeys).doAllNodes()._model_mem_size; _model._output._treeStats._byte_size = model_mem_size; double avg_tree_mem_size = (double) model_mem_size / trees_so_far; Log.debug( "Average tree size (for all classes): " + PrettyPrint.bytes((long) avg_tree_mem_size)); // all the compressed trees are stored on the driver node long max_mem = H2O.SELF.get_max_mem(); if (_parms._ntrees * avg_tree_mem_size > max_mem) { String msg = "The tree model will not fit in the driver node's memory (" + PrettyPrint.bytes((long) avg_tree_mem_size) + " per tree x " + _parms._ntrees + " > " + PrettyPrint.bytes(max_mem) + ") - try decreasing ntrees and/or max_depth or increasing min_rows!"; error("_ntrees", msg); cancel(msg); } }
/** * Score a frame with the given model and return just the metrics. * * <p>NOTE: ModelMetrics are now always being created by model.score. . . */ @SuppressWarnings("unused") // called through reflection by RequestServer public ModelMetricsListSchemaV3 score(int version, ModelMetricsListSchemaV3 s) { // parameters checking: if (null == s.model) throw new H2OIllegalArgumentException("model", "predict", s.model); if (null == DKV.get(s.model.name)) throw new H2OKeyNotFoundArgumentException("model", "predict", s.model.name); if (null == s.frame) throw new H2OIllegalArgumentException("frame", "predict", s.frame); if (null == DKV.get(s.frame.name)) throw new H2OKeyNotFoundArgumentException("frame", "predict", s.frame.name); ModelMetricsList parms = s.createAndFillImpl(); parms ._model .score(parms._frame, parms._predictions_name) .remove(); // throw away predictions, keep metrics as a side-effect ModelMetricsListSchemaV3 mm = this.fetch(version, s); // TODO: for now only binary predictors write an MM object. // For the others cons one up here to return the predictions frame. if (null == mm) mm = new ModelMetricsListSchemaV3(); if (null == mm.model_metrics || 0 == mm.model_metrics.length) { Log.warn( "Score() did not return a ModelMetrics for model: " + s.model + " on frame: " + s.frame); } return mm; }
public static void main(String[] args) throws Exception { Log._dontDie = true; // Ignore fatal class load error ArrayList<String> list = new ArrayList<String>(); for (String name : Boot.getClasses()) { if (!name.equals("water.api.RequestServer") && !name.equals("water.External") && !name.startsWith("water.r.")) { Class c = Class.forName(name); if (Freezable.class.isAssignableFrom(c)) list.add(c.getName()); } } Collections.sort(list); String s = "" + // "package water;\n" + // "\n" + // "// Do not edit - generated\n" + // "public class TypeMapGen {\n" + // " static final String[] CLAZZES = {\n" + // " \" BAD\", // 0: BAD\n" + // " \"[B\", // 1: Array of Bytes\n"; for (String c : list) s += " \"" + c + "\",\n"; s += " };\n"; s += "}"; Utils.writeFile(new File("src/main/java/water/TypeMapGen.java"), s); Log.info("Generated TypeMap"); }
public NanoHTTPD.Response serve(NanoHTTPD server, Properties args, RequestType type) { // Needs to be done also for help to initialize or argument records String query = checkArguments(args, type); switch (type) { case help: return wrap(server, build(Response.done(serveHelp()))); case json: case www: if (log()) { String log = getClass().getSimpleName(); for (Object arg : args.keySet()) { String value = args.getProperty((String) arg); if (value != null && value.length() != 0) log += " " + arg + "=" + value; } Log.debug(Sys.HTTPD, log); } if (query != null) return wrap(server, query, type); long time = System.currentTimeMillis(); Response response = serve(); response.setTimeStart(time); if (type == RequestType.json) return wrap(server, response.toJson()); return wrap(server, build(response)); case debug: response = serve_debug(); return wrap(server, build(response)); case query: return wrap(server, query); default: throw new RuntimeException("Invalid request type " + type.toString()); } }
@Test public void testAggregatorBinary() { CreateFrame cf = new CreateFrame(); cf.rows = 1000; cf.cols = 10; cf.categorical_fraction = 0.6; cf.integer_fraction = 0.0; cf.binary_fraction = 0.0; cf.real_range = 100; cf.integer_range = 100; cf.missing_fraction = 0.1; cf.factors = 5; cf.seed = 1234; Frame frame = cf.execImpl().get(); AggregatorModel.AggregatorParameters parms = new AggregatorModel.AggregatorParameters(); parms._train = frame._key; parms._radius_scale = 1.0; parms._transform = DataInfo.TransformType.NORMALIZE; parms._categorical_encoding = Model.Parameters.CategoricalEncodingScheme.Binary; long start = System.currentTimeMillis(); AggregatorModel agg = new Aggregator(parms).trainModel().get(); // 0.905 System.out.println( "AggregatorModel finished in: " + (System.currentTimeMillis() - start) / 1000. + " seconds"); agg.checkConsistency(); Frame output = agg._output._output_frame.get(); System.out.println(output.toTwoDimTable(0, 10)); Log.info("Number of exemplars: " + agg._exemplars.length); // Assert.assertTrue(agg._exemplars.length==649); output.remove(); frame.remove(); agg.remove(); }
@Override protected Response serve() { try { // pull everything local Log.info("ExportFiles processing (" + path + ")"); if (DKV.get(src_key) == null) throw new IllegalArgumentException(src_key.toString() + " not found."); Object value = DKV.get(src_key).get(); // create a stream to read the entire VA or Frame if (!(value instanceof ValueArray) && !(value instanceof Frame)) throw new UnsupportedOperationException("Can only export Frames or ValueArrays."); InputStream csv = value instanceof ValueArray ? new ValueArray.CsvVAStream((ValueArray) value, null) : ((Frame) value).toCSV(true); String p2 = path.toLowerCase(); if (p2.startsWith("hdfs://")) serveHdfs(csv); else if (p2.startsWith("s3n://")) serveHdfs(csv); else serveLocalDisk(csv); return RequestBuilders.Response.done(this); } catch (Throwable t) { return RequestBuilders.Response.error(t); } }
/** * Compute the correct final quantile from these 4 values. If the lo and hi elements are equal, * use them. However if they differ, then there is no single value which exactly matches the * desired quantile. There are several well-accepted definitions in this case - including picking * either the lo or the hi, or averaging them, or doing a linear interpolation. * * @param lo the highest element less than or equal to the desired quantile * @param hi the lowest element greater than or equal to the desired quantile * @param row row number (zero based) of the lo element; high element is +1 * @return desired quantile. */ static double computeQuantile( double lo, double hi, double row, double nrows, double prob, QuantileModel.CombineMethod method) { if (lo == hi) return lo; // Equal; pick either if (method == null) method = QuantileModel.CombineMethod.INTERPOLATE; switch (method) { case INTERPOLATE: return linearInterpolate(lo, hi, row, nrows, prob); case AVERAGE: return 0.5 * (hi + lo); case LOW: return lo; case HIGH: return hi; default: Log.info( "Unknown even sample size quantile combination type: " + method + ". Doing linear interpolation."); return linearInterpolate(lo, hi, row, nrows, prob); } }
private static void run(Callable c, boolean read, int size) { // Count all i/o time from here, including all retry overheads long start_io_ms = System.currentTimeMillis(); while (true) { try { long start_ns = System.nanoTime(); // Blocking i/o call timing - without counting repeats c.call(); TimeLine.record_IOclose(start_ns, start_io_ms, read ? 1 : 0, size, Value.HDFS); break; // Explicitly ignore the following exceptions but // fail on the rest IOExceptions } catch (EOFException e) { ignoreAndWait(e, false); } catch (SocketTimeoutException e) { ignoreAndWait(e, false); } catch (S3Exception e) { // Preserve S3Exception before IOException // Since this is tricky code - we are supporting different HDFS version // New version declares S3Exception as IOException // But old versions (0.20.xxx) declares it as RuntimeException // So we have to catch it before IOException !!! ignoreAndWait(e, false); } catch (IOException e) { ignoreAndWait(e, true); } catch (Exception e) { throw Log.errRTExcept(e); } } }
// filter the current active columns using the strong rules // note: strong rules are update so tha they keep all previous coefficients in, to prevent issues // with line-search private int[] activeCols(final double l1, final double l2, final double[] grad) { final double rhs = alpha[0] * (2 * l1 - l2); int[] cols = MemoryManager.malloc4(_dinfo.fullN()); int selected = 0; int j = 0; if (_activeCols == null) _activeCols = new int[] {-1}; for (int i = 0; i < _dinfo.fullN(); ++i) if ((j < _activeCols.length && i == _activeCols[j]) || grad[i] > rhs || grad[i] < -rhs) { cols[selected++] = i; if (j < _activeCols.length && i == _activeCols[j]) ++j; } if (!strong_rules_enabled || selected == _dinfo.fullN()) { _activeCols = null; _activeData._adaptedFrame = _dinfo._adaptedFrame; _activeData = _dinfo; } else { _activeCols = Arrays.copyOf(cols, selected); _activeData = _dinfo.filterExpandedColumns(_activeCols); } Log.info( "GLM2 strong rule at lambda=" + l1 + ", got " + selected + " active cols out of " + _dinfo.fullN() + " total."); return _activeCols; }
@Override protected void init() { super.init(); // Reject request if classification is required and response column is float // Argument a4class = find("classification"); // get UI control // String p4class = input("classification"); // get value from HTTP requests // if there is UI control and classification field was passed final boolean classificationFieldSpecified = true; // ROLLBACK: a4class!=null ? p4class!=null : /* we are not in UI so expect that // parameter is specified correctly */ true; if (!classificationFieldSpecified) { // can happen if a client sends a request which does not // specify classification parameter classification = response.isEnum(); Log.warn( "Classification field is not specified - deriving according to response! The classification field set to " + classification); } else { if (classification && response.isFloat()) throw new H2OIllegalArgumentException( find("classification"), "Requested classification on float column!"); if (!classification && response.isEnum()) throw new H2OIllegalArgumentException( find("classification"), "Requested regression on enum column!"); } }
/** * Helper to create the DataInfo object from training/validation frames and the DL parameters * * @param train Training frame * @param valid Validation frame * @param parms Model parameters * @param nClasses Number of response levels (1: regression, >=2: classification) * @return DataInfo */ static DataInfo makeDataInfo( Frame train, Frame valid, DeepLearningParameters parms, int nClasses) { double x = 0.782347234; boolean identityLink = new Distribution(parms._distribution, parms._tweedie_power).link(x) == x; DataInfo dinfo = new DataInfo( train, valid, parms._autoencoder ? 0 : 1, // nResponses parms._autoencoder || parms._use_all_factor_levels, // use all FactorLevels for auto-encoder parms._standardize ? (parms._autoencoder ? DataInfo.TransformType.NORMALIZE : parms._sparse ? DataInfo.TransformType.DESCALE : DataInfo.TransformType.STANDARDIZE) : DataInfo.TransformType.NONE, // transform predictors !parms._standardize || train.lastVec().isCategorical() ? DataInfo.TransformType.NONE : identityLink ? DataInfo.TransformType.STANDARDIZE : DataInfo.TransformType .NONE, // transform response for regression with identity link parms._missing_values_handling == DeepLearningParameters.MissingValuesHandling.Skip, // whether to skip missing false, // do not replace NAs in numeric cols with mean true, // always add a bucket for missing values parms._weights_column != null, // observation weights parms._offset_column != null, parms._fold_column != null); // Checks and adjustments: // 1) observation weights (adjust mean/sigmas for predictors and response) // 2) NAs (check that there's enough rows left) GLMTask.YMUTask ymt = new GLMTask.YMUTask( dinfo, nClasses, true, !parms._autoencoder && nClasses == 1, false, !parms._autoencoder) .doAll(dinfo._adaptedFrame); if (ymt._wsum == 0 && parms._missing_values_handling == DeepLearningParameters.MissingValuesHandling.Skip) throw new H2OIllegalArgumentException( "No rows left in the dataset after filtering out rows with missing values. Ignore columns with many NAs or set missing_values_handling to 'MeanImputation'."); if (parms._weights_column != null && parms._offset_column != null) { Log.warn( "Combination of offset and weights can lead to slight differences because Rollupstats aren't weighted - need to re-calculate weighted mean/sigma of the response including offset terms."); } if (parms._weights_column != null && parms._offset_column == null /*FIXME: offset not yet implemented*/) { dinfo.updateWeightedSigmaAndMean(ymt._basicStats.sigma(), ymt._basicStats.mean()); if (nClasses == 1) dinfo.updateWeightedSigmaAndMeanForResponse( ymt._basicStatsResponse.sigma(), ymt._basicStatsResponse.mean()); } return dinfo; }
public static void reserveTaskMem(long m) { final long bytes = m; while (!tryReserveTaskMem(bytes)) { try { ForkJoinPool.managedBlock( new ManagedBlocker() { @Override public boolean isReleasable() { return _taskMem.get() >= bytes; } @Override public boolean block() throws InterruptedException { synchronized (_taskMemLock) { try { _taskMemLock.wait(); } catch (InterruptedException e) { } } return isReleasable(); } }); } catch (InterruptedException e) { throw Log.errRTExcept(e); } } }
@Test public void sparseTester() { Storage.DenseVector dv = new Storage.DenseVector(20); dv.set(3, 0.21f); dv.set(7, 0.13f); dv.set(18, 0.14f); Storage.SparseVector sv = new Storage.SparseVector(dv); assert (sv.size() == 20); assert (sv.nnz() == 3); // dense treatment for (int i = 0; i < sv.size(); ++i) Log.info("sparse [" + i + "] = " + sv.get(i)); // sparse treatment for (Storage.SparseVector.Iterator it = sv.begin(); !it.equals(sv.end()); it.next()) { // Log.info(it.toString()); Log.info(it.index() + " -> " + it.value()); } Storage.DenseColMatrix dcm = new Storage.DenseColMatrix(3, 5); dcm.set(2, 1, 3.2f); dcm.set(1, 3, -1.2f); assert (dcm.get(2, 1) == 3.2f); assert (dcm.get(1, 3) == -1.2f); assert (dcm.get(0, 0) == 0f); Storage.DenseRowMatrix drm = new Storage.DenseRowMatrix(3, 5); drm.set(2, 1, 3.2f); drm.set(1, 3, -1.2f); assert (drm.get(2, 1) == 3.2f); assert (drm.get(1, 3) == -1.2f); assert (drm.get(0, 0) == 0f); Storage.SparseColMatrix scm = new Storage.SparseColMatrix(3, 5); scm.set(2, 1, 3.2f); scm.set(1, 3, -1.2f); assert (scm.get(2, 1) == 3.2f); assert (scm.get(1, 3) == -1.2f); assert (scm.get(0, 0) == 0f); Storage.SparseRowMatrix srm = new Storage.SparseRowMatrix(3, 5); srm.set(2, 1, 3.2f); srm.set(1, 3, -1.2f); assert (srm.get(2, 1) == 3.2f); assert (srm.get(1, 3) == -1.2f); assert (srm.get(0, 0) == 0f); }
public static void setMemLow() { if (!CAN_ALLOC) return; synchronized (_lock) { CAN_ALLOC = false; } // NO LOGGING UNDER LOCK! Log.info(Sys.CLEAN, "Pausing to swap to disk; more memory may help"); }
// Write-lock 'this', delete any old thing, returns NEW guy public T delete_and_lock(Key job_key) { Lockable old = write_lock(job_key); if (old != null) { Log.debug(Log.Tag.Sys.LOCKS, "lock-then-clear " + _key + " by job " + job_key); old.delete_impl(new Futures()).blockForPending(); } return (T) this; }
@Test public void testCategoricalIris() throws InterruptedException, ExecutionException { GLRM job = null; GLRMModel model = null; Frame train = null; try { train = parse_test_file(Key.make("iris.hex"), "smalldata/iris/iris_wheader.csv"); GLRMParameters parms = new GLRMParameters(); parms._train = train._key; parms._k = 4; parms._loss = GLRMParameters.Loss.Absolute; parms._init = GLRM.Initialization.SVD; parms._transform = DataInfo.TransformType.NONE; parms._recover_svd = true; parms._max_iterations = 1000; try { job = new GLRM(parms); model = job.trainModel().get(); Log.info( "Iteration " + model._output._iterations + ": Objective value = " + model._output._objective); model.score(train).delete(); ModelMetricsGLRM mm = (ModelMetricsGLRM) ModelMetrics.getFromDKV(model, train); Log.info( "Numeric Sum of Squared Error = " + mm._numerr + "\tCategorical Misclassification Error = " + mm._caterr); } catch (Throwable t) { t.printStackTrace(); throw new RuntimeException(t); } finally { job.remove(); } } catch (Throwable t) { t.printStackTrace(); throw new RuntimeException(t); } finally { if (train != null) train.delete(); if (model != null) model.delete(); } }