@Override public void map(Chunk chks[], NewChunk nchks[]) { long rstart = chks[0]._start; int rlen = chks[0]._len; // Total row count int rx = 0; // Which row to in/ex-clude int rlo = 0; // Lo/Hi for this block of rows int rhi = rlen; while (true) { // Still got rows to include? if (_rows != null) { // Got a row selector? if (rx >= _rows.length) break; // All done with row selections long r = _rows[rx++] - 1; // Next row selector if (r < 0) { // Row exclusion if (rx > 0 && _rows[rx - 1] < _rows[rx]) throw H2O.unimpl(); long er = Math.abs(r) - 2; if (er < rstart) continue; // scoop up all of the rows before the first exclusion if (rx == 1 && ((int) (er + 1 - rstart)) > 0 && _ex) { rlo = (int) rstart; rhi = (int) (er - rstart); _ex = false; rx--; } else { rlo = (int) (er + 1 - rstart); // TODO: handle jumbled row indices ( e.g. -c(1,5,3) ) while (rx < _rows.length && (_rows[rx] + 1 == _rows[rx - 1] && rlo < rlen)) { if (rx < _rows.length - 1 && _rows[rx] < _rows[rx + 1]) throw H2O.unimpl(); rx++; rlo++; // Exclude consecutive rows } rhi = rx >= _rows.length ? rlen : (int) Math.abs(_rows[rx] - 1) - 2; if (rx < _rows.length - 1 && _rows[rx] < _rows[rx + 1]) throw H2O.unimpl(); } } else { // Positive row list? if (r < rstart) continue; rlo = (int) (r - rstart); rhi = rlo + 1; // Stop at the next row while (rx < _rows.length && (_rows[rx] - 1 - rstart) == rhi && rhi < rlen) { rx++; rhi++; // Grab sequential rows } } } // Process this next set of rows // For all cols in the new set for (int i = 0; i < _cols.length; i++) { Chunk oc = chks[_cols[i]]; NewChunk nc = nchks[i]; if (oc._vec.isInt()) { // Slice on integer columns for (int j = rlo; j < rhi; j++) if (oc.isNA0(j)) nc.addNA(); else nc.addNum(oc.at80(j), 0); } else { // Slice on double columns for (int j = rlo; j < rhi; j++) nc.addNum(oc.at0(j)); } } rlo = rhi; if (_rows == null) break; } }
@Override Val apply(Env env, Env.StackHelp stk, AST asts[]) { // Compute the variable args. Find the common row count Val vals[] = new Val[asts.length]; Vec vec = null; for (int i = 1; i < asts.length; i++) { vals[i] = stk.track(asts[i].exec(env)); if (vals[i].isFrame()) { Vec anyvec = vals[i].getFrame().anyVec(); if (anyvec == null) continue; // Ignore the empty frame if (vec == null) vec = anyvec; else if (vec.length() != anyvec.length()) throw new IllegalArgumentException( "cbind frames must have all the same rows, found " + vec.length() + " and " + anyvec.length() + " rows."); } } boolean clean = false; if (vec == null) { vec = Vec.makeZero(1); clean = true; } // Default to length 1 // Populate the new Frame Frame fr = new Frame(); for (int i = 1; i < asts.length; i++) { switch (vals[i].type()) { case Val.FRM: fr.add(fr.makeCompatible(vals[i].getFrame())); break; case Val.FUN: throw H2O.unimpl(); case Val.STR: throw H2O.unimpl(); case Val.NUM: // Auto-expand scalars to fill every row double d = vals[i].getNum(); fr.add(Double.toString(d), vec.makeCon(d)); break; default: throw H2O.unimpl(); } } if (clean) vec.remove(); return new ValFrame(fr); }
@Override protected void doGet(HttpServletRequest request, HttpServletResponse response) throws IOException, ServletException { String uri = getDecodedUri(request); try { Pattern p = Pattern.compile(".*/NodePersistentStorage.bin/([^/]+)/([^/]+)"); Matcher m = p.matcher(uri); boolean b = m.matches(); if (!b) { setResponseStatus(response, HttpServletResponse.SC_BAD_REQUEST); response.getWriter().write("Improperly formatted URI"); return; } String categoryName = m.group(1); String keyName = m.group(2); NodePersistentStorage nps = H2O.getNPS(); AtomicLong length = new AtomicLong(); InputStream is = nps.get(categoryName, keyName, length); if (length.get() > (long) Integer.MAX_VALUE) { throw new Exception("NPS value size exceeds Integer.MAX_VALUE"); } response.setContentType("application/octet-stream"); response.setContentLength((int) length.get()); response.addHeader("Content-Disposition", "attachment; filename=" + keyName + ".flow"); setResponseStatus(response, HttpServletResponse.SC_OK); OutputStream os = response.getOutputStream(); water.util.FileUtils.copyStream(is, os, 2048); } catch (Exception e) { sendErrorResponse(response, e, uri); } finally { logRequest("GET", request, response); } }
public final Vec[] vecs() { if (_vecs != null) return _vecs; // Load all Vec headers; load them all in parallel by spawning F/J tasks. final Vec[] vecs = new Vec[_keys.length]; Futures fs = new Futures(); for (int i = 0; i < _keys.length; i++) { final int ii = i; final Key k = _keys[i]; H2OCountedCompleter t = new H2OCountedCompleter() { // We need higher priority here as there is a danger of deadlock in // case of many calls from MRTask2 at once (e.g. frame with many // vectors invokes rollup tasks for all vectors in parallel). Should // probably be done in CPS style in the future @Override public byte priority() { return H2O.MIN_HI_PRIORITY; } @Override public void compute2() { vecs[ii] = DKV.get(k).get(); tryComplete(); } }; H2O.submitTask(t); fs.add(t); } fs.blockForPending(); return _vecs = vecs; }
// ------------------------------------------------------------------------ // Zipped file; no parallel decompression; decompress into local chunks, // parse local chunks; distribute chunks later. ParseWriter streamParseZip(final InputStream is, final StreamParseWriter dout, InputStream bvs) throws IOException { // All output into a fresh pile of NewChunks, one per column if (!_setup._parse_type._parallelParseSupported) throw H2O.unimpl(); StreamData din = new StreamData(is); int cidx = 0; StreamParseWriter nextChunk = dout; int zidx = bvs.read(null, 0, 0); // Back-channel read of chunk index assert zidx == 1; while (is.available() > 0) { int xidx = bvs.read(null, 0, 0); // Back-channel read of chunk index if (xidx > zidx) { // Advanced chunk index of underlying ByteVec stream? zidx = xidx; // Record advancing of chunk nextChunk.close(); // Match output chunks to input zipfile chunks if (dout != nextChunk) { dout.reduce(nextChunk); if (_jobKey != null && ((Job) DKV.getGet(_jobKey)).isCancelledOrCrashed()) break; } nextChunk = nextChunk.nextChunk(); } parseChunk(cidx++, din, nextChunk); } parseChunk(cidx, din, nextChunk); // Parse the remaining partial 32K buffer nextChunk.close(); if (dout != nextChunk) dout.reduce(nextChunk); return dout; }
@Override protected void doPost(HttpServletRequest request, HttpServletResponse response) throws IOException, ServletException { String uri = getDecodedUri(request); try { Pattern p = Pattern.compile(".*NodePersistentStorage.bin/([^/]+)/([^/]+)"); Matcher m = p.matcher(uri); boolean b = m.matches(); if (!b) { setResponseStatus(response, HttpServletResponse.SC_BAD_REQUEST); response.getWriter().write("Improperly formatted URI"); return; } String categoryName = m.group(1); String keyName = m.group(2); InputStream is = extractPartInputStream(request, response); if (is == null) { return; } H2O.getNPS().put(categoryName, keyName, is); long length = H2O.getNPS().get_length(categoryName, keyName); String responsePayload = "{ " + "\"category\" : " + "\"" + categoryName + "\", " + "\"name\" : " + "\"" + keyName + "\", " + "\"total_bytes\" : " + length + " " + "}\n"; response.setContentType("application/json"); response.getWriter().write(responsePayload); } catch (Exception e) { sendErrorResponse(response, e, uri); } finally { logRequest("POST", request, response); } }
public void handle( String target, Request baseRequest, HttpServletRequest request, HttpServletResponse response) throws IOException, ServletException { H2O.getJetty().handle1(target, baseRequest, request, response); }
ParseWriter streamParse(final InputStream is, final ParseWriter dout) throws IOException { if (!_setup._parse_type._parallelParseSupported) throw H2O.unimpl(); StreamData din = new StreamData(is); int cidx = 0; // FIXME leaving _jobKey == null until sampling is done, this mean entire zip files // FIXME are parsed for parseSetup while (is.available() > 0 && (_jobKey == null || !((Job) DKV.getGet(_jobKey)).isCancelledOrCrashed())) parseChunk(cidx++, din, dout); parseChunk(cidx, din, dout); // Parse the remaining partial 32K buffer return dout; }
private static void sendErrorResponse(HttpServletResponse response, Exception e, String uri) { if (e instanceof H2OFailException) { H2OFailException ee = (H2OFailException) e; H2OError error = ee.toH2OError(uri); Log.fatal("Caught exception (fatal to the cluster): " + error.toString()); throw (H2O.fail(error.toString())); } else if (e instanceof H2OAbstractRuntimeException) { H2OAbstractRuntimeException ee = (H2OAbstractRuntimeException) e; H2OError error = ee.toH2OError(uri); Log.warn("Caught exception: " + error.toString()); setResponseStatus(response, HttpServletResponse.SC_INTERNAL_SERVER_ERROR); // Note: don't use Schema.schema(version, error) because we have to work at bootstrap: try { @SuppressWarnings("unchecked") String s = new H2OErrorV3().fillFromImpl(error).toJsonString(); response.getWriter().write(s); } catch (Exception ignore) { } } else { // make sure that no Exception is ever thrown out from the request H2OError error = new H2OError(e, uri); // some special cases for which we return 400 because it's likely a problem with the client // request: if (e instanceof IllegalArgumentException) error._http_status = HttpResponseStatus.BAD_REQUEST.getCode(); else if (e instanceof FileNotFoundException) error._http_status = HttpResponseStatus.BAD_REQUEST.getCode(); else if (e instanceof MalformedURLException) error._http_status = HttpResponseStatus.BAD_REQUEST.getCode(); setResponseStatus(response, error._http_status); Log.warn("Caught exception: " + error.toString()); // Note: don't use Schema.schema(version, error) because we have to work at bootstrap: try { @SuppressWarnings("unchecked") String s = new H2OErrorV3().fillFromImpl(error).toJsonString(); response.getWriter().write(s); } catch (Exception ignore) { } } }
public static void main(String[] args) { try { H2O.main(args); TestUtil.stall_till_cloudsize(3); List<Class> tests = JUnitRunner.all(); Result r = org.junit.runner.JUnitCore.runClasses(tests.toArray(new Class[0])); if( r.getFailureCount() == 0 ) { System.out.println("Successfully ran the following tests in " + (r.getRunTime() / 1000) + "s"); for( Class c : tests ) System.out.println(c.getName()); } else { for( Failure f : r.getFailures() ) { System.err.println(f.getDescription()); if( f.getException() != null ) f.getException().printStackTrace(); } } System.exit(r.getFailureCount()); } catch( Throwable t ) { t.printStackTrace(); System.exit(1); } }
@Override protected Frame rebalance(final Frame original_fr, boolean local, final String name) { if (original_fr == null) return null; if (_parms._force_load_balance) { int original_chunks = original_fr.anyVec().nChunks(); _job.update(0, "Load balancing " + name.substring(name.length() - 5) + " data..."); int chunks = desiredChunks(original_fr, local); if (!_parms._reproducible) { if (original_chunks >= chunks) { if (!_parms._quiet_mode) Log.info( "Dataset already contains " + original_chunks + " chunks. No need to rebalance."); return original_fr; } } else { // reproducible, set chunks to 1 assert chunks == 1; if (!_parms._quiet_mode) Log.warn("Reproducibility enforced - using only 1 thread - can be slow."); if (original_chunks == 1) return original_fr; } if (!_parms._quiet_mode) Log.info( "Rebalancing " + name.substring(name.length() - 5) + " dataset into " + chunks + " chunks."); Key newKey = Key.make(name + ".chks" + chunks); RebalanceDataSet rb = new RebalanceDataSet(original_fr, newKey, chunks); H2O.submitTask(rb).join(); Frame rebalanced_fr = DKV.get(newKey).get(); Scope.track(rebalanced_fr); return rebalanced_fr; } return original_fr; }
@Override Val apply(Env env, Env.StackHelp stk, AST asts[]) { // Execute all args. Find a canonical frame; all Frames must look like this one. // Each argument turns into either a Frame (whose rows are entirely // inlined) or a scalar (which is replicated across as a single row). Frame fr = null; // Canonical Frame; all frames have the same column count, types and names int nchks = 0; // Total chunks Val vals[] = new Val[asts.length]; // Computed AST results for (int i = 1; i < asts.length; i++) { vals[i] = stk.track(asts[i].exec(env)); if (vals[i].isFrame()) { fr = vals[i].getFrame(); nchks += fr.anyVec().nChunks(); // Total chunks } else nchks++; // One chunk per scalar } // No Frame, just a pile-o-scalars? Vec zz = null; // The zero-length vec for the zero-frame frame if (fr == null) { // Zero-length, 1-column, default name fr = new Frame(new String[] {Frame.defaultColName(0)}, new Vec[] {zz = Vec.makeZero(0)}); if (asts.length == 1) return new ValFrame(fr); } // Verify all Frames are the same columns, names, and types. Domains can vary, and will be the // union final Frame frs[] = new Frame[asts.length]; // Input frame final byte[] types = fr.types(); // Column types final int ncols = fr.numCols(); final long[] espc = new long[nchks + 1]; // Compute a new layout! int coffset = 0; for (int i = 1; i < asts.length; i++) { Val val = vals[i]; // Save values computed for pass 2 Frame fr0 = val.isFrame() ? val.getFrame() // Scalar: auto-expand into a 1-row frame : stk.track(new Frame(fr._names, Vec.makeCons(val.getNum(), 1L, fr.numCols()))); // Check that all frames are compatible if (fr.numCols() != fr0.numCols()) throw new IllegalArgumentException( "rbind frames must have all the same columns, found " + fr.numCols() + " and " + fr0.numCols() + " columns."); if (!Arrays.deepEquals(fr._names, fr0._names)) throw new IllegalArgumentException( "rbind frames must have all the same column names, found " + Arrays.toString(fr._names) + " and " + Arrays.toString(fr0._names)); if (!Arrays.equals(types, fr0.types())) throw new IllegalArgumentException( "rbind frames must have all the same column types, found " + Arrays.toString(types) + " and " + Arrays.toString(fr0.types())); frs[i] = fr0; // Save frame // Roll up the ESPC row counts long roffset = espc[coffset]; long[] espc2 = fr0.anyVec().espc(); for (int j = 1; j < espc2.length; j++) // Roll up the row counts espc[coffset + j] = (roffset + espc2[j]); coffset += espc2.length - 1; // Chunk offset } if (zz != null) zz.remove(); // build up the new domains for each vec HashMap<String, Integer>[] dmap = new HashMap[types.length]; String[][] domains = new String[types.length][]; int[][][] cmaps = new int[types.length][][]; for (int k = 0; k < types.length; ++k) { dmap[k] = new HashMap<>(); int c = 0; byte t = types[k]; if (t == Vec.T_CAT) { int[][] maps = new int[frs.length][]; for (int i = 1; i < frs.length; i++) { maps[i] = new int[frs[i].vec(k).domain().length]; for (int j = 0; j < maps[i].length; j++) { String s = frs[i].vec(k).domain()[j]; if (!dmap[k].containsKey(s)) dmap[k].put(s, maps[i][j] = c++); else maps[i][j] = dmap[k].get(s); } } cmaps[k] = maps; } else { cmaps[k] = new int[frs.length][]; } domains[k] = c == 0 ? null : new String[c]; for (Map.Entry<String, Integer> e : dmap[k].entrySet()) domains[k][e.getValue()] = e.getKey(); } // Now make Keys for the new Vecs Key<Vec>[] keys = fr.anyVec().group().addVecs(fr.numCols()); Vec[] vecs = new Vec[fr.numCols()]; int rowLayout = Vec.ESPC.rowLayout(keys[0], espc); for (int i = 0; i < vecs.length; i++) vecs[i] = new Vec(keys[i], rowLayout, domains[i], types[i]); // Do the row-binds column-by-column. // Switch to F/J thread for continuations ParallelRbinds t; H2O.submitTask(t = new ParallelRbinds(frs, espc, vecs, cmaps)).join(); return new ValFrame(new Frame(fr.names(), t._vecs)); }
public void doGeneric(String method, HttpServletRequest request, HttpServletResponse response) throws IOException, ServletException { try { startTransaction(request.getHeader("User-Agent")); // Marshal Jetty request parameters to Nano-style. // Note that getServletPath does an un-escape so that the %24 of job id's are turned into $ // characters. String uri = request.getServletPath(); Properties headers = new Properties(); Enumeration<String> en = request.getHeaderNames(); while (en.hasMoreElements()) { String key = en.nextElement(); String value = request.getHeader(key); headers.put(key, value); } Properties parms = new Properties(); Map<String, String[]> parameterMap; parameterMap = request.getParameterMap(); for (Map.Entry<String, String[]> entry : parameterMap.entrySet()) { String key = entry.getKey(); String[] values = entry.getValue(); for (String value : values) { parms.put(key, value); } } // Make Nano call. NanoHTTPD.Response resp = water.api.RequestServer.SERVER.serve(uri, method, headers, parms); // Un-marshal Nano response back to Jetty. String choppedNanoStatus = resp.status.substring(0, 3); assert (choppedNanoStatus.length() == 3); int sc = Integer.parseInt(choppedNanoStatus); setResponseStatus(response, sc); response.setContentType(resp.mimeType); Properties header = resp.header; Enumeration<Object> en2 = header.keys(); while (en2.hasMoreElements()) { String key = (String) en2.nextElement(); String value = header.getProperty(key); response.setHeader(key, value); } OutputStream os = response.getOutputStream(); if (resp instanceof NanoHTTPD.StreamResponse) { NanoHTTPD.StreamResponse ssr = (NanoHTTPD.StreamResponse) resp; ssr.streamWriter.writeTo(os); } else { InputStream is = resp.data; FileUtils.copyStream(is, os, 1024); } } finally { logRequest(method, request, response); // Handle shutdown if it was requested. if (H2O.getShutdownRequested()) { (new Thread() { public void run() { boolean[] confirmations = new boolean[H2O.CLOUD.size()]; if (H2O.SELF.index() >= 0) { confirmations[H2O.SELF.index()] = true; } for (H2ONode n : H2O.CLOUD._memary) { if (n != H2O.SELF) new RPC(n, new ShutdownTsk(H2O.SELF, n.index(), 1000, confirmations)).call(); } try { Thread.sleep(2000); } catch (Exception ignore) { } int failedToShutdown = 0; // shutdown failed for (boolean b : confirmations) if (!b) failedToShutdown++; Log.info( "Orderly shutdown: " + (failedToShutdown > 0 ? failedToShutdown + " nodes failed to shut down! " : "") + " Shutting down now."); H2O.closeAll(); H2O.exit(failedToShutdown); } }) .start(); } endTransaction(); } }
protected void createServer(Connector connector) throws Exception { _server.setConnectors(new Connector[] {connector}); if (H2O.ARGS.hash_login || H2O.ARGS.ldap_login) { // REFER TO // http://www.eclipse.org/jetty/documentation/9.1.4.v20140401/embedded-examples.html#embedded-secured-hello-handler if (H2O.ARGS.login_conf == null) { Log.err("Must specify -login_conf argument"); H2O.exit(1); } LoginService loginService; if (H2O.ARGS.hash_login) { Log.info("Configuring HashLoginService"); loginService = new HashLoginService("H2O", H2O.ARGS.login_conf); } else if (H2O.ARGS.ldap_login) { Log.info("Configuring JAASLoginService (with LDAP)"); System.setProperty("java.security.auth.login.config", H2O.ARGS.login_conf); loginService = new JAASLoginService("ldaploginmodule"); } else { throw H2O.fail(); } IdentityService identityService = new DefaultIdentityService(); loginService.setIdentityService(identityService); _server.addBean(loginService); // Set a security handler as the first handler in the chain. ConstraintSecurityHandler security = new ConstraintSecurityHandler(); // Set up a constraint to authenticate all calls, and allow certain roles in. Constraint constraint = new Constraint(); constraint.setName("auth"); constraint.setAuthenticate(true); // Configure role stuff (to be disregarded). We are ignoring roles, and only going off the // user name. // // Jetty 8 and prior. // // Jetty 8 requires the security.setStrict(false) and ANY_ROLE. security.setStrict(false); constraint.setRoles(new String[] {Constraint.ANY_ROLE}); // Jetty 9 and later. // // Jetty 9 and later uses a different servlet spec, and ANY_AUTH gives the same behavior // for that API version as ANY_ROLE did previously. This required some low-level // debugging // to figure out, so I'm documenting it here. // Jetty 9 did not require security.setStrict(false). // // constraint.setRoles(new String[]{Constraint.ANY_AUTH}); ConstraintMapping mapping = new ConstraintMapping(); mapping.setPathSpec("/*"); // Lock down all API calls mapping.setConstraint(constraint); security.setConstraintMappings(Collections.singletonList(mapping)); // Authentication / Authorization security.setAuthenticator(new BasicAuthenticator()); security.setLoginService(loginService); // Pass-through to H2O if authenticated. registerHandlers(security); _server.setHandler(security); } else { registerHandlers(_server); } _server.start(); }
protected Response serve_debug() { throw H2O.unimpl(); }