public PersistManager(URI iceRoot) { I = new Persist[MAX_BACKENDS]; stats = new PersistStatsEntry[MAX_BACKENDS]; for (int i = 0; i < stats.length; i++) { stats[i] = new PersistStatsEntry(); } if (iceRoot == null) { Log.err("ice_root must be specified. Exiting."); H2O.exit(1); } Persist ice = null; boolean windowsPath = iceRoot.toString().matches("^[a-zA-Z]:.*"); if (windowsPath) { ice = new PersistFS(new File(iceRoot.toString())); } else if ((iceRoot.getScheme() == null) || Schemes.FILE.equals(iceRoot.getScheme())) { ice = new PersistFS(new File(iceRoot.getPath())); } else if (Schemes.HDFS.equals(iceRoot.getScheme())) { Log.err("HDFS ice_root not yet supported. Exiting."); H2O.exit(1); // I am not sure anyone actually ever does this. // H2O on Hadoop launches use local disk for ice root. // This has a chance to work, but turn if off until it gets tested. // // try { // Class klass = Class.forName("water.persist.PersistHdfs"); // java.lang.reflect.Constructor constructor = klass.getConstructor(new // Class[]{URI.class}); // ice = (Persist) constructor.newInstance(iceRoot); // } catch (Exception e) { // Log.err("Could not initialize HDFS"); // throw new RuntimeException(e); // } } I[Value.ICE] = ice; I[Value.NFS] = new PersistNFS(); try { Class klass = Class.forName("water.persist.PersistHdfs"); java.lang.reflect.Constructor constructor = klass.getConstructor(); I[Value.HDFS] = (Persist) constructor.newInstance(); Log.info("HDFS subsystem successfully initialized"); } catch (Throwable ignore) { Log.info("HDFS subsystem not available"); } try { Class klass = Class.forName("water.persist.PersistS3"); java.lang.reflect.Constructor constructor = klass.getConstructor(); I[Value.S3] = (Persist) constructor.newInstance(); Log.info("S3 subsystem successfully initialized"); } catch (Throwable ignore) { Log.info("S3 subsystem not available"); } }
static { InputStream resource = Boot._init.getResource2("/page.html"); try { _htmlTemplate = new String(ByteStreams.toByteArray(resource)).replace("%cloud_name", H2O.NAME); } catch (NullPointerException e) { Log.err(e); Log.die("page.html not found in resources."); } catch (Exception e) { Log.err(e); Log.die(e.getMessage()); } finally { Closeables.closeQuietly(resource); } }
private static void addFolder2( FileSystem fs, Path p, ArrayList<String> keys, ArrayList<String> failed) { try { if (fs == null) return; Futures futures = new Futures(); for (FileStatus file : fs.listStatus(p)) { Path pfs = file.getPath(); if (file.isDir()) { addFolder2(fs, pfs, keys, failed); } else { long size = file.getLen(); Key res; if (pfs.getName().endsWith(Extensions.JSON)) { throw H2O.unimpl(); } else if (pfs.getName().endsWith(Extensions.HEX)) { // Hex file? throw H2O.unimpl(); } else { Key k = null; keys.add((k = HdfsFileVec.make(file, futures)).toString()); Log.info("PersistHdfs: DKV.put(" + k + ")"); } } } } catch (Exception e) { Log.err(e); failed.add(p.toString()); } }
private void sendAck() { // Send results back DTask dt, origDt = _dt; // _dt can go null the instant it is send over wire assert origDt != null; // Freed after completion while ((dt = _dt) != null) { // Retry loop for broken TCP sends AutoBuffer ab = null; try { // Start the ACK with results back to client. If the client is // asking for a class/id mapping (or any job running at FETCH_ACK // priority) then return a udp.fetchack byte instead of a udp.ack. // The receiver thread then knows to handle the mapping at the higher // priority. UDP.udp udp = dt.priority() == H2O.FETCH_ACK_PRIORITY ? UDP.udp.fetchack : UDP.udp.ack; ab = new AutoBuffer(_client, udp._prior).putTask(udp, _tsknum).put1(SERVER_UDP_SEND); assert ab.position() == 1 + 2 + 4 + 1; dt.write(ab); // Write the DTask - could be very large write dt._repliedTcp = ab.hasTCP(); // Resends do not need to repeat TCP result ab.close(); // Then close; send final byte _computedAndReplied = true; // After the final handshake, set computed+replied bit break; // Break out of retry loop } catch (AutoBuffer.AutoBufferException e) { if (!_client._heartbeat._client) // Report on servers only; clients allowed to be flaky Log.info( "IOException during ACK, " + e._ioe.getMessage() + ", t#" + _tsknum + " AB=" + ab + ", waiting and retrying..."); ab.drainClose(); if (_client._heartbeat._client) // Dead client will not accept a TCP ACK response? this.CAS_DT(dt, null); // cancel the ACK try { Thread.sleep(100); } catch (InterruptedException ignore) { } } catch (Exception e) { // Custom serializer just barfed? Log.err(e); // Log custom serializer exception ab.drainClose(); } } // end of while(true) if (dt == null) Log.info( "Cancelled remote task#" + _tsknum + " " + origDt.getClass() + " to " + _client + " has been cancelled by remote"); else { if (dt instanceof MRTask && dt.logVerbose()) Log.debug("Done remote task#" + _tsknum + " " + dt.getClass() + " to " + _client); _client.record_task_answer(this); // Setup for retrying Ack & AckAck, if not canceled } }
private static void addFolder(FileSystem fs, Path p, JsonArray succeeded, JsonArray failed) { try { if (fs == null) return; for (FileStatus file : fs.listStatus(p)) { Path pfs = file.getPath(); if (file.isDir()) { addFolder(fs, pfs, succeeded, failed); } else { Key k = Key.make(pfs.toString()); long size = file.getLen(); Value val = null; if (pfs.getName().endsWith(Extensions.JSON)) { JsonParser parser = new JsonParser(); JsonObject json = parser.parse(new InputStreamReader(fs.open(pfs))).getAsJsonObject(); JsonElement v = json.get(Constants.VERSION); if (v == null) throw new RuntimeException("Missing version"); JsonElement type = json.get(Constants.TYPE); if (type == null) throw new RuntimeException("Missing type"); Class c = Class.forName(type.getAsString()); OldModel model = (OldModel) c.newInstance(); model.fromJson(json); } else if (pfs.getName().endsWith(Extensions.HEX)) { // Hex file? FSDataInputStream s = fs.open(pfs); int sz = (int) Math.min(1L << 20, size); // Read up to the 1st meg byte[] mem = MemoryManager.malloc1(sz); s.readFully(mem); // Convert to a ValueArray (hope it fits in 1Meg!) ValueArray ary = new ValueArray(k, 0).read(new AutoBuffer(mem)); val = new Value(k, ary, Value.HDFS); } else if (size >= 2 * ValueArray.CHUNK_SZ) { val = new Value( k, new ValueArray(k, size), Value.HDFS); // ValueArray byte wrapper over a large file } else { val = new Value(k, (int) size, Value.HDFS); // Plain Value val.setdsk(); } DKV.put(k, val); Log.info("PersistHdfs: DKV.put(" + k + ")"); JsonObject o = new JsonObject(); o.addProperty(Constants.KEY, k.toString()); o.addProperty(Constants.FILE, pfs.toString()); o.addProperty(Constants.VALUE_SIZE, file.getLen()); succeeded.add(o); } } } catch (Exception e) { Log.err(e); JsonObject o = new JsonObject(); o.addProperty(Constants.FILE, p.toString()); o.addProperty(Constants.ERROR, e.getMessage()); failed.add(o); } }
public int peek() { if (_idx > 0) return _buf[_idx - 1]; try { int b = _is.read(); if (b != -1) return push(b); } catch (IOException e) { Log.err(e); } throw new ParseException("Premature EOF"); }
public int get() { if (_idx > 0) return _buf[--_idx]; try { int b = _is.read(); if (b != -1) return b; } catch (IOException ioe) { Log.err(ioe); } throw new ParseException("Premature EOF"); }
public static double getBoolean(Comparable o) { if (o instanceof Boolean) return ((Boolean) o) ? 1.0 : 0.0; if (o instanceof String) { try { if ("true".equalsIgnoreCase((String) o)) return 1.0; if ("false".equalsIgnoreCase((String) o)) return 0.0; } catch (Throwable t) { Log.err(t); } } return Double.NaN; }
private void cancel(final String msg, JobState resultingState) { if (resultingState == JobState.CANCELLED) { Log.info("Job " + self() + "(" + description + ") was cancelled."); } else { Log.err("Job " + self() + "(" + description + ") failed."); Log.err(msg); } exception = msg; state = resultingState; // replace finished job by a job handle replaceByJobHandle(); DKV.write_barrier(); final Job job = this; H2O.submitTask( new H2OCountedCompleter() { @Override public void compute2() { job.onCancelled(); } }); }
public ValidationMessage( ModelBuilder.ValidationMessage.MessageType message_type, String field_name, String message) { this.message_type = message_type; this.field_name = field_name; this.message = message; switch (message_type) { case INFO: Log.info(field_name + ": " + message); break; case WARN: Log.warn(field_name + ": " + message); break; case ERROR: Log.err(field_name + ": " + message); break; } }
protected void createServer(Connector connector) throws Exception { _server.setConnectors(new Connector[] {connector}); if (H2O.ARGS.hash_login || H2O.ARGS.ldap_login) { // REFER TO // http://www.eclipse.org/jetty/documentation/9.1.4.v20140401/embedded-examples.html#embedded-secured-hello-handler if (H2O.ARGS.login_conf == null) { Log.err("Must specify -login_conf argument"); H2O.exit(1); } LoginService loginService; if (H2O.ARGS.hash_login) { Log.info("Configuring HashLoginService"); loginService = new HashLoginService("H2O", H2O.ARGS.login_conf); } else if (H2O.ARGS.ldap_login) { Log.info("Configuring JAASLoginService (with LDAP)"); System.setProperty("java.security.auth.login.config", H2O.ARGS.login_conf); loginService = new JAASLoginService("ldaploginmodule"); } else { throw H2O.fail(); } IdentityService identityService = new DefaultIdentityService(); loginService.setIdentityService(identityService); _server.addBean(loginService); // Set a security handler as the first handler in the chain. ConstraintSecurityHandler security = new ConstraintSecurityHandler(); // Set up a constraint to authenticate all calls, and allow certain roles in. Constraint constraint = new Constraint(); constraint.setName("auth"); constraint.setAuthenticate(true); // Configure role stuff (to be disregarded). We are ignoring roles, and only going off the // user name. // // Jetty 8 and prior. // // Jetty 8 requires the security.setStrict(false) and ANY_ROLE. security.setStrict(false); constraint.setRoles(new String[] {Constraint.ANY_ROLE}); // Jetty 9 and later. // // Jetty 9 and later uses a different servlet spec, and ANY_AUTH gives the same behavior // for that API version as ANY_ROLE did previously. This required some low-level // debugging // to figure out, so I'm documenting it here. // Jetty 9 did not require security.setStrict(false). // // constraint.setRoles(new String[]{Constraint.ANY_AUTH}); ConstraintMapping mapping = new ConstraintMapping(); mapping.setPathSpec("/*"); // Lock down all API calls mapping.setConstraint(constraint); security.setConstraintMappings(Collections.singletonList(mapping)); // Authentication / Authorization security.setAuthenticator(new BasicAuthenticator()); security.setLoginService(loginService); // Pass-through to H2O if authenticated. registerHandlers(security); _server.setHandler(security); } else { registerHandlers(_server); } _server.start(); }
public static int onLoad(String className) { Integer I = MAP.get(className); if (I == null) throw Log.err(new RuntimeException("TypeMap missing " + className)); return I; }
// Handle traffic, from a client to this server asking for work to be done. // Called from either a F/J thread (generally with a UDP packet) or from the // TCPReceiver thread. static void remote_exec(AutoBuffer ab) { long lo = ab.get8(0), hi = ab._size >= 16 ? ab.get8(8) : 0; final int task = ab.getTask(); final int flag = ab.getFlag(); assert flag == CLIENT_UDP_SEND || flag == CLIENT_TCP_SEND; // Client-side send // Atomically record an instance of this task, one-time-only replacing a // null with an RPCCall, a placeholder while we work on a proper response - // and it serves to let us discard dup UDP requests. RPCCall old = ab._h2o.has_task(task); // This is a UDP packet requesting an answer back for a request sent via // TCP but the UDP packet has arrived ahead of the TCP. Just drop the UDP // and wait for the TCP to appear. if (old == null && flag == CLIENT_TCP_SEND) { Log.warn( "got tcp with existing task #, FROM " + ab._h2o.toString() + " AB: " /* + UDP.printx16(lo,hi)*/); assert !ab.hasTCP() : "ERROR: got tcp with existing task #, FROM " + ab._h2o.toString() + " AB: " /* + UDP.printx16(lo,hi)*/; // All the resends should be UDP only // DROP PACKET } else if (old == null) { // New task? RPCCall rpc; try { // Read the DTask Right Now. If we are the TCPReceiver thread, then we // are reading in that thread... and thus TCP reads are single-threaded. rpc = new RPCCall(ab.get(water.DTask.class), ab._h2o, task); } catch (AutoBuffer.AutoBufferException e) { // Here we assume it's a TCP fail on read - and ignore the remote_exec // request. The caller will send it again. NOTE: this case is // indistinguishable from a broken short-writer/long-reader bug, except // that we'll re-send endlessly and fail endlessly. Log.info( "Network congestion OR short-writer/long-reader: TCP " + e._ioe.getMessage() + ", AB=" + ab + ", ignoring partial send"); ab.drainClose(); return; } RPCCall rpc2 = ab._h2o.record_task(rpc); if (rpc2 == null) { // Atomically insert (to avoid double-work) if (rpc._dt instanceof MRTask && rpc._dt.logVerbose()) Log.debug("Start remote task#" + task + " " + rpc._dt.getClass() + " from " + ab._h2o); H2O.submitTask(rpc); // And execute! } else { // Else lost the task-insertion race if (ab.hasTCP()) ab.drainClose(); // DROP PACKET } } else if (!old._computedAndReplied) { // This packet has not been fully computed. Hence it's still a work-in- // progress locally. We have no answer to reply but we do not want to // re-offer the packet for repeated work. Send back a NACK, letting the // client know we're Working On It assert !ab.hasTCP() : "got tcp with existing task #, FROM " + ab._h2o.toString() + " AB: " + UDP.printx16(lo, hi) + ", position = " + ab._bb.position(); ab.clearForWriting(udp.nack._prior).putTask(UDP.udp.nack.ordinal(), task); // DROP PACKET } else { // This is an old re-send of the same thing we've answered to before. // Send back the same old answer ACK. If we sent via TCP before, then // we know the answer got there so just send a control-ACK back. If we // sent via UDP, resend the whole answer. if (ab.hasTCP()) { Log.warn( "got tcp with existing task #, FROM " + ab._h2o.toString() + " AB: " + UDP.printx16(lo, hi)); // All the resends should be UDP only ab.drainClose(); } if (old._dt != null) { // already ackacked ++old._ackResendCnt; if (old._ackResendCnt % 10 == 0) Log.err( "Possibly broken network, can not send ack through, got " + old._ackResendCnt + " for task # " + old._tsknum + ", dt == null?" + (old._dt == null)); old.resend_ack(); } } ab.close(); }