@Override public void compute2() { // First set self to be completed when this subtask completer assert _dt.getCompleter() == null; _dt.setCompleter(this); // Run the remote task on this server... _dt.dinvoke(_client); }
// Do the remote execution in a F/J thread & send a reply packet. // Caller must call 'tryComplete'. private static AutoBuffer remexec(DTask dt, H2ONode client, int task, AutoBuffer abold) { abold.close(); // Closing the old guy, returning a new guy // Now compute on it! dt.invoke(client); // Send results back AutoBuffer ab = new AutoBuffer(client).putTask(UDP.udp.ack, task).put1(SERVER_UDP_SEND); dt.write(ab); // Write the DTask dt._repliedTcp = ab.hasTCP(); // Resends do not need to repeat TCP result // Install answer so retries get this very answer client.record_task_answer(task, dt); return ab; }
private void sendAck() { // Send results back DTask dt, origDt = _dt; // _dt can go null the instant it is send over wire assert origDt != null; // Freed after completion while ((dt = _dt) != null) { // Retry loop for broken TCP sends AutoBuffer ab = null; try { // Start the ACK with results back to client. If the client is // asking for a class/id mapping (or any job running at FETCH_ACK // priority) then return a udp.fetchack byte instead of a udp.ack. // The receiver thread then knows to handle the mapping at the higher // priority. UDP.udp udp = dt.priority() == H2O.FETCH_ACK_PRIORITY ? UDP.udp.fetchack : UDP.udp.ack; ab = new AutoBuffer(_client, udp._prior).putTask(udp, _tsknum).put1(SERVER_UDP_SEND); assert ab.position() == 1 + 2 + 4 + 1; dt.write(ab); // Write the DTask - could be very large write dt._repliedTcp = ab.hasTCP(); // Resends do not need to repeat TCP result ab.close(); // Then close; send final byte _computedAndReplied = true; // After the final handshake, set computed+replied bit break; // Break out of retry loop } catch (AutoBuffer.AutoBufferException e) { if (!_client._heartbeat._client) // Report on servers only; clients allowed to be flaky Log.info( "IOException during ACK, " + e._ioe.getMessage() + ", t#" + _tsknum + " AB=" + ab + ", waiting and retrying..."); ab.drainClose(); if (_client._heartbeat._client) // Dead client will not accept a TCP ACK response? this.CAS_DT(dt, null); // cancel the ACK try { Thread.sleep(100); } catch (InterruptedException ignore) { } } catch (Exception e) { // Custom serializer just barfed? Log.err(e); // Log custom serializer exception ab.drainClose(); } } // end of while(true) if (dt == null) Log.info( "Cancelled remote task#" + _tsknum + " " + origDt.getClass() + " to " + _client + " has been cancelled by remote"); else { if (dt instanceof MRTask && dt.logVerbose()) Log.debug("Done remote task#" + _tsknum + " " + dt.getClass() + " to " + _client); _client.record_task_answer(this); // Setup for retrying Ack & AckAck, if not canceled } }
RPCCall(DTask dt, H2ONode client, int tsknum) { super(dt.priority()); _dt = dt; _client = client; _tsknum = tsknum; if (_dt == null) _computedAndReplied = true; // Only for Golden Completed Tasks (see H2ONode.java) _started = System.currentTimeMillis(); // for nack timeout _retry = RETRY_MS >> 1; // half retry for sending nack }
// Exception occurred when processing this task locally, set exception and // send it back to the caller. Can be called lots of times (e.g., once per // MRTask.map call that throws). @Override public boolean onExceptionalCompletion(Throwable ex, CountedCompleter caller) { if (_computed) return false; synchronized (this) { // Filter dup calls to onExCompletion if (_computed) return false; _computed = true; } _dt.setException(ex); sendAck(); return false; }
// Handle TCP traffic, from a client to this server asking for work to be // done. This is called on the TCP reader thread, not a Fork/Join worker // thread. We want to do the bulk TCP read in the TCP reader thread. static void tcp_exec(final AutoBuffer ab) { final int ctrl = ab.getCtrl(); final int task = ab.getTask(); final int flag = ab.getFlag(); assert flag == CLIENT_UDP_SEND; // Client sent a request to be executed? // Act "as if" called from the UDP packet code, by recording the task just // like the packet we will be receiving (eventually). The presence of this // packet is used to stop dup-actions on dup-sends. Racily inserted, keep // only the last one. DTask dt1 = ab._h2o.record_task(task); assert dt1 == null || dt1 instanceof NOPTask : "#" + task + " " + dt1.getClass(); // For TCP, no repeats, so 1st send is only send (except for UDP // timeout retries) // Make a remote instance of this dude from the stream, but only if the // racing UDP packet did not already make one. Start the bulk TCP read. final DTask dt = ab.get(DTask.class); // Here I want to execute on this, but not block for completion in the // TCP reader thread. Jam the task on some F/J thread. UDP.udp .UDPS[ctrl] .pool() .execute( new CountedCompleter() { public void compute() { remexec(dt, ab._h2o, task, ab).close(); tryComplete(); } public boolean onExceptionalCompletion(Throwable ex, CountedCompleter caller) { ex.printStackTrace(); return true; } }); // All done for the TCP thread! Work continues in the FJ thread... }
// Re-send strictly the ack, because we're missing an AckAck final void resend_ack() { assert _computedAndReplied : "Found RPCCall not computed " + _tsknum; DTask dt = _dt; if (dt == null) return; // Received ACKACK already UDP.udp udp = dt.priority() == H2O.FETCH_ACK_PRIORITY ? UDP.udp.fetchack : UDP.udp.ack; AutoBuffer rab = new AutoBuffer(_client, dt.priority()).putTask(udp, _tsknum); boolean wasTCP = dt._repliedTcp; if (wasTCP) rab.put1(RPC.SERVER_TCP_SEND); // Original reply sent via TCP else { rab.put1(RPC.SERVER_UDP_SEND); // Original reply sent via UDP assert rab.position() == 1 + 2 + 4 + 1; dt.write(rab); } assert sz_check(rab) : "Resend of " + _dt.getClass() + " changes size from " + _size + " to " + rab.size(); assert dt._repliedTcp == wasTCP; rab.close(); dt._repliedTcp = wasTCP; // Double retry until we exceed existing age. This is the time to delay // until we try again. Note that we come here immediately on creation, // so the first doubling happens before anybody does any waiting. Also // note the generous 5sec cap: ping at least every 5 sec. _retry += (_retry < MAX_TIMEOUT) ? _retry : MAX_TIMEOUT; }