private void sendAck() { // Send results back DTask dt, origDt = _dt; // _dt can go null the instant it is send over wire assert origDt != null; // Freed after completion while ((dt = _dt) != null) { // Retry loop for broken TCP sends AutoBuffer ab = null; try { // Start the ACK with results back to client. If the client is // asking for a class/id mapping (or any job running at FETCH_ACK // priority) then return a udp.fetchack byte instead of a udp.ack. // The receiver thread then knows to handle the mapping at the higher // priority. UDP.udp udp = dt.priority() == H2O.FETCH_ACK_PRIORITY ? UDP.udp.fetchack : UDP.udp.ack; ab = new AutoBuffer(_client, udp._prior).putTask(udp, _tsknum).put1(SERVER_UDP_SEND); assert ab.position() == 1 + 2 + 4 + 1; dt.write(ab); // Write the DTask - could be very large write dt._repliedTcp = ab.hasTCP(); // Resends do not need to repeat TCP result ab.close(); // Then close; send final byte _computedAndReplied = true; // After the final handshake, set computed+replied bit break; // Break out of retry loop } catch (AutoBuffer.AutoBufferException e) { if (!_client._heartbeat._client) // Report on servers only; clients allowed to be flaky Log.info( "IOException during ACK, " + e._ioe.getMessage() + ", t#" + _tsknum + " AB=" + ab + ", waiting and retrying..."); ab.drainClose(); if (_client._heartbeat._client) // Dead client will not accept a TCP ACK response? this.CAS_DT(dt, null); // cancel the ACK try { Thread.sleep(100); } catch (InterruptedException ignore) { } } catch (Exception e) { // Custom serializer just barfed? Log.err(e); // Log custom serializer exception ab.drainClose(); } } // end of while(true) if (dt == null) Log.info( "Cancelled remote task#" + _tsknum + " " + origDt.getClass() + " to " + _client + " has been cancelled by remote"); else { if (dt instanceof MRTask && dt.logVerbose()) Log.debug("Done remote task#" + _tsknum + " " + dt.getClass() + " to " + _client); _client.record_task_answer(this); // Setup for retrying Ack & AckAck, if not canceled } }
// Re-send strictly the ack, because we're missing an AckAck final void resend_ack() { assert _computedAndReplied : "Found RPCCall not computed " + _tsknum; DTask dt = _dt; if (dt == null) return; // Received ACKACK already UDP.udp udp = dt.priority() == H2O.FETCH_ACK_PRIORITY ? UDP.udp.fetchack : UDP.udp.ack; AutoBuffer rab = new AutoBuffer(_client, dt.priority()).putTask(udp, _tsknum); boolean wasTCP = dt._repliedTcp; if (wasTCP) rab.put1(RPC.SERVER_TCP_SEND); // Original reply sent via TCP else { rab.put1(RPC.SERVER_UDP_SEND); // Original reply sent via UDP assert rab.position() == 1 + 2 + 4 + 1; dt.write(rab); } assert sz_check(rab) : "Resend of " + _dt.getClass() + " changes size from " + _size + " to " + rab.size(); assert dt._repliedTcp == wasTCP; rab.close(); dt._repliedTcp = wasTCP; // Double retry until we exceed existing age. This is the time to delay // until we try again. Note that we come here immediately on creation, // so the first doubling happens before anybody does any waiting. Also // note the generous 5sec cap: ping at least every 5 sec. _retry += (_retry < MAX_TIMEOUT) ? _retry : MAX_TIMEOUT; }
public synchronized RPC<V> call() { // Any Completer will not be carried over to remote; add it to the RPC call // so completion is signaled after the remote comes back. CountedCompleter cc = _dt.getCompleter(); if (cc != null) handleCompleter(cc); // If running on self, just submit to queues & do locally if (_target == H2O.SELF) return handleLocal(); // Keep a global record, for awhile if (_target != null) _target.taskPut(_tasknum, this); try { if (_nack) return this; // Racing Nack rechecked under lock; no need to send retry // We could be racing timeouts-vs-replies. Blow off timeout if we have an answer. if (isDone()) { if (_target != null) _target.taskRemove(_tasknum); return this; } // Default strategy: (re)fire the packet and (re)start the timeout. We // "count" exactly 1 failure: just whether or not we shipped via TCP ever // once. After that we fearlessly (re)send UDP-sized packets until the // server replies. // Pack classloader/class & the instance data into the outgoing // AutoBuffer. If it fits in a single UDP packet, ship it. If not, // finish off the current AutoBuffer (which is now going TCP style), and // make a new UDP-sized packet. On a re-send of a TCP-sized hunk, just // send the basic UDP control packet. if (!_sentTcp) { while (true) { // Retry loop for broken TCP sends AutoBuffer ab = new AutoBuffer(_target, _dt.priority()); try { final boolean t; int offset = ab.position(); ab.putTask(UDP.udp.exec, _tasknum).put1(CLIENT_UDP_SEND); ab.put(_dt); t = ab.hasTCP(); assert sz_check(ab) : "Resend of " + _dt.getClass() + " changes size from " + _size + " to " + ab.size() + " for task#" + _tasknum; ab.close(); // Then close; send final byte _sentTcp = t; // Set after close (and any other possible fail) break; // Break out of retry loop } catch (AutoBuffer.AutoBufferException e) { Log.info( "IOException during RPC call: " + e._ioe.getMessage() + ", AB=" + ab + ", for task#" + _tasknum + ", waiting and retrying..."); ab.drainClose(); try { Thread.sleep(500); } catch (InterruptedException ignore) { } } } // end of while(true) } else { // Else it was sent via TCP in a prior attempt, and we've timed out. // This means the caller's ACK/answer probably got dropped and we need // him to resend it (or else the caller is still processing our // request). Send a UDP reminder - but with the CLIENT_TCP_SEND flag // instead of the UDP send, and no DTask (since it previously went via // TCP, no need to resend it). AutoBuffer ab = new AutoBuffer(_target, _dt.priority()).putTask(UDP.udp.exec, _tasknum); ab.put1(CLIENT_TCP_SEND).close(); } // Double retry until we exceed existing age. This is the time to delay // until we try again. Note that we come here immediately on creation, // so the first doubling happens before anybody does any waiting. Also // note the generous 5sec cap: ping at least every 5 sec. _retry += (_retry < MAX_TIMEOUT) ? _retry : MAX_TIMEOUT; // Put self on the "TBD" list of tasks awaiting Timeout. // So: dont really 'forget' but remember me in a little bit. // UDPTimeOutThread.PENDING.put(_tasknum, this); return this; } catch (Throwable t) { t.printStackTrace(); throw Log.throwErr(t); } }