Exemplo n.º 1
0
 // TCP large RECEIVE of results.  Note that 'this' is NOT the RPC object
 // that is hoping to get the received object, nor is the current thread the
 // RPC thread blocking for the object.  The current thread is the TCP
 // reader thread.
 static void tcp_ack(final AutoBuffer ab) throws IOException {
   // Get the RPC we're waiting on
   int task = ab.getTask();
   RPC rpc = ab._h2o.taskGet(task);
   // Race with canceling a large RPC fetch: Task is already dead.  Do not
   // bother reading from the TCP socket, just bail out & close socket.
   if (rpc == null || rpc._done) {
     ab.drainClose();
   } else {
     assert rpc._tasknum == task;
     assert !rpc._done;
     // Here we have the result, and we're on the correct Node but wrong
     // Thread.  If we just return, the TCP reader thread will close the
     // remote, the remote will UDP ACK the RPC back, and back on the current
     // Node but in the correct Thread, we'd wake up and realize we received a
     // large result.
     try {
       rpc.response(ab);
     } catch (AutoBuffer.AutoBufferException e) {
       // If TCP fails, we will have done a short-read crushing the original
       // _dt object, and be unable to resend.  This is fatal right now.
       // Really: an unimplemented feature; fix is to notice that a partial
       // TCP read means that the server (1) got our remote_exec request, (2)
       // has computed an answer and was trying to send it to us, (3) failed
       // sending via TCP hence the server knows it failed and will send again
       // without any further work from us.  We need to disable all the resend
       // & retry logic, and wait for the server to re-send our result.
       // Meanwhile the _dt object is crushed with half-read crap, and cannot
       // be trusted except in the base fields.
       throw Log.throwErr(e._ioe);
     }
   }
   // ACKACK the remote, telling him "we got the answer"
   new AutoBuffer(ab._h2o, H2O.ACK_ACK_PRIORITY).putTask(UDP.udp.ackack.ordinal(), task).close();
 }
Exemplo n.º 2
0
 // Do the remote execution in a F/J thread & send a reply packet.
 // Caller must call 'tryComplete'.
 private static AutoBuffer remexec(DTask dt, H2ONode client, int task, AutoBuffer abold) {
   abold.close(); // Closing the old guy, returning a new guy
   // Now compute on it!
   dt.invoke(client);
   // Send results back
   AutoBuffer ab = new AutoBuffer(client).putTask(UDP.udp.ack, task).put1(SERVER_UDP_SEND);
   dt.write(ab); // Write the DTask
   dt._repliedTcp = ab.hasTCP(); // Resends do not need to repeat TCP result
   // Install answer so retries get this very answer
   client.record_task_answer(task, dt);
   return ab;
 }
Exemplo n.º 3
0
 // Assertion check that size is not changing between resends,
 // i.e., resends sent identical data.
 private boolean sz_check(AutoBuffer ab) {
   final int absize = ab.size();
   if (_size == 0) {
     _size = absize;
     return true;
   }
   return _size == absize;
 }
Exemplo n.º 4
0
 // TCP large RECEIVE of results.  Note that 'this' is NOT the RPC object
 // that is hoping to get the received object, nor is the current thread the
 // RPC thread blocking for the object.  The current thread is the TCP
 // reader thread.
 static void tcp_ack(final AutoBuffer ab) {
   // Get the RPC we're waiting on
   int task = ab.getTask();
   RPC rpc = TASKS.get(task);
   // Race with canceling a large RPC fetch: Task is already dead.  Do not
   // bother reading from the TCP socket, just bail out & close socket.
   if (rpc == null) {
     ab.drainClose();
   } else {
     assert rpc._tasknum == task;
     assert !rpc._done;
     // Here we have the result, and we're on the correct Node but wrong
     // Thread.  If we just return, the TCP reader thread will close the
     // remote, the remote will UDP ACK the RPC back, and back on the current
     // Node but in the correct Thread, we'd wake up and realize we received a
     // large result.
     rpc.response(ab);
   }
   // ACKACK the remote, telling him "we got the answer"
   new AutoBuffer(ab._h2o).putTask(UDP.udp.ackack.ordinal(), task).close(true);
 }
Exemplo n.º 5
0
 // Got a response UDP packet, or completed a large TCP answer-receive.
 // Install it as The Answer packet and wake up anybody waiting on an answer.
 protected void response(AutoBuffer ab) {
   assert _tasknum == ab.getTask();
   if (_done) {
     ab.close();
     return;
   } // Ignore duplicate response packet
   int flag = ab.getFlag(); // Must read flag also, to advance ab
   if (flag == SERVER_TCP_SEND) {
     ab.close();
     return;
   } // Ignore UDP packet for a TCP reply
   assert flag == SERVER_UDP_SEND;
   synchronized (this) { // Install the answer under lock
     if (_done) {
       ab.close();
       return;
     } // Ignore duplicate response packet
     _dt.read(ab); // Read the answer (under lock?)
     ab.close(); // Also finish the read (under lock?)
     _dt.onAck(); // One time only execute (before sending ACKACK)
     _done = true;
     UDPTimeOutThread.PENDING.remove(this);
     TASKS.remove(_tasknum); // Flag as task-completed, even if the result is null
     notifyAll(); // And notify in any case
   }
 }
Exemplo n.º 6
0
  // Handle TCP traffic, from a client to this server asking for work to be
  // done.  This is called on the TCP reader thread, not a Fork/Join worker
  // thread.  We want to do the bulk TCP read in the TCP reader thread.
  static void tcp_exec(final AutoBuffer ab) {
    final int ctrl = ab.getCtrl();
    final int task = ab.getTask();
    final int flag = ab.getFlag();
    assert flag == CLIENT_UDP_SEND; // Client sent a request to be executed?
    // Act "as if" called from the UDP packet code, by recording the task just
    // like the packet we will be receiving (eventually).  The presence of this
    // packet is used to stop dup-actions on dup-sends.  Racily inserted, keep
    // only the last one.
    DTask dt1 = ab._h2o.record_task(task);
    assert dt1 == null || dt1 instanceof NOPTask
        : "#"
            + task
            + " "
            + dt1.getClass(); // For TCP, no repeats, so 1st send is only send (except for UDP
    // timeout retries)

    // Make a remote instance of this dude from the stream, but only if the
    // racing UDP packet did not already make one.  Start the bulk TCP read.
    final DTask dt = ab.get(DTask.class);

    // Here I want to execute on this, but not block for completion in the
    // TCP reader thread.  Jam the task on some F/J thread.
    UDP.udp
        .UDPS[ctrl]
        .pool()
        .execute(
            new CountedCompleter() {
              public void compute() {
                remexec(dt, ab._h2o, task, ab).close();
                tryComplete();
              }

              public boolean onExceptionalCompletion(Throwable ex, CountedCompleter caller) {
                ex.printStackTrace();
                return true;
              }
            });
    // All done for the TCP thread!  Work continues in the FJ thread...
  }
Exemplo n.º 7
0
 // Pretty-print bytes 1-15; byte 0 is the udp_type enum
 public String print16(AutoBuffer ab) {
   int flag = ab.getFlag();
   String clazz = "";
   if (flag == CLIENT_UDP_SEND)
     clazz = new String(ab.getA1(Math.min(ab.get2(), ab.remaining())));
   String fs = "";
   switch (flag) {
     case SERVER_UDP_SEND:
       fs = "SERVER_UDP_SEND";
       break;
     case SERVER_TCP_SEND:
       fs = "SERVER_TCP_SEND";
       break;
     case CLIENT_UDP_SEND:
       fs = "CLIENT_UDP_SEND";
       break;
     case CLIENT_TCP_SEND:
       fs = "CLIENT_TCP_SEND";
       break;
   }
   return "task# " + ab.getTask() + " " + fs + " " + clazz;
 }
Exemplo n.º 8
0
  // Make an initial RPC, or re-send a packet.  Always called on 1st send; also
  // called on a timeout.
  public synchronized RPC<V> call() {
    // Keep a global record, for awhile
    TASKS.put(_tasknum, this);
    // We could be racing timeouts-vs-replies.  Blow off timeout if we have an answer.
    if (isDone()) {
      TASKS.remove(_tasknum);
      return this;
    }
    // Default strategy: (re)fire the packet and (re)start the timeout.  We
    // "count" exactly 1 failure: just whether or not we shipped via TCP ever
    // once.  After that we fearlessly (re)send UDP-sized packets until the
    // server replies.

    // Pack classloader/class & the instance data into the outgoing
    // AutoBuffer.  If it fits in a single UDP packet, ship it.  If not,
    // finish off the current AutoBuffer (which is now going TCP style), and
    // make a new UDP-sized packet.  On a re-send of a TCP-sized hunk, just
    // send the basic UDP control packet.
    if (!_sentTcp) {
      // Ship the UDP packet with clazz name to execute
      // totally replace me with Michal's enums!!!
      UDP.udp fjq = _dt.isHighPriority() ? UDP.udp.exechi : UDP.udp.execlo;
      AutoBuffer ab = new AutoBuffer(_target).putTask(fjq, _tasknum);
      ab.put1(CLIENT_UDP_SEND).put(_dt).close();
      if (ab.hasTCP()) _sentTcp = true;
    }

    // Double retry until we exceed existing age.  This is the time to delay
    // until we try again.  Note that we come here immediately on creation,
    // so the first doubling happens before anybody does any waiting.  Also
    // note the generous 5sec cap: ping at least every 5 sec.
    _retry += (_retry < 5000) ? _retry : 5000;
    // Put self on the "TBD" list of tasks awaiting Timeout.
    // So: dont really 'forget' but remember me in a little bit.
    assert !UDPTimeOutThread.PENDING.contains(this);
    UDPTimeOutThread.PENDING.add(this);
    return this;
  }
Exemplo n.º 9
0
    private void sendAck() {
      // Send results back
      DTask dt, origDt = _dt; // _dt can go null the instant it is send over wire
      assert origDt != null; // Freed after completion
      while ((dt = _dt) != null) { // Retry loop for broken TCP sends
        AutoBuffer ab = null;
        try {
          // Start the ACK with results back to client.  If the client is
          // asking for a class/id mapping (or any job running at FETCH_ACK
          // priority) then return a udp.fetchack byte instead of a udp.ack.
          // The receiver thread then knows to handle the mapping at the higher
          // priority.

          UDP.udp udp = dt.priority() == H2O.FETCH_ACK_PRIORITY ? UDP.udp.fetchack : UDP.udp.ack;
          ab = new AutoBuffer(_client, udp._prior).putTask(udp, _tsknum).put1(SERVER_UDP_SEND);
          assert ab.position() == 1 + 2 + 4 + 1;
          dt.write(ab); // Write the DTask - could be very large write
          dt._repliedTcp = ab.hasTCP(); // Resends do not need to repeat TCP result
          ab.close(); // Then close; send final byte
          _computedAndReplied = true; // After the final handshake, set computed+replied bit
          break; // Break out of retry loop
        } catch (AutoBuffer.AutoBufferException e) {
          if (!_client._heartbeat._client) // Report on servers only; clients allowed to be flaky
          Log.info(
                "IOException during ACK, "
                    + e._ioe.getMessage()
                    + ", t#"
                    + _tsknum
                    + " AB="
                    + ab
                    + ", waiting and retrying...");
          ab.drainClose();
          if (_client._heartbeat._client) // Dead client will not accept a TCP ACK response?
          this.CAS_DT(dt, null); // cancel the ACK
          try {
            Thread.sleep(100);
          } catch (InterruptedException ignore) {
          }
        } catch (Exception e) { // Custom serializer just barfed?
          Log.err(e); // Log custom serializer exception
          ab.drainClose();
        }
      } // end of while(true)
      if (dt == null)
        Log.info(
            "Cancelled remote task#"
                + _tsknum
                + " "
                + origDt.getClass()
                + " to "
                + _client
                + " has been cancelled by remote");
      else {
        if (dt instanceof MRTask && dt.logVerbose())
          Log.debug("Done remote task#" + _tsknum + " " + dt.getClass() + " to " + _client);
        _client.record_task_answer(this); // Setup for retrying Ack & AckAck, if not canceled
      }
    }
Exemplo n.º 10
0
  protected AutoBuffer response(AutoBuffer ab) {

    assert _tasknum == ab.getTask();
    if (_done) {
      if (!ab.hasTCP()) return ackack(ab, _tasknum); // Ignore duplicate response packet
      ab.drainClose();
    } else {
      int flag = ab.getFlag(); // Must read flag also, to advance ab
      if (flag == SERVER_TCP_SEND) return ackack(ab, _tasknum); // Ignore UDP packet for a TCP reply
      assert flag == SERVER_UDP_SEND : "flag = " + flag;
      synchronized (this) { // Install the answer under lock
        if (_done) {
          if (!ab.hasTCP()) return ackack(ab, _tasknum); // Ignore duplicate response packet
          ab.drainClose();
        } else {
          //          UDPTimeOutThread.PENDING.remove(_tasknum);
          _dt.read(ab); // Read the answer (under lock?)
          _size_rez = ab.size(); // Record received size
          ab
              .close(); // Also finish the read (under lock?  even if canceled, since need to drain
                        // TCP)
          if (!isCancelled()) // Can be canceled already (locally by MRTask while recieving remote
                              // answer)
          _dt.onAck(); // One time only execute (before sending ACKACK)
          _done = true; // Only read one (of many) response packets
          ab._h2o.taskRemove(_tasknum); // Flag as task-completed, even if the result is null
          notifyAll(); // And notify in any case
        }
        if (!isCancelled()) // Can be canceled already
        doAllCompletions(); // Send all tasks needing completion to the work queues
      }
    }
    // AckAck back on a fresh AutoBuffer, since actually closed() the incoming one
    return new AutoBuffer(ab._h2o, H2O.ACK_ACK_PRIORITY)
        .putTask(UDP.udp.ackack.ordinal(), _tasknum);
  }
Exemplo n.º 11
0
 // Re-send strictly the ack, because we're missing an AckAck
 final void resend_ack() {
   assert _computedAndReplied : "Found RPCCall not computed " + _tsknum;
   DTask dt = _dt;
   if (dt == null) return; // Received ACKACK already
   UDP.udp udp = dt.priority() == H2O.FETCH_ACK_PRIORITY ? UDP.udp.fetchack : UDP.udp.ack;
   AutoBuffer rab = new AutoBuffer(_client, dt.priority()).putTask(udp, _tsknum);
   boolean wasTCP = dt._repliedTcp;
   if (wasTCP) rab.put1(RPC.SERVER_TCP_SEND); // Original reply sent via TCP
   else {
     rab.put1(RPC.SERVER_UDP_SEND); // Original reply sent via UDP
     assert rab.position() == 1 + 2 + 4 + 1;
     dt.write(rab);
   }
   assert sz_check(rab)
       : "Resend of " + _dt.getClass() + " changes size from " + _size + " to " + rab.size();
   assert dt._repliedTcp == wasTCP;
   rab.close();
   dt._repliedTcp = wasTCP;
   // Double retry until we exceed existing age.  This is the time to delay
   // until we try again.  Note that we come here immediately on creation,
   // so the first doubling happens before anybody does any waiting.  Also
   // note the generous 5sec cap: ping at least every 5 sec.
   _retry += (_retry < MAX_TIMEOUT) ? _retry : MAX_TIMEOUT;
 }
Exemplo n.º 12
0
 AutoBuffer call(AutoBuffer ab) {
   return ab.getFlag() == CLIENT_UDP_SEND // UDP vs TCP send?
       ? remexec(ab.get(DTask.class), ab._h2o, ab.getTask(), ab)
       : ab; // Else all the work is being done in the TCP thread.
 }
Exemplo n.º 13
0
 // Got a response UDP packet, or completed a large TCP answer-receive.
 // Install it as The Answer packet and wake up anybody waiting on an answer.
 // On all paths, send an ACKACK back
 static AutoBuffer ackack(AutoBuffer ab, int tnum) {
   return ab.clearForWriting(H2O.ACK_ACK_PRIORITY).putTask(UDP.udp.ackack.ordinal(), tnum);
 }
Exemplo n.º 14
0
  // Handle traffic, from a client to this server asking for work to be done.
  // Called from either a F/J thread (generally with a UDP packet) or from the
  // TCPReceiver thread.
  static void remote_exec(AutoBuffer ab) {
    long lo = ab.get8(0), hi = ab._size >= 16 ? ab.get8(8) : 0;
    final int task = ab.getTask();
    final int flag = ab.getFlag();
    assert flag == CLIENT_UDP_SEND || flag == CLIENT_TCP_SEND; // Client-side send
    // Atomically record an instance of this task, one-time-only replacing a
    // null with an RPCCall, a placeholder while we work on a proper response -
    // and it serves to let us discard dup UDP requests.
    RPCCall old = ab._h2o.has_task(task);
    // This is a UDP packet requesting an answer back for a request sent via
    // TCP but the UDP packet has arrived ahead of the TCP.  Just drop the UDP
    // and wait for the TCP to appear.
    if (old == null && flag == CLIENT_TCP_SEND) {
      Log.warn(
          "got tcp with existing task #, FROM "
              + ab._h2o.toString()
              + " AB: " /* +  UDP.printx16(lo,hi)*/);
      assert !ab.hasTCP()
          : "ERROR: got tcp with existing task #, FROM "
              + ab._h2o.toString()
              + " AB: " /* + UDP.printx16(lo,hi)*/; // All the resends should be UDP only
      // DROP PACKET
    } else if (old == null) { // New task?
      RPCCall rpc;
      try {
        // Read the DTask Right Now.  If we are the TCPReceiver thread, then we
        // are reading in that thread... and thus TCP reads are single-threaded.
        rpc = new RPCCall(ab.get(water.DTask.class), ab._h2o, task);
      } catch (AutoBuffer.AutoBufferException e) {
        // Here we assume it's a TCP fail on read - and ignore the remote_exec
        // request.  The caller will send it again.  NOTE: this case is
        // indistinguishable from a broken short-writer/long-reader bug, except
        // that we'll re-send endlessly and fail endlessly.
        Log.info(
            "Network congestion OR short-writer/long-reader: TCP "
                + e._ioe.getMessage()
                + ",  AB="
                + ab
                + ", ignoring partial send");
        ab.drainClose();
        return;
      }
      RPCCall rpc2 = ab._h2o.record_task(rpc);
      if (rpc2 == null) { // Atomically insert (to avoid double-work)
        if (rpc._dt instanceof MRTask && rpc._dt.logVerbose())
          Log.debug("Start remote task#" + task + " " + rpc._dt.getClass() + " from " + ab._h2o);
        H2O.submitTask(rpc); // And execute!
      } else { // Else lost the task-insertion race
        if (ab.hasTCP()) ab.drainClose();
        // DROP PACKET
      }

    } else if (!old._computedAndReplied) {
      // This packet has not been fully computed.  Hence it's still a work-in-
      // progress locally.  We have no answer to reply but we do not want to
      // re-offer the packet for repeated work.  Send back a NACK, letting the
      // client know we're Working On It
      assert !ab.hasTCP()
          : "got tcp with existing task #, FROM "
              + ab._h2o.toString()
              + " AB: "
              + UDP.printx16(lo, hi)
              + ", position = "
              + ab._bb.position();
      ab.clearForWriting(udp.nack._prior).putTask(UDP.udp.nack.ordinal(), task);
      // DROP PACKET
    } else {
      // This is an old re-send of the same thing we've answered to before.
      // Send back the same old answer ACK.  If we sent via TCP before, then
      // we know the answer got there so just send a control-ACK back.  If we
      // sent via UDP, resend the whole answer.
      if (ab.hasTCP()) {
        Log.warn(
            "got tcp with existing task #, FROM "
                + ab._h2o.toString()
                + " AB: "
                + UDP.printx16(lo, hi)); // All the resends should be UDP only
        ab.drainClose();
      }
      if (old._dt != null) { // already ackacked
        ++old._ackResendCnt;
        if (old._ackResendCnt % 10 == 0)
          Log.err(
              "Possibly broken network, can not send ack through, got "
                  + old._ackResendCnt
                  + " for task # "
                  + old._tsknum
                  + ", dt == null?"
                  + (old._dt == null));
        old.resend_ack();
      }
    }
    ab.close();
  }
Exemplo n.º 15
0
 // Pretty-print bytes 1-15; byte 0 is the udp_type enum
 @Override
 String print16(AutoBuffer ab) {
   int flag = ab.getFlag();
   String clazz = (flag == CLIENT_UDP_SEND) ? TypeMap.className(ab.getInt()) : "";
   return "task# " + ab.getTask() + " " + clazz + " " + COOKIES[flag - SERVER_UDP_SEND];
 }
Exemplo n.º 16
0
  public synchronized RPC<V> call() {
    // Any Completer will not be carried over to remote; add it to the RPC call
    // so completion is signaled after the remote comes back.
    CountedCompleter cc = _dt.getCompleter();
    if (cc != null) handleCompleter(cc);

    // If running on self, just submit to queues & do locally
    if (_target == H2O.SELF) return handleLocal();

    // Keep a global record, for awhile
    if (_target != null) _target.taskPut(_tasknum, this);
    try {
      if (_nack) return this; // Racing Nack rechecked under lock; no need to send retry
      // We could be racing timeouts-vs-replies.  Blow off timeout if we have an answer.
      if (isDone()) {
        if (_target != null) _target.taskRemove(_tasknum);
        return this;
      }
      // Default strategy: (re)fire the packet and (re)start the timeout.  We
      // "count" exactly 1 failure: just whether or not we shipped via TCP ever
      // once.  After that we fearlessly (re)send UDP-sized packets until the
      // server replies.

      // Pack classloader/class & the instance data into the outgoing
      // AutoBuffer.  If it fits in a single UDP packet, ship it.  If not,
      // finish off the current AutoBuffer (which is now going TCP style), and
      // make a new UDP-sized packet.  On a re-send of a TCP-sized hunk, just
      // send the basic UDP control packet.
      if (!_sentTcp) {
        while (true) { // Retry loop for broken TCP sends
          AutoBuffer ab = new AutoBuffer(_target, _dt.priority());
          try {
            final boolean t;
            int offset = ab.position();
            ab.putTask(UDP.udp.exec, _tasknum).put1(CLIENT_UDP_SEND);
            ab.put(_dt);
            t = ab.hasTCP();
            assert sz_check(ab)
                : "Resend of "
                    + _dt.getClass()
                    + " changes size from "
                    + _size
                    + " to "
                    + ab.size()
                    + " for task#"
                    + _tasknum;
            ab.close(); // Then close; send final byte
            _sentTcp = t; // Set after close (and any other possible fail)
            break; // Break out of retry loop
          } catch (AutoBuffer.AutoBufferException e) {
            Log.info(
                "IOException during RPC call: "
                    + e._ioe.getMessage()
                    + ",  AB="
                    + ab
                    + ", for task#"
                    + _tasknum
                    + ", waiting and retrying...");
            ab.drainClose();
            try {
              Thread.sleep(500);
            } catch (InterruptedException ignore) {
            }
          }
        } // end of while(true)
      } else {
        // Else it was sent via TCP in a prior attempt, and we've timed out.
        // This means the caller's ACK/answer probably got dropped and we need
        // him to resend it (or else the caller is still processing our
        // request).  Send a UDP reminder - but with the CLIENT_TCP_SEND flag
        // instead of the UDP send, and no DTask (since it previously went via
        // TCP, no need to resend it).
        AutoBuffer ab = new AutoBuffer(_target, _dt.priority()).putTask(UDP.udp.exec, _tasknum);
        ab.put1(CLIENT_TCP_SEND).close();
      }
      // Double retry until we exceed existing age.  This is the time to delay
      // until we try again.  Note that we come here immediately on creation,
      // so the first doubling happens before anybody does any waiting.  Also
      // note the generous 5sec cap: ping at least every 5 sec.
      _retry += (_retry < MAX_TIMEOUT) ? _retry : MAX_TIMEOUT;
      // Put self on the "TBD" list of tasks awaiting Timeout.
      // So: dont really 'forget' but remember me in a little bit.
      //      UDPTimeOutThread.PENDING.put(_tasknum, this);
      return this;
    } catch (Throwable t) {
      t.printStackTrace();
      throw Log.throwErr(t);
    }
  }