예제 #1
0
 @Override
 public void dinvoke(H2ONode sender) {
   assert _key.home()
       || _val == null; // Only PUT to home for keys, or remote invalidation from home
   Paxos.lockCloud();
   // Initialize Value for having a single known replica (the sender)
   if (_val != null) _val.initReplicaHome(sender, _key);
   // Spin, until we update something.
   Value old = H2O.raw_get(_key); // Raw-get: do not lazy-manifest if overwriting
   while (H2O.putIfMatch(_key, _val, old) != old)
     old = H2O.raw_get(_key); // Repeat until we update something.
   // Invalidate remote caches.  Block, so that all invalidates are done
   // before we return to the remote caller.
   if (_key.home() && old != null) old.lockAndInvalidate(sender, new Futures()).blockForPending();
   // No return result
   _key = null;
   _val = null;
   tryComplete();
 }
예제 #2
0
  public void handleServerMessage(String currentMessage) {
    String[] messageParts = currentMessage.split("[|]");
    int senderReplicaId = Integer.parseInt(messageParts[1]);
    int paxosId = Integer.parseInt(messageParts[2]);
    int numEntries = this.replica.paxosEntries.size();
    Paxos currentPaxos;

    switch (Integer.parseInt(messageParts[0])) {
      case 1:
        BallotPair proposedBallotNumPair =
            new BallotPair(Integer.parseInt(messageParts[4]), Integer.parseInt(messageParts[3]));
        // receivePrepareMsg
        this.replica.logger.write(
            "Received Prepare Msg for Paxos Instance: " + String.valueOf(paxosId));
        if (paxosId > numEntries - 1) {
          // Create a new one
          this.replica.paxosEntries.add(
              new Paxos(paxosId, this.replica.replicaId, "", this.replica.logger));
          currentPaxos = this.replica.paxosEntries.get(numEntries);
          currentPaxos.onreceivePrepare(proposedBallotNumPair, senderReplicaId);
        } else if (paxosId == numEntries - 1) {
          // check if it is active or not
          currentPaxos = this.replica.paxosEntries.get(numEntries - 1);
          // active and join it - else already decided and send decide
          if (!currentPaxos.isDecided) {
            currentPaxos.onreceivePrepare(proposedBallotNumPair, senderReplicaId);
          } else {
            MessageCommunication.sendDecideUnicast(
                this.replica.replicaId, senderReplicaId, paxosId, currentPaxos.logEnrties);
          }
        } else {
          // already decided
          currentPaxos = this.replica.paxosEntries.get(numEntries - 1);
          MessageCommunication.sendDecideUnicast(
              this.replica.replicaId, senderReplicaId, paxosId, currentPaxos.logEnrties);
        }
        break;
      case 2:
        // receiveAcktoPrepareMsg
        this.replica.logger.write(
            "Received AckToPrepare Msg for Paxos Instance: " + String.valueOf(paxosId));
        // Ongoing paxos Instance
        if (paxosId == numEntries - 1 && !this.replica.paxosEntries.get(paxosId).isDecided) {
          currentPaxos = this.replica.paxosEntries.get(paxosId);
          BallotPair ownBallotNumPair =
              new BallotPair(Integer.parseInt(messageParts[4]), Integer.parseInt(messageParts[3]));
          BallotPair acceptedBallotNumPair =
              new BallotPair(Integer.parseInt(messageParts[6]), Integer.parseInt(messageParts[5]));
          currentPaxos.onreceiveAckToPrepare(
              ownBallotNumPair,
              acceptedBallotNumPair,
              messageParts.length > 7 ? messageParts[7] : "");
        } else {
          if (paxosId < numEntries - 1) {
            MessageCommunication.sendDecideUnicast(
                this.replica.replicaId,
                senderReplicaId,
                paxosId,
                this.replica.paxosEntries.get(paxosId).logEnrties);
            this.replica.logger.write(
                "Responded to  AckToPrepare Msg with Decide Msg for Paxos Instance: "
                    + String.valueOf(paxosId));
          } else {
            this.replica.logger.write(
                "Ignoring AckToPrepare Msg for Paxos Instance: " + String.valueOf(paxosId));
          }
        }
        break;
      case 3:
        // receiveNAcktoPrepareMsg
        this.replica.logger.write(
            "Received NAckToPrepare Msg for Paxos Instance: " + String.valueOf(paxosId));
        // Ongoing paxos Instance
        if (paxosId == numEntries - 1 && !this.replica.paxosEntries.get(paxosId).isDecided) {
          currentPaxos = this.replica.paxosEntries.get(paxosId);
          BallotPair ownBallotNumPair =
              new BallotPair(Integer.parseInt(messageParts[4]), Integer.parseInt(messageParts[3]));
          currentPaxos.onreceiveNAckToPrepare(ownBallotNumPair);
        } else {
          if (paxosId < numEntries - 1) {
            MessageCommunication.sendDecideUnicast(
                this.replica.replicaId,
                senderReplicaId,
                paxosId,
                this.replica.paxosEntries.get(paxosId).logEnrties);
            this.replica.logger.write(
                "Responded to  NAckToPrepare Msg with Decide Msg for Paxos Instance: "
                    + String.valueOf(paxosId));
          } else {
            this.replica.logger.write(
                "Ignoring NAckToPrepare Msg for Paxos Instance: " + String.valueOf(paxosId));
          }
        }
        break;
      case 4:
        // onreceiveAccept
        BallotPair acceptBallotNumPair =
            new BallotPair(Integer.parseInt(messageParts[4]), Integer.parseInt(messageParts[3]));
        this.replica.logger.write(
            "Received Accept Msg for Paxos Instance: " + String.valueOf(paxosId));
        if (paxosId > numEntries - 1) {
          // Create a new one
          this.replica.paxosEntries.add(
              new Paxos(paxosId, this.replica.replicaId, "", this.replica.logger));
          currentPaxos = this.replica.paxosEntries.get(numEntries);
          currentPaxos.onreceiveAccept(acceptBallotNumPair, messageParts[5]);
        } else if (paxosId == numEntries - 1) {
          // check if it is active or not
          currentPaxos = this.replica.paxosEntries.get(numEntries - 1);
          // active and join it - else already decided and send decide
          if (!currentPaxos.isDecided) {
            currentPaxos.onreceiveAccept(acceptBallotNumPair, messageParts[5]);
          } else {
            MessageCommunication.sendDecideUnicast(
                this.replica.replicaId, senderReplicaId, paxosId, currentPaxos.logEnrties);
          }
        } else {
          // already decided
          currentPaxos = this.replica.paxosEntries.get(numEntries - 1);
          MessageCommunication.sendDecideUnicast(
              this.replica.replicaId, senderReplicaId, paxosId, currentPaxos.logEnrties);
        }
        break;
      case 5:
        // onreceiveDecide
        String value = messageParts[3];
        ArrayList<String> Parts = new ArrayList<String>();
        String[] tempArray = value.split(":");
        for (String x : tempArray) {
          Parts.add(x);
        }
        if (paxosId < numEntries - 1) {
          // Redundant check
          currentPaxos = this.replica.paxosEntries.get(paxosId);
          currentPaxos.onreceiveDecide(Parts);
        } else if (paxosId == numEntries - 1) {
          // check if it is active or not
          currentPaxos = this.replica.paxosEntries.get(numEntries - 1);
          // active and join it - else already decided and send decide
          /** if(!currentPaxos.isDecided) { currentPaxos.onreceiveDecide(value); } */
          currentPaxos.onreceiveDecide(Parts);
        } else {
          // future stuff in which I did not participate because I was down
          this.replica.logger.write(
              "RECVD DECIDE MSG FOR PAXOS INSTANCE "
                  + String.valueOf(paxosId)
                  + " BUT IGNORED BECAUSE THIS REPLICA WAS DOWN IN INITIAL PHASES.");
        }
        break;
      case 6:
        String value1 = messageParts[3];
        ArrayList<String> Parts1 = new ArrayList<String>();
        String[] tempArray1 = value1.split(":");
        for (String x : tempArray1) {
          Parts1.add(x);
        }
        if (paxosId < numEntries - 1) {
          // Redundant check
          currentPaxos = this.replica.paxosEntries.get(paxosId);
          currentPaxos.onreceiveDecide(Parts1);
        } else if (paxosId == numEntries - 1) {
          // check if it is active or not
          currentPaxos = this.replica.paxosEntries.get(numEntries - 1);
          // active and join it - else already decided and send decide
          currentPaxos.onreceiveDecide(Parts1);
        } else {
          // future stuff in which I did not participate because I was down
          this.replica.logger.write(
              "RECVD DECIDE MSG FOR PAXOS INSTANCE "
                  + String.valueOf(paxosId)
                  + " BUT IGNORED BECAUSE THIS REPLICA WAS DOWN IN INITIAL PHASES.");
        }
        break;
      case 7:

        // received recoverme message from a failed node
        int highestindex = paxosId;
        int recoverNodeId = senderReplicaId;
        int num = 0;
        if (numEntries >= 1) {
          if (this.replica.paxosEntries.get(numEntries - 1).isDecided) {
            num = numEntries - 1;
          } else {
            num = numEntries - 2;
          }
          if (highestindex <= num) {
            // sendUpdate or everythinguptodate
            ArrayList<String> newValues = new ArrayList<String>();

            for (int i = highestindex + 1; i <= num; i++) {
              StringBuilder message = new StringBuilder();
              ArrayList<String> entry = this.replica.paxosEntries.get(i).logEnrties;
              for (String x : entry) {
                message.append(x + ":");
              }
              newValues.add(message.subSequence(0, message.length() - 1).toString());
              // newValues.add(this.replica.paxosEntries.get(i).valueWritten);
            }
            MessageCommunication.replyOnRecover(this.replica.replicaId, recoverNodeId, newValues);
          } else {
            this.replica.logger.write("RECOVERING NODE KNOWS MUCH MORE THAN ME :(");
          }
        }
        break;
      case 8:
        // receiving replyToRecover message

        // handle recovery message from other replicas and set isRecovered = true
        if (!this.replica.isRecovered) {
          int messageSize = paxosId;
          int numberOfEntries = replica.paxosEntries.size() - 1;
          int highestDecidedIndex = 0;
          if (numberOfEntries >= 0) {
            if (replica.paxosEntries.get(numberOfEntries).isDecided) {
              highestDecidedIndex = numberOfEntries;
            } else {
              this.replica.logger.write("This would never happen.");
              replica.paxosEntries.remove(numberOfEntries);
              highestDecidedIndex = numberOfEntries - 1;
            }
          } else highestDecidedIndex = -1;

          synchronized (this.replica.paxosEntries) {
            System.out.println("Message Size = " + messageSize);
            for (int i = 0; i < messageSize; i++) {
              System.out.println("Message Size = " + messageSize + "Message" + messageParts[3 + i]);
              String str = messageParts[3 + i];
              ArrayList<String> Parts2 = new ArrayList<String>();
              String[] tempArray2 = str.split(":");
              for (String x : tempArray2) {
                Parts2.add(x);
              }

              this.replica.paxosEntries.add(
                  new Paxos(highestDecidedIndex + i + 1, this.replica.replicaId, Parts2));
            }
          }
          replica.isRecovered = true;
        }
        // else don't do anything I am have recovered
        break;
      default:
        break;
    }
  }
예제 #3
0
  // The Run Method.
  // Started by main() on a single thread, this code publishes Cloud membership
  // to the Cloud once a second (across all members).  If anybody disagrees
  // with the membership Heartbeat, they will start a round of Paxos group
  // discovery.
  public void run() {
    MBeanServer mbs = ManagementFactory.getPlatformMBeanServer();
    ObjectName os;
    try {
      os = new ObjectName("java.lang:type=OperatingSystem");
    } catch (MalformedObjectNameException e) {
      throw Log.errRTExcept(e);
    }
    Thread.currentThread().setPriority(Thread.MAX_PRIORITY);
    int counter = 0;
    while (true) {
      // Once per second, for the entire cloud a Node will multi-cast publish
      // itself, so other unrelated Clouds discover each other and form up.
      try {
        Thread.sleep(SLEEP);
      } // Only once-sec per entire Cloud
      catch (InterruptedException e) {
      }

      // Update the interesting health self-info for publication also
      H2O cloud = H2O.CLOUD;
      HeartBeat hb = H2O.SELF._heartbeat;
      hb._hb_version = HB_VERSION++;
      hb._jvm_boot_msec = TimeLine.JVM_BOOT_MSEC;
      final Runtime run = Runtime.getRuntime();
      hb.set_free_mem(run.freeMemory());
      hb.set_max_mem(run.maxMemory());
      hb.set_tot_mem(run.totalMemory());
      hb._keys = (H2O.STORE.size());
      hb.set_valsz(myHisto.histo(false)._cached);
      hb._num_cpus = (char) run.availableProcessors();
      if (counter % 300 == 2) {
        // run mini-benchmark every 5 mins
        hb._gflops = Linpack.run();
        hb._membw = MemoryBandwidth.run();
      }
      Object load = null;
      try {
        load = mbs.getAttribute(os, "SystemLoadAverage");
      } catch (Exception e) {
        // Ignore, data probably not available on this VM
      }
      hb._system_load_average = load instanceof Double ? ((Double) load).floatValue() : 0;
      int rpcs = 0;
      for (H2ONode h2o : cloud._memary) rpcs += h2o.taskSize();
      hb._rpcs = (char) rpcs;
      // Scrape F/J pool counts
      hb._fjthrds = new short[H2O.MAX_PRIORITY + 1];
      hb._fjqueue = new short[H2O.MAX_PRIORITY + 1];
      for (int i = 0; i < hb._fjthrds.length; i++) {
        hb._fjthrds[i] = (short) H2O.getWrkThrPoolSize(i);
        hb._fjqueue[i] = (short) H2O.getWrkQueueSize(i);
      }
      hb._tcps_active = (char) H2ONode.TCPS.get();

      // get the usable and total disk storage for the partition where the
      // persistent KV pairs are stored
      hb.set_free_disk(Persist.getIce().getUsableSpace());
      hb.set_max_disk(Persist.getIce().getTotalSpace());

      // get cpu utilization for the system and for this process.  (linux only.)
      LinuxProcFileReader lpfr = new LinuxProcFileReader();
      lpfr.read();
      if (lpfr.valid()) {
        hb._system_idle_ticks = lpfr.getSystemIdleTicks();
        hb._system_total_ticks = lpfr.getSystemTotalTicks();
        hb._process_total_ticks = lpfr.getProcessTotalTicks();
        hb._process_num_open_fds = lpfr.getProcessNumOpenFds();
      } else {
        hb._system_idle_ticks = -1;
        hb._system_total_ticks = -1;
        hb._process_total_ticks = -1;
        hb._process_num_open_fds = -1;
      }
      hb._pid = lpfr.getProcessID();

      // Announce what Cloud we think we are in.
      // Publish our health as well.
      UDPHeartbeat.build_and_multicast(cloud, hb);

      // If we have no internet connection, then the multicast goes
      // nowhere and we never receive a heartbeat from ourselves!
      // Fake it now.
      long now = System.currentTimeMillis();
      H2O.SELF._last_heard_from = now;

      // Look for napping Nodes & propose removing from Cloud
      for (H2ONode h2o : cloud._memary) {
        long delta = now - h2o._last_heard_from;
        if (delta > SUSPECT) { // We suspect this Node has taken a dirt nap
          if (!h2o._announcedLostContact) {
            Paxos.print("hart: announce suspect node", cloud._memary, h2o.toString());
            h2o._announcedLostContact = true;
          }
        } else if (h2o._announcedLostContact) {
          Paxos.print("hart: regained contact with node", cloud._memary, h2o.toString());
          h2o._announcedLostContact = false;
        }
      }
      counter++;
    }
  }