@Override public void dinvoke(H2ONode sender) { assert _key.home() || _val == null; // Only PUT to home for keys, or remote invalidation from home Paxos.lockCloud(); // Initialize Value for having a single known replica (the sender) if (_val != null) _val.initReplicaHome(sender, _key); // Spin, until we update something. Value old = H2O.raw_get(_key); // Raw-get: do not lazy-manifest if overwriting while (H2O.putIfMatch(_key, _val, old) != old) old = H2O.raw_get(_key); // Repeat until we update something. // Invalidate remote caches. Block, so that all invalidates are done // before we return to the remote caller. if (_key.home() && old != null) old.lockAndInvalidate(sender, new Futures()).blockForPending(); // No return result _key = null; _val = null; tryComplete(); }
public void handleServerMessage(String currentMessage) { String[] messageParts = currentMessage.split("[|]"); int senderReplicaId = Integer.parseInt(messageParts[1]); int paxosId = Integer.parseInt(messageParts[2]); int numEntries = this.replica.paxosEntries.size(); Paxos currentPaxos; switch (Integer.parseInt(messageParts[0])) { case 1: BallotPair proposedBallotNumPair = new BallotPair(Integer.parseInt(messageParts[4]), Integer.parseInt(messageParts[3])); // receivePrepareMsg this.replica.logger.write( "Received Prepare Msg for Paxos Instance: " + String.valueOf(paxosId)); if (paxosId > numEntries - 1) { // Create a new one this.replica.paxosEntries.add( new Paxos(paxosId, this.replica.replicaId, "", this.replica.logger)); currentPaxos = this.replica.paxosEntries.get(numEntries); currentPaxos.onreceivePrepare(proposedBallotNumPair, senderReplicaId); } else if (paxosId == numEntries - 1) { // check if it is active or not currentPaxos = this.replica.paxosEntries.get(numEntries - 1); // active and join it - else already decided and send decide if (!currentPaxos.isDecided) { currentPaxos.onreceivePrepare(proposedBallotNumPair, senderReplicaId); } else { MessageCommunication.sendDecideUnicast( this.replica.replicaId, senderReplicaId, paxosId, currentPaxos.logEnrties); } } else { // already decided currentPaxos = this.replica.paxosEntries.get(numEntries - 1); MessageCommunication.sendDecideUnicast( this.replica.replicaId, senderReplicaId, paxosId, currentPaxos.logEnrties); } break; case 2: // receiveAcktoPrepareMsg this.replica.logger.write( "Received AckToPrepare Msg for Paxos Instance: " + String.valueOf(paxosId)); // Ongoing paxos Instance if (paxosId == numEntries - 1 && !this.replica.paxosEntries.get(paxosId).isDecided) { currentPaxos = this.replica.paxosEntries.get(paxosId); BallotPair ownBallotNumPair = new BallotPair(Integer.parseInt(messageParts[4]), Integer.parseInt(messageParts[3])); BallotPair acceptedBallotNumPair = new BallotPair(Integer.parseInt(messageParts[6]), Integer.parseInt(messageParts[5])); currentPaxos.onreceiveAckToPrepare( ownBallotNumPair, acceptedBallotNumPair, messageParts.length > 7 ? messageParts[7] : ""); } else { if (paxosId < numEntries - 1) { MessageCommunication.sendDecideUnicast( this.replica.replicaId, senderReplicaId, paxosId, this.replica.paxosEntries.get(paxosId).logEnrties); this.replica.logger.write( "Responded to AckToPrepare Msg with Decide Msg for Paxos Instance: " + String.valueOf(paxosId)); } else { this.replica.logger.write( "Ignoring AckToPrepare Msg for Paxos Instance: " + String.valueOf(paxosId)); } } break; case 3: // receiveNAcktoPrepareMsg this.replica.logger.write( "Received NAckToPrepare Msg for Paxos Instance: " + String.valueOf(paxosId)); // Ongoing paxos Instance if (paxosId == numEntries - 1 && !this.replica.paxosEntries.get(paxosId).isDecided) { currentPaxos = this.replica.paxosEntries.get(paxosId); BallotPair ownBallotNumPair = new BallotPair(Integer.parseInt(messageParts[4]), Integer.parseInt(messageParts[3])); currentPaxos.onreceiveNAckToPrepare(ownBallotNumPair); } else { if (paxosId < numEntries - 1) { MessageCommunication.sendDecideUnicast( this.replica.replicaId, senderReplicaId, paxosId, this.replica.paxosEntries.get(paxosId).logEnrties); this.replica.logger.write( "Responded to NAckToPrepare Msg with Decide Msg for Paxos Instance: " + String.valueOf(paxosId)); } else { this.replica.logger.write( "Ignoring NAckToPrepare Msg for Paxos Instance: " + String.valueOf(paxosId)); } } break; case 4: // onreceiveAccept BallotPair acceptBallotNumPair = new BallotPair(Integer.parseInt(messageParts[4]), Integer.parseInt(messageParts[3])); this.replica.logger.write( "Received Accept Msg for Paxos Instance: " + String.valueOf(paxosId)); if (paxosId > numEntries - 1) { // Create a new one this.replica.paxosEntries.add( new Paxos(paxosId, this.replica.replicaId, "", this.replica.logger)); currentPaxos = this.replica.paxosEntries.get(numEntries); currentPaxos.onreceiveAccept(acceptBallotNumPair, messageParts[5]); } else if (paxosId == numEntries - 1) { // check if it is active or not currentPaxos = this.replica.paxosEntries.get(numEntries - 1); // active and join it - else already decided and send decide if (!currentPaxos.isDecided) { currentPaxos.onreceiveAccept(acceptBallotNumPair, messageParts[5]); } else { MessageCommunication.sendDecideUnicast( this.replica.replicaId, senderReplicaId, paxosId, currentPaxos.logEnrties); } } else { // already decided currentPaxos = this.replica.paxosEntries.get(numEntries - 1); MessageCommunication.sendDecideUnicast( this.replica.replicaId, senderReplicaId, paxosId, currentPaxos.logEnrties); } break; case 5: // onreceiveDecide String value = messageParts[3]; ArrayList<String> Parts = new ArrayList<String>(); String[] tempArray = value.split(":"); for (String x : tempArray) { Parts.add(x); } if (paxosId < numEntries - 1) { // Redundant check currentPaxos = this.replica.paxosEntries.get(paxosId); currentPaxos.onreceiveDecide(Parts); } else if (paxosId == numEntries - 1) { // check if it is active or not currentPaxos = this.replica.paxosEntries.get(numEntries - 1); // active and join it - else already decided and send decide /** if(!currentPaxos.isDecided) { currentPaxos.onreceiveDecide(value); } */ currentPaxos.onreceiveDecide(Parts); } else { // future stuff in which I did not participate because I was down this.replica.logger.write( "RECVD DECIDE MSG FOR PAXOS INSTANCE " + String.valueOf(paxosId) + " BUT IGNORED BECAUSE THIS REPLICA WAS DOWN IN INITIAL PHASES."); } break; case 6: String value1 = messageParts[3]; ArrayList<String> Parts1 = new ArrayList<String>(); String[] tempArray1 = value1.split(":"); for (String x : tempArray1) { Parts1.add(x); } if (paxosId < numEntries - 1) { // Redundant check currentPaxos = this.replica.paxosEntries.get(paxosId); currentPaxos.onreceiveDecide(Parts1); } else if (paxosId == numEntries - 1) { // check if it is active or not currentPaxos = this.replica.paxosEntries.get(numEntries - 1); // active and join it - else already decided and send decide currentPaxos.onreceiveDecide(Parts1); } else { // future stuff in which I did not participate because I was down this.replica.logger.write( "RECVD DECIDE MSG FOR PAXOS INSTANCE " + String.valueOf(paxosId) + " BUT IGNORED BECAUSE THIS REPLICA WAS DOWN IN INITIAL PHASES."); } break; case 7: // received recoverme message from a failed node int highestindex = paxosId; int recoverNodeId = senderReplicaId; int num = 0; if (numEntries >= 1) { if (this.replica.paxosEntries.get(numEntries - 1).isDecided) { num = numEntries - 1; } else { num = numEntries - 2; } if (highestindex <= num) { // sendUpdate or everythinguptodate ArrayList<String> newValues = new ArrayList<String>(); for (int i = highestindex + 1; i <= num; i++) { StringBuilder message = new StringBuilder(); ArrayList<String> entry = this.replica.paxosEntries.get(i).logEnrties; for (String x : entry) { message.append(x + ":"); } newValues.add(message.subSequence(0, message.length() - 1).toString()); // newValues.add(this.replica.paxosEntries.get(i).valueWritten); } MessageCommunication.replyOnRecover(this.replica.replicaId, recoverNodeId, newValues); } else { this.replica.logger.write("RECOVERING NODE KNOWS MUCH MORE THAN ME :("); } } break; case 8: // receiving replyToRecover message // handle recovery message from other replicas and set isRecovered = true if (!this.replica.isRecovered) { int messageSize = paxosId; int numberOfEntries = replica.paxosEntries.size() - 1; int highestDecidedIndex = 0; if (numberOfEntries >= 0) { if (replica.paxosEntries.get(numberOfEntries).isDecided) { highestDecidedIndex = numberOfEntries; } else { this.replica.logger.write("This would never happen."); replica.paxosEntries.remove(numberOfEntries); highestDecidedIndex = numberOfEntries - 1; } } else highestDecidedIndex = -1; synchronized (this.replica.paxosEntries) { System.out.println("Message Size = " + messageSize); for (int i = 0; i < messageSize; i++) { System.out.println("Message Size = " + messageSize + "Message" + messageParts[3 + i]); String str = messageParts[3 + i]; ArrayList<String> Parts2 = new ArrayList<String>(); String[] tempArray2 = str.split(":"); for (String x : tempArray2) { Parts2.add(x); } this.replica.paxosEntries.add( new Paxos(highestDecidedIndex + i + 1, this.replica.replicaId, Parts2)); } } replica.isRecovered = true; } // else don't do anything I am have recovered break; default: break; } }
// The Run Method. // Started by main() on a single thread, this code publishes Cloud membership // to the Cloud once a second (across all members). If anybody disagrees // with the membership Heartbeat, they will start a round of Paxos group // discovery. public void run() { MBeanServer mbs = ManagementFactory.getPlatformMBeanServer(); ObjectName os; try { os = new ObjectName("java.lang:type=OperatingSystem"); } catch (MalformedObjectNameException e) { throw Log.errRTExcept(e); } Thread.currentThread().setPriority(Thread.MAX_PRIORITY); int counter = 0; while (true) { // Once per second, for the entire cloud a Node will multi-cast publish // itself, so other unrelated Clouds discover each other and form up. try { Thread.sleep(SLEEP); } // Only once-sec per entire Cloud catch (InterruptedException e) { } // Update the interesting health self-info for publication also H2O cloud = H2O.CLOUD; HeartBeat hb = H2O.SELF._heartbeat; hb._hb_version = HB_VERSION++; hb._jvm_boot_msec = TimeLine.JVM_BOOT_MSEC; final Runtime run = Runtime.getRuntime(); hb.set_free_mem(run.freeMemory()); hb.set_max_mem(run.maxMemory()); hb.set_tot_mem(run.totalMemory()); hb._keys = (H2O.STORE.size()); hb.set_valsz(myHisto.histo(false)._cached); hb._num_cpus = (char) run.availableProcessors(); if (counter % 300 == 2) { // run mini-benchmark every 5 mins hb._gflops = Linpack.run(); hb._membw = MemoryBandwidth.run(); } Object load = null; try { load = mbs.getAttribute(os, "SystemLoadAverage"); } catch (Exception e) { // Ignore, data probably not available on this VM } hb._system_load_average = load instanceof Double ? ((Double) load).floatValue() : 0; int rpcs = 0; for (H2ONode h2o : cloud._memary) rpcs += h2o.taskSize(); hb._rpcs = (char) rpcs; // Scrape F/J pool counts hb._fjthrds = new short[H2O.MAX_PRIORITY + 1]; hb._fjqueue = new short[H2O.MAX_PRIORITY + 1]; for (int i = 0; i < hb._fjthrds.length; i++) { hb._fjthrds[i] = (short) H2O.getWrkThrPoolSize(i); hb._fjqueue[i] = (short) H2O.getWrkQueueSize(i); } hb._tcps_active = (char) H2ONode.TCPS.get(); // get the usable and total disk storage for the partition where the // persistent KV pairs are stored hb.set_free_disk(Persist.getIce().getUsableSpace()); hb.set_max_disk(Persist.getIce().getTotalSpace()); // get cpu utilization for the system and for this process. (linux only.) LinuxProcFileReader lpfr = new LinuxProcFileReader(); lpfr.read(); if (lpfr.valid()) { hb._system_idle_ticks = lpfr.getSystemIdleTicks(); hb._system_total_ticks = lpfr.getSystemTotalTicks(); hb._process_total_ticks = lpfr.getProcessTotalTicks(); hb._process_num_open_fds = lpfr.getProcessNumOpenFds(); } else { hb._system_idle_ticks = -1; hb._system_total_ticks = -1; hb._process_total_ticks = -1; hb._process_num_open_fds = -1; } hb._pid = lpfr.getProcessID(); // Announce what Cloud we think we are in. // Publish our health as well. UDPHeartbeat.build_and_multicast(cloud, hb); // If we have no internet connection, then the multicast goes // nowhere and we never receive a heartbeat from ourselves! // Fake it now. long now = System.currentTimeMillis(); H2O.SELF._last_heard_from = now; // Look for napping Nodes & propose removing from Cloud for (H2ONode h2o : cloud._memary) { long delta = now - h2o._last_heard_from; if (delta > SUSPECT) { // We suspect this Node has taken a dirt nap if (!h2o._announcedLostContact) { Paxos.print("hart: announce suspect node", cloud._memary, h2o.toString()); h2o._announcedLostContact = true; } } else if (h2o._announcedLostContact) { Paxos.print("hart: regained contact with node", cloud._memary, h2o.toString()); h2o._announcedLostContact = false; } } counter++; } }