@After
 public void tearDown() throws Exception {
   for (HostMessenger hm : createdMessengers) {
     hm.shutdown();
   }
   createdMessengers.clear();
 }
 private HostMessenger createHostMessenger(int index, boolean start) throws Exception {
   HostMessenger.Config config = new HostMessenger.Config();
   config.internalPort = config.internalPort + index;
   config.zkInterface = "127.0.0.1:" + (2181 + index);
   HostMessenger hm = new HostMessenger(config);
   createdMessengers.add(hm);
   if (start) {
     hm.start();
   }
   return hm;
 }
Exemple #3
0
 // This message used to be sent by the SP or MP initiator when they accepted a promotion.
 // For dev speed, we'll detect mastership changes here and construct and send this message to the
 // local client interface so we can keep the CIs implementation
 private void sendLeaderChangeNotify(long hsId, int partitionId) {
   try {
     JSONStringer stringer = new JSONStringer();
     stringer.object();
     stringer.key(JSON_PARTITION_ID).value(partitionId);
     stringer.key(JSON_INITIATOR_HSID).value(hsId);
     stringer.endObject();
     BinaryPayloadMessage bpm =
         new BinaryPayloadMessage(new byte[0], stringer.toString().getBytes("UTF-8"));
     int hostId = m_hostMessenger.getHostId();
     m_hostMessenger.send(
         CoreUtils.getHSIdFromHostAndSite(hostId, HostMessenger.CLIENT_INTERFACE_SITE_ID), bpm);
   } catch (Exception e) {
     VoltDB.crashLocalVoltDB("Unable to propogate leader promotion to client interface.", true, e);
   }
 }
Exemple #4
0
  public MockVoltDB(int clientPort, int adminPort, int httpPort, int drPort) {
    try {
      JSONObject obj = new JSONObject();
      JSONArray jsonArray = new JSONArray();
      jsonArray.put("127.0.0.1");
      obj.put("interfaces", jsonArray);
      obj.put("clientPort", clientPort);
      obj.put("adminPort", adminPort);
      obj.put("httpPort", httpPort);
      obj.put("drPort", drPort);
      m_localMetadata = obj.toString(4);

      m_catalog = new Catalog();
      m_catalog.execute("add / clusters " + m_clusterName);
      m_catalog.execute(
          "add "
              + m_catalog.getClusters().get(m_clusterName).getPath()
              + " databases "
              + m_databaseName);
      Cluster cluster = m_catalog.getClusters().get(m_clusterName);
      // Set a sane default for TestMessaging (at least)
      cluster.setHeartbeattimeout(10000);
      assert (cluster != null);

      try {
        m_hostMessenger.start();
      } catch (Exception e) {
        throw new RuntimeException(e);
      }
      VoltZK.createPersistentZKNodes(m_hostMessenger.getZK());
      m_hostMessenger
          .getZK()
          .create(
              VoltZK.cluster_metadata + "/" + m_hostMessenger.getHostId(),
              getLocalMetadata().getBytes("UTF-8"),
              Ids.OPEN_ACL_UNSAFE,
              CreateMode.EPHEMERAL);

      m_hostMessenger.generateMailboxId(
          m_hostMessenger.getHSIdForLocalSite(HostMessenger.STATS_SITE_ID));
      m_statsAgent = new StatsAgent();
      m_statsAgent.registerMailbox(
          m_hostMessenger, m_hostMessenger.getHSIdForLocalSite(HostMessenger.STATS_SITE_ID));
      for (MailboxType type : MailboxType.values()) {
        m_mailboxMap.put(type, new LinkedList<MailboxNodeContent>());
      }
      m_mailboxMap
          .get(MailboxType.StatsAgent)
          .add(
              new MailboxNodeContent(
                  m_hostMessenger.getHSIdForLocalSite(HostMessenger.STATS_SITE_ID), null));
      m_siteTracker = new SiteTracker(m_hostId, m_mailboxMap);
    } catch (Exception e) {
      throw new RuntimeException(e);
    }
  }
  @Test
  public void testSingleHost() throws Exception {
    HostMessenger hm = createHostMessenger(0);

    Mailbox m1 = hm.createMailbox();

    SiteMailbox sm = new SiteMailbox(hm, (-2L << 32));

    hm.createMailbox(sm.getHSId(), sm);

    sm.send(m1.getHSId(), new LocalObjectMessage(null));
    m1.send(sm.getHSId(), new LocalObjectMessage(null));

    LocalObjectMessage lom = (LocalObjectMessage) m1.recv();
    assertEquals(lom.m_sourceHSId, sm.getHSId());

    lom = (LocalObjectMessage) sm.recv();
    assertEquals(lom.m_sourceHSId, m1.getHSId());
  }
Exemple #6
0
 public Cartographer(HostMessenger hostMessenger) {
   super(false);
   m_hostMessenger = hostMessenger;
   m_zk = hostMessenger.getZK();
   m_iv2Masters = new LeaderCache(m_zk, VoltZK.iv2masters, m_SPIMasterCallback);
   m_iv2Mpi = new LeaderCache(m_zk, VoltZK.iv2mpi, m_MPICallback);
   try {
     m_iv2Masters.start(true);
     m_iv2Mpi.start(true);
   } catch (Exception e) {
     VoltDB.crashLocalVoltDB("Screwed", true, e);
   }
 }
Exemple #7
0
 @Override
 public boolean shutdown(Thread mainSiteThread) throws InterruptedException {
   if (m_faultDistributor != null) {
     m_faultDistributor.shutDown();
   }
   VoltDB.wasCrashCalled = false;
   VoltDB.crashMessage = null;
   m_snapshotCompletionMonitor.shutdown();
   m_es.shutdown();
   m_es.awaitTermination(1, TimeUnit.DAYS);
   m_statsAgent.shutdown();
   m_hostMessenger.shutdown();
   return true;
 }
Exemple #8
0
  private void doPartitionDetectionActivities() {
    // We should never re-enter here once we've decided we're partitioned and doomed
    assert (!m_partitionDetected);
    // After everything is resolved, write the new surviving set to ZK
    List<Integer> currentNodes = null;
    try {
      currentNodes = m_hostMessenger.getLiveHostIds();
    } catch (Exception e) {

    }
    Set<Integer> currentHosts = new HashSet<Integer>(currentNodes);
    Set<Integer> previousHosts = readPriorKnownLiveNodes();

    boolean partitionDetectionTriggered = makePPDDecision(previousHosts, currentHosts);

    if (partitionDetectionTriggered) {
      m_partitionDetected = true;
      if (m_usingCommandLog) {
        // Just shut down immediately
        VoltDB.crashGlobalVoltDB(
            "Use of command logging detected, no additional database snapshot will "
                + "be generated.  Please use the 'recover' action to restore the database if necessary.",
            false,
            null);
      } else {
        SnapshotUtil.requestSnapshot(
            0L,
            m_partSnapshotSchedule.getPath(),
            m_partSnapshotSchedule.getPrefix() + System.currentTimeMillis(),
            true,
            SnapshotFormat.NATIVE,
            null,
            m_snapshotHandler,
            true);
      }
    }
    // If the cluster host set has changed, then write the new set to ZK
    // NOTE: we don't want to update the known live nodes if we've decided that our subcluster is
    // dying, otherwise a poorly timed subsequent failure might reverse this decision.  Any future
    // promoted
    // LeaderAppointer should make their partition detection decision based on the pre-partition
    // cluster state.
    else if (!currentHosts.equals(previousHosts)) {
      writeKnownLiveNodes(currentNodes);
    }
  }
Exemple #9
0
 public SpInitiator(
     HostMessenger messenger,
     Integer partition,
     StatsAgent agent,
     SnapshotCompletionMonitor snapMonitor,
     VoltDB.START_ACTION startAction) {
   super(
       VoltZK.iv2masters,
       messenger,
       partition,
       new SpScheduler(partition, new SiteTaskerQueue(), snapMonitor),
       "SP",
       agent,
       startAction);
   m_leaderCache =
       new LeaderCache(messenger.getZK(), VoltZK.iv2appointees, m_leadersChangeHandler);
   m_tickProducer = new TickProducer(m_scheduler.m_tasks);
 }
Exemple #10
0
 public LeaderAppointer(
     HostMessenger hm,
     int numberOfPartitions,
     int kfactor,
     boolean partitionDetectionEnabled,
     SnapshotSchedule partitionSnapshotSchedule,
     boolean usingCommandLog,
     JSONObject topology,
     MpInitiator mpi) {
   m_hostMessenger = hm;
   m_zk = hm.getZK();
   m_kfactor = kfactor;
   m_topo = topology;
   m_MPI = mpi;
   m_partitionCount = numberOfPartitions;
   m_callbacks = new PartitionCallback[m_partitionCount];
   m_partitionWatchers = new BabySitter[m_partitionCount];
   m_iv2appointees = new LeaderCache(m_zk, VoltZK.iv2appointees);
   m_iv2masters = new LeaderCache(m_zk, VoltZK.iv2masters, m_masterCallback);
   m_partitionDetectionEnabled = partitionDetectionEnabled;
   m_partSnapshotSchedule = partitionSnapshotSchedule;
   m_usingCommandLog = usingCommandLog;
 }
Exemple #11
0
  @Override
  public void acceptPromotion() throws InterruptedException, ExecutionException, KeeperException {
    // Crank up the leader caches.  Use blocking startup so that we'll have valid point-in-time
    // caches later.
    m_iv2appointees.start(true);
    m_iv2masters.start(true);
    // Figure out what conditions we assumed leadership under.
    if (m_iv2appointees.pointInTimeCache().size() == 0) {
      tmLog.debug("LeaderAppointer in startup");
      m_state.set(AppointerState.CLUSTER_START);
    } else if ((m_iv2appointees.pointInTimeCache().size() != m_partitionCount)
        || (m_iv2masters.pointInTimeCache().size() != m_partitionCount)) {
      // If we are promoted and the appointees or masters set is partial, the previous appointer
      // failed
      // during startup (at least for now, until we add add/remove a partition on the fly).
      VoltDB.crashGlobalVoltDB("Detected failure during startup, unable to start", false, null);
    } else {
      tmLog.debug("LeaderAppointer in repair");
      m_state.set(AppointerState.DONE);
    }

    if (m_state.get() == AppointerState.CLUSTER_START) {
      // Need to block the return of acceptPromotion until after the MPI is promoted.  Wait for this
      // latch
      // to countdown after appointing all the partition leaders.  The
      // LeaderCache callback will count it down once it has seen all the
      // appointed leaders publish themselves as the actual leaders.
      m_startupLatch = new CountDownLatch(1);
      writeKnownLiveNodes(m_hostMessenger.getLiveHostIds());
      for (int i = 0; i < m_partitionCount; i++) {
        String dir = LeaderElector.electionDirForPartition(i);
        // Race along with all of the replicas for this partition to create the ZK parent node
        try {
          m_zk.create(dir, null, Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT);
        } catch (KeeperException.NodeExistsException e) {
          // expected on all nodes that don't start() first.
        }
        m_callbacks[i] = new PartitionCallback(i);
        Pair<BabySitter, List<String>> sitterstuff =
            BabySitter.blockingFactory(m_zk, dir, m_callbacks[i], m_es);
        m_partitionWatchers[i] = sitterstuff.getFirst();
      }
      m_startupLatch.await();
    } else {
      // If we're taking over for a failed LeaderAppointer, we know when
      // we get here that every partition had a leader at some point in
      // time.  We'll seed each of the PartitionCallbacks for each
      // partition with the HSID of the last published leader.  The
      // blocking startup of the BabySitter watching that partition will
      // call our callback, get the current full set of replicas, and
      // appoint a new leader if the seeded one has actually failed
      Map<Integer, Long> masters = m_iv2masters.pointInTimeCache();
      tmLog.info("LeaderAppointer repairing with master set: " + masters);
      for (Entry<Integer, Long> master : masters.entrySet()) {
        int partId = master.getKey();
        String dir = LeaderElector.electionDirForPartition(partId);
        m_callbacks[partId] = new PartitionCallback(partId, master.getValue());
        Pair<BabySitter, List<String>> sitterstuff =
            BabySitter.blockingFactory(m_zk, dir, m_callbacks[partId], m_es);
        m_partitionWatchers[partId] = sitterstuff.getFirst();
      }
      // just go ahead and promote our MPI
      m_MPI.acceptPromotion();
    }
  }
  private void createAndRegisterAckMailboxes(
      final Set<Integer> localPartitions, HostMessenger messenger) {
    m_zk = messenger.getZK();
    m_mailboxesZKPath = VoltZK.exportGenerations + "/" + m_timestamp + "/" + "mailboxes";

    m_mbox =
        new LocalMailbox(messenger) {
          @Override
          public void deliver(VoltMessage message) {
            if (message instanceof BinaryPayloadMessage) {
              BinaryPayloadMessage bpm = (BinaryPayloadMessage) message;
              ByteBuffer buf = ByteBuffer.wrap(bpm.m_payload);
              final int partition = buf.getInt();
              final int length = buf.getInt();
              byte stringBytes[] = new byte[length];
              buf.get(stringBytes);
              String signature = new String(stringBytes, Constants.UTF8ENCODING);
              final long ackUSO = buf.getLong();

              final HashMap<String, ExportDataSource> partitionSources =
                  m_dataSourcesByPartition.get(partition);
              if (partitionSources == null) {
                exportLog.error(
                    "Received an export ack for partition "
                        + partition
                        + " which does not exist on this node");
                return;
              }

              final ExportDataSource eds = partitionSources.get(signature);
              if (eds == null) {
                exportLog.error(
                    "Received an export ack for partition "
                        + partition
                        + " source signature "
                        + signature
                        + " which does not exist on this node");
                return;
              }

              try {
                eds.ack(ackUSO);
              } catch (RejectedExecutionException ignoreIt) {
                // ignore it: as it is already shutdown
              }
            } else {
              exportLog.error("Receive unexpected message " + message + " in export subsystem");
            }
          }
        };
    messenger.createMailbox(null, m_mbox);

    for (Integer partition : localPartitions) {
      final String partitionDN = m_mailboxesZKPath + "/" + partition;
      ZKUtil.asyncMkdirs(m_zk, partitionDN);

      ZKUtil.StringCallback cb = new ZKUtil.StringCallback();
      m_zk.create(
          partitionDN + "/" + m_mbox.getHSId(),
          null,
          Ids.OPEN_ACL_UNSAFE,
          CreateMode.EPHEMERAL,
          cb,
          null);
    }

    ListenableFuture<?> fut =
        m_childUpdatingThread.submit(
            new Runnable() {
              @Override
              public void run() {
                List<Pair<Integer, ZKUtil.ChildrenCallback>> callbacks =
                    new ArrayList<Pair<Integer, ZKUtil.ChildrenCallback>>();
                for (Integer partition : localPartitions) {
                  ZKUtil.ChildrenCallback callback = new ZKUtil.ChildrenCallback();
                  m_zk.getChildren(
                      m_mailboxesZKPath + "/" + partition,
                      constructMailboxChildWatcher(),
                      callback,
                      null);
                  callbacks.add(Pair.of(partition, callback));
                }
                for (Pair<Integer, ZKUtil.ChildrenCallback> p : callbacks) {
                  final Integer partition = p.getFirst();
                  List<String> children = null;
                  try {
                    children = p.getSecond().getChildren();
                  } catch (InterruptedException e) {
                    Throwables.propagate(e);
                  } catch (KeeperException e) {
                    Throwables.propagate(e);
                  }
                  ImmutableList.Builder<Long> mailboxes = ImmutableList.builder();

                  for (String child : children) {
                    if (child.equals(Long.toString(m_mbox.getHSId()))) continue;
                    mailboxes.add(Long.valueOf(child));
                  }
                  ImmutableList<Long> mailboxHsids = mailboxes.build();

                  for (ExportDataSource eds : m_dataSourcesByPartition.get(partition).values()) {
                    eds.updateAckMailboxes(Pair.of(m_mbox, mailboxHsids));
                  }
                }
              }
            });
    try {
      fut.get();
    } catch (Throwable t) {
      Throwables.propagate(t);
    }
  }
Exemple #13
0
 /** Discard the mailbox. */
 public void close() {
   m_messenger.removeMailbox(getHSId());
 }
Exemple #14
0
 public JoinCoordinator(HostMessenger hostMessenger) {
   super(hostMessenger, hostMessenger.generateMailboxId(null));
   m_messenger = hostMessenger;
 }
  @Test
  public void testMultiHost() throws Exception {
    HostMessenger hm1 = createHostMessenger(0);

    final HostMessenger hm2 = createHostMessenger(1, false);

    final HostMessenger hm3 = createHostMessenger(2, false);

    final AtomicReference<Exception> exception = new AtomicReference<Exception>();
    Thread hm2Start =
        new Thread() {
          @Override
          public void run() {
            try {
              hm2.start();
            } catch (Exception e) {
              e.printStackTrace();
              exception.set(e);
            }
          }
        };
    Thread hm3Start =
        new Thread() {
          @Override
          public void run() {
            try {
              hm3.start();
            } catch (Exception e) {
              e.printStackTrace();
              exception.set(e);
            }
          }
        };

    hm2Start.start();
    hm3Start.start();
    hm2Start.join();
    System.out.println(hm2.getZK().getChildren(CoreZK.hostids, false));
    hm3Start.join();

    if (exception.get() != null) {
      fail(exception.get().toString());
    }

    List<String> root1 = hm1.getZK().getChildren("/", false);
    List<String> root2 = hm2.getZK().getChildren("/", false);
    List<String> root3 = hm3.getZK().getChildren("/", false);
    System.out.println(root1);
    System.out.println(root2);
    System.out.println(root3);
    assertTrue(root1.equals(root2));
    assertTrue(root2.equals(root3));

    List<String> hostids1 = hm1.getZK().getChildren(CoreZK.hostids, false);
    List<String> hostids2 = hm2.getZK().getChildren(CoreZK.hostids, false);
    List<String> hostids3 = hm3.getZK().getChildren(CoreZK.hostids, false);
    System.out.println(hostids1);
    System.out.println(hostids2);
    System.out.println(hostids3);
    assertTrue(hostids1.equals(hostids2));
    assertTrue(hostids2.equals(hostids3));

    List<String> hosts3;
    List<String> hosts1;
    hm2.shutdown();
    boolean success = false;
    for (int ii = 0; ii < (200 / 5); ii++) {
      hosts3 = hm3.getZK().getChildren(CoreZK.hosts, false);
      hosts1 = hm1.getZK().getChildren(CoreZK.hosts, false);
      if (hosts3.size() == 2 && hosts1.size() == 2 && hosts1.equals(hosts3)) {
        success = true;
        break;
      }
      Thread.sleep(5);
    }
    assertTrue(success);

    hm1.waitForGroupJoin(2);
    hm3.waitForGroupJoin(2);
  }