예제 #1
0
  @Override
  public void configure(
      BackendTarget backend,
      String serializedCatalog,
      CatalogContext catalogContext,
      int kfactor,
      CatalogSpecificPlanner csp,
      int numberOfPartitions,
      VoltDB.START_ACTION startAction,
      StatsAgent agent,
      MemoryStats memStats,
      CommandLog cl,
      NodeDRGateway nodeDRGateway,
      String coreBindIds)
      throws KeeperException, InterruptedException, ExecutionException {
    try {
      m_leaderCache.start(true);
    } catch (Exception e) {
      VoltDB.crashLocalVoltDB("Unable to configure SpInitiator.", true, e);
    }

    // configure DR
    PartitionDRGateway drGateway =
        PartitionDRGateway.getInstance(
            m_partitionId, nodeDRGateway, true, VoltDB.createForRejoin(startAction));
    ((SpScheduler) m_scheduler).setDRGateway(drGateway);

    super.configureCommon(
        backend,
        serializedCatalog,
        catalogContext,
        csp,
        numberOfPartitions,
        startAction,
        agent,
        memStats,
        cl,
        coreBindIds,
        drGateway);

    m_tickProducer.start();

    // add ourselves to the ephemeral node list which BabySitters will watch for this
    // partition
    LeaderElector.createParticipantNode(
        m_messenger.getZK(),
        LeaderElector.electionDirForPartition(m_partitionId),
        Long.toString(getInitiatorHSId()),
        null);
  }
예제 #2
0
  /**
   * Given a set of partition IDs, return a map of partition to a list of HSIDs of all the sites
   * with copies of each partition
   */
  public Map<Integer, List<Long>> getReplicasForPartitions(Collection<Integer> partitions) {
    Map<Integer, List<Long>> retval = new HashMap<Integer, List<Long>>();
    List<Pair<Integer, ZKUtil.ChildrenCallback>> callbacks =
        new ArrayList<Pair<Integer, ZKUtil.ChildrenCallback>>();

    for (Integer partition : partitions) {
      String zkpath = LeaderElector.electionDirForPartition(partition);
      ZKUtil.ChildrenCallback cb = new ZKUtil.ChildrenCallback();
      callbacks.add(Pair.of(partition, cb));
      m_zk.getChildren(zkpath, false, cb, null);
    }

    for (Pair<Integer, ZKUtil.ChildrenCallback> p : callbacks) {
      final Integer partition = p.getFirst();
      try {
        List<String> children = p.getSecond().getChildren();
        List<Long> sites = new ArrayList<Long>();
        for (String child : children) {
          sites.add(Long.valueOf(child.split("_")[0]));
        }
        retval.put(partition, sites);
      } catch (KeeperException ke) {
        org.voltdb.VoltDB.crashLocalVoltDB(
            "KeeperException getting replicas for partition: " + partition, true, ke);
      } catch (InterruptedException ie) {
        org.voltdb.VoltDB.crashLocalVoltDB(
            "InterruptedException getting replicas for partition: " + partition, true, ie);
      }
    }
    return retval;
  }
예제 #3
0
 void shutdown() {
   try {
     m_leaderElector.shutdown();
   } catch (Exception e) {
     VoltDB.crashLocalVoltDB("Error shutting down GlobalServiceElector's LeaderElector", true, e);
   }
 }
예제 #4
0
 /**
  * Returns the IDs of the partitions currently in the cluster.
  *
  * @return A list of partition IDs
  */
 public static List<Integer> getPartitions(ZooKeeper zk) {
   List<Integer> partitions = new ArrayList<Integer>();
   try {
     List<String> children = zk.getChildren(VoltZK.leaders_initiators, null);
     for (String child : children) {
       partitions.add(LeaderElector.getPartitionFromElectionDir(child));
     }
   } catch (KeeperException e) {
     VoltDB.crashLocalVoltDB("Failed to get partition IDs from ZK", true, e);
   } catch (InterruptedException e) {
     VoltDB.crashLocalVoltDB("Failed to get partition IDs from ZK", true, e);
   }
   return partitions;
 }
예제 #5
0
 private boolean isClusterKSafe() {
   boolean retval = true;
   for (int i = 0; i < m_partitionCount; i++) {
     String dir = LeaderElector.electionDirForPartition(i);
     try {
       List<String> replicas = m_zk.getChildren(dir, null, null);
       if (replicas.isEmpty()) {
         tmLog.fatal("K-Safety violation: No replicas found for partition: " + i);
         retval = false;
       }
     } catch (Exception e) {
       VoltDB.crashLocalVoltDB("Unable to read replicas in ZK dir: " + dir, true, e);
     }
   }
   return retval;
 }
예제 #6
0
 /** Given a partition ID, return a list of HSIDs of all the sites with copies of that partition */
 public List<Long> getReplicasForPartition(int partition) {
   String zkpath = LeaderElector.electionDirForPartition(partition);
   List<Long> retval = new ArrayList<Long>();
   try {
     List<String> children = m_zk.getChildren(zkpath, null);
     for (String child : children) {
       retval.add(Long.valueOf(child.split("_")[0]));
     }
   } catch (KeeperException ke) {
     org.voltdb.VoltDB.crashLocalVoltDB(
         "KeeperException getting replicas for partition: " + partition, true, ke);
   } catch (InterruptedException ie) {
     org.voltdb.VoltDB.crashLocalVoltDB(
         "InterruptedException getting replicas for partition: " + partition, true, ie);
   }
   return retval;
 }
예제 #7
0
  @Override
  public void acceptPromotion() throws InterruptedException, ExecutionException, KeeperException {
    // Crank up the leader caches.  Use blocking startup so that we'll have valid point-in-time
    // caches later.
    m_iv2appointees.start(true);
    m_iv2masters.start(true);
    // Figure out what conditions we assumed leadership under.
    if (m_iv2appointees.pointInTimeCache().size() == 0) {
      tmLog.debug("LeaderAppointer in startup");
      m_state.set(AppointerState.CLUSTER_START);
    } else if ((m_iv2appointees.pointInTimeCache().size() != m_partitionCount)
        || (m_iv2masters.pointInTimeCache().size() != m_partitionCount)) {
      // If we are promoted and the appointees or masters set is partial, the previous appointer
      // failed
      // during startup (at least for now, until we add add/remove a partition on the fly).
      VoltDB.crashGlobalVoltDB("Detected failure during startup, unable to start", false, null);
    } else {
      tmLog.debug("LeaderAppointer in repair");
      m_state.set(AppointerState.DONE);
    }

    if (m_state.get() == AppointerState.CLUSTER_START) {
      // Need to block the return of acceptPromotion until after the MPI is promoted.  Wait for this
      // latch
      // to countdown after appointing all the partition leaders.  The
      // LeaderCache callback will count it down once it has seen all the
      // appointed leaders publish themselves as the actual leaders.
      m_startupLatch = new CountDownLatch(1);
      writeKnownLiveNodes(m_hostMessenger.getLiveHostIds());
      for (int i = 0; i < m_partitionCount; i++) {
        String dir = LeaderElector.electionDirForPartition(i);
        // Race along with all of the replicas for this partition to create the ZK parent node
        try {
          m_zk.create(dir, null, Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT);
        } catch (KeeperException.NodeExistsException e) {
          // expected on all nodes that don't start() first.
        }
        m_callbacks[i] = new PartitionCallback(i);
        Pair<BabySitter, List<String>> sitterstuff =
            BabySitter.blockingFactory(m_zk, dir, m_callbacks[i], m_es);
        m_partitionWatchers[i] = sitterstuff.getFirst();
      }
      m_startupLatch.await();
    } else {
      // If we're taking over for a failed LeaderAppointer, we know when
      // we get here that every partition had a leader at some point in
      // time.  We'll seed each of the PartitionCallbacks for each
      // partition with the HSID of the last published leader.  The
      // blocking startup of the BabySitter watching that partition will
      // call our callback, get the current full set of replicas, and
      // appoint a new leader if the seeded one has actually failed
      Map<Integer, Long> masters = m_iv2masters.pointInTimeCache();
      tmLog.info("LeaderAppointer repairing with master set: " + masters);
      for (Entry<Integer, Long> master : masters.entrySet()) {
        int partId = master.getKey();
        String dir = LeaderElector.electionDirForPartition(partId);
        m_callbacks[partId] = new PartitionCallback(partId, master.getValue());
        Pair<BabySitter, List<String>> sitterstuff =
            BabySitter.blockingFactory(m_zk, dir, m_callbacks[partId], m_es);
        m_partitionWatchers[partId] = sitterstuff.getFirst();
      }
      // just go ahead and promote our MPI
      m_MPI.acceptPromotion();
    }
  }
예제 #8
0
 /**
  * Kick off the leader election. Will block until all of the acceptPromotion() calls to all of the
  * services return at the initial leader.
  */
 void start() throws KeeperException, InterruptedException, ExecutionException {
   m_leaderElector.start(true);
 }