Пример #1
0
 @Override
 public void setPerPartitionTxnIds(long[] perPartitionTxnIds) {
   boolean foundMultipartTxnId = false;
   boolean foundSinglepartTxnId = false;
   for (long txnId : perPartitionTxnIds) {
     if (TxnEgo.getPartitionId(txnId) == m_partitionId) {
       if (foundSinglepartTxnId) {
         VoltDB.crashLocalVoltDB(
             "Found multiple transactions ids during restore for a partition", false, null);
       }
       foundSinglepartTxnId = true;
       m_initiatorMailbox.setMaxLastSeenTxnId(txnId);
     }
     if (TxnEgo.getPartitionId(txnId) == MpInitiator.MP_INIT_PID) {
       if (foundMultipartTxnId) {
         VoltDB.crashLocalVoltDB(
             "Found multiple transactions ids during restore for a multipart txnid", false, null);
       }
       foundMultipartTxnId = true;
       m_initiatorMailbox.setMaxLastSeenMultipartTxnId(txnId);
     }
   }
   if (!foundMultipartTxnId) {
     VoltDB.crashLocalVoltDB("Didn't find a multipart txnid on restore", false, null);
   }
 }
Пример #2
0
 public void handleDumpMessage() {
   String who = CoreUtils.hsIdToString(m_mailbox.getHSId());
   hostLog.warn("State dump for site: " + who);
   hostLog.warn("" + who + ": partition: " + m_partitionId + ", isLeader: " + m_isLeader);
   if (m_isLeader) {
     hostLog.warn("" + who + ": replicas: " + CoreUtils.hsIdCollectionToString(m_replicaHSIds));
     if (m_sendToHSIds.length > 0) {
       m_mailbox.send(m_sendToHSIds, new DumpMessage());
     }
   }
   hostLog.warn(
       ""
           + who
           + ": most recent SP handle: "
           + getCurrentTxnId()
           + " "
           + TxnEgo.txnIdToString(getCurrentTxnId()));
   hostLog.warn(
       ""
           + who
           + ": outstanding txns: "
           + m_outstandingTxns.keySet()
           + " "
           + TxnEgo.txnIdCollectionToString(m_outstandingTxns.keySet()));
   hostLog.warn("" + who + ": TransactionTaskQueue: " + m_pendingTasks.toString());
   if (m_duplicateCounters.size() > 0) {
     hostLog.warn("" + who + ": duplicate counters: ");
     for (Entry<DuplicateCounterKey, DuplicateCounter> e : m_duplicateCounters.entrySet()) {
       hostLog.warn("\t" + who + ": " + e.getKey().toString() + ": " + e.getValue().toString());
     }
   }
 }
Пример #3
0
 @Override
 public String toString() {
   StringBuilder sb = new StringBuilder();
   sb.append("MpProcedureTask:");
   sb.append("  TXN ID: ").append(TxnEgo.txnIdToString(getTxnId()));
   sb.append("  SP HANDLE ID: ").append(TxnEgo.txnIdToString(getSpHandle()));
   sb.append("  ON HSID: ").append(CoreUtils.hsIdToString(m_initiator.getHSId()));
   return sb.toString();
 }
Пример #4
0
 @Override
 public String toString() {
   StringBuilder sb = new StringBuilder();
   sb.append("CompleteTransactionTask:");
   sb.append("  TXN ID: ").append(TxnEgo.txnIdToString(getTxnId()));
   sb.append("  SP HANDLE: ").append(TxnEgo.txnIdToString(getSpHandle()));
   sb.append("  UNDO TOKEN: ").append(m_txnState.getBeginUndoToken());
   sb.append("  MSG: ").append(m_completeMsg.toString());
   return sb.toString();
 }
Пример #5
0
  // SpSchedulers will see FragmentTaskMessage for:
  // - The scatter fragment(s) of a multi-part transaction (normal or sysproc)
  // - Borrow tasks to do the local fragment work if this partition is the
  //   buddy of the MPI.  Borrow tasks may include input dependency tables for
  //   aggregation fragments, or not, if it's a replicated table read.
  // For multi-batch MP transactions, we'll need to look up the transaction state
  // that gets created when the first batch arrives.
  // During command log replay a new SP handle is going to be generated, but it really
  // doesn't matter, it isn't going to be used for anything.
  void handleFragmentTaskMessage(FragmentTaskMessage message) {
    FragmentTaskMessage msg = message;
    long newSpHandle;
    if (m_isLeader) {
      // Quick hack to make progress...we need to copy the FragmentTaskMessage
      // before we start mucking with its state (SPHANDLE).  We need to revisit
      // all the messaging mess at some point.
      msg =
          new FragmentTaskMessage(
              message.getInitiatorHSId(), message.getCoordinatorHSId(), message);
      // Not going to use the timestamp from the new Ego because the multi-part timestamp is what
      // should be used
      TxnEgo ego = advanceTxnEgo();
      newSpHandle = ego.getTxnId();
      msg.setSpHandle(newSpHandle);
      if (msg.getInitiateTask() != null) {
        msg.getInitiateTask().setSpHandle(newSpHandle); // set the handle
        msg.setInitiateTask(
            msg.getInitiateTask()); // Trigger reserialization so the new handle is used
      }

      /*
       * If there a replicas to send it to, forward it!
       * Unless... it's read only AND not a sysproc. Read only sysprocs may expect to be sent
       * everywhere.
       * In that case don't propagate it to avoid a determinism check and extra messaging overhead
       */
      if (m_sendToHSIds.length > 0 && (!msg.isReadOnly() || msg.isSysProcTask())) {
        FragmentTaskMessage replmsg =
            new FragmentTaskMessage(m_mailbox.getHSId(), m_mailbox.getHSId(), msg);
        m_mailbox.send(m_sendToHSIds, replmsg);
        DuplicateCounter counter;
        /*
         * Non-determinism should be impossible to happen with MP fragments.
         * if you see "MP_DETERMINISM_ERROR" as procedure name in the crash logs
         * something has horribly gone wrong.
         */
        if (message.getFragmentTaskType() != FragmentTaskMessage.SYS_PROC_PER_SITE) {
          counter =
              new DuplicateCounter(
                  msg.getCoordinatorHSId(), msg.getTxnId(), m_replicaHSIds, "MP_DETERMINISM_ERROR");
        } else {
          counter =
              new SysProcDuplicateCounter(
                  msg.getCoordinatorHSId(), msg.getTxnId(), m_replicaHSIds, "MP_DETERMINISM_ERROR");
        }
        m_duplicateCounters.put(new DuplicateCounterKey(msg.getTxnId(), newSpHandle), counter);
      }
    } else {
      newSpHandle = msg.getSpHandle();
      setMaxSeenTxnId(newSpHandle);
    }
    Iv2Trace.logFragmentTaskMessage(message, m_mailbox.getHSId(), newSpHandle, false);
    doLocalFragmentOffer(msg);
  }
Пример #6
0
  /*
   * Inherit the per partition txnid from the long since gone
   * partition that existed in the past
   */
  private long[] fetchPerPartitionTxnId() {
    ZooKeeper zk = VoltDB.instance().getHostMessenger().getZK();
    byte partitionTxnIdsBytes[] = null;
    try {
      partitionTxnIdsBytes = zk.getData(VoltZK.perPartitionTxnIds, false, null);
    } catch (KeeperException.NoNodeException e) {
      return null;
    } // Can be no node if the cluster was never restored
    catch (Exception e) {
      VoltDB.crashLocalVoltDB("Error retrieving per partition txn ids", true, e);
    }
    ByteBuffer buf = ByteBuffer.wrap(partitionTxnIdsBytes);

    int count = buf.getInt();
    Long partitionTxnId = null;
    long partitionTxnIds[] = new long[count];
    for (int ii = 0; ii < count; ii++) {
      long txnId = buf.getLong();
      partitionTxnIds[ii] = txnId;
      int partitionId = TxnEgo.getPartitionId(txnId);
      if (partitionId == m_partitionId) {
        partitionTxnId = txnId;
        continue;
      }
    }
    if (partitionTxnId != null) {
      return partitionTxnIds;
    }
    return null;
  }
Пример #7
0
 public void dump(long hsId) {
   final String who = CoreUtils.hsIdToString(hsId);
   tmLog.info(
       String.format(
           "%s: REPLAY SEQUENCER DUMP, LAST POLLED FRAGMENT %d (%s), LAST SEEN TXNID %d (%s), %s%s",
           who,
           m_lastPolledFragmentTxnId,
           TxnEgo.txnIdToString(m_lastPolledFragmentTxnId),
           m_lastSeenTxnId,
           TxnEgo.txnIdToString(m_lastSeenTxnId),
           m_mpiEOLReached ? "MPI EOL, " : "",
           m_mustDrain ? "MUST DRAIN" : ""));
   for (Entry<Long, ReplayEntry> e : m_replayEntries.entrySet()) {
     tmLog.info(String.format("%s: REPLAY ENTRY %s: %s", who, e.getKey(), e.getValue()));
   }
 }
Пример #8
0
 private static String txnIdToString(long txnId) {
   if (txnId == Long.MIN_VALUE) {
     return "UNUSED";
   } else {
     return TxnEgo.txnIdToString(txnId);
   }
 }
Пример #9
0
  /** Setup a new RepairAlgo but don't take any action to take responsibility. */
  public SpPromoteAlgo(
      List<Long> survivors, InitiatorMailbox mailbox, String whoami, int partitionId) {
    m_mailbox = mailbox;
    m_survivors = survivors;

    m_whoami = whoami;
    m_maxSeenTxnId = TxnEgo.makeZero(partitionId).getTxnId();
  }
Пример #10
0
 @Override
 public String toString() {
   return String.format(
       "(SENTINEL TXNID: %d (%s), %d BLOCKED MESSAGES, %s)\n%s",
       m_sentinalTxnId,
       TxnEgo.txnIdToString(m_sentinalTxnId),
       m_blockedMessages.size(),
       m_servedFragment ? "SERVED FRAGMENT" : "",
       m_firstFragment);
 }
Пример #11
0
  /** Create a new execution site and the corresponding EE */
  public Site(
      SiteTaskerQueue scheduler,
      long siteId,
      BackendTarget backend,
      CatalogContext context,
      String serializedCatalog,
      long txnId,
      int partitionId,
      int numPartitions,
      VoltDB.START_ACTION startAction,
      int snapshotPriority,
      InitiatorMailbox initiatorMailbox,
      StatsAgent agent,
      MemoryStats memStats) {
    m_siteId = siteId;
    m_context = context;
    m_partitionId = partitionId;
    m_numberOfPartitions = numPartitions;
    m_scheduler = scheduler;
    m_backend = backend;
    m_startAction = startAction;
    m_rejoinState = VoltDB.createForRejoin(startAction) ? kStateRejoining : kStateRunning;
    m_snapshotPriority = snapshotPriority;
    // need this later when running in the final thread.
    m_startupConfig = new StartupConfig(serializedCatalog, context.m_timestamp);
    m_lastCommittedTxnId = TxnEgo.makeZero(partitionId).getTxnId();
    m_lastCommittedSpHandle = TxnEgo.makeZero(partitionId).getTxnId();
    m_currentTxnId = Long.MIN_VALUE;
    m_initiatorMailbox = initiatorMailbox;

    if (agent != null) {
      m_tableStats = new TableStats(m_siteId);
      agent.registerStatsSource(SysProcSelector.TABLE, m_siteId, m_tableStats);
      m_indexStats = new IndexStats(m_siteId);
      agent.registerStatsSource(SysProcSelector.INDEX, m_siteId, m_indexStats);
      m_memStats = memStats;
    } else {
      // MPI doesn't need to track these stats
      m_tableStats = null;
      m_indexStats = null;
      m_memStats = null;
    }
  }
Пример #12
0
 @Override
 public void truncateUndoLog(boolean rollback, long beginUndoToken, long txnId, long spHandle) {
   if (rollback) {
     m_ee.undoUndoToken(beginUndoToken);
   } else {
     assert (latestUndoToken != Site.kInvalidUndoToken);
     assert (latestUndoToken >= beginUndoToken);
     if (latestUndoToken > beginUndoToken) {
       m_ee.releaseUndoToken(latestUndoToken);
     }
     m_lastCommittedTxnId = txnId;
     if (TxnEgo.getPartitionId(m_lastCommittedSpHandle) != TxnEgo.getPartitionId(spHandle)) {
       VoltDB.crashLocalVoltDB(
           "Mismatch SpHandle partitiond id "
               + TxnEgo.getPartitionId(m_lastCommittedSpHandle)
               + ", "
               + TxnEgo.getPartitionId(spHandle),
           true,
           null);
     }
     m_lastCommittedSpHandle = spHandle;
   }
 }
Пример #13
0
  // SpScheduler expects to see InitiateTaskMessages corresponding to single-partition
  // procedures only.
  public void handleIv2InitiateTaskMessage(Iv2InitiateTaskMessage message) {
    if (!message.isSinglePartition()) {
      throw new RuntimeException(
          "SpScheduler.handleIv2InitiateTaskMessage "
              + "should never receive multi-partition initiations.");
    }

    final String procedureName = message.getStoredProcedureName();
    long newSpHandle;
    long uniqueId = Long.MIN_VALUE;
    Iv2InitiateTaskMessage msg = message;
    if (m_isLeader || message.isReadOnly()) {
      /*
       * A short circuit read is a read where the client interface is local to
       * this node. The CI will let a replica perform a read in this case and
       * it does looser tracking of client handles since it can't be
       * partitioned from the local replica.
       */
      if (!m_isLeader
          && CoreUtils.getHostIdFromHSId(msg.getInitiatorHSId())
              != CoreUtils.getHostIdFromHSId(m_mailbox.getHSId())) {
        VoltDB.crashLocalVoltDB("Only allowed to do short circuit reads locally", true, null);
      }

      /*
       * If this is for CL replay or DR, update the unique ID generator
       */
      if (message.isForReplay()) {
        uniqueId = message.getUniqueId();
        try {
          m_uniqueIdGenerator.updateMostRecentlyGeneratedUniqueId(uniqueId);
        } catch (Exception e) {
          hostLog.fatal(e.getMessage());
          hostLog.fatal("Invocation: " + message);
          VoltDB.crashLocalVoltDB(e.getMessage(), true, e);
        }
      } else if (message.isForDR()) {
        uniqueId = message.getStoredProcedureInvocation().getOriginalUniqueId();
        // @LoadSinglepartitionTable does not have a valid uid
        if (UniqueIdGenerator.getPartitionIdFromUniqueId(uniqueId) == m_partitionId) {
          m_uniqueIdGenerator.updateMostRecentlyGeneratedUniqueId(uniqueId);
        }
      }

      /*
       * If this is CL replay use the txnid from the CL and also
       * update the txnid to match the one from the CL
       */
      if (message.isForReplay()) {
        newSpHandle = message.getTxnId();
        setMaxSeenTxnId(newSpHandle);
      } else if (m_isLeader) {
        TxnEgo ego = advanceTxnEgo();
        newSpHandle = ego.getTxnId();
        uniqueId = m_uniqueIdGenerator.getNextUniqueId();
      } else {
        /*
         * The short circuit read case. Since we are not a master
         * we can't create new transaction IDs, so reuse the last seen
         * txnid. For a timestamp, might as well give a reasonable one
         * for a read heavy workload so time isn't bursty.
         */
        uniqueId =
            UniqueIdGenerator.makeIdFromComponents(
                Math.max(System.currentTimeMillis(), m_uniqueIdGenerator.lastUsedTime),
                0,
                m_uniqueIdGenerator.partitionId);
        // Don't think it wise to make a new one for a short circuit read
        newSpHandle = getCurrentTxnId();
      }

      // Need to set the SP handle on the received message
      // Need to copy this or the other local sites handling
      // the same initiate task message will overwrite each
      // other's memory -- the message isn't copied on delivery
      // to other local mailboxes.
      msg =
          new Iv2InitiateTaskMessage(
              message.getInitiatorHSId(),
              message.getCoordinatorHSId(),
              m_repairLogTruncationHandle,
              message.getTxnId(),
              message.getUniqueId(),
              message.isReadOnly(),
              message.isSinglePartition(),
              message.getStoredProcedureInvocation(),
              message.getClientInterfaceHandle(),
              message.getConnectionId(),
              message.isForReplay());

      msg.setSpHandle(newSpHandle);

      // Also, if this is a vanilla single-part procedure, make the TXNID
      // be the SpHandle (for now)
      // Only system procedures are every-site, so we'll check through the SystemProcedureCatalog
      if (SystemProcedureCatalog.listing.get(procedureName) == null
          || !SystemProcedureCatalog.listing.get(procedureName).getEverysite()) {
        msg.setTxnId(newSpHandle);
        msg.setUniqueId(uniqueId);
      }

      // Don't replicate reads, this really assumes that DML validation
      // is going to be integrated soonish
      if (m_isLeader && !msg.isReadOnly() && m_sendToHSIds.length > 0) {
        Iv2InitiateTaskMessage replmsg =
            new Iv2InitiateTaskMessage(
                m_mailbox.getHSId(),
                m_mailbox.getHSId(),
                m_repairLogTruncationHandle,
                msg.getTxnId(),
                msg.getUniqueId(),
                msg.isReadOnly(),
                msg.isSinglePartition(),
                msg.getStoredProcedureInvocation(),
                msg.getClientInterfaceHandle(),
                msg.getConnectionId(),
                msg.isForReplay());
        // Update the handle in the copy since the constructor doesn't set it
        replmsg.setSpHandle(newSpHandle);
        m_mailbox.send(m_sendToHSIds, replmsg);
        DuplicateCounter counter =
            new DuplicateCounter(
                msg.getInitiatorHSId(),
                msg.getTxnId(),
                m_replicaHSIds,
                msg.getStoredProcedureName());
        m_duplicateCounters.put(new DuplicateCounterKey(msg.getTxnId(), newSpHandle), counter);
      }
    } else {
      setMaxSeenTxnId(msg.getSpHandle());
      newSpHandle = msg.getSpHandle();
      uniqueId = msg.getUniqueId();
    }
    Iv2Trace.logIv2InitiateTaskMessage(message, m_mailbox.getHSId(), msg.getTxnId(), newSpHandle);
    doLocalInitiateOffer(msg);
    return;
  }