Пример #1
0
  // SpSchedulers will see FragmentTaskMessage for:
  // - The scatter fragment(s) of a multi-part transaction (normal or sysproc)
  // - Borrow tasks to do the local fragment work if this partition is the
  //   buddy of the MPI.  Borrow tasks may include input dependency tables for
  //   aggregation fragments, or not, if it's a replicated table read.
  // For multi-batch MP transactions, we'll need to look up the transaction state
  // that gets created when the first batch arrives.
  // During command log replay a new SP handle is going to be generated, but it really
  // doesn't matter, it isn't going to be used for anything.
  void handleFragmentTaskMessage(FragmentTaskMessage message) {
    FragmentTaskMessage msg = message;
    long newSpHandle;
    if (m_isLeader) {
      // Quick hack to make progress...we need to copy the FragmentTaskMessage
      // before we start mucking with its state (SPHANDLE).  We need to revisit
      // all the messaging mess at some point.
      msg =
          new FragmentTaskMessage(
              message.getInitiatorHSId(), message.getCoordinatorHSId(), message);
      // Not going to use the timestamp from the new Ego because the multi-part timestamp is what
      // should be used
      TxnEgo ego = advanceTxnEgo();
      newSpHandle = ego.getTxnId();
      msg.setSpHandle(newSpHandle);
      if (msg.getInitiateTask() != null) {
        msg.getInitiateTask().setSpHandle(newSpHandle); // set the handle
        msg.setInitiateTask(
            msg.getInitiateTask()); // Trigger reserialization so the new handle is used
      }

      /*
       * If there a replicas to send it to, forward it!
       * Unless... it's read only AND not a sysproc. Read only sysprocs may expect to be sent
       * everywhere.
       * In that case don't propagate it to avoid a determinism check and extra messaging overhead
       */
      if (m_sendToHSIds.length > 0 && (!msg.isReadOnly() || msg.isSysProcTask())) {
        FragmentTaskMessage replmsg =
            new FragmentTaskMessage(m_mailbox.getHSId(), m_mailbox.getHSId(), msg);
        m_mailbox.send(m_sendToHSIds, replmsg);
        DuplicateCounter counter;
        /*
         * Non-determinism should be impossible to happen with MP fragments.
         * if you see "MP_DETERMINISM_ERROR" as procedure name in the crash logs
         * something has horribly gone wrong.
         */
        if (message.getFragmentTaskType() != FragmentTaskMessage.SYS_PROC_PER_SITE) {
          counter =
              new DuplicateCounter(
                  msg.getCoordinatorHSId(), msg.getTxnId(), m_replicaHSIds, "MP_DETERMINISM_ERROR");
        } else {
          counter =
              new SysProcDuplicateCounter(
                  msg.getCoordinatorHSId(), msg.getTxnId(), m_replicaHSIds, "MP_DETERMINISM_ERROR");
        }
        m_duplicateCounters.put(new DuplicateCounterKey(msg.getTxnId(), newSpHandle), counter);
      }
    } else {
      newSpHandle = msg.getSpHandle();
      setMaxSeenTxnId(newSpHandle);
    }
    Iv2Trace.logFragmentTaskMessage(message, m_mailbox.getHSId(), newSpHandle, false);
    doLocalFragmentOffer(msg);
  }
Пример #2
0
  /**
   * Do the work necessary to turn the FragmentTaskMessage into a TransactionTask which can be
   * queued to the TransactionTaskQueue. This is reused by both the normal message handling path and
   * the repair path, and assumes that the caller has dealt with or ensured that the necessary ID,
   * SpHandles, and replication issues are resolved.
   */
  private void doLocalFragmentOffer(FragmentTaskMessage msg) {
    TransactionState txn = m_outstandingTxns.get(msg.getTxnId());
    boolean logThis = false;
    // bit of a hack...we will probably not want to create and
    // offer FragmentTasks for txn ids that don't match if we have
    // something in progress already
    if (txn == null) {
      txn = new ParticipantTransactionState(msg.getSpHandle(), msg);
      m_outstandingTxns.put(msg.getTxnId(), txn);
      // Only want to send things to the command log if it satisfies this predicate
      // AND we've never seen anything for this transaction before.  We can't
      // actually log until we create a TransactionTask, though, so just keep track
      // of whether it needs to be done.
      logThis = (msg.getInitiateTask() != null && !msg.getInitiateTask().isReadOnly());
    }

    // Check to see if this is the final task for this txn, and if so, if we can close it out early
    // Right now, this just means read-only.
    // NOTE: this overlaps slightly with CompleteTransactionMessage handling completion.  It's so
    // tiny
    // that for now, meh, but if this scope grows then it should get refactored out
    if (msg.isFinalTask() && txn.isReadOnly()) {
      m_outstandingTxns.remove(msg.getTxnId());
    }

    TransactionTask task;
    if (msg.isSysProcTask()) {
      task =
          new SysprocFragmentTask(
              m_mailbox, (ParticipantTransactionState) txn, m_pendingTasks, msg, null);
    } else {
      task =
          new FragmentTask(m_mailbox, (ParticipantTransactionState) txn, m_pendingTasks, msg, null);
    }
    if (logThis) {
      if (!m_cl.log(msg.getInitiateTask(), msg.getSpHandle(), m_durabilityListener, task)) {
        m_pendingTasks.offer(task);
      } else {
        /* Getting here means that the task is the first fragment of an MP txn and
         * synchronous command logging is on, so create a backlog for future tasks of
         * this MP arrived before it's marked durable.
         *
         * This is important for synchronous command logging and MP txn restart. Without
         * this, a restarted MP txn may not be gated by logging of the first fragment.
         */
        assert !m_mpsPendingDurability.containsKey(task.getTxnId());
        m_mpsPendingDurability.put(task.getTxnId(), new ArrayDeque<TransactionTask>());
      }
    } else {
      queueOrOfferMPTask(task);
    }
  }
Пример #3
0
 public static void logFragmentTaskMessage(
     FragmentTaskMessage ftask, long localHSId, long spHandle, boolean borrow) {
   if (iv2log.isTraceEnabled()) {
     String label = "rxFragMsg";
     if (borrow) {
       label = "rxBrrwMsg";
     }
     if (ftask.getSpHandle() != Long.MIN_VALUE && ftask.getSpHandle() != spHandle) {
       iv2log.error(
           "FragmentTaskMessage SP HANDLE conflict.  Message: "
               + ftask.getSpHandle()
               + ", locally held: "
               + spHandle);
     }
     String logmsg = "%s %s from %s txnId %s spHandle %s trunc %s";
     iv2log.trace(
         String.format(
             logmsg,
             label,
             CoreUtils.hsIdToString(localHSId),
             CoreUtils.hsIdToString(ftask.m_sourceHSId),
             txnIdToString(ftask.getTxnId()),
             txnIdToString(spHandle),
             txnIdToString(ftask.getTruncationHandle())));
   }
 }
Пример #4
0
  private void handleFragmentTaskMessageRepair(
      List<Long> needsRepair, FragmentTaskMessage message) {
    // set up duplicate counter. expect exactly the responses corresponding
    // to needsRepair. These may, or may not, include the local site.

    List<Long> expectedHSIds = new ArrayList<Long>(needsRepair);
    DuplicateCounter counter =
        new DuplicateCounter(
            message.getCoordinatorHSId(), // Assume that the MPI's HSID hasn't changed
            message.getTxnId(),
            expectedHSIds,
            "MP_DETERMINISM_ERROR");
    m_duplicateCounters.put(
        new DuplicateCounterKey(message.getTxnId(), message.getSpHandle()), counter);

    // is local repair necessary?
    if (needsRepair.contains(m_mailbox.getHSId())) {
      // Sanity check that we really need repair.
      if (m_outstandingTxns.get(message.getTxnId()) != null) {
        hostLog.warn(
            "SPI repair attempted to repair a fragment which it has already seen. "
                + "This shouldn't be possible.");
        // Not sure what to do in this event.  Crash for now
        throw new RuntimeException("Attempted to repair with a fragment we've already seen.");
      }
      needsRepair.remove(m_mailbox.getHSId());
      // make a copy because handleIv2 non-repair case does?
      FragmentTaskMessage localWork =
          new FragmentTaskMessage(
              message.getInitiatorHSId(), message.getCoordinatorHSId(), message);
      doLocalFragmentOffer(localWork);
    }

    // is remote repair necessary?
    if (!needsRepair.isEmpty()) {
      FragmentTaskMessage replmsg =
          new FragmentTaskMessage(m_mailbox.getHSId(), m_mailbox.getHSId(), message);
      m_mailbox.send(com.google.common.primitives.Longs.toArray(needsRepair), replmsg);
    }
  }
Пример #5
0
  // Offer a new message to the repair log. This will truncate
  // the repairLog if the message includes a truncation hint.
  public void deliver(VoltMessage msg) {
    if (!m_isLeader && msg instanceof Iv2InitiateTaskMessage) {
      final Iv2InitiateTaskMessage m = (Iv2InitiateTaskMessage) msg;
      // We can't repair read only SP transactions. Just don't log them to the repair log.
      if (m.isReadOnly()) {
        return;
      }

      m_lastSpHandle = m.getSpHandle();
      truncate(m.getTruncationHandle(), IS_SP);
      m_logSP.add(new Item(IS_SP, m, m.getSpHandle(), m.getTxnId()));
    } else if (msg instanceof FragmentTaskMessage) {
      final FragmentTaskMessage m = (FragmentTaskMessage) msg;

      // We can't repair read only SP transactions. Just don't log them to the repair log.
      if (m.isReadOnly()) {
        return;
      }

      truncate(m.getTruncationHandle(), IS_MP);
      // only log the first fragment of a procedure (and handle 1st case)
      if (m.getTxnId() > m_lastMpHandle || m_lastMpHandle == Long.MAX_VALUE) {
        m_logMP.add(new Item(IS_MP, m, m.getSpHandle(), m.getTxnId()));
        m_lastMpHandle = m.getTxnId();
        m_lastSpHandle = m.getSpHandle();
      }
    } else if (msg instanceof CompleteTransactionMessage) {
      // a CompleteTransactionMessage which indicates restart is not the end of the
      // transaction.  We don't want to log it in the repair log.
      CompleteTransactionMessage ctm = (CompleteTransactionMessage) msg;
      // We can't repair read only SP transactions. Just don't log them to the repair log.
      // Restart transaction do not need to be repaired here, don't log them as well.
      if (ctm.isReadOnly() || ctm.isRestart()) {
        return;
      }

      truncate(ctm.getTruncationHandle(), IS_MP);
      m_logMP.add(new Item(IS_MP, ctm, ctm.getSpHandle(), ctm.getTxnId()));
      // Restore will send a complete transaction message with a lower mp transaction id because
      // the restore transaction precedes the loading of the right mp transaction id from the
      // snapshot
      // Hence Math.max
      m_lastMpHandle = Math.max(m_lastMpHandle, ctm.getTxnId());
      m_lastSpHandle = ctm.getSpHandle();
    } else if (msg instanceof DumpMessage) {
      String who = CoreUtils.hsIdToString(m_HSId);
      tmLog.warn(
          "Repair log dump for site: "
              + who
              + ", isLeader: "
              + m_isLeader
              + ", "
              + who
              + ": lastSpHandle: "
              + m_lastSpHandle
              + ", lastMpHandle: "
              + m_lastMpHandle);
      for (Iv2RepairLogResponseMessage il : contents(0l, false)) {
        tmLog.warn("[Repair log contents]" + who + ": msg: " + il);
      }
    } else if (msg instanceof RepairLogTruncationMessage) {
      final RepairLogTruncationMessage truncateMsg = (RepairLogTruncationMessage) msg;
      truncate(truncateMsg.getHandle(), IS_SP);
    }
  }
Пример #6
0
  void replayFromTaskLog() throws IOException {
    // not yet time to catch-up.
    if (m_rejoinState != kStateReplayingRejoin) {
      return;
    }

    // replay 10:1 in favor of replay
    for (int i = 0; i < 10; ++i) {
      if (m_rejoinTaskLog.isEmpty()) {
        break;
      }

      TransactionInfoBaseMessage tibm = m_rejoinTaskLog.getNextMessage();
      if (tibm == null) {
        break;
      }

      // Apply the readonly / sysproc filter. With Iv2 read optimizations,
      // reads should not reach here; the cost of post-filtering shouldn't
      // be particularly high (vs pre-filtering).
      if (filter(tibm)) {
        continue;
      }

      if (tibm instanceof Iv2InitiateTaskMessage) {
        Iv2InitiateTaskMessage m = (Iv2InitiateTaskMessage) tibm;
        SpProcedureTask t =
            new SpProcedureTask(m_initiatorMailbox, m.getStoredProcedureName(), null, m, null);
        t.runFromTaskLog(this);
      } else if (tibm instanceof FragmentTaskMessage) {
        FragmentTaskMessage m = (FragmentTaskMessage) tibm;
        if (global_replay_mpTxn == null) {
          global_replay_mpTxn = new ParticipantTransactionState(m.getTxnId(), m);
        } else if (global_replay_mpTxn.txnId != m.getTxnId()) {
          VoltDB.crashLocalVoltDB(
              "Started a MP transaction during replay before completing " + " open transaction.",
              false,
              null);
        }
        FragmentTask t = new FragmentTask(m_initiatorMailbox, m, global_replay_mpTxn);
        t.runFromTaskLog(this);
      } else if (tibm instanceof CompleteTransactionMessage) {
        // Needs improvement: completes for sysprocs aren't filterable as sysprocs.
        // Only complete transactions that are open...
        if (global_replay_mpTxn != null) {
          CompleteTransactionMessage m = (CompleteTransactionMessage) tibm;
          CompleteTransactionTask t =
              new CompleteTransactionTask(global_replay_mpTxn, null, m, null);
          if (!m.isRestart()) {
            global_replay_mpTxn = null;
          }
          t.runFromTaskLog(this);
        }
      } else {
        VoltDB.crashLocalVoltDB(
            "Can not replay message type " + tibm + " during live rejoin. Unexpected error.",
            false,
            null);
      }
    }

    // exit replay being careful not to exit in the middle of a multi-partititon
    // transaction. The SPScheduler doesn't have a valid transaction state for a
    // partially replayed MP txn and in case of rollback the scheduler's undo token
    // is wrong. Run MP txns fully kStateRejoining or fully kStateRunning.
    if (m_rejoinTaskLog.isEmpty() && global_replay_mpTxn == null) {
      setReplayRejoinComplete();
    }
  }