Beispiel #1
0
    /*
     * Multi-partition/non-replicated fragment with collector and aggregator.
     */
    void addFragmentPair(
        int index,
        PlanFragment collectorFragment,
        PlanFragment aggregatorFragment,
        ByteBuffer params) {
      assert (index >= 0);
      assert (index < m_batchSize);
      assert (collectorFragment != null);
      assert (aggregatorFragment != null);
      assert (collectorFragment.getHasdependencies() == false);
      assert (aggregatorFragment.getHasdependencies() == true);

      // frags with no deps are usually collector frags that go to all partitions
      long distributedFragId = CatalogUtil.getUniqueIdForFragment(collectorFragment);
      long localFragId = CatalogUtil.getUniqueIdForFragment(aggregatorFragment);
      // if any frag is transactional, update this check
      if (aggregatorFragment.getNontransactional() == true) {
        m_localFragsAreNonTransactional = true;
      }
      int outputDepId = m_txnState.getNextDependencyId() | DtxnConstants.MULTIPARTITION_DEPENDENCY;
      m_depsForLocalTask[index] = outputDepId;
      // Add local and distributed fragments.
      m_localTask.addFragment(localFragId, m_depsToResume[index], params);
      m_distributedTask.addFragment(distributedFragId, outputDepId, params);
    }
  @Override
  public void createLocalFragmentWork(FragmentTaskMessage task, boolean nonTransactional) {
    if (task.getFragmentCount() <= 0) return;

    WorkUnit w =
        new WorkUnit(
            m_site.getSiteTracker(),
            task,
            task.getAllUnorderedInputDepIds(),
            m_hsId,
            m_nonCoordinatingSites,
            false,
            m_allowMismatchedResults);
    w.nonTransactional = nonTransactional;

    for (int i = 0; i < task.getFragmentCount(); i++) {
      ArrayList<Integer> inputDepIds = task.getInputDepIds(i);
      if (inputDepIds == null) continue;
      for (int inputDepId : inputDepIds) {
        if (m_missingDependencies == null) m_missingDependencies = new HashMap<Integer, WorkUnit>();
        assert (!m_missingDependencies.containsKey(inputDepId));
        m_missingDependencies.put(inputDepId, w);
      }
    }

    if (w.allDependenciesSatisfied()) m_readyWorkUnits.add(w);
  }
 private void logToDR(PartitionDRGateway drGateway) {
   // Log invocation to DR
   if (drGateway != null
       && !m_txnState.isForReplay()
       && !m_txnState.isReadOnly()
       && !m_completeMsg.isRollback()) {
     FragmentTaskMessage fragment = (FragmentTaskMessage) m_txnState.getNotice();
     Iv2InitiateTaskMessage initiateTask = fragment.getInitiateTask();
     assert (initiateTask != null);
     if (initiateTask == null) {
       hostLog.error(
           "Unable to log MP transaction to DR because of missing InitiateTaskMessage, "
               + "fragment: "
               + fragment.toString());
     }
     StoredProcedureInvocation invocation =
         initiateTask.getStoredProcedureInvocation().getShallowCopy();
     drGateway.onSuccessfulMPCall(
         m_txnState.m_spHandle,
         m_txnState.txnId,
         m_txnState.uniqueId,
         m_completeMsg.getHash(),
         invocation,
         m_txnState.getResults());
   }
 }
Beispiel #4
0
 public static void logFragmentTaskMessage(
     FragmentTaskMessage ftask, long localHSId, long spHandle, boolean borrow) {
   if (iv2log.isTraceEnabled()) {
     String label = "rxFragMsg";
     if (borrow) {
       label = "rxBrrwMsg";
     }
     if (ftask.getSpHandle() != Long.MIN_VALUE && ftask.getSpHandle() != spHandle) {
       iv2log.error(
           "FragmentTaskMessage SP HANDLE conflict.  Message: "
               + ftask.getSpHandle()
               + ", locally held: "
               + spHandle);
     }
     String logmsg = "%s %s from %s txnId %s spHandle %s trunc %s";
     iv2log.trace(
         String.format(
             logmsg,
             label,
             CoreUtils.hsIdToString(localHSId),
             CoreUtils.hsIdToString(ftask.m_sourceHSId),
             txnIdToString(ftask.getTxnId()),
             txnIdToString(spHandle),
             txnIdToString(ftask.getTruncationHandle())));
   }
 }
Beispiel #5
0
  /**
   * Do the work necessary to turn the FragmentTaskMessage into a TransactionTask which can be
   * queued to the TransactionTaskQueue. This is reused by both the normal message handling path and
   * the repair path, and assumes that the caller has dealt with or ensured that the necessary ID,
   * SpHandles, and replication issues are resolved.
   */
  private void doLocalFragmentOffer(FragmentTaskMessage msg) {
    TransactionState txn = m_outstandingTxns.get(msg.getTxnId());
    boolean logThis = false;
    // bit of a hack...we will probably not want to create and
    // offer FragmentTasks for txn ids that don't match if we have
    // something in progress already
    if (txn == null) {
      txn = new ParticipantTransactionState(msg.getSpHandle(), msg);
      m_outstandingTxns.put(msg.getTxnId(), txn);
      // Only want to send things to the command log if it satisfies this predicate
      // AND we've never seen anything for this transaction before.  We can't
      // actually log until we create a TransactionTask, though, so just keep track
      // of whether it needs to be done.
      logThis = (msg.getInitiateTask() != null && !msg.getInitiateTask().isReadOnly());
    }

    // Check to see if this is the final task for this txn, and if so, if we can close it out early
    // Right now, this just means read-only.
    // NOTE: this overlaps slightly with CompleteTransactionMessage handling completion.  It's so
    // tiny
    // that for now, meh, but if this scope grows then it should get refactored out
    if (msg.isFinalTask() && txn.isReadOnly()) {
      m_outstandingTxns.remove(msg.getTxnId());
    }

    TransactionTask task;
    if (msg.isSysProcTask()) {
      task =
          new SysprocFragmentTask(
              m_mailbox, (ParticipantTransactionState) txn, m_pendingTasks, msg, null);
    } else {
      task =
          new FragmentTask(m_mailbox, (ParticipantTransactionState) txn, m_pendingTasks, msg, null);
    }
    if (logThis) {
      if (!m_cl.log(msg.getInitiateTask(), msg.getSpHandle(), m_durabilityListener, task)) {
        m_pendingTasks.offer(task);
      } else {
        /* Getting here means that the task is the first fragment of an MP txn and
         * synchronous command logging is on, so create a backlog for future tasks of
         * this MP arrived before it's marked durable.
         *
         * This is important for synchronous command logging and MP txn restart. Without
         * this, a restarted MP txn may not be gated by logging of the first fragment.
         */
        assert !m_mpsPendingDurability.containsKey(task.getTxnId());
        m_mpsPendingDurability.put(task.getTxnId(), new ArrayDeque<TransactionTask>());
      }
    } else {
      queueOrOfferMPTask(task);
    }
  }
Beispiel #6
0
    /*
     * Multi-partition/non-replicated custom fragment with collector and aggregator.
     */
    void addCustomFragmentPair(
        int index, byte[] collectorFragment, byte[] aggregatorFragment, ByteBuffer params) {
      assert (index >= 0);
      assert (index < m_batchSize);
      assert (collectorFragment != null);
      assert (aggregatorFragment != null);

      int outputDepId = m_txnState.getNextDependencyId() | DtxnConstants.MULTIPARTITION_DEPENDENCY;
      m_depsForLocalTask[index] = outputDepId;
      // Add the aggegator and collector fragments.
      m_localTask.addCustomFragment(m_depsToResume[index], params, aggregatorFragment);
      m_distributedTask.addCustomFragment(outputDepId, params, collectorFragment);
    }
Beispiel #7
0
    /*
     * Replicated custom fragment.
     */
    void addCustomFragment(int index, byte[] aggregatorFragment, ByteBuffer params) {
      assert (index >= 0);
      assert (index < m_batchSize);
      assert (aggregatorFragment != null);

      m_depsForLocalTask[index] = -1;
      m_localTask.addCustomFragment(m_depsToResume[index], params, aggregatorFragment);
    }
  private void processRejoiningFragmentWork(
      FragmentTaskMessage ftask, HashMap<Integer, List<VoltTable>> dependencies) {
    assert (ftask.getFragmentCount() > 0);
    assert (m_rejoinState == RejoinState.REJOINING);

    FragmentResponseMessage response = new FragmentResponseMessage(ftask, m_hsId);
    response.setRecovering(true);
    response.setStatus(FragmentResponseMessage.SUCCESS, null);

    // log the work done for replay
    if (!ftask.isReadOnly() && !ftask.isSysProcTask()) {
      assert (m_notice.isReadOnly() == false);
      assert (m_loggedFragments != null);
      m_loggedFragments.appendFragmentTask(ftask);
    }

    // add a dummy table for all of the expected dependency ids
    for (int i = 0; i < ftask.getFragmentCount(); i++) {
      response.addDependency(
          ftask.getOutputDepId(i),
          new VoltTable(new VoltTable.ColumnInfo("DUMMY", VoltType.BIGINT)));
    }

    m_mbox.send(response.getDestinationSiteId(), response);

    // If we're not the coordinator, the transaction is read-only,
    // and this was the final task, then we can try to move on after
    // we've finished this work.
    if (!isCoordinator() && isReadOnly() && ftask.isFinalTask()) {
      m_done = true;
    }
  }
Beispiel #9
0
    /*
     * Replicated fragment.
     */
    void addFragment(int index, PlanFragment frag, ByteBuffer params) {
      assert (index >= 0);
      assert (index < m_batchSize);
      assert (frag != null);
      assert (frag.getHasdependencies() == false);

      // if any frag is transactional, update this check
      if (frag.getNontransactional() == true) m_localFragsAreNonTransactional = true;

      long localFragId = CatalogUtil.getUniqueIdForFragment(frag);
      m_depsForLocalTask[index] = -1;
      // Add the local fragment data.
      m_localTask.addFragment(localFragId, m_depsToResume[index], params);
    }
  void processFragmentWork(
      FragmentTaskMessage ftask, HashMap<Integer, List<VoltTable>> dependencies) {
    assert (ftask.getFragmentCount() > 0);

    FragmentResponseMessage response = m_site.processFragmentTask(this, dependencies, ftask);
    if (response.getStatusCode() != FragmentResponseMessage.SUCCESS) {
      if (m_missingDependencies != null) m_missingDependencies.clear();
      m_readyWorkUnits.clear();

      if (m_isCoordinator) {
        // throw an exception which will back the runtime all the way
        // to the stored procedure invocation call, triggering undo
        // at that point
        if (response.getException() != null) {
          throw response.getException();
        } else {
          throw new FragmentFailureException();
        }
      } else {
        m_needsRollback = true;
        m_done = true;
      }
    }

    if (m_isCoordinator && (response.getDestinationSiteId() == response.getExecutorSiteId())) {
      processFragmentResponseDependencies(response);
    } else {
      m_mbox.send(response.getDestinationSiteId(), response);
      // If we're not the coordinator, the transaction is read-only,
      // and this was the final task, then we can try to move on after
      // we've finished this work.
      if (!isCoordinator() && isReadOnly() && ftask.isFinalTask()) {
        m_done = true;
      }
    }
  }
  @Test
  public void testTruncationHandleForwarding() throws IOException {
    long truncPt = 100L;
    Iv2InitiateTaskMessage taskmsg =
        new Iv2InitiateTaskMessage(
            0,
            0,
            truncPt,
            101L,
            System.currentTimeMillis(),
            true,
            false,
            new StoredProcedureInvocation(),
            0,
            0,
            false);
    assertEquals(truncPt, taskmsg.getTruncationHandle());

    FragmentTaskMessage localFrag = mock(FragmentTaskMessage.class);
    FragmentTaskMessage remoteFrag = mock(FragmentTaskMessage.class);
    when(remoteFrag.getFragmentCount()).thenReturn(1);

    buddyHSId = 0;
    Mailbox mailbox = mock(Mailbox.class);

    MpTransactionState dut = new MpTransactionState(mailbox, taskmsg, allHsids, buddyHSId, false);

    // create local work and verify the created localwork has the
    // expected truncation point.
    dut.createLocalFragmentWork(localFrag, false);
    verify(dut.m_localWork).setTruncationHandle(truncPt);

    // same with partcipating work.
    dut.createAllParticipatingFragmentWork(remoteFrag);
    verify(dut.m_remoteWork).setTruncationHandle(truncPt);
  }
Beispiel #12
0
  private void handleFragmentTaskMessageRepair(
      List<Long> needsRepair, FragmentTaskMessage message) {
    // set up duplicate counter. expect exactly the responses corresponding
    // to needsRepair. These may, or may not, include the local site.

    List<Long> expectedHSIds = new ArrayList<Long>(needsRepair);
    DuplicateCounter counter =
        new DuplicateCounter(
            message.getCoordinatorHSId(), // Assume that the MPI's HSID hasn't changed
            message.getTxnId(),
            expectedHSIds,
            "MP_DETERMINISM_ERROR");
    m_duplicateCounters.put(
        new DuplicateCounterKey(message.getTxnId(), message.getSpHandle()), counter);

    // is local repair necessary?
    if (needsRepair.contains(m_mailbox.getHSId())) {
      // Sanity check that we really need repair.
      if (m_outstandingTxns.get(message.getTxnId()) != null) {
        hostLog.warn(
            "SPI repair attempted to repair a fragment which it has already seen. "
                + "This shouldn't be possible.");
        // Not sure what to do in this event.  Crash for now
        throw new RuntimeException("Attempted to repair with a fragment we've already seen.");
      }
      needsRepair.remove(m_mailbox.getHSId());
      // make a copy because handleIv2 non-repair case does?
      FragmentTaskMessage localWork =
          new FragmentTaskMessage(
              message.getInitiatorHSId(), message.getCoordinatorHSId(), message);
      doLocalFragmentOffer(localWork);
    }

    // is remote repair necessary?
    if (!needsRepair.isEmpty()) {
      FragmentTaskMessage replmsg =
          new FragmentTaskMessage(m_mailbox.getHSId(), m_mailbox.getHSId(), message);
      m_mailbox.send(com.google.common.primitives.Longs.toArray(needsRepair), replmsg);
    }
  }
Beispiel #13
0
  // SpSchedulers will see FragmentTaskMessage for:
  // - The scatter fragment(s) of a multi-part transaction (normal or sysproc)
  // - Borrow tasks to do the local fragment work if this partition is the
  //   buddy of the MPI.  Borrow tasks may include input dependency tables for
  //   aggregation fragments, or not, if it's a replicated table read.
  // For multi-batch MP transactions, we'll need to look up the transaction state
  // that gets created when the first batch arrives.
  // During command log replay a new SP handle is going to be generated, but it really
  // doesn't matter, it isn't going to be used for anything.
  void handleFragmentTaskMessage(FragmentTaskMessage message) {
    FragmentTaskMessage msg = message;
    long newSpHandle;
    if (m_isLeader) {
      // Quick hack to make progress...we need to copy the FragmentTaskMessage
      // before we start mucking with its state (SPHANDLE).  We need to revisit
      // all the messaging mess at some point.
      msg =
          new FragmentTaskMessage(
              message.getInitiatorHSId(), message.getCoordinatorHSId(), message);
      // Not going to use the timestamp from the new Ego because the multi-part timestamp is what
      // should be used
      TxnEgo ego = advanceTxnEgo();
      newSpHandle = ego.getTxnId();
      msg.setSpHandle(newSpHandle);
      if (msg.getInitiateTask() != null) {
        msg.getInitiateTask().setSpHandle(newSpHandle); // set the handle
        msg.setInitiateTask(
            msg.getInitiateTask()); // Trigger reserialization so the new handle is used
      }

      /*
       * If there a replicas to send it to, forward it!
       * Unless... it's read only AND not a sysproc. Read only sysprocs may expect to be sent
       * everywhere.
       * In that case don't propagate it to avoid a determinism check and extra messaging overhead
       */
      if (m_sendToHSIds.length > 0 && (!msg.isReadOnly() || msg.isSysProcTask())) {
        FragmentTaskMessage replmsg =
            new FragmentTaskMessage(m_mailbox.getHSId(), m_mailbox.getHSId(), msg);
        m_mailbox.send(m_sendToHSIds, replmsg);
        DuplicateCounter counter;
        /*
         * Non-determinism should be impossible to happen with MP fragments.
         * if you see "MP_DETERMINISM_ERROR" as procedure name in the crash logs
         * something has horribly gone wrong.
         */
        if (message.getFragmentTaskType() != FragmentTaskMessage.SYS_PROC_PER_SITE) {
          counter =
              new DuplicateCounter(
                  msg.getCoordinatorHSId(), msg.getTxnId(), m_replicaHSIds, "MP_DETERMINISM_ERROR");
        } else {
          counter =
              new SysProcDuplicateCounter(
                  msg.getCoordinatorHSId(), msg.getTxnId(), m_replicaHSIds, "MP_DETERMINISM_ERROR");
        }
        m_duplicateCounters.put(new DuplicateCounterKey(msg.getTxnId(), newSpHandle), counter);
      }
    } else {
      newSpHandle = msg.getSpHandle();
      setMaxSeenTxnId(newSpHandle);
    }
    Iv2Trace.logFragmentTaskMessage(message, m_mailbox.getHSId(), newSpHandle, false);
    doLocalFragmentOffer(msg);
  }
Beispiel #14
0
  // Offer a new message to the repair log. This will truncate
  // the repairLog if the message includes a truncation hint.
  public void deliver(VoltMessage msg) {
    if (!m_isLeader && msg instanceof Iv2InitiateTaskMessage) {
      final Iv2InitiateTaskMessage m = (Iv2InitiateTaskMessage) msg;
      // We can't repair read only SP transactions. Just don't log them to the repair log.
      if (m.isReadOnly()) {
        return;
      }

      m_lastSpHandle = m.getSpHandle();
      truncate(m.getTruncationHandle(), IS_SP);
      m_logSP.add(new Item(IS_SP, m, m.getSpHandle(), m.getTxnId()));
    } else if (msg instanceof FragmentTaskMessage) {
      final FragmentTaskMessage m = (FragmentTaskMessage) msg;

      // We can't repair read only SP transactions. Just don't log them to the repair log.
      if (m.isReadOnly()) {
        return;
      }

      truncate(m.getTruncationHandle(), IS_MP);
      // only log the first fragment of a procedure (and handle 1st case)
      if (m.getTxnId() > m_lastMpHandle || m_lastMpHandle == Long.MAX_VALUE) {
        m_logMP.add(new Item(IS_MP, m, m.getSpHandle(), m.getTxnId()));
        m_lastMpHandle = m.getTxnId();
        m_lastSpHandle = m.getSpHandle();
      }
    } else if (msg instanceof CompleteTransactionMessage) {
      // a CompleteTransactionMessage which indicates restart is not the end of the
      // transaction.  We don't want to log it in the repair log.
      CompleteTransactionMessage ctm = (CompleteTransactionMessage) msg;
      // We can't repair read only SP transactions. Just don't log them to the repair log.
      // Restart transaction do not need to be repaired here, don't log them as well.
      if (ctm.isReadOnly() || ctm.isRestart()) {
        return;
      }

      truncate(ctm.getTruncationHandle(), IS_MP);
      m_logMP.add(new Item(IS_MP, ctm, ctm.getSpHandle(), ctm.getTxnId()));
      // Restore will send a complete transaction message with a lower mp transaction id because
      // the restore transaction precedes the loading of the right mp transaction id from the
      // snapshot
      // Hence Math.max
      m_lastMpHandle = Math.max(m_lastMpHandle, ctm.getTxnId());
      m_lastSpHandle = ctm.getSpHandle();
    } else if (msg instanceof DumpMessage) {
      String who = CoreUtils.hsIdToString(m_HSId);
      tmLog.warn(
          "Repair log dump for site: "
              + who
              + ", isLeader: "
              + m_isLeader
              + ", "
              + who
              + ": lastSpHandle: "
              + m_lastSpHandle
              + ", lastMpHandle: "
              + m_lastMpHandle);
      for (Iv2RepairLogResponseMessage il : contents(0l, false)) {
        tmLog.warn("[Repair log contents]" + who + ": msg: " + il);
      }
    } else if (msg instanceof RepairLogTruncationMessage) {
      final RepairLogTruncationMessage truncateMsg = (RepairLogTruncationMessage) msg;
      truncate(truncateMsg.getHandle(), IS_SP);
    }
  }
Beispiel #15
0
  void replayFromTaskLog() throws IOException {
    // not yet time to catch-up.
    if (m_rejoinState != kStateReplayingRejoin) {
      return;
    }

    // replay 10:1 in favor of replay
    for (int i = 0; i < 10; ++i) {
      if (m_rejoinTaskLog.isEmpty()) {
        break;
      }

      TransactionInfoBaseMessage tibm = m_rejoinTaskLog.getNextMessage();
      if (tibm == null) {
        break;
      }

      // Apply the readonly / sysproc filter. With Iv2 read optimizations,
      // reads should not reach here; the cost of post-filtering shouldn't
      // be particularly high (vs pre-filtering).
      if (filter(tibm)) {
        continue;
      }

      if (tibm instanceof Iv2InitiateTaskMessage) {
        Iv2InitiateTaskMessage m = (Iv2InitiateTaskMessage) tibm;
        SpProcedureTask t =
            new SpProcedureTask(m_initiatorMailbox, m.getStoredProcedureName(), null, m, null);
        t.runFromTaskLog(this);
      } else if (tibm instanceof FragmentTaskMessage) {
        FragmentTaskMessage m = (FragmentTaskMessage) tibm;
        if (global_replay_mpTxn == null) {
          global_replay_mpTxn = new ParticipantTransactionState(m.getTxnId(), m);
        } else if (global_replay_mpTxn.txnId != m.getTxnId()) {
          VoltDB.crashLocalVoltDB(
              "Started a MP transaction during replay before completing " + " open transaction.",
              false,
              null);
        }
        FragmentTask t = new FragmentTask(m_initiatorMailbox, m, global_replay_mpTxn);
        t.runFromTaskLog(this);
      } else if (tibm instanceof CompleteTransactionMessage) {
        // Needs improvement: completes for sysprocs aren't filterable as sysprocs.
        // Only complete transactions that are open...
        if (global_replay_mpTxn != null) {
          CompleteTransactionMessage m = (CompleteTransactionMessage) tibm;
          CompleteTransactionTask t =
              new CompleteTransactionTask(global_replay_mpTxn, null, m, null);
          if (!m.isRestart()) {
            global_replay_mpTxn = null;
          }
          t.runFromTaskLog(this);
        }
      } else {
        VoltDB.crashLocalVoltDB(
            "Can not replay message type " + tibm + " during live rejoin. Unexpected error.",
            false,
            null);
      }
    }

    // exit replay being careful not to exit in the middle of a multi-partititon
    // transaction. The SPScheduler doesn't have a valid transaction state for a
    // partially replayed MP txn and in case of rollback the scheduler's undo token
    // is wrong. Run MP txns fully kStateRejoining or fully kStateRunning.
    if (m_rejoinTaskLog.isEmpty() && global_replay_mpTxn == null) {
      setReplayRejoinComplete();
    }
  }