private void processRejoiningFragmentWork(
      FragmentTaskMessage ftask, HashMap<Integer, List<VoltTable>> dependencies) {
    assert (ftask.getFragmentCount() > 0);
    assert (m_rejoinState == RejoinState.REJOINING);

    FragmentResponseMessage response = new FragmentResponseMessage(ftask, m_hsId);
    response.setRecovering(true);
    response.setStatus(FragmentResponseMessage.SUCCESS, null);

    // log the work done for replay
    if (!ftask.isReadOnly() && !ftask.isSysProcTask()) {
      assert (m_notice.isReadOnly() == false);
      assert (m_loggedFragments != null);
      m_loggedFragments.appendFragmentTask(ftask);
    }

    // add a dummy table for all of the expected dependency ids
    for (int i = 0; i < ftask.getFragmentCount(); i++) {
      response.addDependency(
          ftask.getOutputDepId(i),
          new VoltTable(new VoltTable.ColumnInfo("DUMMY", VoltType.BIGINT)));
    }

    m_mbox.send(response.getDestinationSiteId(), response);

    // If we're not the coordinator, the transaction is read-only,
    // and this was the final task, then we can try to move on after
    // we've finished this work.
    if (!isCoordinator() && isReadOnly() && ftask.isFinalTask()) {
      m_done = true;
    }
  }
  @Override
  public void processRemoteWorkResponse(FragmentResponseMessage response) {
    // if we've already decided that we're rolling back, then we just
    // want to discard any incoming FragmentResponses that were
    // possibly in flight
    if (m_needsRollback) {
      return;
    }

    if (response.getStatusCode() != FragmentResponseMessage.SUCCESS) {
      if (m_missingDependencies != null) m_missingDependencies.clear();
      m_readyWorkUnits.clear();

      if (m_isCoordinator) {
        // throw an exception which will back the runtime all the way
        // to the stored procedure invocation call, triggering undo
        // at that point
        if (response.getException() != null) {
          throw response.getException();
        } else {
          throw new FragmentFailureException();
        }
      } else {
        m_needsRollback = true;
        m_done = true;
      }
    }

    processFragmentResponseDependencies(response);
  }
Example #3
0
 public static void logInitiatorRxMsg(VoltMessage msg, long localHSId) {
   if (iv2log.isTraceEnabled()) {
     if (msg instanceof InitiateResponseMessage) {
       InitiateResponseMessage iresp = (InitiateResponseMessage) msg;
       String logmsg = "rxInitRsp %s from %s ciHandle %s txnId %s spHandle %s status %s";
       iv2log.trace(
           String.format(
               logmsg,
               CoreUtils.hsIdToString(localHSId),
               CoreUtils.hsIdToString(iresp.m_sourceHSId),
               ClientInterfaceHandleManager.handleToString(iresp.getClientInterfaceHandle()),
               txnIdToString(iresp.getTxnId()),
               txnIdToString(iresp.getSpHandle()),
               respStatusToString(iresp.getClientResponseData().getStatus())));
     } else if (msg instanceof FragmentResponseMessage) {
       FragmentResponseMessage fresp = (FragmentResponseMessage) msg;
       String logmsg = "rxFragRsp %s from %s txnId %s spHandle %s status %s";
       iv2log.trace(
           String.format(
               logmsg,
               CoreUtils.hsIdToString(localHSId),
               CoreUtils.hsIdToString(fresp.m_sourceHSId),
               txnIdToString(fresp.getTxnId()),
               txnIdToString(fresp.getSpHandle()),
               fragStatusToString(fresp.getStatusCode())));
     }
   }
 }
Example #4
0
  // This is going to run in the BabySitter's thread.  This and deliver are synchronized by
  // virtue of both being called on InitiatorMailbox and not directly called.
  // (That is, InitiatorMailbox's API, used by BabySitter, is synchronized on the same
  // lock deliver() is synchronized on.)
  @Override
  public void updateReplicas(List<Long> replicas) {
    // First - correct the official replica set.
    m_replicaHSIds = replicas;
    // Update the list of remote replicas that we'll need to send to
    List<Long> sendToHSIds = new ArrayList<Long>(m_replicaHSIds);
    sendToHSIds.remove(m_mailbox.getHSId());
    m_sendToHSIds = Longs.toArray(sendToHSIds);

    // Cleanup duplicate counters and collect DONE counters
    // in this list for further processing.
    List<DuplicateCounterKey> doneCounters = new LinkedList<DuplicateCounterKey>();
    for (Entry<DuplicateCounterKey, DuplicateCounter> entry : m_duplicateCounters.entrySet()) {
      DuplicateCounter counter = entry.getValue();
      int result = counter.updateReplicas(m_replicaHSIds);
      if (result == DuplicateCounter.DONE) {
        doneCounters.add(entry.getKey());
      }
    }

    // Maintain the CI invariant that responses arrive in txnid order.
    Collections.sort(doneCounters);
    for (DuplicateCounterKey key : doneCounters) {
      DuplicateCounter counter = m_duplicateCounters.remove(key);
      VoltMessage resp = counter.getLastResponse();
      if (resp != null) {
        // MPI is tracking deps per partition HSID.  We need to make
        // sure we write ours into the message getting sent to the MPI
        if (resp instanceof FragmentResponseMessage) {
          FragmentResponseMessage fresp = (FragmentResponseMessage) resp;
          fresp.setExecutorSiteId(m_mailbox.getHSId());
        }
        m_mailbox.send(counter.m_destinationId, resp);
      } else {
        hostLog.warn(
            "TXN "
                + counter.getTxnId()
                + " lost all replicas and "
                + "had no responses.  This should be impossible?");
      }
    }
    writeIv2ViableReplayEntry();
  }
Example #5
0
  // Eventually, the master for a partition set will need to be able to dedupe
  // FragmentResponses from its replicas.
  public void handleFragmentResponseMessage(FragmentResponseMessage message) {
    // Send the message to the duplicate counter, if any
    DuplicateCounter counter =
        m_duplicateCounters.get(new DuplicateCounterKey(message.getTxnId(), message.getSpHandle()));
    if (counter != null) {
      int result = counter.offer(message);
      if (result == DuplicateCounter.DONE) {
        m_duplicateCounters.remove(
            new DuplicateCounterKey(message.getTxnId(), message.getSpHandle()));
        m_repairLogTruncationHandle = message.getSpHandle();
        FragmentResponseMessage resp = (FragmentResponseMessage) counter.getLastResponse();
        // MPI is tracking deps per partition HSID.  We need to make
        // sure we write ours into the message getting sent to the MPI
        resp.setExecutorSiteId(m_mailbox.getHSId());
        m_mailbox.send(counter.m_destinationId, resp);
      } else if (result == DuplicateCounter.MISMATCH) {
        VoltDB.crashLocalVoltDB("HASH MISMATCH running multi-part procedure.", true, null);
      }
      // doing duplicate suppresion: all done.
      return;
    }

    m_mailbox.send(message.getDestinationSiteId(), message);
  }
  private void processFragmentResponseDependencies(FragmentResponseMessage response) {
    int depCount = response.getTableCount();
    for (int i = 0; i < depCount; i++) {
      int dependencyId = response.getTableDependencyIdAtIndex(i);
      VoltTable payload = response.getTableAtIndex(i);
      assert (payload != null);

      // if we're getting a dependency, i hope we know about it
      assert (m_missingDependencies != null);

      WorkUnit w = m_missingDependencies.get(dependencyId);
      if (w == null) {
        String msg =
            "Unable to find WorkUnit for dependency: "
                + dependencyId
                + " as part of TXN: "
                + txnId
                + " received from execution site: "
                + response.getExecutorSiteId();
        hostLog.warn(msg);
        // throw new FragmentFailureException();
        return;
      }

      // if the node is recovering, it doesn't matter if the payload matches
      if (response.isRecovering()) {
        w.putDummyDependency(dependencyId, response.getExecutorSiteId());
      } else {
        w.putDependency(
            dependencyId, response.getExecutorSiteId(), payload, m_site.getSiteTracker());
      }
      if (w.allDependenciesSatisfied()) {
        handleWorkUnitComplete(w);
      }
    }
  }
  void processFragmentWork(
      FragmentTaskMessage ftask, HashMap<Integer, List<VoltTable>> dependencies) {
    assert (ftask.getFragmentCount() > 0);

    FragmentResponseMessage response = m_site.processFragmentTask(this, dependencies, ftask);
    if (response.getStatusCode() != FragmentResponseMessage.SUCCESS) {
      if (m_missingDependencies != null) m_missingDependencies.clear();
      m_readyWorkUnits.clear();

      if (m_isCoordinator) {
        // throw an exception which will back the runtime all the way
        // to the stored procedure invocation call, triggering undo
        // at that point
        if (response.getException() != null) {
          throw response.getException();
        } else {
          throw new FragmentFailureException();
        }
      } else {
        m_needsRollback = true;
        m_done = true;
      }
    }

    if (m_isCoordinator && (response.getDestinationSiteId() == response.getExecutorSiteId())) {
      processFragmentResponseDependencies(response);
    } else {
      m_mbox.send(response.getDestinationSiteId(), response);
      // If we're not the coordinator, the transaction is read-only,
      // and this was the final task, then we can try to move on after
      // we've finished this work.
      if (!isCoordinator() && isReadOnly() && ftask.isFinalTask()) {
        m_done = true;
      }
    }
  }
  // Currently emulates the code in ProcedureRunner.slowPath()
  // So any change to how that stuff is built will need to
  // be reflected here
  MpTestPlan createTestPlan(
      int batchSize,
      boolean readOnly,
      boolean replicatedTable,
      boolean rollback,
      long[] remoteHSIds)
      throws IOException {
    boolean single_frag = readOnly && replicatedTable;
    MpTestPlan plan = new MpTestPlan();

    List<Integer> distributedOutputDepIds = new ArrayList<Integer>();
    List<Integer> depsToResumeList = new ArrayList<Integer>();
    List<Integer> depsForLocalTask = new ArrayList<Integer>();
    for (int i = 0; i < batchSize; i++) {
      // each SQL statement in the batch gets an output dep ID
      // which corresponds to a local fragment ID
      depsToResumeList.add(i);
      // each local fragment ID needs an input dep.  If this is
      // not replicated read only, generate a new value and add it to
      // the distributed output deps
      if (!single_frag) {
        // take the dep and add 1000
        depsForLocalTask.add(i + 1000);
        distributedOutputDepIds.add(i + 1000);
      } else {
        depsForLocalTask.add(-1);
      }
    }

    // store resume dependencies in the MpTestPlan for later.
    plan.depsToResume = depsToResumeList;

    // generate remote task with output IDs, fill in lists appropriately
    plan.remoteWork =
        new FragmentTaskMessage(
            Long.MIN_VALUE, // try not to care?
            Long.MIN_VALUE, // try not to care?
            Long.MIN_VALUE, // try not to care?
            1234l, // magic, change if it matters
            readOnly,
            false,
            false); // IV2 doesn't use final task (yet)

    for (int i = 0; i < distributedOutputDepIds.size(); i++) {
      plan.remoteWork.addFragment(
          Long.MIN_VALUE, distributedOutputDepIds.get(i), createDummyParameterSet());
    }
    System.out.println("REMOTE TASK: " + plan.remoteWork.toString());

    if (!single_frag) {
      // generate a remote fragment response for each remote message
      for (int i = 0; i < remoteHSIds.length; i++) {
        FragmentResponseMessage resp = new FragmentResponseMessage(plan.remoteWork, remoteHSIds[i]);
        if (rollback && i == (remoteHSIds.length - 1)) {
          resp.setStatus(FragmentResponseMessage.UNEXPECTED_ERROR, new EEException(1234));
        } else {
          resp.setStatus(FragmentResponseMessage.SUCCESS, null);
          for (int j = 0; j < distributedOutputDepIds.size(); j++) {
            resp.addDependency(
                distributedOutputDepIds.get(j),
                new VoltTable(new VoltTable.ColumnInfo("BOGO", VoltType.BIGINT)));
          }
        }
        System.out.println("RESPONSE: " + resp);
        plan.generatedResponses.add(resp);
      }
    }

    // generate local task with new output IDs, use above outputs as inputs, if any
    plan.localWork =
        new FragmentTaskMessage(
            Long.MIN_VALUE, // try not to care
            Long.MIN_VALUE,
            Long.MIN_VALUE,
            1234l,
            readOnly,
            false,
            false);

    for (int i = 0; i < batchSize; i++) {
      plan.localWork.addFragment(0L, depsToResumeList.get(i), createDummyParameterSet());
    }

    for (int i = 0; i < depsForLocalTask.size(); i++) {
      if (depsForLocalTask.get(i) < 0) continue;
      plan.localWork.addInputDepId(i, depsForLocalTask.get(i));
    }
    // create the FragmentResponse for the BorrowTask
    FragmentResponseMessage resp = new FragmentResponseMessage(plan.remoteWork, remoteHSIds[0]);
    resp.setStatus(FragmentResponseMessage.SUCCESS, null);
    for (int j = 0; j < batchSize; j++) {
      resp.addDependency(
          depsToResumeList.get(j),
          new VoltTable(new VoltTable.ColumnInfo("BOGO", VoltType.BIGINT)));
    }
    System.out.println("BORROW RESPONSE: " + resp);
    plan.generatedResponses.add(resp);

    System.out.println("LOCAL TASK: " + plan.localWork.toString());

    return plan;
  }