@Override
  public void run(SiteProcedureConnection siteConnection) {
    hostLog.debug("STARTING: " + this);
    if (!m_txnState.isReadOnly()) {
      // the truncation point token SHOULD be part of m_txn. However, the
      // legacy interaces don't work this way and IV2 hasn't changed this
      // ownership yet. But truncateUndoLog is written assuming the right
      // eventual encapsulation.
      siteConnection.truncateUndoLog(
          m_completeMsg.isRollback(),
          m_txnState.getBeginUndoToken(),
          m_txnState.m_spHandle,
          m_txnState.getUndoLog());
    }
    if (!m_completeMsg.isRestart()) {
      doCommonSPICompleteActions();

      // Log invocation to DR
      logToDR(siteConnection.getDRGateway());
      hostLog.debug("COMPLETE: " + this);
    } else {
      // If we're going to restart the transaction, then reset the begin undo token so the
      // first FragmentTask will set it correctly.  Otherwise, don't set the Done state or
      // flush the queue; we want the TransactionTaskQueue to stay blocked on this TXN ID
      // for the restarted fragments.
      m_txnState.setBeginUndoToken(Site.kInvalidUndoToken);
      hostLog.debug("RESTART: " + this);
    }

    final CompleteTransactionResponseMessage resp =
        new CompleteTransactionResponseMessage(m_completeMsg);
    resp.m_sourceHSId = m_initiator.getHSId();
    m_initiator.deliver(resp);
  }
  @Override
  public void runForRejoin(SiteProcedureConnection siteConnection, TaskLog taskLog)
      throws IOException {
    if (!m_txnState.isReadOnly() && !m_completeMsg.isRollback()) {
      // ENG-5276: Need to set the last committed spHandle so that the rejoining site gets the
      // accurate
      // per-partition txnId set for the next snapshot. Normally, this is done through undo log
      // truncation.
      // Since the task is not run here, we need to set the last committed spHandle explicitly.
      //
      // How does this work?
      // - Blocking rejoin with idle cluster: The spHandle is updated here with the spHandle of the
      // stream
      //   snapshot that transfers the rejoin data. So the snapshot right after rejoin should have
      // the spHandle
      //   passed here.
      // - Live rejoin with idle cluster: Same as blocking rejoin.
      // - Live rejoin with workload: Transactions will be logged and replayed afterward. The
      // spHandle will be
      //   updated when they commit and truncate undo logs. So at the end of replay,
      //   the spHandle should have the latest value. If all replayed transactions rolled back,
      //   the spHandle is still guaranteed to be the spHandle of the stream snapshot that
      // transfered the
      //   rejoin data, which is the correct value.
      siteConnection.setSpHandleForSnapshotDigest(m_txnState.m_spHandle);
    }

    if (!m_completeMsg.isRestart()) {
      // future: offer to siteConnection.IBS for replay.
      doCommonSPICompleteActions();
    }

    if (!m_txnState.isReadOnly()) {
      // We need to log the restarting message to the task log so we'll replay the whole
      // stream faithfully
      taskLog.logTask(m_completeMsg);
    }

    final CompleteTransactionResponseMessage resp =
        new CompleteTransactionResponseMessage(m_completeMsg);
    resp.setIsRecovering(true);
    resp.m_sourceHSId = m_initiator.getHSId();
    m_initiator.deliver(resp);
  }
Example #3
0
  public long initialize(int sourceCount, FixedDBBPool bufferPool) {
    // Expect sourceCount number of EOFs at the end
    m_expectedEOFs.set(sourceCount);

    m_in = new StreamSnapshotDataReceiver(m_mb, bufferPool);
    m_inThread = new Thread(m_in, "Snapshot data receiver");
    m_inThread.setDaemon(true);
    m_ack = new StreamSnapshotAckSender(m_mb);
    m_ackThread = new Thread(m_ack, "Snapshot ack sender");
    m_inThread.start();
    m_ackThread.start();

    return m_mb.getHSId();
  }
Example #4
0
  /**
   * Blocking transfer all partitioned table data and notify the coordinator.
   *
   * @param siteConnection
   */
  private void runForBlockingDataTransfer(SiteProcedureConnection siteConnection) {
    boolean sourcesReady = false;
    RestoreWork restoreWork = m_dataSink.poll(m_snapshotBufferAllocator);
    if (restoreWork != null) {
      restoreBlock(restoreWork, siteConnection);
      sourcesReady = true;
    }

    // The completion monitor may fire even if m_dataSink has not reached EOF in the case that
    // there's no
    // replicated table in the database, so check for both conditions.
    if (m_dataSink.isEOF() || m_snapshotCompletionMonitor.isDone()) {
      // No more data from this data sink, close and remove it from the list
      m_dataSink.close();

      if (m_streamSnapshotMb != null) {
        VoltDB.instance().getHostMessenger().removeMailbox(m_streamSnapshotMb.getHSId());
      }

      JOINLOG.debug(m_whoami + " data transfer is finished");

      if (m_snapshotCompletionMonitor.isDone()) {
        try {
          SnapshotCompletionEvent event = m_snapshotCompletionMonitor.get();
          assert (event != null);
          JOINLOG.debug("P" + m_partitionId + " noticed data transfer completion");
          m_completionAction.setSnapshotTxnId(event.multipartTxnId);

          setJoinComplete(
              siteConnection,
              event.exportSequenceNumbers,
              event.drSequenceNumbers,
              false /* requireExistingSequenceNumbers */);
        } catch (InterruptedException e) {
          // isDone() already returned true, this shouldn't happen
          VoltDB.crashLocalVoltDB("Impossible interruption happend", true, e);
        } catch (ExecutionException e) {
          VoltDB.crashLocalVoltDB("Error waiting for snapshot to finish", true, e);
        }
      } else {
        m_taskQueue.offer(this);
      }
    } else {
      // The sources are not set up yet, don't block the site,
      // return here and retry later.
      returnToTaskQueue(sourcesReady);
    }
  }
Example #5
0
  /**
   * An implementation of run() that does not block the site thread. The Site has responsibility for
   * transactions that occur between schedulings of this task.
   */
  @Override
  public void runForRejoin(SiteProcedureConnection siteConnection, TaskLog m_taskLog)
      throws IOException {
    RestoreWork rejoinWork = m_rejoinSiteProcessor.poll(m_snapshotBufferAllocator);
    if (rejoinWork != null) {
      restoreBlock(rejoinWork, siteConnection);
    }

    if (m_rejoinSiteProcessor.isEOF() == false) {
      m_taskQueue.offer(this);
    } else {
      REJOINLOG.debug(m_whoami + "Rejoin snapshot transfer is finished");
      m_rejoinSiteProcessor.close();

      Preconditions.checkNotNull(m_streamSnapshotMb);
      VoltDB.instance().getHostMessenger().removeMailbox(m_streamSnapshotMb.getHSId());

      doFinishingTask(siteConnection);
    }
  }
  private void createAndRegisterAckMailboxes(
      final Set<Integer> localPartitions, HostMessenger messenger) {
    m_zk = messenger.getZK();
    m_mailboxesZKPath = VoltZK.exportGenerations + "/" + m_timestamp + "/" + "mailboxes";

    m_mbox =
        new LocalMailbox(messenger) {
          @Override
          public void deliver(VoltMessage message) {
            if (message instanceof BinaryPayloadMessage) {
              BinaryPayloadMessage bpm = (BinaryPayloadMessage) message;
              ByteBuffer buf = ByteBuffer.wrap(bpm.m_payload);
              final int partition = buf.getInt();
              final int length = buf.getInt();
              byte stringBytes[] = new byte[length];
              buf.get(stringBytes);
              String signature = new String(stringBytes, Constants.UTF8ENCODING);
              final long ackUSO = buf.getLong();

              final HashMap<String, ExportDataSource> partitionSources =
                  m_dataSourcesByPartition.get(partition);
              if (partitionSources == null) {
                exportLog.error(
                    "Received an export ack for partition "
                        + partition
                        + " which does not exist on this node");
                return;
              }

              final ExportDataSource eds = partitionSources.get(signature);
              if (eds == null) {
                exportLog.error(
                    "Received an export ack for partition "
                        + partition
                        + " source signature "
                        + signature
                        + " which does not exist on this node");
                return;
              }

              try {
                eds.ack(ackUSO);
              } catch (RejectedExecutionException ignoreIt) {
                // ignore it: as it is already shutdown
              }
            } else {
              exportLog.error("Receive unexpected message " + message + " in export subsystem");
            }
          }
        };
    messenger.createMailbox(null, m_mbox);

    for (Integer partition : localPartitions) {
      final String partitionDN = m_mailboxesZKPath + "/" + partition;
      ZKUtil.asyncMkdirs(m_zk, partitionDN);

      ZKUtil.StringCallback cb = new ZKUtil.StringCallback();
      m_zk.create(
          partitionDN + "/" + m_mbox.getHSId(),
          null,
          Ids.OPEN_ACL_UNSAFE,
          CreateMode.EPHEMERAL,
          cb,
          null);
    }

    ListenableFuture<?> fut =
        m_childUpdatingThread.submit(
            new Runnable() {
              @Override
              public void run() {
                List<Pair<Integer, ZKUtil.ChildrenCallback>> callbacks =
                    new ArrayList<Pair<Integer, ZKUtil.ChildrenCallback>>();
                for (Integer partition : localPartitions) {
                  ZKUtil.ChildrenCallback callback = new ZKUtil.ChildrenCallback();
                  m_zk.getChildren(
                      m_mailboxesZKPath + "/" + partition,
                      constructMailboxChildWatcher(),
                      callback,
                      null);
                  callbacks.add(Pair.of(partition, callback));
                }
                for (Pair<Integer, ZKUtil.ChildrenCallback> p : callbacks) {
                  final Integer partition = p.getFirst();
                  List<String> children = null;
                  try {
                    children = p.getSecond().getChildren();
                  } catch (InterruptedException e) {
                    Throwables.propagate(e);
                  } catch (KeeperException e) {
                    Throwables.propagate(e);
                  }
                  ImmutableList.Builder<Long> mailboxes = ImmutableList.builder();

                  for (String child : children) {
                    if (child.equals(Long.toString(m_mbox.getHSId()))) continue;
                    mailboxes.add(Long.valueOf(child));
                  }
                  ImmutableList<Long> mailboxHsids = mailboxes.build();

                  for (ExportDataSource eds : m_dataSourcesByPartition.get(partition).values()) {
                    eds.updateAckMailboxes(Pair.of(m_mbox, mailboxHsids));
                  }
                }
              }
            });
    try {
      fut.get();
    } catch (Throwable t) {
      Throwables.propagate(t);
    }
  }