Ejemplo n.º 1
0
 public void handleDumpMessage() {
   String who = CoreUtils.hsIdToString(m_mailbox.getHSId());
   hostLog.warn("State dump for site: " + who);
   hostLog.warn("" + who + ": partition: " + m_partitionId + ", isLeader: " + m_isLeader);
   if (m_isLeader) {
     hostLog.warn("" + who + ": replicas: " + CoreUtils.hsIdCollectionToString(m_replicaHSIds));
     if (m_sendToHSIds.length > 0) {
       m_mailbox.send(m_sendToHSIds, new DumpMessage());
     }
   }
   hostLog.warn(
       ""
           + who
           + ": most recent SP handle: "
           + getCurrentTxnId()
           + " "
           + TxnEgo.txnIdToString(getCurrentTxnId()));
   hostLog.warn(
       ""
           + who
           + ": outstanding txns: "
           + m_outstandingTxns.keySet()
           + " "
           + TxnEgo.txnIdCollectionToString(m_outstandingTxns.keySet()));
   hostLog.warn("" + who + ": TransactionTaskQueue: " + m_pendingTasks.toString());
   if (m_duplicateCounters.size() > 0) {
     hostLog.warn("" + who + ": duplicate counters: ");
     for (Entry<DuplicateCounterKey, DuplicateCounter> e : m_duplicateCounters.entrySet()) {
       hostLog.warn("\t" + who + ": " + e.getKey().toString() + ": " + e.getValue().toString());
     }
   }
 }
Ejemplo n.º 2
0
 public static void logTopology(long leaderHSId, List<Long> replicas, int partitionId) {
   if (iv2log.isTraceEnabled()) {
     String logmsg = "topology partition %d leader %s replicas (%s)";
     iv2log.trace(
         String.format(
             logmsg,
             partitionId,
             CoreUtils.hsIdToString(leaderHSId),
             CoreUtils.hsIdCollectionToString(replicas)));
   }
 }
Ejemplo n.º 3
0
  @Override
  protected void updateStatsRow(Object rowKey, Object[] rowValues) {
    long leader;
    List<Long> sites = new ArrayList<Long>();
    if (rowKey.equals(MpInitiator.MP_INIT_PID)) {
      leader = getHSIdForMultiPartitionInitiator();
      sites.add(leader);
    } else {
      leader = m_iv2Masters.pointInTimeCache().get((Integer) rowKey);
      sites.addAll(getReplicasForPartition((Integer) rowKey));
    }

    rowValues[columnNameToIndex.get("Partition")] = rowKey;
    rowValues[columnNameToIndex.get("Sites")] = CoreUtils.hsIdCollectionToString(sites);
    rowValues[columnNameToIndex.get("Leader")] = CoreUtils.hsIdToString(leader);
  }
Ejemplo n.º 4
0
  /** Start fixing survivors: setup scoreboard and request repair logs. */
  void prepareForFaultRecovery() {
    for (Long hsid : m_survivors) {
      m_replicaRepairStructs.put(hsid, new ReplicaRepairStruct());
    }

    tmLog.info(
        m_whoami
            + "found (including self) "
            + m_survivors.size()
            + " surviving replicas to repair. "
            + " Survivors: "
            + CoreUtils.hsIdCollectionToString(m_survivors));
    VoltMessage logRequest =
        new Iv2RepairLogRequestMessage(m_requestId, Iv2RepairLogRequestMessage.SPREQUEST);
    m_mailbox.send(com.google.common.primitives.Longs.toArray(m_survivors), logRequest);
  }
Ejemplo n.º 5
0
  /** Send missed-messages to survivors. */
  public void repairSurvivors() {
    // cancel() and repair() must be synchronized by the caller (the deliver lock,
    // currently). If cancelled and the last repair message arrives, don't send
    // out corrections!
    if (this.m_promotionResult.isCancelled()) {
      tmLog.debug(m_whoami + "Skipping repair message creation for cancelled Term.");
      return;
    }

    int queued = 0;
    tmLog.debug(m_whoami + "received all repair logs and is repairing surviving replicas.");
    for (Iv2RepairLogResponseMessage li : m_repairLogUnion) {
      List<Long> needsRepair = new ArrayList<Long>(5);
      for (Entry<Long, ReplicaRepairStruct> entry : m_replicaRepairStructs.entrySet()) {
        if (entry.getValue().needs(li.getHandle())) {
          ++queued;
          tmLog.debug(
              m_whoami
                  + "repairing "
                  + CoreUtils.hsIdToString(entry.getKey())
                  + ". Max seen "
                  + entry.getValue().m_maxSpHandleSeen
                  + ". Repairing with "
                  + li.getHandle());
          needsRepair.add(entry.getKey());
        }
      }
      if (!needsRepair.isEmpty()) {
        if (tmLog.isTraceEnabled()) {
          tmLog.trace(
              m_whoami
                  + "repairing: "
                  + CoreUtils.hsIdCollectionToString(needsRepair)
                  + " with message: "
                  + li.getPayload());
        }
        m_mailbox.repairReplicasWith(needsRepair, li.getPayload());
      }
    }
    tmLog.debug(m_whoami + "finished queuing " + queued + " replica repair messages.");

    m_promotionResult.done(m_maxSeenTxnId);
  }
Ejemplo n.º 6
0
    @Override
    public void run(List<String> children) {
      List<Long> updatedHSIds = VoltZK.childrenToReplicaHSIds(children);
      // compute previously unseen HSId set in the callback list
      Set<Long> newHSIds = new HashSet<Long>(updatedHSIds);
      newHSIds.removeAll(m_replicas);
      tmLog.debug("Newly seen replicas: " + CoreUtils.hsIdCollectionToString(newHSIds));
      // compute previously seen but now vanished from the callback list HSId set
      Set<Long> missingHSIds = new HashSet<Long>(m_replicas);
      missingHSIds.removeAll(updatedHSIds);
      tmLog.debug("Newly dead replicas: " + CoreUtils.hsIdCollectionToString(missingHSIds));

      tmLog.debug(
          "Handling babysitter callback for partition "
              + m_partitionId
              + ": children: "
              + CoreUtils.hsIdCollectionToString(updatedHSIds));
      if (m_state.get() == AppointerState.CLUSTER_START) {
        // We can't yet tolerate a host failure during startup.  Crash it all
        if (missingHSIds.size() > 0) {
          VoltDB.crashGlobalVoltDB("Node failure detected during startup.", false, null);
        }
        // ENG-3166: Eventually we would like to get rid of the extra replicas beyond k_factor,
        // but for now we just look to see how many replicas of this partition we actually expect
        // and gate leader assignment on that many copies showing up.
        int replicaCount = m_kfactor + 1;
        JSONArray parts;
        try {
          parts = m_topo.getJSONArray("partitions");
          for (int p = 0; p < parts.length(); p++) {
            JSONObject aPartition = parts.getJSONObject(p);
            int pid = aPartition.getInt("partition_id");
            if (pid == m_partitionId) {
              replicaCount = aPartition.getJSONArray("replicas").length();
            }
          }
        } catch (JSONException e) {
          // Ignore and just assume the normal number of replicas
        }
        if (children.size() == replicaCount) {
          m_currentLeader = assignLeader(m_partitionId, updatedHSIds);
        } else {
          tmLog.info(
              "Waiting on "
                  + ((m_kfactor + 1) - children.size())
                  + " more nodes "
                  + "for k-safety before startup");
        }
      } else {
        // Check for k-safety
        if (!isClusterKSafe()) {
          VoltDB.crashGlobalVoltDB(
              "Some partitions have no replicas.  Cluster has become unviable.", false, null);
        }
        // Check if replay has completed
        if (m_replayComplete.get() == false) {
          VoltDB.crashGlobalVoltDB(
              "Detected node failure during command log replay. Cluster will shut down.",
              false,
              null);
        }
        // Check to see if there's been a possible network partition and we're not already handling
        // it
        if (m_partitionDetectionEnabled && !m_partitionDetected) {
          doPartitionDetectionActivities();
        }
        // If we survived the above gauntlet of fail, appoint a new leader for this partition.
        if (missingHSIds.contains(m_currentLeader)) {
          m_currentLeader = assignLeader(m_partitionId, updatedHSIds);
        }
      }
      m_replicas.clear();
      m_replicas.addAll(updatedHSIds);
    }
Ejemplo n.º 7
0
  private void createSetupIv2(
      final String file_path,
      final String file_nonce,
      SnapshotFormat format,
      final long txnId,
      final Map<Integer, Long> partitionTransactionIds,
      String data,
      final SystemProcedureExecutionContext context,
      final VoltTable result,
      Map<String, Map<Integer, Pair<Long, Long>>> exportSequenceNumbers,
      SiteTracker tracker,
      HashinatorSnapshotData hashinatorData,
      long timestamp) {
    JSONObject jsData = null;
    if (data != null && !data.isEmpty()) {
      try {
        jsData = new JSONObject(data);
      } catch (JSONException e) {
        SNAP_LOG.error(String.format("JSON exception on snapshot data \"%s\".", data), e);
      }
    }

    SnapshotWritePlan plan;
    if (format == SnapshotFormat.NATIVE) {
      plan = new NativeSnapshotWritePlan();
    } else if (format == SnapshotFormat.CSV) {
      plan = new CSVSnapshotWritePlan();
    } else if (format == SnapshotFormat.STREAM) {
      plan = new StreamSnapshotWritePlan();
    } else if (format == SnapshotFormat.INDEX) {
      plan = new IndexSnapshotWritePlan();
    } else {
      throw new RuntimeException("BAD BAD BAD");
    }
    final Callable<Boolean> deferredSetup =
        plan.createSetup(
            file_path,
            file_nonce,
            txnId,
            partitionTransactionIds,
            jsData,
            context,
            result,
            exportSequenceNumbers,
            tracker,
            hashinatorData,
            timestamp);
    m_deferredSetupFuture =
        VoltDB.instance()
            .submitSnapshotIOWork(
                new DeferredSnapshotSetup(plan, deferredSetup, txnId, partitionTransactionIds));

    synchronized (m_createLock) {
      // Seems like this should be cleared out just in case
      // Log if there is actually anything to clear since it is unexpected
      if (!m_taskListsForHSIds.isEmpty()) {
        SNAP_LOG.warn("Found lingering snapshot tasks while setting up a snapshot");
      }
      m_taskListsForHSIds.clear();
      m_createSuccess.set(true);
      m_createResult.set(result);

      m_taskListsForHSIds.putAll(plan.getTaskListsForHSIds());

      // HACK HACK HACK.  If the task list is empty, this host has no work to do for
      // this snapshot.  We're going to create an empty list of tasks for one of the sites to do
      // so that we'll have a SnapshotSiteProcessor which will do the logSnapshotCompleteToZK.
      if (m_taskListsForHSIds.isEmpty()) {
        SNAP_LOG.debug(
            "Node had no snapshot work to do.  Creating a null task to drive completion.");
        m_taskListsForHSIds.put(context.getSiteId(), new ArrayDeque<SnapshotTableTask>());
      }
      SNAP_LOG.debug(
          "Planned tasks: "
              + CoreUtils.hsIdCollectionToString(plan.getTaskListsForHSIds().keySet()));
      SNAP_LOG.debug(
          "Created tasks for HSIds: "
              + CoreUtils.hsIdCollectionToString(m_taskListsForHSIds.keySet()));
    }
  }
Ejemplo n.º 8
0
  /**
   * The only public method: do all the work to start a snapshot. Assumes that a snapshot is
   * feasible, that the caller has validated it can be accomplished, that the caller knows this is a
   * consistent or useful transaction point at which to snapshot.
   *
   * @param file_path
   * @param file_nonce
   * @param format
   * @param block
   * @param txnId
   * @param data
   * @param context
   * @param hostname
   * @return VoltTable describing the results of the snapshot attempt
   */
  public VoltTable startSnapshotting(
      final String file_path,
      final String file_nonce,
      final SnapshotFormat format,
      final byte block,
      final long multiPartTxnId,
      final long partitionTxnId,
      final long legacyPerPartitionTxnIds[],
      final String data,
      final SystemProcedureExecutionContext context,
      final String hostname,
      final HashinatorSnapshotData hashinatorData,
      final long timestamp) {
    TRACE_LOG.trace("Creating snapshot target and handing to EEs");
    final VoltTable result = SnapshotUtil.constructNodeResultsTable();
    final int numLocalSites =
        context.getCluster().getDeployment().get("deployment").getSitesperhost();

    // One site wins the race to create the snapshot targets, populating
    // m_taskListsForSites for the other sites and creating an appropriate
    // number of snapshot permits.
    synchronized (SnapshotSiteProcessor.m_snapshotCreateLock) {
      SnapshotSiteProcessor.m_snapshotCreateSetupBarrierActualAction.set(
          new Runnable() {
            @Override
            public void run() {
              Map<Integer, Long> partitionTransactionIds = new HashMap<Integer, Long>();
              partitionTransactionIds = m_partitionLastSeenTransactionIds;
              SNAP_LOG.debug("Last seen partition transaction ids " + partitionTransactionIds);
              m_partitionLastSeenTransactionIds = new HashMap<Integer, Long>();
              partitionTransactionIds.put(TxnEgo.getPartitionId(multiPartTxnId), multiPartTxnId);

              /*
               * Do a quick sanity check that the provided IDs
               * don't conflict with currently active partitions. If they do
               * it isn't fatal we can just skip it.
               */
              for (long txnId : legacyPerPartitionTxnIds) {
                final int legacyPartition = TxnEgo.getPartitionId(txnId);
                if (partitionTransactionIds.containsKey(legacyPartition)) {
                  SNAP_LOG.warn(
                      "While saving a snapshot and propagating legacy "
                          + "transaction ids found an id that matches currently active partition"
                          + partitionTransactionIds.get(legacyPartition));
                } else {
                  partitionTransactionIds.put(legacyPartition, txnId);
                }
              }
              exportSequenceNumbers = SnapshotSiteProcessor.getExportSequenceNumbers();
              createSetupIv2(
                  file_path,
                  file_nonce,
                  format,
                  multiPartTxnId,
                  partitionTransactionIds,
                  data,
                  context,
                  result,
                  exportSequenceNumbers,
                  context.getSiteTrackerForSnapshot(),
                  hashinatorData,
                  timestamp);
            }
          });

      // Create a barrier to use with the current number of sites to wait for
      // or if the barrier is already set up check if it is broken and reset if necessary
      SnapshotSiteProcessor.readySnapshotSetupBarriers(numLocalSites);

      // From within this EE, record the sequence numbers as of the start of the snapshot (now)
      // so that the info can be put in the digest.
      SnapshotSiteProcessor.populateExportSequenceNumbersForExecutionSite(context);
      SNAP_LOG.debug(
          "Registering transaction id "
              + partitionTxnId
              + " for "
              + TxnEgo.getPartitionId(partitionTxnId));
      m_partitionLastSeenTransactionIds.put(TxnEgo.getPartitionId(partitionTxnId), partitionTxnId);
    }

    boolean runPostTasks = false;
    VoltTable earlyResultTable = null;
    try {
      SnapshotSiteProcessor.m_snapshotCreateSetupBarrier.await();
      try {
        synchronized (m_createLock) {
          SNAP_LOG.debug(
              "Found tasks for HSIds: "
                  + CoreUtils.hsIdCollectionToString(m_taskListsForHSIds.keySet()));
          SNAP_LOG.debug("Looking for local HSID: " + CoreUtils.hsIdToString(context.getSiteId()));
          Deque<SnapshotTableTask> taskList = m_taskListsForHSIds.remove(context.getSiteId());
          // If createSetup failed, then the first site to reach here is going
          // to send the results table generated by createSetup, and then empty out the table.
          // All other sites to reach here will send the appropriate empty table.
          // If createSetup was a success but the taskList is null, then we'll use the block
          // switch to figure out what flavor of empty SnapshotSave result table to return.
          if (!m_createSuccess.get()) {
            // There shouldn't be any work for any site if we failed
            assert (m_taskListsForHSIds.isEmpty());
            VoltTable finalresult = m_createResult.get();
            if (finalresult != null) {
              m_createResult.set(null);
              earlyResultTable = finalresult;
            } else {
              // We returned a non-empty NodeResultsTable with the failures in it,
              // every other site needs to return a NodeResultsTable as well.
              earlyResultTable = SnapshotUtil.constructNodeResultsTable();
            }
          } else if (taskList == null) {
            SNAP_LOG.debug("No task for this site, block " + block);
            // This node is participating in the snapshot but this site has nothing to do.
            // Send back an appropriate empty table based on the block flag
            if (block != 0) {
              runPostTasks = true;
              earlyResultTable = SnapshotUtil.constructPartitionResultsTable();
              earlyResultTable.addRow(
                  context.getHostId(),
                  hostname,
                  CoreUtils.getSiteIdFromHSId(context.getSiteId()),
                  "SUCCESS",
                  "");
            } else {
              earlyResultTable = SnapshotUtil.constructNodeResultsTable();
            }
          } else {
            context
                .getSiteSnapshotConnection()
                .initiateSnapshots(format, taskList, multiPartTxnId, exportSequenceNumbers);
          }

          if (m_deferredSetupFuture != null) {
            // Add a listener to the deferred setup so that it can kick off the snapshot
            // task once the setup is done.
            m_deferredSetupFuture.addListener(
                new Runnable() {
                  @Override
                  public void run() {
                    DeferredSnapshotSetup deferredSnapshotSetup = null;
                    try {
                      deferredSnapshotSetup = m_deferredSetupFuture.get();
                    } catch (Exception e) {
                      // it doesn't throw
                    }

                    assert deferredSnapshotSetup != null;
                    context
                        .getSiteSnapshotConnection()
                        .startSnapshotWithTargets(
                            deferredSnapshotSetup.getPlan().getSnapshotDataTargets());
                  }
                },
                CoreUtils.SAMETHREADEXECUTOR);
          }
        }
      } finally {
        SnapshotSiteProcessor.m_snapshotCreateFinishBarrier.await(120, TimeUnit.SECONDS);
      }
    } catch (TimeoutException e) {
      VoltDB.crashLocalVoltDB(
          "Timed out waiting 120 seconds for all threads to arrive and start snapshot", true, null);
    } catch (InterruptedException e) {
      result.addRow(context.getHostId(), hostname, "", "FAILURE", CoreUtils.throwableToString(e));
      earlyResultTable = result;
    } catch (BrokenBarrierException e) {
      result.addRow(context.getHostId(), hostname, "", "FAILURE", CoreUtils.throwableToString(e));
      earlyResultTable = result;
    }

    // If earlyResultTable is set, return here
    if (earlyResultTable != null) {
      if (runPostTasks) {
        // Need to run post-snapshot tasks before finishing
        SnapshotSiteProcessor.runPostSnapshotTasks(context);
      }
      return earlyResultTable;
    }

    if (block != 0) {
      HashSet<Exception> failures = Sets.newHashSet();
      String status = "SUCCESS";
      String err = "";
      try {
        // For blocking snapshot, propogate the error from deferred setup back to the client
        final DeferredSnapshotSetup deferredSnapshotSetup = m_deferredSetupFuture.get();
        if (deferredSnapshotSetup != null && deferredSnapshotSetup.getError() != null) {
          status = "FAILURE";
          err = deferredSnapshotSetup.getError().toString();
          failures.add(deferredSnapshotSetup.getError());
        }

        failures.addAll(context.getSiteSnapshotConnection().completeSnapshotWork());
        SnapshotSiteProcessor.runPostSnapshotTasks(context);
      } catch (Exception e) {
        status = "FAILURE";
        err = e.toString();
        failures.add(e);
      }
      final VoltTable blockingResult = SnapshotUtil.constructPartitionResultsTable();

      if (failures.isEmpty()) {
        blockingResult.addRow(
            context.getHostId(),
            hostname,
            CoreUtils.getSiteIdFromHSId(context.getSiteId()),
            status,
            err);
      } else {
        status = "FAILURE";
        for (Exception e : failures) {
          err = e.toString();
        }
        blockingResult.addRow(
            context.getHostId(),
            hostname,
            CoreUtils.getSiteIdFromHSId(context.getSiteId()),
            status,
            err);
      }
      return blockingResult;
    }

    return result;
  }