Beispiel #1
0
 /**
  * This variant delays the formatting of the string message until it is actually logged
  *
  * @param now
  * @param level a {@link Level debug level}
  * @param cause evidentiary exception
  * @param stemformat a {@link String#format(String, Object...) string format}
  * @param args format arguments
  */
 public void log(long now, Level level, Throwable cause, String stemformat, Object... args) {
   if (now - m_lastLogTime > m_maxLogIntervalMillis) {
     synchronized (this) {
       if (now - m_lastLogTime > m_maxLogIntervalMillis) {
         String message = formatMessage(cause, stemformat, args);
         switch (level) {
           case DEBUG:
             m_logger.debug(message);
             break;
           case ERROR:
             m_logger.error(message);
             break;
           case FATAL:
             m_logger.fatal(message);
             break;
           case INFO:
             m_logger.info(message);
             break;
           case TRACE:
             m_logger.trace(message);
             break;
           case WARN:
             m_logger.warn(message);
             break;
         }
         m_lastLogTime = now;
       }
     }
   }
 }
Beispiel #2
0
 public void log(String message, long now, Level level) {
   if (now - m_lastLogTime > m_maxLogIntervalMillis) {
     synchronized (this) {
       if (now - m_lastLogTime > m_maxLogIntervalMillis) {
         switch (level) {
           case DEBUG:
             m_logger.debug(message);
             break;
           case ERROR:
             m_logger.error(message);
             break;
           case FATAL:
             m_logger.fatal(message);
             break;
           case INFO:
             m_logger.info(message);
             break;
           case TRACE:
             m_logger.trace(message);
             break;
           case WARN:
             m_logger.warn(message);
             break;
         }
         m_lastLogTime = now;
       }
     }
   }
 }
Beispiel #3
0
 public void runDDL(String ddl) {
   try {
     // LOG.info("Executing " + ddl);
     Statement stmt = dbconn.createStatement();
     /*boolean success =*/ stmt.execute(ddl);
     SQLWarning warn = stmt.getWarnings();
     if (warn != null) sqlLog.warn(warn.getMessage());
     // LOG.info("SQL DDL execute result: " + (success ? "true" : "false"));
   } catch (SQLException e) {
     hostLog.l7dlog(Level.ERROR, LogKeys.host_Backend_RunDDLFailed.name(), new Object[] {ddl}, e);
   }
 }
Beispiel #4
0
 void shutdown() {
   try {
     if (m_hsql != null) {
       HsqlBackend.shutdownInstance();
     }
     if (m_ee != null) {
       m_ee.release();
     }
     if (m_snapshotter != null) {
       m_snapshotter.shutdown();
     }
   } catch (InterruptedException e) {
     hostLog.warn("Interrupted shutdown execution site.", e);
   }
 }
Beispiel #5
0
 /**
  * Connect to a single server with retry. Limited exponential backoff. No timeout. This will run
  * until the process is killed if it's not able to connect.
  *
  * @param server hostname:port or just hostname (hostname can be ip).
  */
 private void connectToOneServerWithRetry(String server) {
   int sleep = 1000;
   while (!shutdown.get()) {
     try {
       client.createConnection(server);
       activeConnections.incrementAndGet();
       log.info(String.format("Connected to VoltDB node at: %s.", server));
       break;
     } catch (Exception e) {
       log.warn(
           String.format(
               "Connection to " + server + " failed - retrying in %d second(s).", sleep / 1000));
       try {
         Thread.sleep(sleep);
       } catch (Exception interruted) {
       }
       if (sleep < 8000) sleep += sleep;
     }
   }
 }
  private void processFragmentResponseDependencies(FragmentResponseMessage response) {
    int depCount = response.getTableCount();
    for (int i = 0; i < depCount; i++) {
      int dependencyId = response.getTableDependencyIdAtIndex(i);
      VoltTable payload = response.getTableAtIndex(i);
      assert (payload != null);

      // if we're getting a dependency, i hope we know about it
      assert (m_missingDependencies != null);

      WorkUnit w = m_missingDependencies.get(dependencyId);
      if (w == null) {
        String msg =
            "Unable to find WorkUnit for dependency: "
                + dependencyId
                + " as part of TXN: "
                + txnId
                + " received from execution site: "
                + response.getExecutorSiteId();
        hostLog.warn(msg);
        // throw new FragmentFailureException();
        return;
      }

      // if the node is recovering, it doesn't matter if the payload matches
      if (response.isRecovering()) {
        w.putDummyDependency(dependencyId, response.getExecutorSiteId());
      } else {
        w.putDependency(
            dependencyId, response.getExecutorSiteId(), payload, m_site.getSiteTracker());
      }
      if (w.allDependenciesSatisfied()) {
        handleWorkUnitComplete(w);
      }
    }
  }
Beispiel #7
0
  private void createSetupIv2(
      final String file_path,
      final String file_nonce,
      SnapshotFormat format,
      final long txnId,
      final Map<Integer, Long> partitionTransactionIds,
      String data,
      final SystemProcedureExecutionContext context,
      final VoltTable result,
      Map<String, Map<Integer, Pair<Long, Long>>> exportSequenceNumbers,
      SiteTracker tracker,
      HashinatorSnapshotData hashinatorData,
      long timestamp) {
    JSONObject jsData = null;
    if (data != null && !data.isEmpty()) {
      try {
        jsData = new JSONObject(data);
      } catch (JSONException e) {
        SNAP_LOG.error(String.format("JSON exception on snapshot data \"%s\".", data), e);
      }
    }

    SnapshotWritePlan plan;
    if (format == SnapshotFormat.NATIVE) {
      plan = new NativeSnapshotWritePlan();
    } else if (format == SnapshotFormat.CSV) {
      plan = new CSVSnapshotWritePlan();
    } else if (format == SnapshotFormat.STREAM) {
      plan = new StreamSnapshotWritePlan();
    } else if (format == SnapshotFormat.INDEX) {
      plan = new IndexSnapshotWritePlan();
    } else {
      throw new RuntimeException("BAD BAD BAD");
    }
    final Callable<Boolean> deferredSetup =
        plan.createSetup(
            file_path,
            file_nonce,
            txnId,
            partitionTransactionIds,
            jsData,
            context,
            result,
            exportSequenceNumbers,
            tracker,
            hashinatorData,
            timestamp);
    m_deferredSetupFuture =
        VoltDB.instance()
            .submitSnapshotIOWork(
                new DeferredSnapshotSetup(plan, deferredSetup, txnId, partitionTransactionIds));

    synchronized (m_createLock) {
      // Seems like this should be cleared out just in case
      // Log if there is actually anything to clear since it is unexpected
      if (!m_taskListsForHSIds.isEmpty()) {
        SNAP_LOG.warn("Found lingering snapshot tasks while setting up a snapshot");
      }
      m_taskListsForHSIds.clear();
      m_createSuccess.set(true);
      m_createResult.set(result);

      m_taskListsForHSIds.putAll(plan.getTaskListsForHSIds());

      // HACK HACK HACK.  If the task list is empty, this host has no work to do for
      // this snapshot.  We're going to create an empty list of tasks for one of the sites to do
      // so that we'll have a SnapshotSiteProcessor which will do the logSnapshotCompleteToZK.
      if (m_taskListsForHSIds.isEmpty()) {
        SNAP_LOG.debug(
            "Node had no snapshot work to do.  Creating a null task to drive completion.");
        m_taskListsForHSIds.put(context.getSiteId(), new ArrayDeque<SnapshotTableTask>());
      }
      SNAP_LOG.debug(
          "Planned tasks: "
              + CoreUtils.hsIdCollectionToString(plan.getTaskListsForHSIds().keySet()));
      SNAP_LOG.debug(
          "Created tasks for HSIds: "
              + CoreUtils.hsIdCollectionToString(m_taskListsForHSIds.keySet()));
    }
  }
Beispiel #8
0
  @Override
  public void run() {
    List<String> lineList;

    // if header option is true, check whether csv first line is valid
    if (m_config.header) {
      if (!checkHeader()) {
        m_log.error(
            "In the CSV file "
                + m_config.file
                + ", the header "
                + m_listReader.getUntokenizedRow()
                + " does not match "
                + "an existing column in the table "
                + m_config.table
                + ".");
        System.exit(-1);
      }
    }

    while ((m_config.limitrows-- > 0)) {
      if (m_errHandler.hasReachedErrorLimit()) {
        break;
      }

      try {
        // Initial setting of m_totalLineCount
        if (m_listReader.getLineNumber() == 0) {
          m_totalLineCount.set(m_config.skip);
        } else {
          m_totalLineCount.set(m_listReader.getLineNumber());
        }
        long st = System.nanoTime();
        lineList = m_listReader.read();
        long end = System.nanoTime();
        m_parsingTime += (end - st);
        if (lineList == null) {
          if (m_totalLineCount.get() > m_listReader.getLineNumber()) {
            m_totalLineCount.set(m_listReader.getLineNumber());
          }
          break;
        }
        m_totalRowCount.incrementAndGet();

        if (lineList.isEmpty()) {
          continue;
        }

        String[] lineValues = lineList.toArray(new String[0]);
        String lineCheckResult;
        String[] reorderValues = new String[m_columnCount];
        if ((lineCheckResult = checkparams_trimspace_reorder(lineValues, reorderValues)) != null) {
          final RowWithMetaData metaData =
              new RowWithMetaData(m_listReader.getUntokenizedRow(), m_totalLineCount.get() + 1);
          if (m_errHandler.handleError(metaData, null, lineCheckResult)) {
            break;
          }
          continue;
        }

        RowWithMetaData lineData =
            new RowWithMetaData(m_listReader.getUntokenizedRow(), m_listReader.getLineNumber());
        m_loader.insertRow(lineData, reorderValues);
      } catch (SuperCsvException e) {
        // Catch rows that can not be read by superCSV m_listReader.
        // e.g. items without quotes when strictquotes is enabled.
        final RowWithMetaData metaData =
            new RowWithMetaData(m_listReader.getUntokenizedRow(), m_totalLineCount.get() + 1);
        if (m_errHandler.handleError(metaData, null, e.getMessage())) {
          break;
        }
      } catch (IOException ex) {
        m_log.error("Failed to read CSV line from file: " + ex);
        break;
      } catch (InterruptedException e) {
        m_log.error("CSVLoader interrupted: " + e);
        break;
      }
    }

    // Now wait for processors to see endOfData and count down. After that drain to finish all
    // callbacks
    try {
      m_log.debug("Waiting for CSVDataLoader to finish.");
      m_loader.close();
      m_log.debug("CSVDataLoader Done.");
    } catch (Exception ex) {
      m_log.warn(
          "Stopped processing because of connection error. "
              + "A report will be generated with what we processed so far. Error: "
              + ex);
    }
  }
Beispiel #9
0
  /**
   * The only public method: do all the work to start a snapshot. Assumes that a snapshot is
   * feasible, that the caller has validated it can be accomplished, that the caller knows this is a
   * consistent or useful transaction point at which to snapshot.
   *
   * @param file_path
   * @param file_nonce
   * @param format
   * @param block
   * @param txnId
   * @param data
   * @param context
   * @param hostname
   * @return VoltTable describing the results of the snapshot attempt
   */
  public VoltTable startSnapshotting(
      final String file_path,
      final String pathType,
      final String file_nonce,
      final SnapshotFormat format,
      final byte block,
      final long multiPartTxnId,
      final long partitionTxnId,
      final long legacyPerPartitionTxnIds[],
      final String data,
      final SystemProcedureExecutionContext context,
      final String hostname,
      final HashinatorSnapshotData hashinatorData,
      final long timestamp) {
    TRACE_LOG.trace("Creating snapshot target and handing to EEs");
    final VoltTable result = SnapshotUtil.constructNodeResultsTable();
    final int numLocalSites =
        context.getCluster().getDeployment().get("deployment").getSitesperhost();
    JSONObject jsData = null;
    if (data != null && !data.isEmpty()) {
      try {
        jsData = new JSONObject(data);
      } catch (JSONException e) {
        SNAP_LOG.error(String.format("JSON exception on snapshot data \"%s\".", data), e);
      }
    }
    final JSONObject finalJsData = jsData;

    JSONObject perSiteRemoteDataCenterDrIds;
    try {
      perSiteRemoteDataCenterDrIds =
          ExtensibleSnapshotDigestData.serializeSiteConsumerDrIdTrackersToJSON(
              context.getDrAppliedTrackers());
    } catch (JSONException e) {
      SNAP_LOG.warn("Failed to serialize the Remote DataCenter's Last applied DRIds");
      perSiteRemoteDataCenterDrIds = new JSONObject();
    }

    // One site wins the race to create the snapshot targets, populating
    // m_taskListsForHSIds for the other sites and creating an appropriate
    // number of snapshot permits.
    synchronized (SnapshotSiteProcessor.m_snapshotCreateLock) {
      SnapshotSiteProcessor.m_snapshotCreateSetupBarrierActualAction.set(
          new Runnable() {
            @Override
            public void run() {
              Map<Integer, Long> partitionTransactionIds = m_partitionLastSeenTransactionIds;

              SNAP_LOG.debug("Last seen partition transaction ids " + partitionTransactionIds);
              m_partitionLastSeenTransactionIds = new HashMap<Integer, Long>();
              partitionTransactionIds.put(TxnEgo.getPartitionId(multiPartTxnId), multiPartTxnId);

              Map<Integer, JSONObject> remoteDataCenterLastIds = m_remoteDataCenterLastIds;
              m_remoteDataCenterLastIds = new HashMap<Integer, JSONObject>();

              /*
               * Do a quick sanity check that the provided IDs
               * don't conflict with currently active partitions. If they do
               * it isn't fatal we can just skip it.
               */
              for (long txnId : legacyPerPartitionTxnIds) {
                final int legacyPartition = TxnEgo.getPartitionId(txnId);
                if (partitionTransactionIds.containsKey(legacyPartition)) {
                  SNAP_LOG.warn(
                      "While saving a snapshot and propagating legacy "
                          + "transaction ids found an id that matches currently active partition"
                          + partitionTransactionIds.get(legacyPartition));
                } else {
                  partitionTransactionIds.put(legacyPartition, txnId);
                }
              }

              m_allLocalSiteSnapshotDigestData =
                  new ExtensibleSnapshotDigestData(
                      SnapshotSiteProcessor.getExportSequenceNumbers(),
                      SnapshotSiteProcessor.getDRTupleStreamStateInfo(),
                      remoteDataCenterLastIds);
              createSetupIv2(
                  file_path,
                  pathType,
                  file_nonce,
                  format,
                  multiPartTxnId,
                  partitionTransactionIds,
                  finalJsData,
                  context,
                  result,
                  m_allLocalSiteSnapshotDigestData,
                  context.getSiteTrackerForSnapshot(),
                  hashinatorData,
                  timestamp);
            }
          });

      // Create a barrier to use with the current number of sites to wait for
      // or if the barrier is already set up check if it is broken and reset if necessary
      SnapshotSiteProcessor.readySnapshotSetupBarriers(numLocalSites);

      // From within this EE, record the sequence numbers as of the start of the snapshot (now)
      // so that the info can be put in the digest.
      SnapshotSiteProcessor.populateSequenceNumbersForExecutionSite(context);
      Integer partitionId = TxnEgo.getPartitionId(partitionTxnId);
      SNAP_LOG.debug(
          "Registering transaction id "
              + partitionTxnId
              + " for "
              + TxnEgo.getPartitionId(partitionTxnId));
      m_partitionLastSeenTransactionIds.put(partitionId, partitionTxnId);
      m_remoteDataCenterLastIds.put(partitionId, perSiteRemoteDataCenterDrIds);
    }

    boolean runPostTasks = false;
    VoltTable earlyResultTable = null;
    try {
      SnapshotSiteProcessor.m_snapshotCreateSetupBarrier.await();
      try {
        synchronized (m_createLock) {
          SNAP_LOG.debug(
              "Found tasks for HSIds: "
                  + CoreUtils.hsIdCollectionToString(m_taskListsForHSIds.keySet()));
          SNAP_LOG.debug("Looking for local HSID: " + CoreUtils.hsIdToString(context.getSiteId()));
          Deque<SnapshotTableTask> taskList = m_taskListsForHSIds.remove(context.getSiteId());
          // If createSetup failed, then the first site to reach here is going
          // to send the results table generated by createSetup, and then empty out the table.
          // All other sites to reach here will send the appropriate empty table.
          // If createSetup was a success but the taskList is null, then we'll use the block
          // switch to figure out what flavor of empty SnapshotSave result table to return.
          if (!m_createSuccess.get()) {
            // There shouldn't be any work for any site if we failed
            assert (m_taskListsForHSIds.isEmpty());
            VoltTable finalresult = m_createResult.get();
            if (finalresult != null) {
              m_createResult.set(null);
              earlyResultTable = finalresult;
            } else {
              // We returned a non-empty NodeResultsTable with the failures in it,
              // every other site needs to return a NodeResultsTable as well.
              earlyResultTable = SnapshotUtil.constructNodeResultsTable();
            }
          } else if (taskList == null) {
            SNAP_LOG.debug("No task for this site, block " + block);
            // This node is participating in the snapshot but this site has nothing to do.
            // Send back an appropriate empty table based on the block flag
            if (block != 0) {
              runPostTasks = true;
              earlyResultTable = SnapshotUtil.constructPartitionResultsTable();
              earlyResultTable.addRow(
                  context.getHostId(),
                  hostname,
                  CoreUtils.getSiteIdFromHSId(context.getSiteId()),
                  "SUCCESS",
                  "");
            } else {
              // If doing snapshot for only replicated table(s), earlyResultTable here
              // may not be empty even if the taskList of this site is null.
              // In that case, snapshot result is preserved by earlyResultTable.
              earlyResultTable = result;
            }
          } else {
            context
                .getSiteSnapshotConnection()
                .initiateSnapshots(
                    format, taskList, multiPartTxnId, m_allLocalSiteSnapshotDigestData);
          }

          if (m_deferredSetupFuture != null && taskList != null) {
            // Add a listener to the deferred setup so that it can kick off the snapshot
            // task once the setup is done.
            m_deferredSetupFuture.addListener(
                new Runnable() {
                  @Override
                  public void run() {
                    DeferredSnapshotSetup deferredSnapshotSetup = null;
                    try {
                      deferredSnapshotSetup = m_deferredSetupFuture.get();
                    } catch (Exception e) {
                      // it doesn't throw
                    }

                    assert deferredSnapshotSetup != null;
                    context
                        .getSiteSnapshotConnection()
                        .startSnapshotWithTargets(
                            deferredSnapshotSetup.getPlan().getSnapshotDataTargets());
                  }
                },
                CoreUtils.SAMETHREADEXECUTOR);
          }
        }
      } finally {
        SnapshotSiteProcessor.m_snapshotCreateFinishBarrier.await(120, TimeUnit.SECONDS);
      }
    } catch (TimeoutException e) {
      VoltDB.crashLocalVoltDB(
          "Timed out waiting 120 seconds for all threads to arrive and start snapshot", true, null);
    } catch (InterruptedException e) {
      result.addRow(context.getHostId(), hostname, "", "FAILURE", CoreUtils.throwableToString(e));
      earlyResultTable = result;
    } catch (BrokenBarrierException e) {
      result.addRow(context.getHostId(), hostname, "", "FAILURE", CoreUtils.throwableToString(e));
      earlyResultTable = result;
    } catch (IllegalArgumentException e) {
      result.addRow(context.getHostId(), hostname, "", "FAILURE", CoreUtils.throwableToString(e));
      earlyResultTable = result;
    }

    // If earlyResultTable is set, return here
    if (earlyResultTable != null) {
      if (runPostTasks) {
        // Need to run post-snapshot tasks before finishing
        SnapshotSiteProcessor.runPostSnapshotTasks(context);
      }
      return earlyResultTable;
    }

    if (block != 0) {
      HashSet<Exception> failures = Sets.newHashSet();
      String status = "SUCCESS";
      String err = "";
      try {
        // For blocking snapshot, propogate the error from deferred setup back to the client
        final DeferredSnapshotSetup deferredSnapshotSetup = m_deferredSetupFuture.get();
        if (deferredSnapshotSetup != null && deferredSnapshotSetup.getError() != null) {
          status = "FAILURE";
          err = deferredSnapshotSetup.getError().toString();
          failures.add(deferredSnapshotSetup.getError());
        }

        failures.addAll(context.getSiteSnapshotConnection().completeSnapshotWork());
        SnapshotSiteProcessor.runPostSnapshotTasks(context);
      } catch (Exception e) {
        status = "FAILURE";
        err = e.toString();
        failures.add(e);
      }
      final VoltTable blockingResult = SnapshotUtil.constructPartitionResultsTable();

      if (failures.isEmpty()) {
        blockingResult.addRow(
            context.getHostId(),
            hostname,
            CoreUtils.getSiteIdFromHSId(context.getSiteId()),
            status,
            err);
      } else {
        status = "FAILURE";
        for (Exception e : failures) {
          err = e.toString();
        }
        blockingResult.addRow(
            context.getHostId(),
            hostname,
            CoreUtils.getSiteIdFromHSId(context.getSiteId()),
            status,
            err);
      }
      return blockingResult;
    }

    return result;
  }
Beispiel #10
0
  // Offer a new message to the repair log. This will truncate
  // the repairLog if the message includes a truncation hint.
  public void deliver(VoltMessage msg) {
    if (!m_isLeader && msg instanceof Iv2InitiateTaskMessage) {
      final Iv2InitiateTaskMessage m = (Iv2InitiateTaskMessage) msg;
      // We can't repair read only SP transactions. Just don't log them to the repair log.
      if (m.isReadOnly()) {
        return;
      }

      m_lastSpHandle = m.getSpHandle();
      truncate(m.getTruncationHandle(), IS_SP);
      m_logSP.add(new Item(IS_SP, m, m.getSpHandle(), m.getTxnId()));
    } else if (msg instanceof FragmentTaskMessage) {
      final FragmentTaskMessage m = (FragmentTaskMessage) msg;

      // We can't repair read only SP transactions. Just don't log them to the repair log.
      if (m.isReadOnly()) {
        return;
      }

      truncate(m.getTruncationHandle(), IS_MP);
      // only log the first fragment of a procedure (and handle 1st case)
      if (m.getTxnId() > m_lastMpHandle || m_lastMpHandle == Long.MAX_VALUE) {
        m_logMP.add(new Item(IS_MP, m, m.getSpHandle(), m.getTxnId()));
        m_lastMpHandle = m.getTxnId();
        m_lastSpHandle = m.getSpHandle();
      }
    } else if (msg instanceof CompleteTransactionMessage) {
      // a CompleteTransactionMessage which indicates restart is not the end of the
      // transaction.  We don't want to log it in the repair log.
      CompleteTransactionMessage ctm = (CompleteTransactionMessage) msg;
      // We can't repair read only SP transactions. Just don't log them to the repair log.
      // Restart transaction do not need to be repaired here, don't log them as well.
      if (ctm.isReadOnly() || ctm.isRestart()) {
        return;
      }

      truncate(ctm.getTruncationHandle(), IS_MP);
      m_logMP.add(new Item(IS_MP, ctm, ctm.getSpHandle(), ctm.getTxnId()));
      // Restore will send a complete transaction message with a lower mp transaction id because
      // the restore transaction precedes the loading of the right mp transaction id from the
      // snapshot
      // Hence Math.max
      m_lastMpHandle = Math.max(m_lastMpHandle, ctm.getTxnId());
      m_lastSpHandle = ctm.getSpHandle();
    } else if (msg instanceof DumpMessage) {
      String who = CoreUtils.hsIdToString(m_HSId);
      tmLog.warn(
          "Repair log dump for site: "
              + who
              + ", isLeader: "
              + m_isLeader
              + ", "
              + who
              + ": lastSpHandle: "
              + m_lastSpHandle
              + ", lastMpHandle: "
              + m_lastMpHandle);
      for (Iv2RepairLogResponseMessage il : contents(0l, false)) {
        tmLog.warn("[Repair log contents]" + who + ": msg: " + il);
      }
    } else if (msg instanceof RepairLogTruncationMessage) {
      final RepairLogTruncationMessage truncateMsg = (RepairLogTruncationMessage) msg;
      truncate(truncateMsg.getHandle(), IS_SP);
    }
  }