Beispiel #1
0
 @Override
 public void handleResponse(ClientResponse resp) {
   if (resp == null) {
     VoltDB.crashLocalVoltDB(
         "Received a null response to a snapshot initiation request.  "
             + "This should be impossible.",
         true,
         null);
   } else if (resp.getStatus() != ClientResponse.SUCCESS) {
     tmLog.info(
         "Failed to complete partition detection snapshot, status: "
             + resp.getStatus()
             + ", reason: "
             + resp.getStatusString());
     tmLog.info("Retrying partition detection snapshot...");
     SnapshotUtil.requestSnapshot(
         0L,
         m_partSnapshotSchedule.getPath(),
         m_partSnapshotSchedule.getPrefix() + System.currentTimeMillis(),
         true,
         SnapshotFormat.NATIVE,
         null,
         m_snapshotHandler,
         true);
   } else if (!SnapshotUtil.didSnapshotRequestSucceed(resp.getResults())) {
     VoltDB.crashGlobalVoltDB(
         "Unable to complete partition detection snapshot: " + resp.getResults()[0],
         false,
         null);
   } else {
     VoltDB.crashGlobalVoltDB(
         "Partition detection snapshot completed. Shutting down.", false, null);
   }
 }
Beispiel #2
0
  /**
   * Given a set of the known host IDs before a fault, and the known host IDs in the post-fault
   * cluster, determine whether or not we think a network partition may have happened. NOTE: this
   * assumes that we have already done the k-safety validation for every partition and already
   * failed if we weren't a viable cluster. ALSO NOTE: not private so it may be unit-tested.
   */
  static boolean makePPDDecision(Set<Integer> previousHosts, Set<Integer> currentHosts) {
    // Real partition detection stuff would go here
    // find the lowest hostId between the still-alive hosts and the
    // failed hosts. Which set contains the lowest hostId?
    int blessedHostId = Integer.MAX_VALUE;
    boolean blessedHostIdInFailedSet = true;

    // This should be all the pre-partition hosts IDs.  Any new host IDs
    // (say, if this was triggered by rejoin), will be greater than any surviving
    // host ID, so don't worry about including it in this search.
    for (Integer hostId : previousHosts) {
      if (hostId < blessedHostId) {
        blessedHostId = hostId;
      }
    }

    for (Integer hostId : currentHosts) {
      if (hostId.equals(blessedHostId)) {
        blessedHostId = hostId;
        blessedHostIdInFailedSet = false;
      }
    }

    // Evaluate PPD triggers.
    boolean partitionDetectionTriggered = false;
    // Exact 50-50 splits. The set with the lowest survivor host doesn't trigger PPD
    // If the blessed host is in the failure set, this set is not blessed.
    if (currentHosts.size() * 2 == previousHosts.size()) {
      if (blessedHostIdInFailedSet) {
        tmLog.info(
            "Partition detection triggered for 50/50 cluster failure. "
                + "This survivor set is shutting down.");
        partitionDetectionTriggered = true;
      } else {
        tmLog.info(
            "Partition detected for 50/50 failure. "
                + "This survivor set is continuing execution.");
      }
    }

    // A strict, viable minority is always a partition.
    if (currentHosts.size() * 2 < previousHosts.size()) {
      tmLog.info(
          "Partition detection triggered. " + "This minority survivor set is shutting down.");
      partitionDetectionTriggered = true;
    }

    return partitionDetectionTriggered;
  }
Beispiel #3
0
 public void log(String message, long now, Level level) {
   if (now - m_lastLogTime > m_maxLogIntervalMillis) {
     synchronized (this) {
       if (now - m_lastLogTime > m_maxLogIntervalMillis) {
         switch (level) {
           case DEBUG:
             m_logger.debug(message);
             break;
           case ERROR:
             m_logger.error(message);
             break;
           case FATAL:
             m_logger.fatal(message);
             break;
           case INFO:
             m_logger.info(message);
             break;
           case TRACE:
             m_logger.trace(message);
             break;
           case WARN:
             m_logger.warn(message);
             break;
         }
         m_lastLogTime = now;
       }
     }
   }
 }
Beispiel #4
0
 /**
  * This variant delays the formatting of the string message until it is actually logged
  *
  * @param now
  * @param level a {@link Level debug level}
  * @param cause evidentiary exception
  * @param stemformat a {@link String#format(String, Object...) string format}
  * @param args format arguments
  */
 public void log(long now, Level level, Throwable cause, String stemformat, Object... args) {
   if (now - m_lastLogTime > m_maxLogIntervalMillis) {
     synchronized (this) {
       if (now - m_lastLogTime > m_maxLogIntervalMillis) {
         String message = formatMessage(cause, stemformat, args);
         switch (level) {
           case DEBUG:
             m_logger.debug(message);
             break;
           case ERROR:
             m_logger.error(message);
             break;
           case FATAL:
             m_logger.fatal(message);
             break;
           case INFO:
             m_logger.info(message);
             break;
           case TRACE:
             m_logger.trace(message);
             break;
           case WARN:
             m_logger.warn(message);
             break;
         }
         m_lastLogTime = now;
       }
     }
   }
 }
Beispiel #5
0
 public void dump(long hsId) {
   final String who = CoreUtils.hsIdToString(hsId);
   tmLog.info(
       String.format(
           "%s: REPLAY SEQUENCER DUMP, LAST POLLED FRAGMENT %d (%s), LAST SEEN TXNID %d (%s), %s%s",
           who,
           m_lastPolledFragmentTxnId,
           TxnEgo.txnIdToString(m_lastPolledFragmentTxnId),
           m_lastSeenTxnId,
           TxnEgo.txnIdToString(m_lastSeenTxnId),
           m_mpiEOLReached ? "MPI EOL, " : "",
           m_mustDrain ? "MUST DRAIN" : ""));
   for (Entry<Long, ReplayEntry> e : m_replayEntries.entrySet()) {
     tmLog.info(String.format("%s: REPLAY ENTRY %s: %s", who, e.getKey(), e.getValue()));
   }
 }
Beispiel #6
0
  private ModuleManager(File cacheRoot) {

    String systemPackagesSpec =
        FluentIterable.from(SYSTEM_PACKAGES).transform(appendVersion).join(COMMA_JOINER);

    Map<String, String> frameworkProps =
        ImmutableMap.<String, String>builder()
            .put(Constants.FRAMEWORK_SYSTEMPACKAGES_EXTRA, systemPackagesSpec)
            .put("org.osgi.framework.storage.clean", "onFirstInit")
            .put("felix.cache.rootdir", cacheRoot.getAbsolutePath())
            .put("felix.cache.locking", Boolean.FALSE.toString())
            .build();

    LOG.info("Framework properties are: " + frameworkProps);

    FrameworkFactory frameworkFactory =
        ServiceLoader.load(FrameworkFactory.class).iterator().next();

    m_framework = frameworkFactory.newFramework(frameworkProps);

    try {
      m_framework.start();
    } catch (BundleException e) {
      LOG.error("Failed to start the felix OSGi framework", e);
      throw new SetUpException("Failed to start the felix OSGi framework", e);
    }

    m_bundles = new BundleRef(m_framework);
  }
Beispiel #7
0
  public void processKafkaMessages() throws Exception {
    // Split server list
    final String[] serverlist = m_config.servers.split(",");

    // Create connection
    final ClientConfig c_config = new ClientConfig(m_config.user, m_config.password);
    c_config.setProcedureCallTimeout(0); // Set procedure all to infinite

    m_client = getClient(c_config, serverlist, m_config.port);

    if (m_config.useSuppliedProcedure) {
      m_loader =
          new CSVTupleDataLoader(
              (ClientImpl) m_client, m_config.procedure, new KafkaBulkLoaderCallback());
    } else {
      m_loader =
          new CSVBulkDataLoader(
              (ClientImpl) m_client, m_config.table, m_config.batch, new KafkaBulkLoaderCallback());
    }
    m_loader.setFlushInterval(m_config.flush, m_config.flush);
    m_consumer =
        new KafkaConsumerConnector(
            m_config.zookeeper,
            m_config.useSuppliedProcedure ? m_config.procedure : m_config.table);
    try {
      m_es = getConsumerExecutor(m_consumer, m_loader);
      if (m_config.useSuppliedProcedure) {
        m_log.info(
            "Kafka Consumer from topic: "
                + m_config.topic
                + " Started using procedure: "
                + m_config.procedure);
      } else {
        m_log.info(
            "Kafka Consumer from topic: "
                + m_config.topic
                + " Started for table: "
                + m_config.table);
      }
      m_es.awaitTermination(365, TimeUnit.DAYS);
    } catch (Exception ex) {
      m_log.error("Error in Kafka Consumer", ex);
      System.exit(-1);
    }
    close();
  }
Beispiel #8
0
  /**
   * Constructor for benchmark instance. Configures VoltDB client and prints configuration.
   *
   * @param config Parsed & validated CLI options.
   */
  Benchmark(Config config) {
    this.config = config;

    processor =
        new PayloadProcessor(
            config.minvaluesize, config.maxvaluesize,
            config.entropy, config.usecompression);

    log.info(HORIZONTAL_RULE);
    log.info(" Command Line Configuration");
    log.info(HORIZONTAL_RULE);
    log.info(config.getConfigDumpString());

    StatusListener statusListener = new StatusListener();
    ClientConfig clientConfig = new ClientConfig("", "", statusListener);
    client = ClientFactory.createClient(clientConfig);
  }
Beispiel #9
0
 public List<Integer> getIv2PartitionsToReplace(JSONObject topology) throws JSONException {
   ClusterConfig clusterConfig = new ClusterConfig(topology);
   hostLog.info(
       "Computing partitions to replace.  Total partitions: " + clusterConfig.getPartitionCount());
   Map<Integer, Integer> repsPerPart = new HashMap<Integer, Integer>();
   for (int i = 0; i < clusterConfig.getPartitionCount(); i++) {
     repsPerPart.put(i, getReplicaCountForPartition(i));
   }
   List<Integer> partitions =
       computeReplacementPartitions(
           repsPerPart,
           clusterConfig.getReplicationFactor(),
           clusterConfig.getSitesPerHost(),
           clusterConfig.getPartitionCount());
   hostLog.info("IV2 Sites will replicate the following partitions: " + partitions);
   return partitions;
 }
 public void flattenToBuffer(ByteBuffer buf) throws IOException {
   assert (!((params == null) && (serializedParams == null)));
   assert ((params != null) || (serializedParams != null));
   buf.put(type.getValue()); // version and type
   if (ProcedureInvocationType.isDeprecatedInternalDRType(type)) {
     buf.putLong(originalTxnId);
     buf.putLong(originalUniqueId);
   }
   if (type.getValue() >= BatchTimeoutOverrideType.BATCH_TIMEOUT_VERSION) {
     if (batchTimeout == BatchTimeoutOverrideType.NO_TIMEOUT) {
       buf.put(BatchTimeoutOverrideType.NO_OVERRIDE_FOR_BATCH_TIMEOUT.getValue());
     } else {
       buf.put(BatchTimeoutOverrideType.HAS_OVERRIDE_FOR_BATCH_TIMEOUT.getValue());
       buf.putInt(batchTimeout);
     }
   }
   buf.putInt(procName.length());
   buf.put(procName.getBytes(Constants.UTF8ENCODING));
   buf.putLong(clientHandle);
   if (serializedParams != null) {
     if (serializedParams.hasArray()) {
       // if position can be non-zero, then the dup/rewind logic below
       // would be wrong?
       assert (serializedParams.position() == 0);
       buf.put(
           serializedParams.array(),
           serializedParams.position() + serializedParams.arrayOffset(),
           serializedParams.remaining());
     } else {
       // duplicate for thread-safety
       assert (serializedParams.position() == 0);
       ByteBuffer dup = serializedParams.duplicate();
       dup.rewind();
       buf.put(dup);
     }
   } else if (params != null) {
     try {
       getParams().flattenToBuffer(buf);
     } catch (BufferOverflowException e) {
       hostLog.info("SP \"" + procName + "\" has thrown BufferOverflowException");
       hostLog.info(toString());
       throw e;
     }
   }
 }
 /**
  * Constructor to create a new generation of export data
  *
  * @param exportOverflowDirectory
  * @throws IOException
  */
 public ExportGeneration(long txnId, Runnable onAllSourcesDrained, File exportOverflowDirectory)
     throws IOException {
   m_onAllSourcesDrained = onAllSourcesDrained;
   m_timestamp = txnId;
   m_directory = new File(exportOverflowDirectory, Long.toString(txnId));
   if (!m_directory.mkdirs()) {
     throw new IOException("Could not create " + m_directory);
   }
   exportLog.info("Creating new export generation " + m_timestamp);
 }
 @Override
 public synchronized void becomeLeader() {
   hostLog.info("Host " + m_hostId + " promoted to be the global service provider");
   m_isLeader = true;
   for (Promotable service : m_services) {
     try {
       service.acceptPromotion();
     } catch (Exception e) {
       VoltDB.crashLocalVoltDB("Unable to promote global service.", true, e);
     }
   }
 }
Beispiel #13
0
  /**
   * Compute the new partition IDs to add to the cluster based on the new topology.
   *
   * @param zk Zookeeper client
   * @param topo The new topology which should include the new host count
   * @return A list of partitions IDs to add to the cluster.
   * @throws JSONException
   */
  public static List<Integer> getPartitionsToAdd(ZooKeeper zk, JSONObject topo)
      throws JSONException {
    ClusterConfig clusterConfig = new ClusterConfig(topo);
    List<Integer> newPartitions = new ArrayList<Integer>();
    Set<Integer> existingParts = new HashSet<Integer>(getPartitions(zk));
    // Remove MPI
    existingParts.remove(MpInitiator.MP_INIT_PID);
    int partsToAdd = clusterConfig.getPartitionCount() - existingParts.size();

    if (partsToAdd > 0) {
      hostLog.info(
          "Computing new partitions to add. Total partitions: "
              + clusterConfig.getPartitionCount());
      for (int i = 0; newPartitions.size() != partsToAdd; i++) {
        if (!existingParts.contains(i)) {
          newPartitions.add(i);
        }
      }
      hostLog.info("Adding " + partsToAdd + " partitions: " + newPartitions);
    }
    return newPartitions;
  }
 /**
  * Indicate to all associated {@link ExportDataSource}to assume mastership role for the given
  * partition id
  *
  * @param partitionId
  */
 public void acceptMastershipTask(int partitionId) {
   HashMap<String, ExportDataSource> partitionDataSourceMap =
       m_dataSourcesByPartition.get(partitionId);
   exportLog.info(
       "Export generation " + m_timestamp + " accepting mastership for partition " + partitionId);
   for (ExportDataSource eds : partitionDataSourceMap.values()) {
     try {
       eds.acceptMastership();
     } catch (Exception e) {
       exportLog.error("Unable to start exporting", e);
     }
   }
 }
        @Override
        public void run() {
          int numSourcesDrained = m_drainedSources.incrementAndGet();
          exportLog.info(
              "Drained source in generation "
                  + m_timestamp
                  + " with "
                  + numSourcesDrained
                  + " of "
                  + m_numSources
                  + " drained");
          if (numSourcesDrained == m_numSources) {
            if (m_partitionLeaderZKName.isEmpty()) {
              m_onAllSourcesDrained.run();
            } else {
              ListenableFuture<?> removeLeadership =
                  m_childUpdatingThread.submit(
                      new Runnable() {
                        @Override
                        public void run() {
                          for (Map.Entry<Integer, String> entry :
                              m_partitionLeaderZKName.entrySet()) {
                            m_zk.delete(
                                m_leadersZKPath + "/" + entry.getKey() + "/" + entry.getValue(),
                                -1,
                                new AsyncCallback.VoidCallback() {

                                  @Override
                                  public void processResult(int rc, String path, Object ctx) {
                                    KeeperException.Code code = KeeperException.Code.get(rc);
                                    if (code != KeeperException.Code.OK) {
                                      VoltDB.crashLocalVoltDB(
                                          "Error in export leader election giving up leadership of "
                                              + path,
                                          true,
                                          KeeperException.create(code));
                                    }
                                  }
                                },
                                null);
                          }
                        }
                      },
                      null);
              removeLeadership.addListener(
                  m_onAllSourcesDrained, MoreExecutors.sameThreadExecutor());
            }

            ;
          }
        }
  private void doInitiation(RejoinMessage message) {
    m_coordinatorHsId = message.m_sourceHSId;
    registerSnapshotMonitor(message.getSnapshotNonce());

    long sinkHSId =
        m_dataSink.initialize(message.getSnapshotSourceCount(), message.getSnapshotBufferPool());

    // respond to the coordinator with the sink HSID
    RejoinMessage msg = new RejoinMessage(m_mailbox.getHSId(), -1, sinkHSId);
    m_mailbox.send(m_coordinatorHsId, msg);

    m_taskQueue.offer(this);
    JOINLOG.info("P" + m_partitionId + " received initiation");
  }
Beispiel #17
0
  private long assignLeader(int partitionId, List<Long> children) {
    // We used masterHostId = -1 as a way to force the leader choice to be
    // the first replica in the list, if we don't have some other mechanism
    // which has successfully overridden it.
    int masterHostId = -1;
    if (m_state.get() == AppointerState.CLUSTER_START) {
      try {
        // find master in topo
        JSONArray parts = m_topo.getJSONArray("partitions");
        for (int p = 0; p < parts.length(); p++) {
          JSONObject aPartition = parts.getJSONObject(p);
          int pid = aPartition.getInt("partition_id");
          if (pid == partitionId) {
            masterHostId = aPartition.getInt("master");
          }
        }
      } catch (JSONException jse) {
        tmLog.error("Failed to find master for partition " + partitionId + ", defaulting to 0");
        jse.printStackTrace();
        masterHostId = -1; // stupid default
      }
    } else {
      // For now, if we're appointing a new leader as a result of a
      // failure, just pick the first replica in the children list.
      // Could eventually do something more complex here to try to keep a
      // semi-balance, but it's unclear that this has much utility until
      // we add rebalancing on rejoin as well.
      masterHostId = -1;
    }

    long masterHSId = children.get(0);
    for (Long child : children) {
      if (CoreUtils.getHostIdFromHSId(child) == masterHostId) {
        masterHSId = child;
        break;
      }
    }
    tmLog.info(
        "Appointing HSId "
            + CoreUtils.hsIdToString(masterHSId)
            + " as leader for partition "
            + partitionId);
    try {
      m_iv2appointees.put(partitionId, masterHSId);
    } catch (Exception e) {
      VoltDB.crashLocalVoltDB("Unable to appoint new master for partition " + partitionId, true, e);
    }
    return masterHSId;
  }
Beispiel #18
0
  /** Start fixing survivors: setup scoreboard and request repair logs. */
  void prepareForFaultRecovery() {
    for (Long hsid : m_survivors) {
      m_replicaRepairStructs.put(hsid, new ReplicaRepairStruct());
    }

    tmLog.info(
        m_whoami
            + "found (including self) "
            + m_survivors.size()
            + " surviving replicas to repair. "
            + " Survivors: "
            + CoreUtils.hsIdCollectionToString(m_survivors));
    VoltMessage logRequest =
        new Iv2RepairLogRequestMessage(m_requestId, Iv2RepairLogRequestMessage.SPREQUEST);
    m_mailbox.send(com.google.common.primitives.Longs.toArray(m_survivors), logRequest);
  }
  // silly helper to add datasources for a table catalog object
  private void addDataSources(Table table, int hostId, List<Pair<Integer, Long>> partitions) {
    for (Pair<Integer, Long> p : partitions) {
      Integer partition = p.getFirst();
      Long site = p.getSecond();

      /*
       * IOException can occur if there is a problem
       * with the persistent aspects of the datasource storage
       */
      try {
        HashMap<String, ExportDataSource> dataSourcesForPartition =
            m_dataSourcesByPartition.get(partition);
        if (dataSourcesForPartition == null) {
          dataSourcesForPartition = new HashMap<String, ExportDataSource>();
          m_dataSourcesByPartition.put(partition, dataSourcesForPartition);
        }
        ExportDataSource exportDataSource =
            new ExportDataSource(
                m_onSourceDrained,
                "database",
                table.getTypeName(),
                partition,
                site,
                table.getSignature(),
                m_timestamp,
                table.getColumns(),
                m_directory.getPath());
        m_numSources++;
        exportLog.info(
            "Creating ExportDataSource for table "
                + table.getTypeName()
                + " signature "
                + table.getSignature()
                + " partition id "
                + partition);
        dataSourcesForPartition.put(table.getSignature(), exportDataSource);
      } catch (IOException e) {
        VoltDB.crashLocalVoltDB(
            "Error creating datasources for table " + table.getTypeName() + " host id " + hostId,
            true,
            e);
      }
    }
  }
Beispiel #20
0
 /**
  * Connect to a single server with retry. Limited exponential backoff. No timeout. This will run
  * until the process is killed if it's not able to connect.
  *
  * @param server hostname:port or just hostname (hostname can be ip).
  */
 private void connectToOneServerWithRetry(String server) {
   int sleep = 1000;
   while (!shutdown.get()) {
     try {
       client.createConnection(server);
       activeConnections.incrementAndGet();
       log.info(String.format("Connected to VoltDB node at: %s.", server));
       break;
     } catch (Exception e) {
       log.warn(
           String.format(
               "Connection to " + server + " failed - retrying in %d second(s).", sleep / 1000));
       try {
         Thread.sleep(sleep);
       } catch (Exception interruted) {
       }
       if (sleep < 8000) sleep += sleep;
     }
   }
 }
Beispiel #21
0
  /**
   * Connect to a set of servers in parallel. Each will retry until connection. This call will block
   * until all have connected.
   *
   * @throws InterruptedException if anything bad happens with the threads.
   */
  private void connect() throws InterruptedException {
    log.info("Connecting to VoltDB...");

    final CountDownLatch connections = new CountDownLatch(config.parsedServers.length);

    // use a new thread to connect to each server
    for (final String server : config.parsedServers) {
      new Thread(
              new Runnable() {
                @Override
                public void run() {
                  connectToOneServerWithRetry(server);
                  connections.countDown();
                }
              })
          .start();
    }
    // block until all have connected
    connections.await();
  }
  boolean initializeGenerationFromDisk(final Connector conn, HostMessenger messenger) {
    m_diskBased = true;
    Set<Integer> partitions = new HashSet<Integer>();

    /*
     * Find all the advertisements. Once one is found, extract the nonce
     * and check for any data files related to the advertisement. If no data files
     * exist ignore the advertisement.
     */
    boolean hadValidAd = false;
    for (File f : m_directory.listFiles()) {
      if (f.getName().endsWith(".ad")) {
        boolean haveDataFiles = false;
        String nonce = f.getName().substring(0, f.getName().length() - 3);
        for (File dataFile : m_directory.listFiles()) {
          if (dataFile.getName().startsWith(nonce) && !dataFile.getName().equals(f.getName())) {
            haveDataFiles = true;
            break;
          }
        }

        if (haveDataFiles) {
          try {
            addDataSource(f, partitions);
            hadValidAd = true;
          } catch (IOException e) {
            VoltDB.crashLocalVoltDB("Error intializing export datasource " + f, true, e);
          }
        } else {
          // Delete ads that have no data
          f.delete();
        }
      }
    }
    createAndRegisterAckMailboxes(partitions, messenger);
    exportLog.info("Restoring export generation " + m_timestamp);
    return hadValidAd;
  }
 /*
  * Create a datasource based on an ad file
  */
 private void addDataSource(File adFile, Set<Integer> partitions) throws IOException {
   m_numSources++;
   ExportDataSource source = new ExportDataSource(m_onSourceDrained, adFile);
   partitions.add(source.getPartitionId());
   m_timestamp = source.getGeneration();
   exportLog.info(
       "Creating ExportDataSource for "
           + adFile
           + " table "
           + source.getTableName()
           + " signature "
           + source.getSignature()
           + " partition id "
           + source.getPartitionId()
           + " bytes "
           + source.sizeInBytes());
   HashMap<String, ExportDataSource> dataSourcesForPartition =
       m_dataSourcesByPartition.get(source.getPartitionId());
   if (dataSourcesForPartition == null) {
     dataSourcesForPartition = new HashMap<String, ExportDataSource>();
     m_dataSourcesByPartition.put(source.getPartitionId(), dataSourcesForPartition);
   }
   dataSourcesForPartition.put(source.getSignature(), source);
 }
Beispiel #24
0
  public void pushExportBuffer(
      long uso, final long bufferPtr, ByteBuffer buffer, boolean sync, boolean endOfStream) {
    final java.util.concurrent.atomic.AtomicBoolean deleted =
        new java.util.concurrent.atomic.AtomicBoolean(false);
    synchronized (m_committedBuffers) {
      if (endOfStream) {
        assert (!m_endOfStream);
        assert (bufferPtr == 0);
        assert (buffer == null);
        assert (!sync);
        m_endOfStream = endOfStream;

        if (m_committedBuffers.sizeInBytes() == 0) {
          exportLog.info("Pushed EOS buffer with 0 bytes remaining");
          try {
            m_onDrain.run();
          } finally {
            m_onDrain = null;
          }
        }
        return;
      }
      assert (!m_endOfStream);
      if (buffer != null) {
        if (buffer.capacity() > 0) {
          try {
            m_committedBuffers.offer(
                new StreamBlock(
                    new BBContainer(buffer, bufferPtr) {
                      @Override
                      public void discard() {
                        DBBPool.deleteCharArrayMemory(address);
                        deleted.set(true);
                      }
                    },
                    uso,
                    false));
          } catch (IOException e) {
            exportLog.error(e);
            if (!deleted.get()) {
              DBBPool.deleteCharArrayMemory(bufferPtr);
            }
          }
        } else {
          /*
           * TupleStreamWrapper::setBytesUsed propagates the USO by sending
           * over an empty stream block. The block will be deleted
           * on the native side when this method returns
           */
          exportLog.info(
              "Syncing first unpolled USO to "
                  + uso
                  + " for table "
                  + m_tableName
                  + " partition "
                  + m_partitionId);
          m_firstUnpolledUso = uso;
        }
      }
      if (sync) {
        try {
          // Don't do a real sync, just write the in memory buffers
          // to a file. @Quiesce or blocking snapshot will do the sync
          m_committedBuffers.sync(true);
        } catch (IOException e) {
          exportLog.error(e);
        }
      }
    }
  }
Beispiel #25
0
  @Override
  public void acceptPromotion() throws InterruptedException, ExecutionException, KeeperException {
    // Crank up the leader caches.  Use blocking startup so that we'll have valid point-in-time
    // caches later.
    m_iv2appointees.start(true);
    m_iv2masters.start(true);
    // Figure out what conditions we assumed leadership under.
    if (m_iv2appointees.pointInTimeCache().size() == 0) {
      tmLog.debug("LeaderAppointer in startup");
      m_state.set(AppointerState.CLUSTER_START);
    } else if ((m_iv2appointees.pointInTimeCache().size() != m_partitionCount)
        || (m_iv2masters.pointInTimeCache().size() != m_partitionCount)) {
      // If we are promoted and the appointees or masters set is partial, the previous appointer
      // failed
      // during startup (at least for now, until we add add/remove a partition on the fly).
      VoltDB.crashGlobalVoltDB("Detected failure during startup, unable to start", false, null);
    } else {
      tmLog.debug("LeaderAppointer in repair");
      m_state.set(AppointerState.DONE);
    }

    if (m_state.get() == AppointerState.CLUSTER_START) {
      // Need to block the return of acceptPromotion until after the MPI is promoted.  Wait for this
      // latch
      // to countdown after appointing all the partition leaders.  The
      // LeaderCache callback will count it down once it has seen all the
      // appointed leaders publish themselves as the actual leaders.
      m_startupLatch = new CountDownLatch(1);
      writeKnownLiveNodes(m_hostMessenger.getLiveHostIds());
      for (int i = 0; i < m_partitionCount; i++) {
        String dir = LeaderElector.electionDirForPartition(i);
        // Race along with all of the replicas for this partition to create the ZK parent node
        try {
          m_zk.create(dir, null, Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT);
        } catch (KeeperException.NodeExistsException e) {
          // expected on all nodes that don't start() first.
        }
        m_callbacks[i] = new PartitionCallback(i);
        Pair<BabySitter, List<String>> sitterstuff =
            BabySitter.blockingFactory(m_zk, dir, m_callbacks[i], m_es);
        m_partitionWatchers[i] = sitterstuff.getFirst();
      }
      m_startupLatch.await();
    } else {
      // If we're taking over for a failed LeaderAppointer, we know when
      // we get here that every partition had a leader at some point in
      // time.  We'll seed each of the PartitionCallbacks for each
      // partition with the HSID of the last published leader.  The
      // blocking startup of the BabySitter watching that partition will
      // call our callback, get the current full set of replicas, and
      // appoint a new leader if the seeded one has actually failed
      Map<Integer, Long> masters = m_iv2masters.pointInTimeCache();
      tmLog.info("LeaderAppointer repairing with master set: " + masters);
      for (Entry<Integer, Long> master : masters.entrySet()) {
        int partId = master.getKey();
        String dir = LeaderElector.electionDirForPartition(partId);
        m_callbacks[partId] = new PartitionCallback(partId, master.getValue());
        Pair<BabySitter, List<String>> sitterstuff =
            BabySitter.blockingFactory(m_zk, dir, m_callbacks[partId], m_es);
        m_partitionWatchers[partId] = sitterstuff.getFirst();
      }
      // just go ahead and promote our MPI
      m_MPI.acceptPromotion();
    }
  }
Beispiel #26
0
 /**
  * Prints a one line update on performance that can be printed periodically during a benchmark.
  */
 private synchronized void printStatistics() {
   log.info(String.format("Executed %d", c.get()));
 }
Beispiel #27
0
  /**
   * Create a new data source.
   *
   * @param db
   * @param tableName
   * @param isReplicated
   * @param partitionId
   * @param HSId
   * @param tableId
   * @param catalogMap
   */
  public ExportDataSource(
      Runnable onDrain,
      String db,
      String tableName,
      int partitionId,
      long HSId,
      String signature,
      long generation,
      CatalogMap<Column> catalogMap,
      String overflowPath)
      throws IOException {
    m_generation = generation;
    m_onDrain = onDrain;
    m_database = db;
    m_tableName = tableName;

    String nonce = signature + "_" + HSId + "_" + partitionId;

    m_committedBuffers = new StreamBlockQueue(overflowPath, nonce);

    /*
     * This is not the catalog relativeIndex(). This ID incorporates
     * a catalog version and a table id so that it is constant across
     * catalog updates that add or drop tables.
     */
    m_signature = signature;
    m_partitionId = partitionId;
    m_HSId = HSId;

    // Add the Export meta-data columns to the schema followed by the
    // catalog columns for this table.
    m_columnNames.add("VOLT_TRANSACTION_ID");
    m_columnTypes.add(((int) VoltType.BIGINT.getValue()));

    m_columnNames.add("VOLT_EXPORT_TIMESTAMP");
    m_columnTypes.add(((int) VoltType.BIGINT.getValue()));

    m_columnNames.add("VOLT_EXPORT_SEQUENCE_NUMBER");
    m_columnTypes.add(((int) VoltType.BIGINT.getValue()));

    m_columnNames.add("VOLT_PARTITION_ID");
    m_columnTypes.add(((int) VoltType.BIGINT.getValue()));

    m_columnNames.add("VOLT_SITE_ID");
    m_columnTypes.add(((int) VoltType.BIGINT.getValue()));

    m_columnNames.add("VOLT_EXPORT_OPERATION");
    m_columnTypes.add(((int) VoltType.TINYINT.getValue()));

    for (Column c : CatalogUtil.getSortedCatalogItems(catalogMap, "index")) {
      m_columnNames.add(c.getName());
      m_columnTypes.add(c.getType());
    }

    File adFile = new VoltFile(overflowPath, nonce + ".ad");
    exportLog.info("Creating ad for " + nonce);
    assert (!adFile.exists());
    FastSerializer fs = new FastSerializer();
    fs.writeLong(m_HSId);
    fs.writeString(m_database);
    writeAdvertisementTo(fs);
    FileOutputStream fos = new FileOutputStream(adFile);
    fos.write(fs.getBytes());
    fos.getFD().sync();
    fos.close();

    // compute the number of bytes necessary to hold one bit per
    // schema column
    m_nullArrayLength = ((m_columnTypes.size() + 7) & -8) >> 3;
  }
Beispiel #28
0
  /**
   * Core benchmark code. Connect. Initialize. Run the loop. Cleanup. Print Results.
   *
   * @throws Exception if anything unexpected happens.
   */
  public void runBenchmark() throws Exception {
    log.info(HORIZONTAL_RULE);
    log.info(" Setup & Initialization");
    log.info(HORIZONTAL_RULE);

    final int cidCount = 128;
    final long[] lastRid = new long[cidCount];
    for (int i = 0; i < lastRid.length; i++) {
      lastRid[i] = 0;
    }

    // connect to one or more servers, loop until success
    connect();

    log.info(HORIZONTAL_RULE);
    log.info("Starting Benchmark");
    log.info(HORIZONTAL_RULE);

    // print periodic statistics to the console
    benchmarkStartTS = System.currentTimeMillis();
    schedulePeriodicStats();

    // Run the benchmark loop for the requested duration
    // The throughput may be throttled depending on client configuration
    log.info("Running benchmark...");

    BigTableLoader partitionedLoader =
        new BigTableLoader(
            client,
            "bigp",
            (config.partfillerrowmb * 1024 * 1024) / config.fillerrowsize,
            config.fillerrowsize);
    partitionedLoader.start();
    BigTableLoader replicatedLoader =
        new BigTableLoader(
            client,
            "bigr",
            (config.replfillerrowmb * 1024 * 1024) / config.fillerrowsize,
            config.fillerrowsize);
    replicatedLoader.start();

    ReadThread readThread = new ReadThread(client, config.threads, config.threadoffset);
    readThread.start();

    AdHocMayhemThread adHocMayhemThread = new AdHocMayhemThread(client);
    adHocMayhemThread.start();

    List<ClientThread> clientThreads = new ArrayList<ClientThread>();
    for (byte cid = (byte) config.threadoffset; cid < config.threadoffset + config.threads; cid++) {
      ClientThread clientThread = new ClientThread(cid, c, client, processor);
      clientThread.start();
      clientThreads.add(clientThread);
    }

    final long benchmarkEndTime = System.currentTimeMillis() + (1000l * config.duration);

    while (benchmarkEndTime > System.currentTimeMillis()) {
      Thread.yield();
    }

    replicatedLoader.shutdown();
    partitionedLoader.shutdown();
    readThread.shutdown();
    adHocMayhemThread.shutdown();
    for (ClientThread clientThread : clientThreads) {
      clientThread.shutdown();
    }
    replicatedLoader.join();
    partitionedLoader.join();
    readThread.join();
    adHocMayhemThread.join();
    for (ClientThread clientThread : clientThreads) {
      clientThread.join();
    }

    // cancel periodic stats printing
    timer.cancel();

    shutdown.set(true);
    es.shutdownNow();

    // block until all outstanding txns return
    client.drain();
    client.close();
  }