@Override public void handleResponse(ClientResponse resp) { if (resp == null) { VoltDB.crashLocalVoltDB( "Received a null response to a snapshot initiation request. " + "This should be impossible.", true, null); } else if (resp.getStatus() != ClientResponse.SUCCESS) { tmLog.info( "Failed to complete partition detection snapshot, status: " + resp.getStatus() + ", reason: " + resp.getStatusString()); tmLog.info("Retrying partition detection snapshot..."); SnapshotUtil.requestSnapshot( 0L, m_partSnapshotSchedule.getPath(), m_partSnapshotSchedule.getPrefix() + System.currentTimeMillis(), true, SnapshotFormat.NATIVE, null, m_snapshotHandler, true); } else if (!SnapshotUtil.didSnapshotRequestSucceed(resp.getResults())) { VoltDB.crashGlobalVoltDB( "Unable to complete partition detection snapshot: " + resp.getResults()[0], false, null); } else { VoltDB.crashGlobalVoltDB( "Partition detection snapshot completed. Shutting down.", false, null); } }
/** * Given a set of the known host IDs before a fault, and the known host IDs in the post-fault * cluster, determine whether or not we think a network partition may have happened. NOTE: this * assumes that we have already done the k-safety validation for every partition and already * failed if we weren't a viable cluster. ALSO NOTE: not private so it may be unit-tested. */ static boolean makePPDDecision(Set<Integer> previousHosts, Set<Integer> currentHosts) { // Real partition detection stuff would go here // find the lowest hostId between the still-alive hosts and the // failed hosts. Which set contains the lowest hostId? int blessedHostId = Integer.MAX_VALUE; boolean blessedHostIdInFailedSet = true; // This should be all the pre-partition hosts IDs. Any new host IDs // (say, if this was triggered by rejoin), will be greater than any surviving // host ID, so don't worry about including it in this search. for (Integer hostId : previousHosts) { if (hostId < blessedHostId) { blessedHostId = hostId; } } for (Integer hostId : currentHosts) { if (hostId.equals(blessedHostId)) { blessedHostId = hostId; blessedHostIdInFailedSet = false; } } // Evaluate PPD triggers. boolean partitionDetectionTriggered = false; // Exact 50-50 splits. The set with the lowest survivor host doesn't trigger PPD // If the blessed host is in the failure set, this set is not blessed. if (currentHosts.size() * 2 == previousHosts.size()) { if (blessedHostIdInFailedSet) { tmLog.info( "Partition detection triggered for 50/50 cluster failure. " + "This survivor set is shutting down."); partitionDetectionTriggered = true; } else { tmLog.info( "Partition detected for 50/50 failure. " + "This survivor set is continuing execution."); } } // A strict, viable minority is always a partition. if (currentHosts.size() * 2 < previousHosts.size()) { tmLog.info( "Partition detection triggered. " + "This minority survivor set is shutting down."); partitionDetectionTriggered = true; } return partitionDetectionTriggered; }
public void log(String message, long now, Level level) { if (now - m_lastLogTime > m_maxLogIntervalMillis) { synchronized (this) { if (now - m_lastLogTime > m_maxLogIntervalMillis) { switch (level) { case DEBUG: m_logger.debug(message); break; case ERROR: m_logger.error(message); break; case FATAL: m_logger.fatal(message); break; case INFO: m_logger.info(message); break; case TRACE: m_logger.trace(message); break; case WARN: m_logger.warn(message); break; } m_lastLogTime = now; } } } }
/** * This variant delays the formatting of the string message until it is actually logged * * @param now * @param level a {@link Level debug level} * @param cause evidentiary exception * @param stemformat a {@link String#format(String, Object...) string format} * @param args format arguments */ public void log(long now, Level level, Throwable cause, String stemformat, Object... args) { if (now - m_lastLogTime > m_maxLogIntervalMillis) { synchronized (this) { if (now - m_lastLogTime > m_maxLogIntervalMillis) { String message = formatMessage(cause, stemformat, args); switch (level) { case DEBUG: m_logger.debug(message); break; case ERROR: m_logger.error(message); break; case FATAL: m_logger.fatal(message); break; case INFO: m_logger.info(message); break; case TRACE: m_logger.trace(message); break; case WARN: m_logger.warn(message); break; } m_lastLogTime = now; } } } }
public void dump(long hsId) { final String who = CoreUtils.hsIdToString(hsId); tmLog.info( String.format( "%s: REPLAY SEQUENCER DUMP, LAST POLLED FRAGMENT %d (%s), LAST SEEN TXNID %d (%s), %s%s", who, m_lastPolledFragmentTxnId, TxnEgo.txnIdToString(m_lastPolledFragmentTxnId), m_lastSeenTxnId, TxnEgo.txnIdToString(m_lastSeenTxnId), m_mpiEOLReached ? "MPI EOL, " : "", m_mustDrain ? "MUST DRAIN" : "")); for (Entry<Long, ReplayEntry> e : m_replayEntries.entrySet()) { tmLog.info(String.format("%s: REPLAY ENTRY %s: %s", who, e.getKey(), e.getValue())); } }
private ModuleManager(File cacheRoot) { String systemPackagesSpec = FluentIterable.from(SYSTEM_PACKAGES).transform(appendVersion).join(COMMA_JOINER); Map<String, String> frameworkProps = ImmutableMap.<String, String>builder() .put(Constants.FRAMEWORK_SYSTEMPACKAGES_EXTRA, systemPackagesSpec) .put("org.osgi.framework.storage.clean", "onFirstInit") .put("felix.cache.rootdir", cacheRoot.getAbsolutePath()) .put("felix.cache.locking", Boolean.FALSE.toString()) .build(); LOG.info("Framework properties are: " + frameworkProps); FrameworkFactory frameworkFactory = ServiceLoader.load(FrameworkFactory.class).iterator().next(); m_framework = frameworkFactory.newFramework(frameworkProps); try { m_framework.start(); } catch (BundleException e) { LOG.error("Failed to start the felix OSGi framework", e); throw new SetUpException("Failed to start the felix OSGi framework", e); } m_bundles = new BundleRef(m_framework); }
public void processKafkaMessages() throws Exception { // Split server list final String[] serverlist = m_config.servers.split(","); // Create connection final ClientConfig c_config = new ClientConfig(m_config.user, m_config.password); c_config.setProcedureCallTimeout(0); // Set procedure all to infinite m_client = getClient(c_config, serverlist, m_config.port); if (m_config.useSuppliedProcedure) { m_loader = new CSVTupleDataLoader( (ClientImpl) m_client, m_config.procedure, new KafkaBulkLoaderCallback()); } else { m_loader = new CSVBulkDataLoader( (ClientImpl) m_client, m_config.table, m_config.batch, new KafkaBulkLoaderCallback()); } m_loader.setFlushInterval(m_config.flush, m_config.flush); m_consumer = new KafkaConsumerConnector( m_config.zookeeper, m_config.useSuppliedProcedure ? m_config.procedure : m_config.table); try { m_es = getConsumerExecutor(m_consumer, m_loader); if (m_config.useSuppliedProcedure) { m_log.info( "Kafka Consumer from topic: " + m_config.topic + " Started using procedure: " + m_config.procedure); } else { m_log.info( "Kafka Consumer from topic: " + m_config.topic + " Started for table: " + m_config.table); } m_es.awaitTermination(365, TimeUnit.DAYS); } catch (Exception ex) { m_log.error("Error in Kafka Consumer", ex); System.exit(-1); } close(); }
/** * Constructor for benchmark instance. Configures VoltDB client and prints configuration. * * @param config Parsed & validated CLI options. */ Benchmark(Config config) { this.config = config; processor = new PayloadProcessor( config.minvaluesize, config.maxvaluesize, config.entropy, config.usecompression); log.info(HORIZONTAL_RULE); log.info(" Command Line Configuration"); log.info(HORIZONTAL_RULE); log.info(config.getConfigDumpString()); StatusListener statusListener = new StatusListener(); ClientConfig clientConfig = new ClientConfig("", "", statusListener); client = ClientFactory.createClient(clientConfig); }
public List<Integer> getIv2PartitionsToReplace(JSONObject topology) throws JSONException { ClusterConfig clusterConfig = new ClusterConfig(topology); hostLog.info( "Computing partitions to replace. Total partitions: " + clusterConfig.getPartitionCount()); Map<Integer, Integer> repsPerPart = new HashMap<Integer, Integer>(); for (int i = 0; i < clusterConfig.getPartitionCount(); i++) { repsPerPart.put(i, getReplicaCountForPartition(i)); } List<Integer> partitions = computeReplacementPartitions( repsPerPart, clusterConfig.getReplicationFactor(), clusterConfig.getSitesPerHost(), clusterConfig.getPartitionCount()); hostLog.info("IV2 Sites will replicate the following partitions: " + partitions); return partitions; }
public void flattenToBuffer(ByteBuffer buf) throws IOException { assert (!((params == null) && (serializedParams == null))); assert ((params != null) || (serializedParams != null)); buf.put(type.getValue()); // version and type if (ProcedureInvocationType.isDeprecatedInternalDRType(type)) { buf.putLong(originalTxnId); buf.putLong(originalUniqueId); } if (type.getValue() >= BatchTimeoutOverrideType.BATCH_TIMEOUT_VERSION) { if (batchTimeout == BatchTimeoutOverrideType.NO_TIMEOUT) { buf.put(BatchTimeoutOverrideType.NO_OVERRIDE_FOR_BATCH_TIMEOUT.getValue()); } else { buf.put(BatchTimeoutOverrideType.HAS_OVERRIDE_FOR_BATCH_TIMEOUT.getValue()); buf.putInt(batchTimeout); } } buf.putInt(procName.length()); buf.put(procName.getBytes(Constants.UTF8ENCODING)); buf.putLong(clientHandle); if (serializedParams != null) { if (serializedParams.hasArray()) { // if position can be non-zero, then the dup/rewind logic below // would be wrong? assert (serializedParams.position() == 0); buf.put( serializedParams.array(), serializedParams.position() + serializedParams.arrayOffset(), serializedParams.remaining()); } else { // duplicate for thread-safety assert (serializedParams.position() == 0); ByteBuffer dup = serializedParams.duplicate(); dup.rewind(); buf.put(dup); } } else if (params != null) { try { getParams().flattenToBuffer(buf); } catch (BufferOverflowException e) { hostLog.info("SP \"" + procName + "\" has thrown BufferOverflowException"); hostLog.info(toString()); throw e; } } }
/** * Constructor to create a new generation of export data * * @param exportOverflowDirectory * @throws IOException */ public ExportGeneration(long txnId, Runnable onAllSourcesDrained, File exportOverflowDirectory) throws IOException { m_onAllSourcesDrained = onAllSourcesDrained; m_timestamp = txnId; m_directory = new File(exportOverflowDirectory, Long.toString(txnId)); if (!m_directory.mkdirs()) { throw new IOException("Could not create " + m_directory); } exportLog.info("Creating new export generation " + m_timestamp); }
@Override public synchronized void becomeLeader() { hostLog.info("Host " + m_hostId + " promoted to be the global service provider"); m_isLeader = true; for (Promotable service : m_services) { try { service.acceptPromotion(); } catch (Exception e) { VoltDB.crashLocalVoltDB("Unable to promote global service.", true, e); } } }
/** * Compute the new partition IDs to add to the cluster based on the new topology. * * @param zk Zookeeper client * @param topo The new topology which should include the new host count * @return A list of partitions IDs to add to the cluster. * @throws JSONException */ public static List<Integer> getPartitionsToAdd(ZooKeeper zk, JSONObject topo) throws JSONException { ClusterConfig clusterConfig = new ClusterConfig(topo); List<Integer> newPartitions = new ArrayList<Integer>(); Set<Integer> existingParts = new HashSet<Integer>(getPartitions(zk)); // Remove MPI existingParts.remove(MpInitiator.MP_INIT_PID); int partsToAdd = clusterConfig.getPartitionCount() - existingParts.size(); if (partsToAdd > 0) { hostLog.info( "Computing new partitions to add. Total partitions: " + clusterConfig.getPartitionCount()); for (int i = 0; newPartitions.size() != partsToAdd; i++) { if (!existingParts.contains(i)) { newPartitions.add(i); } } hostLog.info("Adding " + partsToAdd + " partitions: " + newPartitions); } return newPartitions; }
/** * Indicate to all associated {@link ExportDataSource}to assume mastership role for the given * partition id * * @param partitionId */ public void acceptMastershipTask(int partitionId) { HashMap<String, ExportDataSource> partitionDataSourceMap = m_dataSourcesByPartition.get(partitionId); exportLog.info( "Export generation " + m_timestamp + " accepting mastership for partition " + partitionId); for (ExportDataSource eds : partitionDataSourceMap.values()) { try { eds.acceptMastership(); } catch (Exception e) { exportLog.error("Unable to start exporting", e); } } }
@Override public void run() { int numSourcesDrained = m_drainedSources.incrementAndGet(); exportLog.info( "Drained source in generation " + m_timestamp + " with " + numSourcesDrained + " of " + m_numSources + " drained"); if (numSourcesDrained == m_numSources) { if (m_partitionLeaderZKName.isEmpty()) { m_onAllSourcesDrained.run(); } else { ListenableFuture<?> removeLeadership = m_childUpdatingThread.submit( new Runnable() { @Override public void run() { for (Map.Entry<Integer, String> entry : m_partitionLeaderZKName.entrySet()) { m_zk.delete( m_leadersZKPath + "/" + entry.getKey() + "/" + entry.getValue(), -1, new AsyncCallback.VoidCallback() { @Override public void processResult(int rc, String path, Object ctx) { KeeperException.Code code = KeeperException.Code.get(rc); if (code != KeeperException.Code.OK) { VoltDB.crashLocalVoltDB( "Error in export leader election giving up leadership of " + path, true, KeeperException.create(code)); } } }, null); } } }, null); removeLeadership.addListener( m_onAllSourcesDrained, MoreExecutors.sameThreadExecutor()); } ; } }
private void doInitiation(RejoinMessage message) { m_coordinatorHsId = message.m_sourceHSId; registerSnapshotMonitor(message.getSnapshotNonce()); long sinkHSId = m_dataSink.initialize(message.getSnapshotSourceCount(), message.getSnapshotBufferPool()); // respond to the coordinator with the sink HSID RejoinMessage msg = new RejoinMessage(m_mailbox.getHSId(), -1, sinkHSId); m_mailbox.send(m_coordinatorHsId, msg); m_taskQueue.offer(this); JOINLOG.info("P" + m_partitionId + " received initiation"); }
private long assignLeader(int partitionId, List<Long> children) { // We used masterHostId = -1 as a way to force the leader choice to be // the first replica in the list, if we don't have some other mechanism // which has successfully overridden it. int masterHostId = -1; if (m_state.get() == AppointerState.CLUSTER_START) { try { // find master in topo JSONArray parts = m_topo.getJSONArray("partitions"); for (int p = 0; p < parts.length(); p++) { JSONObject aPartition = parts.getJSONObject(p); int pid = aPartition.getInt("partition_id"); if (pid == partitionId) { masterHostId = aPartition.getInt("master"); } } } catch (JSONException jse) { tmLog.error("Failed to find master for partition " + partitionId + ", defaulting to 0"); jse.printStackTrace(); masterHostId = -1; // stupid default } } else { // For now, if we're appointing a new leader as a result of a // failure, just pick the first replica in the children list. // Could eventually do something more complex here to try to keep a // semi-balance, but it's unclear that this has much utility until // we add rebalancing on rejoin as well. masterHostId = -1; } long masterHSId = children.get(0); for (Long child : children) { if (CoreUtils.getHostIdFromHSId(child) == masterHostId) { masterHSId = child; break; } } tmLog.info( "Appointing HSId " + CoreUtils.hsIdToString(masterHSId) + " as leader for partition " + partitionId); try { m_iv2appointees.put(partitionId, masterHSId); } catch (Exception e) { VoltDB.crashLocalVoltDB("Unable to appoint new master for partition " + partitionId, true, e); } return masterHSId; }
/** Start fixing survivors: setup scoreboard and request repair logs. */ void prepareForFaultRecovery() { for (Long hsid : m_survivors) { m_replicaRepairStructs.put(hsid, new ReplicaRepairStruct()); } tmLog.info( m_whoami + "found (including self) " + m_survivors.size() + " surviving replicas to repair. " + " Survivors: " + CoreUtils.hsIdCollectionToString(m_survivors)); VoltMessage logRequest = new Iv2RepairLogRequestMessage(m_requestId, Iv2RepairLogRequestMessage.SPREQUEST); m_mailbox.send(com.google.common.primitives.Longs.toArray(m_survivors), logRequest); }
// silly helper to add datasources for a table catalog object private void addDataSources(Table table, int hostId, List<Pair<Integer, Long>> partitions) { for (Pair<Integer, Long> p : partitions) { Integer partition = p.getFirst(); Long site = p.getSecond(); /* * IOException can occur if there is a problem * with the persistent aspects of the datasource storage */ try { HashMap<String, ExportDataSource> dataSourcesForPartition = m_dataSourcesByPartition.get(partition); if (dataSourcesForPartition == null) { dataSourcesForPartition = new HashMap<String, ExportDataSource>(); m_dataSourcesByPartition.put(partition, dataSourcesForPartition); } ExportDataSource exportDataSource = new ExportDataSource( m_onSourceDrained, "database", table.getTypeName(), partition, site, table.getSignature(), m_timestamp, table.getColumns(), m_directory.getPath()); m_numSources++; exportLog.info( "Creating ExportDataSource for table " + table.getTypeName() + " signature " + table.getSignature() + " partition id " + partition); dataSourcesForPartition.put(table.getSignature(), exportDataSource); } catch (IOException e) { VoltDB.crashLocalVoltDB( "Error creating datasources for table " + table.getTypeName() + " host id " + hostId, true, e); } } }
/** * Connect to a single server with retry. Limited exponential backoff. No timeout. This will run * until the process is killed if it's not able to connect. * * @param server hostname:port or just hostname (hostname can be ip). */ private void connectToOneServerWithRetry(String server) { int sleep = 1000; while (!shutdown.get()) { try { client.createConnection(server); activeConnections.incrementAndGet(); log.info(String.format("Connected to VoltDB node at: %s.", server)); break; } catch (Exception e) { log.warn( String.format( "Connection to " + server + " failed - retrying in %d second(s).", sleep / 1000)); try { Thread.sleep(sleep); } catch (Exception interruted) { } if (sleep < 8000) sleep += sleep; } } }
/** * Connect to a set of servers in parallel. Each will retry until connection. This call will block * until all have connected. * * @throws InterruptedException if anything bad happens with the threads. */ private void connect() throws InterruptedException { log.info("Connecting to VoltDB..."); final CountDownLatch connections = new CountDownLatch(config.parsedServers.length); // use a new thread to connect to each server for (final String server : config.parsedServers) { new Thread( new Runnable() { @Override public void run() { connectToOneServerWithRetry(server); connections.countDown(); } }) .start(); } // block until all have connected connections.await(); }
boolean initializeGenerationFromDisk(final Connector conn, HostMessenger messenger) { m_diskBased = true; Set<Integer> partitions = new HashSet<Integer>(); /* * Find all the advertisements. Once one is found, extract the nonce * and check for any data files related to the advertisement. If no data files * exist ignore the advertisement. */ boolean hadValidAd = false; for (File f : m_directory.listFiles()) { if (f.getName().endsWith(".ad")) { boolean haveDataFiles = false; String nonce = f.getName().substring(0, f.getName().length() - 3); for (File dataFile : m_directory.listFiles()) { if (dataFile.getName().startsWith(nonce) && !dataFile.getName().equals(f.getName())) { haveDataFiles = true; break; } } if (haveDataFiles) { try { addDataSource(f, partitions); hadValidAd = true; } catch (IOException e) { VoltDB.crashLocalVoltDB("Error intializing export datasource " + f, true, e); } } else { // Delete ads that have no data f.delete(); } } } createAndRegisterAckMailboxes(partitions, messenger); exportLog.info("Restoring export generation " + m_timestamp); return hadValidAd; }
/* * Create a datasource based on an ad file */ private void addDataSource(File adFile, Set<Integer> partitions) throws IOException { m_numSources++; ExportDataSource source = new ExportDataSource(m_onSourceDrained, adFile); partitions.add(source.getPartitionId()); m_timestamp = source.getGeneration(); exportLog.info( "Creating ExportDataSource for " + adFile + " table " + source.getTableName() + " signature " + source.getSignature() + " partition id " + source.getPartitionId() + " bytes " + source.sizeInBytes()); HashMap<String, ExportDataSource> dataSourcesForPartition = m_dataSourcesByPartition.get(source.getPartitionId()); if (dataSourcesForPartition == null) { dataSourcesForPartition = new HashMap<String, ExportDataSource>(); m_dataSourcesByPartition.put(source.getPartitionId(), dataSourcesForPartition); } dataSourcesForPartition.put(source.getSignature(), source); }
public void pushExportBuffer( long uso, final long bufferPtr, ByteBuffer buffer, boolean sync, boolean endOfStream) { final java.util.concurrent.atomic.AtomicBoolean deleted = new java.util.concurrent.atomic.AtomicBoolean(false); synchronized (m_committedBuffers) { if (endOfStream) { assert (!m_endOfStream); assert (bufferPtr == 0); assert (buffer == null); assert (!sync); m_endOfStream = endOfStream; if (m_committedBuffers.sizeInBytes() == 0) { exportLog.info("Pushed EOS buffer with 0 bytes remaining"); try { m_onDrain.run(); } finally { m_onDrain = null; } } return; } assert (!m_endOfStream); if (buffer != null) { if (buffer.capacity() > 0) { try { m_committedBuffers.offer( new StreamBlock( new BBContainer(buffer, bufferPtr) { @Override public void discard() { DBBPool.deleteCharArrayMemory(address); deleted.set(true); } }, uso, false)); } catch (IOException e) { exportLog.error(e); if (!deleted.get()) { DBBPool.deleteCharArrayMemory(bufferPtr); } } } else { /* * TupleStreamWrapper::setBytesUsed propagates the USO by sending * over an empty stream block. The block will be deleted * on the native side when this method returns */ exportLog.info( "Syncing first unpolled USO to " + uso + " for table " + m_tableName + " partition " + m_partitionId); m_firstUnpolledUso = uso; } } if (sync) { try { // Don't do a real sync, just write the in memory buffers // to a file. @Quiesce or blocking snapshot will do the sync m_committedBuffers.sync(true); } catch (IOException e) { exportLog.error(e); } } } }
@Override public void acceptPromotion() throws InterruptedException, ExecutionException, KeeperException { // Crank up the leader caches. Use blocking startup so that we'll have valid point-in-time // caches later. m_iv2appointees.start(true); m_iv2masters.start(true); // Figure out what conditions we assumed leadership under. if (m_iv2appointees.pointInTimeCache().size() == 0) { tmLog.debug("LeaderAppointer in startup"); m_state.set(AppointerState.CLUSTER_START); } else if ((m_iv2appointees.pointInTimeCache().size() != m_partitionCount) || (m_iv2masters.pointInTimeCache().size() != m_partitionCount)) { // If we are promoted and the appointees or masters set is partial, the previous appointer // failed // during startup (at least for now, until we add add/remove a partition on the fly). VoltDB.crashGlobalVoltDB("Detected failure during startup, unable to start", false, null); } else { tmLog.debug("LeaderAppointer in repair"); m_state.set(AppointerState.DONE); } if (m_state.get() == AppointerState.CLUSTER_START) { // Need to block the return of acceptPromotion until after the MPI is promoted. Wait for this // latch // to countdown after appointing all the partition leaders. The // LeaderCache callback will count it down once it has seen all the // appointed leaders publish themselves as the actual leaders. m_startupLatch = new CountDownLatch(1); writeKnownLiveNodes(m_hostMessenger.getLiveHostIds()); for (int i = 0; i < m_partitionCount; i++) { String dir = LeaderElector.electionDirForPartition(i); // Race along with all of the replicas for this partition to create the ZK parent node try { m_zk.create(dir, null, Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); } catch (KeeperException.NodeExistsException e) { // expected on all nodes that don't start() first. } m_callbacks[i] = new PartitionCallback(i); Pair<BabySitter, List<String>> sitterstuff = BabySitter.blockingFactory(m_zk, dir, m_callbacks[i], m_es); m_partitionWatchers[i] = sitterstuff.getFirst(); } m_startupLatch.await(); } else { // If we're taking over for a failed LeaderAppointer, we know when // we get here that every partition had a leader at some point in // time. We'll seed each of the PartitionCallbacks for each // partition with the HSID of the last published leader. The // blocking startup of the BabySitter watching that partition will // call our callback, get the current full set of replicas, and // appoint a new leader if the seeded one has actually failed Map<Integer, Long> masters = m_iv2masters.pointInTimeCache(); tmLog.info("LeaderAppointer repairing with master set: " + masters); for (Entry<Integer, Long> master : masters.entrySet()) { int partId = master.getKey(); String dir = LeaderElector.electionDirForPartition(partId); m_callbacks[partId] = new PartitionCallback(partId, master.getValue()); Pair<BabySitter, List<String>> sitterstuff = BabySitter.blockingFactory(m_zk, dir, m_callbacks[partId], m_es); m_partitionWatchers[partId] = sitterstuff.getFirst(); } // just go ahead and promote our MPI m_MPI.acceptPromotion(); } }
/** * Prints a one line update on performance that can be printed periodically during a benchmark. */ private synchronized void printStatistics() { log.info(String.format("Executed %d", c.get())); }
/** * Create a new data source. * * @param db * @param tableName * @param isReplicated * @param partitionId * @param HSId * @param tableId * @param catalogMap */ public ExportDataSource( Runnable onDrain, String db, String tableName, int partitionId, long HSId, String signature, long generation, CatalogMap<Column> catalogMap, String overflowPath) throws IOException { m_generation = generation; m_onDrain = onDrain; m_database = db; m_tableName = tableName; String nonce = signature + "_" + HSId + "_" + partitionId; m_committedBuffers = new StreamBlockQueue(overflowPath, nonce); /* * This is not the catalog relativeIndex(). This ID incorporates * a catalog version and a table id so that it is constant across * catalog updates that add or drop tables. */ m_signature = signature; m_partitionId = partitionId; m_HSId = HSId; // Add the Export meta-data columns to the schema followed by the // catalog columns for this table. m_columnNames.add("VOLT_TRANSACTION_ID"); m_columnTypes.add(((int) VoltType.BIGINT.getValue())); m_columnNames.add("VOLT_EXPORT_TIMESTAMP"); m_columnTypes.add(((int) VoltType.BIGINT.getValue())); m_columnNames.add("VOLT_EXPORT_SEQUENCE_NUMBER"); m_columnTypes.add(((int) VoltType.BIGINT.getValue())); m_columnNames.add("VOLT_PARTITION_ID"); m_columnTypes.add(((int) VoltType.BIGINT.getValue())); m_columnNames.add("VOLT_SITE_ID"); m_columnTypes.add(((int) VoltType.BIGINT.getValue())); m_columnNames.add("VOLT_EXPORT_OPERATION"); m_columnTypes.add(((int) VoltType.TINYINT.getValue())); for (Column c : CatalogUtil.getSortedCatalogItems(catalogMap, "index")) { m_columnNames.add(c.getName()); m_columnTypes.add(c.getType()); } File adFile = new VoltFile(overflowPath, nonce + ".ad"); exportLog.info("Creating ad for " + nonce); assert (!adFile.exists()); FastSerializer fs = new FastSerializer(); fs.writeLong(m_HSId); fs.writeString(m_database); writeAdvertisementTo(fs); FileOutputStream fos = new FileOutputStream(adFile); fos.write(fs.getBytes()); fos.getFD().sync(); fos.close(); // compute the number of bytes necessary to hold one bit per // schema column m_nullArrayLength = ((m_columnTypes.size() + 7) & -8) >> 3; }
/** * Core benchmark code. Connect. Initialize. Run the loop. Cleanup. Print Results. * * @throws Exception if anything unexpected happens. */ public void runBenchmark() throws Exception { log.info(HORIZONTAL_RULE); log.info(" Setup & Initialization"); log.info(HORIZONTAL_RULE); final int cidCount = 128; final long[] lastRid = new long[cidCount]; for (int i = 0; i < lastRid.length; i++) { lastRid[i] = 0; } // connect to one or more servers, loop until success connect(); log.info(HORIZONTAL_RULE); log.info("Starting Benchmark"); log.info(HORIZONTAL_RULE); // print periodic statistics to the console benchmarkStartTS = System.currentTimeMillis(); schedulePeriodicStats(); // Run the benchmark loop for the requested duration // The throughput may be throttled depending on client configuration log.info("Running benchmark..."); BigTableLoader partitionedLoader = new BigTableLoader( client, "bigp", (config.partfillerrowmb * 1024 * 1024) / config.fillerrowsize, config.fillerrowsize); partitionedLoader.start(); BigTableLoader replicatedLoader = new BigTableLoader( client, "bigr", (config.replfillerrowmb * 1024 * 1024) / config.fillerrowsize, config.fillerrowsize); replicatedLoader.start(); ReadThread readThread = new ReadThread(client, config.threads, config.threadoffset); readThread.start(); AdHocMayhemThread adHocMayhemThread = new AdHocMayhemThread(client); adHocMayhemThread.start(); List<ClientThread> clientThreads = new ArrayList<ClientThread>(); for (byte cid = (byte) config.threadoffset; cid < config.threadoffset + config.threads; cid++) { ClientThread clientThread = new ClientThread(cid, c, client, processor); clientThread.start(); clientThreads.add(clientThread); } final long benchmarkEndTime = System.currentTimeMillis() + (1000l * config.duration); while (benchmarkEndTime > System.currentTimeMillis()) { Thread.yield(); } replicatedLoader.shutdown(); partitionedLoader.shutdown(); readThread.shutdown(); adHocMayhemThread.shutdown(); for (ClientThread clientThread : clientThreads) { clientThread.shutdown(); } replicatedLoader.join(); partitionedLoader.join(); readThread.join(); adHocMayhemThread.join(); for (ClientThread clientThread : clientThreads) { clientThread.join(); } // cancel periodic stats printing timer.cancel(); shutdown.set(true); es.shutdownNow(); // block until all outstanding txns return client.drain(); client.close(); }