/** * Create the completion node for the snapshot identified by the txnId. It assumes that all hosts * will race to call this, so it doesn't fail if the node already exists. * * @param nonce Nonce of the snapshot * @param txnId * @param hostId The local host ID * @param isTruncation Whether or not this is a truncation snapshot * @param truncReqId Optional unique ID fed back to the monitor for identification * @return true if the node is created successfully, false if the node already exists. */ public static ZKUtil.StringCallback createSnapshotCompletionNode( String path, String nonce, long txnId, boolean isTruncation, String truncReqId) { if (!(txnId > 0)) { VoltDB.crashGlobalVoltDB("Txnid must be greather than 0", true, null); } byte nodeBytes[] = null; try { JSONStringer stringer = new JSONStringer(); stringer.object(); stringer.key("txnId").value(txnId); stringer.key("isTruncation").value(isTruncation); stringer.key("didSucceed").value(false); stringer.key("hostCount").value(-1); stringer.key("path").value(path); stringer.key("nonce").value(nonce); stringer.key("truncReqId").value(truncReqId); stringer.key("exportSequenceNumbers").object().endObject(); stringer.endObject(); JSONObject jsonObj = new JSONObject(stringer.toString()); nodeBytes = jsonObj.toString(4).getBytes(Charsets.UTF_8); } catch (Exception e) { VoltDB.crashLocalVoltDB("Error serializing snapshot completion node JSON", true, e); } ZKUtil.StringCallback cb = new ZKUtil.StringCallback(); final String snapshotPath = VoltZK.completed_snapshots + "/" + txnId; VoltDB.instance() .getHostMessenger() .getZK() .create(snapshotPath, nodeBytes, Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT, cb, null); return cb; }
public MockVoltDB(int clientPort, int adminPort, int httpPort, int drPort) { try { JSONObject obj = new JSONObject(); JSONArray jsonArray = new JSONArray(); jsonArray.put("127.0.0.1"); obj.put("interfaces", jsonArray); obj.put("clientPort", clientPort); obj.put("adminPort", adminPort); obj.put("httpPort", httpPort); obj.put("drPort", drPort); m_localMetadata = obj.toString(4); m_catalog = new Catalog(); m_catalog.execute("add / clusters " + m_clusterName); m_catalog.execute( "add " + m_catalog.getClusters().get(m_clusterName).getPath() + " databases " + m_databaseName); Cluster cluster = m_catalog.getClusters().get(m_clusterName); // Set a sane default for TestMessaging (at least) cluster.setHeartbeattimeout(10000); assert (cluster != null); try { m_hostMessenger.start(); } catch (Exception e) { throw new RuntimeException(e); } VoltZK.createPersistentZKNodes(m_hostMessenger.getZK()); m_hostMessenger .getZK() .create( VoltZK.cluster_metadata + "/" + m_hostMessenger.getHostId(), getLocalMetadata().getBytes("UTF-8"), Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL); m_hostMessenger.generateMailboxId( m_hostMessenger.getHSIdForLocalSite(HostMessenger.STATS_SITE_ID)); m_statsAgent = new StatsAgent(); m_statsAgent.registerMailbox( m_hostMessenger, m_hostMessenger.getHSIdForLocalSite(HostMessenger.STATS_SITE_ID)); for (MailboxType type : MailboxType.values()) { m_mailboxMap.put(type, new LinkedList<MailboxNodeContent>()); } m_mailboxMap .get(MailboxType.StatsAgent) .add( new MailboxNodeContent( m_hostMessenger.getHSIdForLocalSite(HostMessenger.STATS_SITE_ID), null)); m_siteTracker = new SiteTracker(m_hostId, m_mailboxMap); } catch (Exception e) { throw new RuntimeException(e); } }
private void writeKnownLiveNodes(List<Integer> liveNodes) { try { if (m_zk.exists(VoltZK.lastKnownLiveNodes, null) == null) { // VoltZK.createPersistentZKNodes should have done this m_zk.create(VoltZK.lastKnownLiveNodes, null, Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); } JSONStringer stringer = new JSONStringer(); stringer.object(); stringer.key("liveNodes").array(); for (Integer node : liveNodes) { stringer.value(node); } stringer.endArray(); stringer.endObject(); JSONObject obj = new JSONObject(stringer.toString()); tmLog.debug("Writing live nodes to ZK: " + obj.toString(4)); m_zk.setData(VoltZK.lastKnownLiveNodes, obj.toString(4).getBytes("UTF-8"), -1); } catch (Exception e) { VoltDB.crashLocalVoltDB( "Unable to update known live nodes at ZK path: " + VoltZK.lastKnownLiveNodes, true, e); } }
/** * Once participating host count is set, SnapshotCompletionMonitor can check this ZK node to * determine whether the snapshot has finished or not. * * <p>This should only be called when all participants have responded. It is possible that some * hosts finish taking snapshot before the coordinator logs the participating host count. In this * case, the host count would have been decremented multiple times already. To make sure finished * hosts are logged correctly, this method adds participating host count + 1 to the current host * count. * * @param txnId The snapshot txnId * @param participantCount The number of hosts participating in this snapshot */ public static void logParticipatingHostCount(long txnId, int participantCount) { ZooKeeper zk = VoltDB.instance().getHostMessenger().getZK(); final String snapshotPath = VoltZK.completed_snapshots + "/" + txnId; boolean success = false; while (!success) { Stat stat = new Stat(); byte data[] = null; try { data = zk.getData(snapshotPath, false, stat); } catch (KeeperException e) { if (e.code() == KeeperException.Code.NONODE) { // If snapshot creation failed for some reason, the node won't exist. ignore return; } VoltDB.crashLocalVoltDB("Failed to get snapshot completion node", true, e); } catch (InterruptedException e) { VoltDB.crashLocalVoltDB("Interrupted getting snapshot completion node", true, e); } if (data == null) { VoltDB.crashLocalVoltDB("Data should not be null if the node exists", false, null); } try { JSONObject jsonObj = new JSONObject(new String(data, Charsets.UTF_8)); if (jsonObj.getLong("txnId") != txnId) { VoltDB.crashLocalVoltDB("TxnId should match", false, null); } int hostCount = jsonObj.getInt("hostCount"); // +1 because hostCount was initialized to -1 jsonObj.put("hostCount", hostCount + participantCount + 1); zk.setData(snapshotPath, jsonObj.toString(4).getBytes(Charsets.UTF_8), stat.getVersion()); } catch (KeeperException.BadVersionException e) { continue; } catch (Exception e) { VoltDB.crashLocalVoltDB("This ZK call should never fail", true, e); } success = true; } }
public DefaultSnapshotDataTarget( final File file, final int hostId, final String clusterName, final String databaseName, final String tableName, final int numPartitions, final boolean isReplicated, final List<Integer> partitionIds, final VoltTable schemaTable, final long txnId, final long timestamp, int version[]) throws IOException { String hostname = CoreUtils.getHostnameOrAddress(); m_file = file; m_tableName = tableName; m_fos = new FileOutputStream(file); m_channel = m_fos.getChannel(); m_needsFinalClose = !isReplicated; final FastSerializer fs = new FastSerializer(); fs.writeInt(0); // CRC fs.writeInt(0); // Header length placeholder fs.writeByte( 1); // Indicate the snapshot was not completed, set to true for the CRC calculation, false // later for (int ii = 0; ii < 4; ii++) { fs.writeInt(version[ii]); // version } JSONStringer stringer = new JSONStringer(); byte jsonBytes[] = null; try { stringer.object(); stringer.key("txnId").value(txnId); stringer.key("hostId").value(hostId); stringer.key("hostname").value(hostname); stringer.key("clusterName").value(clusterName); stringer.key("databaseName").value(databaseName); stringer.key("tableName").value(tableName.toUpperCase()); stringer.key("isReplicated").value(isReplicated); stringer.key("isCompressed").value(true); stringer.key("checksumType").value("CRC32C"); stringer.key("timestamp").value(timestamp); /* * The timestamp string is for human consumption, automated stuff should use * the actual timestamp */ stringer.key("timestampString").value(SnapshotUtil.formatHumanReadableDate(timestamp)); if (!isReplicated) { stringer.key("partitionIds").array(); for (int partitionId : partitionIds) { stringer.value(partitionId); } stringer.endArray(); stringer.key("numPartitions").value(numPartitions); } stringer.endObject(); String jsonString = stringer.toString(); JSONObject jsonObj = new JSONObject(jsonString); jsonString = jsonObj.toString(4); jsonBytes = jsonString.getBytes("UTF-8"); } catch (Exception e) { throw new IOException(e); } fs.writeInt(jsonBytes.length); fs.write(jsonBytes); final BBContainer container = fs.getBBContainer(); container.b.position(4); container.b.putInt(container.b.remaining() - 4); container.b.position(0); final byte schemaBytes[] = PrivateVoltTableFactory.getSchemaBytes(schemaTable); final PureJavaCrc32 crc = new PureJavaCrc32(); ByteBuffer aggregateBuffer = ByteBuffer.allocate(container.b.remaining() + schemaBytes.length); aggregateBuffer.put(container.b); aggregateBuffer.put(schemaBytes); aggregateBuffer.flip(); crc.update(aggregateBuffer.array(), 4, aggregateBuffer.capacity() - 4); final int crcValue = (int) crc.getValue(); aggregateBuffer.putInt(crcValue).position(8); aggregateBuffer.put((byte) 0).position(0); // Haven't actually finished writing file if (m_simulateFullDiskWritingHeader) { m_writeException = new IOException("Disk full"); m_writeFailed = true; m_fos.close(); throw m_writeException; } /* * Be completely sure the write succeeded. If it didn't * the disk is probably full or the path is bunk etc. */ m_acceptOneWrite = true; ListenableFuture<?> writeFuture = write(Callables.returning((BBContainer) DBBPool.wrapBB(aggregateBuffer)), false); try { writeFuture.get(); } catch (InterruptedException e) { m_fos.close(); throw new java.io.InterruptedIOException(); } catch (ExecutionException e) { m_fos.close(); throw m_writeException; } if (m_writeFailed) { m_fos.close(); throw m_writeException; } ScheduledFuture<?> syncTask = null; syncTask = m_syncService.scheduleAtFixedRate( new Runnable() { @Override public void run() { // Only sync for at least 4 megabyte of data, enough to amortize the cost of seeking // on ye olden platters. Since we are appending to a file it's actually 2 seeks. while (m_bytesWrittenSinceLastSync.get() > (1024 * 1024 * 4)) { final int bytesSinceLastSync = m_bytesWrittenSinceLastSync.getAndSet(0); try { m_channel.force(false); } catch (IOException e) { if (!(e instanceof java.nio.channels.AsynchronousCloseException)) { SNAP_LOG.error("Error syncing snapshot", e); } else { SNAP_LOG.debug( "Asynchronous close syncing snasphot data, presumably graceful", e); } } m_bytesAllowedBeforeSync.release(bytesSinceLastSync); } } }, SNAPSHOT_SYNC_FREQUENCY, SNAPSHOT_SYNC_FREQUENCY, TimeUnit.MILLISECONDS); m_syncTask = syncTask; }
/** * Compile and cache the statement and plan and return the final plan graph. * * @param sql * @param paramCount */ public List<AbstractPlanNode> compile( String sql, int paramCount, String joinOrder, Object partitionParameter, boolean inferSP, boolean lockInSP) { Statement catalogStmt = proc.getStatements().add("stmt-" + String.valueOf(compileCounter++)); catalogStmt.setSqltext(sql); catalogStmt.setSinglepartition(partitionParameter != null); catalogStmt.setBatched(false); catalogStmt.setParamnum(paramCount); // determine the type of the query QueryType qtype = QueryType.SELECT; catalogStmt.setReadonly(true); if (sql.toLowerCase().startsWith("insert")) { qtype = QueryType.INSERT; catalogStmt.setReadonly(false); } if (sql.toLowerCase().startsWith("update")) { qtype = QueryType.UPDATE; catalogStmt.setReadonly(false); } if (sql.toLowerCase().startsWith("delete")) { qtype = QueryType.DELETE; catalogStmt.setReadonly(false); } catalogStmt.setQuerytype(qtype.getValue()); // name will look like "basename-stmt-#" String name = catalogStmt.getParent().getTypeName() + "-" + catalogStmt.getTypeName(); DatabaseEstimates estimates = new DatabaseEstimates(); TrivialCostModel costModel = new TrivialCostModel(); PartitioningForStatement partitioning = new PartitioningForStatement(partitionParameter, inferSP, lockInSP); QueryPlanner planner = new QueryPlanner( catalogStmt.getSqltext(), catalogStmt.getTypeName(), catalogStmt.getParent().getTypeName(), catalog.getClusters().get("cluster"), db, partitioning, hsql, estimates, false, StatementCompiler.DEFAULT_MAX_JOIN_TABLES, costModel, null, joinOrder); CompiledPlan plan = null; planner.parse(); plan = planner.plan(); assert (plan != null); // Input Parameters // We will need to update the system catalogs with this new information // If this is an adhoc query then there won't be any parameters for (int i = 0; i < plan.parameters.length; ++i) { StmtParameter catalogParam = catalogStmt.getParameters().add(String.valueOf(i)); catalogParam.setJavatype(plan.parameters[i].getValue()); catalogParam.setIndex(i); } // Output Columns int index = 0; for (SchemaColumn col : plan.columns.getColumns()) { Column catColumn = catalogStmt.getOutput_columns().add(String.valueOf(index)); catColumn.setNullable(false); catColumn.setIndex(index); catColumn.setName(col.getColumnName()); catColumn.setType(col.getType().getValue()); catColumn.setSize(col.getSize()); index++; } List<PlanNodeList> nodeLists = new ArrayList<PlanNodeList>(); nodeLists.add(new PlanNodeList(plan.rootPlanGraph)); if (plan.subPlanGraph != null) { nodeLists.add(new PlanNodeList(plan.subPlanGraph)); } // Store the list of parameters types and indexes in the plan node list. List<Pair<Integer, VoltType>> parameters = nodeLists.get(0).getParameters(); for (int i = 0; i < plan.parameters.length; ++i) { Pair<Integer, VoltType> parameter = new Pair<Integer, VoltType>(i, plan.parameters[i]); parameters.add(parameter); } // Now update our catalog information // HACK: We're using the node_tree's hashCode() as it's name. It would be really // nice if the Catalog code give us an guid without needing a name first... String json = null; try { JSONObject jobj = new JSONObject(nodeLists.get(0).toJSONString()); json = jobj.toString(4); } catch (JSONException e2) { // TODO Auto-generated catch block e2.printStackTrace(); System.exit(-1); } // // We then stick a serialized version of PlanNodeTree into a PlanFragment // try { BuildDirectoryUtils.writeFile("statement-plans", name + "_json.txt", json); BuildDirectoryUtils.writeFile( "statement-plans", name + ".dot", nodeLists.get(0).toDOTString("name")); } catch (Exception e) { e.printStackTrace(); } List<AbstractPlanNode> plannodes = new ArrayList<AbstractPlanNode>(); for (PlanNodeList nodeList : nodeLists) { plannodes.add(nodeList.getRootPlanNode()); } m_currentPlan = plan; return plannodes; }