/** * Create the completion node for the snapshot identified by the txnId. It assumes that all hosts * will race to call this, so it doesn't fail if the node already exists. * * @param nonce Nonce of the snapshot * @param txnId * @param hostId The local host ID * @param isTruncation Whether or not this is a truncation snapshot * @param truncReqId Optional unique ID fed back to the monitor for identification * @return true if the node is created successfully, false if the node already exists. */ public static ZKUtil.StringCallback createSnapshotCompletionNode( String path, String nonce, long txnId, boolean isTruncation, String truncReqId) { if (!(txnId > 0)) { VoltDB.crashGlobalVoltDB("Txnid must be greather than 0", true, null); } byte nodeBytes[] = null; try { JSONStringer stringer = new JSONStringer(); stringer.object(); stringer.key("txnId").value(txnId); stringer.key("isTruncation").value(isTruncation); stringer.key("didSucceed").value(false); stringer.key("hostCount").value(-1); stringer.key("path").value(path); stringer.key("nonce").value(nonce); stringer.key("truncReqId").value(truncReqId); stringer.key("exportSequenceNumbers").object().endObject(); stringer.endObject(); JSONObject jsonObj = new JSONObject(stringer.toString()); nodeBytes = jsonObj.toString(4).getBytes(Charsets.UTF_8); } catch (Exception e) { VoltDB.crashLocalVoltDB("Error serializing snapshot completion node JSON", true, e); } ZKUtil.StringCallback cb = new ZKUtil.StringCallback(); final String snapshotPath = VoltZK.completed_snapshots + "/" + txnId; VoltDB.instance() .getHostMessenger() .getZK() .create(snapshotPath, nodeBytes, Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT, cb, null); return cb; }
@Override protected void loadFromJSONObject(JSONObject obj, StmtTableScan tableScan) throws JSONException { m_columnIndex = obj.getInt(Members.COLUMN_IDX); if (obj.has(Members.TABLE_IDX)) { m_tableIdx = obj.getInt(Members.TABLE_IDX); } if (tableScan != null) { m_tableAlias = tableScan.getTableAlias(); m_tableName = tableScan.getTableName(); m_columnName = tableScan.getColumnName(m_columnIndex); } }
@Override public void loadFromJSONObject(JSONObject obj, Database db) throws JSONException { m_isNull = false; if (!obj.isNull(Members.VALUE.name())) { m_value = obj.getString(Members.VALUE.name()); } else { m_isNull = true; } if (!obj.isNull(Members.ISNULL.name())) { m_isNull = obj.getBoolean(Members.ISNULL.name()); } }
@Override public void run() { try { JSONStringer js = new JSONStringer(); js.object(); js.key("role").value(m_config.m_replicationRole.ordinal()); js.key("active").value(m_rvdb.getReplicationActive()); js.endObject(); ZooKeeper zk = m_rvdb.getHostMessenger().getZK(); // rejoining nodes figure out the replication role from other nodes if (!m_isRejoin) { try { zk.create( VoltZK.replicationconfig, js.toString().getBytes("UTF-8"), Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); } catch (KeeperException.NodeExistsException e) { } String discoveredReplicationConfig = new String(zk.getData(VoltZK.replicationconfig, false, null), "UTF-8"); JSONObject discoveredjsObj = new JSONObject(discoveredReplicationConfig); ReplicationRole discoveredRole = ReplicationRole.get((byte) discoveredjsObj.getLong("role")); if (!discoveredRole.equals(m_config.m_replicationRole)) { VoltDB.crashGlobalVoltDB( "Discovered replication role " + discoveredRole + " doesn't match locally specified replication role " + m_config.m_replicationRole, true, null); } // See if we should bring the server up in WAN replication mode m_rvdb.setReplicationRole(discoveredRole); } else { String discoveredReplicationConfig = new String(zk.getData(VoltZK.replicationconfig, false, null), "UTF-8"); JSONObject discoveredjsObj = new JSONObject(discoveredReplicationConfig); ReplicationRole discoveredRole = ReplicationRole.get((byte) discoveredjsObj.getLong("role")); boolean replicationActive = discoveredjsObj.getBoolean("active"); // See if we should bring the server up in WAN replication mode m_rvdb.setReplicationRole(discoveredRole); m_rvdb.setReplicationActive(replicationActive); } } catch (Exception e) { VoltDB.crashGlobalVoltDB("Error discovering replication role", false, e); } }
private long assignLeader(int partitionId, List<Long> children) { // We used masterHostId = -1 as a way to force the leader choice to be // the first replica in the list, if we don't have some other mechanism // which has successfully overridden it. int masterHostId = -1; if (m_state.get() == AppointerState.CLUSTER_START) { try { // find master in topo JSONArray parts = m_topo.getJSONArray("partitions"); for (int p = 0; p < parts.length(); p++) { JSONObject aPartition = parts.getJSONObject(p); int pid = aPartition.getInt("partition_id"); if (pid == partitionId) { masterHostId = aPartition.getInt("master"); } } } catch (JSONException jse) { tmLog.error("Failed to find master for partition " + partitionId + ", defaulting to 0"); jse.printStackTrace(); masterHostId = -1; // stupid default } } else { // For now, if we're appointing a new leader as a result of a // failure, just pick the first replica in the children list. // Could eventually do something more complex here to try to keep a // semi-balance, but it's unclear that this has much utility until // we add rebalancing on rejoin as well. masterHostId = -1; } long masterHSId = children.get(0); for (Long child : children) { if (CoreUtils.getHostIdFromHSId(child) == masterHostId) { masterHSId = child; break; } } tmLog.info( "Appointing HSId " + CoreUtils.hsIdToString(masterHSId) + " as leader for partition " + partitionId); try { m_iv2appointees.put(partitionId, masterHSId); } catch (Exception e) { VoltDB.crashLocalVoltDB("Unable to appoint new master for partition " + partitionId, true, e); } return masterHSId; }
private Set<Integer> readPriorKnownLiveNodes() { Set<Integer> nodes = new HashSet<Integer>(); try { byte[] data = m_zk.getData(VoltZK.lastKnownLiveNodes, false, null); String jsonString = new String(data, "UTF-8"); tmLog.debug("Read prior known live nodes: " + jsonString); JSONObject jsObj = new JSONObject(jsonString); JSONArray jsonNodes = jsObj.getJSONArray("liveNodes"); for (int ii = 0; ii < jsonNodes.length(); ii++) { nodes.add(jsonNodes.getInt(ii)); } } catch (Exception e) { VoltDB.crashLocalVoltDB( "Unable to read prior known live nodes at ZK path: " + VoltZK.lastKnownLiveNodes, true, e); } return nodes; }
/** * Once participating host count is set, SnapshotCompletionMonitor can check this ZK node to * determine whether the snapshot has finished or not. * * <p>This should only be called when all participants have responded. It is possible that some * hosts finish taking snapshot before the coordinator logs the participating host count. In this * case, the host count would have been decremented multiple times already. To make sure finished * hosts are logged correctly, this method adds participating host count + 1 to the current host * count. * * @param txnId The snapshot txnId * @param participantCount The number of hosts participating in this snapshot */ public static void logParticipatingHostCount(long txnId, int participantCount) { ZooKeeper zk = VoltDB.instance().getHostMessenger().getZK(); final String snapshotPath = VoltZK.completed_snapshots + "/" + txnId; boolean success = false; while (!success) { Stat stat = new Stat(); byte data[] = null; try { data = zk.getData(snapshotPath, false, stat); } catch (KeeperException e) { if (e.code() == KeeperException.Code.NONODE) { // If snapshot creation failed for some reason, the node won't exist. ignore return; } VoltDB.crashLocalVoltDB("Failed to get snapshot completion node", true, e); } catch (InterruptedException e) { VoltDB.crashLocalVoltDB("Interrupted getting snapshot completion node", true, e); } if (data == null) { VoltDB.crashLocalVoltDB("Data should not be null if the node exists", false, null); } try { JSONObject jsonObj = new JSONObject(new String(data, Charsets.UTF_8)); if (jsonObj.getLong("txnId") != txnId) { VoltDB.crashLocalVoltDB("TxnId should match", false, null); } int hostCount = jsonObj.getInt("hostCount"); // +1 because hostCount was initialized to -1 jsonObj.put("hostCount", hostCount + participantCount + 1); zk.setData(snapshotPath, jsonObj.toString(4).getBytes(Charsets.UTF_8), stat.getVersion()); } catch (KeeperException.BadVersionException e) { continue; } catch (Exception e) { VoltDB.crashLocalVoltDB("This ZK call should never fail", true, e); } success = true; } }
public MockVoltDB(int clientPort, int adminPort, int httpPort, int drPort) { try { JSONObject obj = new JSONObject(); JSONArray jsonArray = new JSONArray(); jsonArray.put("127.0.0.1"); obj.put("interfaces", jsonArray); obj.put("clientPort", clientPort); obj.put("adminPort", adminPort); obj.put("httpPort", httpPort); obj.put("drPort", drPort); m_localMetadata = obj.toString(4); m_catalog = new Catalog(); m_catalog.execute("add / clusters " + m_clusterName); m_catalog.execute( "add " + m_catalog.getClusters().get(m_clusterName).getPath() + " databases " + m_databaseName); Cluster cluster = m_catalog.getClusters().get(m_clusterName); // Set a sane default for TestMessaging (at least) cluster.setHeartbeattimeout(10000); assert (cluster != null); try { m_hostMessenger.start(); } catch (Exception e) { throw new RuntimeException(e); } VoltZK.createPersistentZKNodes(m_hostMessenger.getZK()); m_hostMessenger .getZK() .create( VoltZK.cluster_metadata + "/" + m_hostMessenger.getHostId(), getLocalMetadata().getBytes("UTF-8"), Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL); m_hostMessenger.generateMailboxId( m_hostMessenger.getHSIdForLocalSite(HostMessenger.STATS_SITE_ID)); m_statsAgent = new StatsAgent(); m_statsAgent.registerMailbox( m_hostMessenger, m_hostMessenger.getHSIdForLocalSite(HostMessenger.STATS_SITE_ID)); for (MailboxType type : MailboxType.values()) { m_mailboxMap.put(type, new LinkedList<MailboxNodeContent>()); } m_mailboxMap .get(MailboxType.StatsAgent) .add( new MailboxNodeContent( m_hostMessenger.getHSIdForLocalSite(HostMessenger.STATS_SITE_ID), null)); m_siteTracker = new SiteTracker(m_hostId, m_mailboxMap); } catch (Exception e) { throw new RuntimeException(e); } }
private void writeKnownLiveNodes(List<Integer> liveNodes) { try { if (m_zk.exists(VoltZK.lastKnownLiveNodes, null) == null) { // VoltZK.createPersistentZKNodes should have done this m_zk.create(VoltZK.lastKnownLiveNodes, null, Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); } JSONStringer stringer = new JSONStringer(); stringer.object(); stringer.key("liveNodes").array(); for (Integer node : liveNodes) { stringer.value(node); } stringer.endArray(); stringer.endObject(); JSONObject obj = new JSONObject(stringer.toString()); tmLog.debug("Writing live nodes to ZK: " + obj.toString(4)); m_zk.setData(VoltZK.lastKnownLiveNodes, obj.toString(4).getBytes("UTF-8"), -1); } catch (Exception e) { VoltDB.crashLocalVoltDB( "Unable to update known live nodes at ZK path: " + VoltZK.lastKnownLiveNodes, true, e); } }
@Override public void run(List<String> children) { List<Long> updatedHSIds = VoltZK.childrenToReplicaHSIds(children); // compute previously unseen HSId set in the callback list Set<Long> newHSIds = new HashSet<Long>(updatedHSIds); newHSIds.removeAll(m_replicas); tmLog.debug("Newly seen replicas: " + CoreUtils.hsIdCollectionToString(newHSIds)); // compute previously seen but now vanished from the callback list HSId set Set<Long> missingHSIds = new HashSet<Long>(m_replicas); missingHSIds.removeAll(updatedHSIds); tmLog.debug("Newly dead replicas: " + CoreUtils.hsIdCollectionToString(missingHSIds)); tmLog.debug( "Handling babysitter callback for partition " + m_partitionId + ": children: " + CoreUtils.hsIdCollectionToString(updatedHSIds)); if (m_state.get() == AppointerState.CLUSTER_START) { // We can't yet tolerate a host failure during startup. Crash it all if (missingHSIds.size() > 0) { VoltDB.crashGlobalVoltDB("Node failure detected during startup.", false, null); } // ENG-3166: Eventually we would like to get rid of the extra replicas beyond k_factor, // but for now we just look to see how many replicas of this partition we actually expect // and gate leader assignment on that many copies showing up. int replicaCount = m_kfactor + 1; JSONArray parts; try { parts = m_topo.getJSONArray("partitions"); for (int p = 0; p < parts.length(); p++) { JSONObject aPartition = parts.getJSONObject(p); int pid = aPartition.getInt("partition_id"); if (pid == m_partitionId) { replicaCount = aPartition.getJSONArray("replicas").length(); } } } catch (JSONException e) { // Ignore and just assume the normal number of replicas } if (children.size() == replicaCount) { m_currentLeader = assignLeader(m_partitionId, updatedHSIds); } else { tmLog.info( "Waiting on " + ((m_kfactor + 1) - children.size()) + " more nodes " + "for k-safety before startup"); } } else { // Check for k-safety if (!isClusterKSafe()) { VoltDB.crashGlobalVoltDB( "Some partitions have no replicas. Cluster has become unviable.", false, null); } // Check if replay has completed if (m_replayComplete.get() == false) { VoltDB.crashGlobalVoltDB( "Detected node failure during command log replay. Cluster will shut down.", false, null); } // Check to see if there's been a possible network partition and we're not already handling // it if (m_partitionDetectionEnabled && !m_partitionDetected) { doPartitionDetectionActivities(); } // If we survived the above gauntlet of fail, appoint a new leader for this partition. if (missingHSIds.contains(m_currentLeader)) { m_currentLeader = assignLeader(m_partitionId, updatedHSIds); } } m_replicas.clear(); m_replicas.addAll(updatedHSIds); }
public InstanceId(JSONObject jsObj) throws JSONException { m_coord = jsObj.getInt("coord"); m_timestamp = jsObj.getLong("timestamp"); }
@Override protected void loadFromJSONObject(JSONObject obj, Database db) throws JSONException { m_columnIndex = obj.getInt(Members.COLUMN_IDX.name()); m_tableName = obj.getString(Members.TABLE_NAME.name()); m_columnName = obj.getString(Members.COLUMN_NAME.name()); }
public DefaultSnapshotDataTarget( final File file, final int hostId, final String clusterName, final String databaseName, final String tableName, final int numPartitions, final boolean isReplicated, final List<Integer> partitionIds, final VoltTable schemaTable, final long txnId, final long timestamp, int version[]) throws IOException { String hostname = CoreUtils.getHostnameOrAddress(); m_file = file; m_tableName = tableName; m_fos = new FileOutputStream(file); m_channel = m_fos.getChannel(); m_needsFinalClose = !isReplicated; final FastSerializer fs = new FastSerializer(); fs.writeInt(0); // CRC fs.writeInt(0); // Header length placeholder fs.writeByte( 1); // Indicate the snapshot was not completed, set to true for the CRC calculation, false // later for (int ii = 0; ii < 4; ii++) { fs.writeInt(version[ii]); // version } JSONStringer stringer = new JSONStringer(); byte jsonBytes[] = null; try { stringer.object(); stringer.key("txnId").value(txnId); stringer.key("hostId").value(hostId); stringer.key("hostname").value(hostname); stringer.key("clusterName").value(clusterName); stringer.key("databaseName").value(databaseName); stringer.key("tableName").value(tableName.toUpperCase()); stringer.key("isReplicated").value(isReplicated); stringer.key("isCompressed").value(true); stringer.key("checksumType").value("CRC32C"); stringer.key("timestamp").value(timestamp); /* * The timestamp string is for human consumption, automated stuff should use * the actual timestamp */ stringer.key("timestampString").value(SnapshotUtil.formatHumanReadableDate(timestamp)); if (!isReplicated) { stringer.key("partitionIds").array(); for (int partitionId : partitionIds) { stringer.value(partitionId); } stringer.endArray(); stringer.key("numPartitions").value(numPartitions); } stringer.endObject(); String jsonString = stringer.toString(); JSONObject jsonObj = new JSONObject(jsonString); jsonString = jsonObj.toString(4); jsonBytes = jsonString.getBytes("UTF-8"); } catch (Exception e) { throw new IOException(e); } fs.writeInt(jsonBytes.length); fs.write(jsonBytes); final BBContainer container = fs.getBBContainer(); container.b.position(4); container.b.putInt(container.b.remaining() - 4); container.b.position(0); final byte schemaBytes[] = PrivateVoltTableFactory.getSchemaBytes(schemaTable); final PureJavaCrc32 crc = new PureJavaCrc32(); ByteBuffer aggregateBuffer = ByteBuffer.allocate(container.b.remaining() + schemaBytes.length); aggregateBuffer.put(container.b); aggregateBuffer.put(schemaBytes); aggregateBuffer.flip(); crc.update(aggregateBuffer.array(), 4, aggregateBuffer.capacity() - 4); final int crcValue = (int) crc.getValue(); aggregateBuffer.putInt(crcValue).position(8); aggregateBuffer.put((byte) 0).position(0); // Haven't actually finished writing file if (m_simulateFullDiskWritingHeader) { m_writeException = new IOException("Disk full"); m_writeFailed = true; m_fos.close(); throw m_writeException; } /* * Be completely sure the write succeeded. If it didn't * the disk is probably full or the path is bunk etc. */ m_acceptOneWrite = true; ListenableFuture<?> writeFuture = write(Callables.returning((BBContainer) DBBPool.wrapBB(aggregateBuffer)), false); try { writeFuture.get(); } catch (InterruptedException e) { m_fos.close(); throw new java.io.InterruptedIOException(); } catch (ExecutionException e) { m_fos.close(); throw m_writeException; } if (m_writeFailed) { m_fos.close(); throw m_writeException; } ScheduledFuture<?> syncTask = null; syncTask = m_syncService.scheduleAtFixedRate( new Runnable() { @Override public void run() { // Only sync for at least 4 megabyte of data, enough to amortize the cost of seeking // on ye olden platters. Since we are appending to a file it's actually 2 seeks. while (m_bytesWrittenSinceLastSync.get() > (1024 * 1024 * 4)) { final int bytesSinceLastSync = m_bytesWrittenSinceLastSync.getAndSet(0); try { m_channel.force(false); } catch (IOException e) { if (!(e instanceof java.nio.channels.AsynchronousCloseException)) { SNAP_LOG.error("Error syncing snapshot", e); } else { SNAP_LOG.debug( "Asynchronous close syncing snasphot data, presumably graceful", e); } } m_bytesAllowedBeforeSync.release(bytesSinceLastSync); } } }, SNAPSHOT_SYNC_FREQUENCY, SNAPSHOT_SYNC_FREQUENCY, TimeUnit.MILLISECONDS); m_syncTask = syncTask; }
/** * Compile and cache the statement and plan and return the final plan graph. * * @param sql * @param paramCount */ public List<AbstractPlanNode> compile( String sql, int paramCount, String joinOrder, Object partitionParameter, boolean inferSP, boolean lockInSP) { Statement catalogStmt = proc.getStatements().add("stmt-" + String.valueOf(compileCounter++)); catalogStmt.setSqltext(sql); catalogStmt.setSinglepartition(partitionParameter != null); catalogStmt.setBatched(false); catalogStmt.setParamnum(paramCount); // determine the type of the query QueryType qtype = QueryType.SELECT; catalogStmt.setReadonly(true); if (sql.toLowerCase().startsWith("insert")) { qtype = QueryType.INSERT; catalogStmt.setReadonly(false); } if (sql.toLowerCase().startsWith("update")) { qtype = QueryType.UPDATE; catalogStmt.setReadonly(false); } if (sql.toLowerCase().startsWith("delete")) { qtype = QueryType.DELETE; catalogStmt.setReadonly(false); } catalogStmt.setQuerytype(qtype.getValue()); // name will look like "basename-stmt-#" String name = catalogStmt.getParent().getTypeName() + "-" + catalogStmt.getTypeName(); DatabaseEstimates estimates = new DatabaseEstimates(); TrivialCostModel costModel = new TrivialCostModel(); PartitioningForStatement partitioning = new PartitioningForStatement(partitionParameter, inferSP, lockInSP); QueryPlanner planner = new QueryPlanner( catalogStmt.getSqltext(), catalogStmt.getTypeName(), catalogStmt.getParent().getTypeName(), catalog.getClusters().get("cluster"), db, partitioning, hsql, estimates, false, StatementCompiler.DEFAULT_MAX_JOIN_TABLES, costModel, null, joinOrder); CompiledPlan plan = null; planner.parse(); plan = planner.plan(); assert (plan != null); // Input Parameters // We will need to update the system catalogs with this new information // If this is an adhoc query then there won't be any parameters for (int i = 0; i < plan.parameters.length; ++i) { StmtParameter catalogParam = catalogStmt.getParameters().add(String.valueOf(i)); catalogParam.setJavatype(plan.parameters[i].getValue()); catalogParam.setIndex(i); } // Output Columns int index = 0; for (SchemaColumn col : plan.columns.getColumns()) { Column catColumn = catalogStmt.getOutput_columns().add(String.valueOf(index)); catColumn.setNullable(false); catColumn.setIndex(index); catColumn.setName(col.getColumnName()); catColumn.setType(col.getType().getValue()); catColumn.setSize(col.getSize()); index++; } List<PlanNodeList> nodeLists = new ArrayList<PlanNodeList>(); nodeLists.add(new PlanNodeList(plan.rootPlanGraph)); if (plan.subPlanGraph != null) { nodeLists.add(new PlanNodeList(plan.subPlanGraph)); } // Store the list of parameters types and indexes in the plan node list. List<Pair<Integer, VoltType>> parameters = nodeLists.get(0).getParameters(); for (int i = 0; i < plan.parameters.length; ++i) { Pair<Integer, VoltType> parameter = new Pair<Integer, VoltType>(i, plan.parameters[i]); parameters.add(parameter); } // Now update our catalog information // HACK: We're using the node_tree's hashCode() as it's name. It would be really // nice if the Catalog code give us an guid without needing a name first... String json = null; try { JSONObject jobj = new JSONObject(nodeLists.get(0).toJSONString()); json = jobj.toString(4); } catch (JSONException e2) { // TODO Auto-generated catch block e2.printStackTrace(); System.exit(-1); } // // We then stick a serialized version of PlanNodeTree into a PlanFragment // try { BuildDirectoryUtils.writeFile("statement-plans", name + "_json.txt", json); BuildDirectoryUtils.writeFile( "statement-plans", name + ".dot", nodeLists.get(0).toDOTString("name")); } catch (Exception e) { e.printStackTrace(); } List<AbstractPlanNode> plannodes = new ArrayList<AbstractPlanNode>(); for (PlanNodeList nodeList : nodeLists) { plannodes.add(nodeList.getRootPlanNode()); } m_currentPlan = plan; return plannodes; }
// XXX maybe consider an IOException subclass at some point public TableSaveFile( FileChannel dataIn, int readAheadChunks, Integer[] relevantPartitionIds, boolean continueOnCorruptedChunk) throws IOException { try { EELibraryLoader.loadExecutionEngineLibrary(true); if (relevantPartitionIds == null) { m_relevantPartitionIds = null; } else { m_relevantPartitionIds = new HashSet<Integer>(); for (Integer i : relevantPartitionIds) { m_relevantPartitionIds.add(i); } } m_chunkReads = new Semaphore(readAheadChunks); m_saveFile = dataIn; m_continueOnCorruptedChunk = continueOnCorruptedChunk; final PureJavaCrc32 crc = new PureJavaCrc32(); /* * If the CRC check fails because the file wasn't completed */ final PureJavaCrc32 secondCRC = new PureJavaCrc32(); /* * Get the header with the save restore specific information */ final ByteBuffer lengthBuffer = ByteBuffer.allocate(8); while (lengthBuffer.hasRemaining()) { final int read = m_saveFile.read(lengthBuffer); if (read == -1) { throw new EOFException(); } } lengthBuffer.flip(); final int originalCRC = lengthBuffer.getInt(); int length = lengthBuffer.getInt(); crc.update(lengthBuffer.array(), 4, 4); secondCRC.update(lengthBuffer.array(), 4, 4); if (length < 0) { throw new IOException("Corrupted save file has negative header length"); } if (length > 2097152) { throw new IOException("Corrupted save file has unreasonable header length > 2 megs"); } final ByteBuffer saveRestoreHeader = ByteBuffer.allocate(length); while (saveRestoreHeader.hasRemaining()) { final int read = m_saveFile.read(saveRestoreHeader); if (read == -1 || read < length) { throw new EOFException(); } } saveRestoreHeader.flip(); crc.update(saveRestoreHeader.array()); secondCRC.update(new byte[] {1}); secondCRC.update(saveRestoreHeader.array(), 1, saveRestoreHeader.array().length - 1); /* * Get the template for the VoltTable serialization header. * It will have an extra length value preceded to it so that * it can be sucked straight into a buffer. This will not * contain a row count since that varies from chunk to chunk * and is supplied by the chunk */ lengthBuffer.clear(); lengthBuffer.limit(4); /* * Why this stupidity and no while loop? * Because java is broken and complains about a random final * elsewhere if you do. */ { final int read = m_saveFile.read(lengthBuffer); if (read == -1) { throw new EOFException(); } } crc.update(lengthBuffer.array(), 0, 4); secondCRC.update(lengthBuffer.array(), 0, 4); lengthBuffer.flip(); length = lengthBuffer.getInt(); if (length < 4) { throw new IOException( "Corrupted save file has negative length or too small length for VoltTable header"); } if (length > 2097152) { throw new IOException( "Corrupted save file has unreasonable VoltTable header length > 2 megs"); } m_tableHeader = ByteBuffer.allocate(length + 4); m_tableHeader.putInt(length); while (m_tableHeader.hasRemaining()) { final int read = m_saveFile.read(m_tableHeader); if (read == -1) { throw new EOFException(); } } crc.update(m_tableHeader.array(), 4, length); secondCRC.update(m_tableHeader.array(), 4, length); boolean failedCRCDueToNotCompleted = false; final int actualCRC = (int) crc.getValue(); if (originalCRC != actualCRC) { /* * Check if the CRC mismatch is due to the snapshot not being completed */ final int secondCRCValue = (int) secondCRC.getValue(); if (secondCRCValue == originalCRC) { failedCRCDueToNotCompleted = true; } else { throw new IOException("Checksum mismatch"); } } FastDeserializer fd = new FastDeserializer(saveRestoreHeader); byte completedByte = fd.readByte(); m_completed = failedCRCDueToNotCompleted ? false : (completedByte == 1 ? true : false); for (int ii = 0; ii < 4; ii++) { m_versionNum[ii] = fd.readInt(); } /* * Support the original pre 1.3 header format as well as a new JSON format. * JSON will make it possible to add info to a snapshot header without * breaking backwards compatibility. */ if (m_versionNum[3] == 0) { m_txnId = fd.readLong(); m_timestamp = TransactionIdManager.getTimestampFromTransactionId(m_txnId); m_hostId = fd.readInt(); m_hostname = fd.readString(); m_clusterName = fd.readString(); m_databaseName = fd.readString(); m_tableName = fd.readString(); m_isReplicated = fd.readBoolean(); m_isCompressed = false; m_checksumType = ChecksumType.CRC32; if (!m_isReplicated) { m_partitionIds = (int[]) fd.readArray(int.class); if (!m_completed) { for (Integer partitionId : m_partitionIds) { m_corruptedPartitions.add(partitionId); } } m_totalPartitions = fd.readInt(); } else { m_partitionIds = new int[] {0}; m_totalPartitions = 1; if (!m_completed) { m_corruptedPartitions.add(0); } } m_hasVersion2FormatChunks = false; } else { assert (m_versionNum[3] == 1 || m_versionNum[3] == 2); if (m_versionNum[3] >= 2) { m_hasVersion2FormatChunks = true; } else { m_hasVersion2FormatChunks = false; } int numJSONBytes = fd.readInt(); byte jsonBytes[] = new byte[numJSONBytes]; fd.readFully(jsonBytes); String jsonString = new String(jsonBytes, "UTF-8"); JSONObject obj = new JSONObject(jsonString); m_txnId = obj.getLong("txnId"); // Timestamp field added for 3.0, might not be there if (obj.has("timestamp")) { m_timestamp = obj.getLong("timestamp"); } else { // Pre 3.0/IV2 the timestamp was in the transactionid m_timestamp = TransactionIdManager.getTimestampFromTransactionId(m_txnId); } m_hostId = obj.getInt("hostId"); m_hostname = obj.getString("hostname"); m_clusterName = obj.getString("clusterName"); m_databaseName = obj.getString("databaseName"); m_tableName = obj.getString("tableName"); m_isReplicated = obj.getBoolean("isReplicated"); m_isCompressed = obj.optBoolean("isCompressed", false); m_checksumType = ChecksumType.valueOf(obj.optString("checksumType", "CRC32")); if (!m_isReplicated) { JSONArray partitionIds = obj.getJSONArray("partitionIds"); m_partitionIds = new int[partitionIds.length()]; for (int ii = 0; ii < m_partitionIds.length; ii++) { m_partitionIds[ii] = partitionIds.getInt(ii); } if (!m_completed) { for (Integer partitionId : m_partitionIds) { m_corruptedPartitions.add(partitionId); } } m_totalPartitions = obj.getInt("numPartitions"); } else { m_partitionIds = new int[] {0}; m_totalPartitions = 1; if (!m_completed) { m_corruptedPartitions.add(0); } } } /* * Several runtime exceptions can be thrown in valid failure cases where * a corrupt save file is being detected. */ } catch (BufferUnderflowException e) { throw new IOException(e); } catch (BufferOverflowException e) { throw new IOException(e); } catch (IndexOutOfBoundsException e) { throw new IOException(e); } catch (JSONException e) { throw new IOException(e); } }
@Override public void loadFromJSONObject(JSONObject jobj, Database db) throws JSONException { super.loadFromJSONObject(jobj, db); m_truncate = jobj.getBoolean(Members.TRUNCATE.name()); }