/** Create an ee and load the volt shared library */ public ExecutionEngine(long siteId, int partitionId) { m_partitionId = partitionId; org.voltdb.EELibraryLoader.loadExecutionEngineLibrary(true); // In mock test environments there may be no stats agent. final StatsAgent statsAgent = VoltDB.instance().getStatsAgent(); if (statsAgent != null) { m_plannerStats = new PlannerStatsCollector(siteId); statsAgent.registerStatsSource(SysProcSelector.PLANNER, siteId, m_plannerStats); } }
/** * Takes a snapshot of all the tables in the database now and check all the rows in each table to * see if they satisfy the constraints. The constraints should be added with the table name and * table id 0. * * <p>Since the snapshot files reside on the servers, we have to copy them over to the client in * order to check. This might be an overkill, but the alternative is to ask the user to write * stored procedure for each table and execute them on all nodes. That's not significantly better, * either. * * <p>This function blocks. Should only be run at the end. * * @return true if all tables passed the test, false otherwise. */ protected boolean checkTables() { String dir = "/tmp"; String nonce = "data_verification"; ClientConfig clientConfig = new ClientConfig(m_username, m_password); clientConfig.setExpectedOutgoingMessageSize(getExpectedOutgoingMessageSize()); clientConfig.setHeavyweight(false); Client client = ClientFactory.createClient(clientConfig); // Host ID to IP mappings LinkedHashMap<Integer, String> hostMappings = new LinkedHashMap<Integer, String>(); /* * The key is the table name. the first one in the pair is the hostname, * the second one is file name */ LinkedHashMap<String, Pair<String, String>> snapshotMappings = new LinkedHashMap<String, Pair<String, String>>(); boolean isSatisfied = true; // Load the native library for loading table from snapshot file org.voltdb.EELibraryLoader.loadExecutionEngineLibrary(true); try { boolean keepTrying = true; VoltTable[] response = null; client.createConnection(m_host); // Only initiate the snapshot if it's the first client while (m_id == 0) { // Take a snapshot of the database. This call is blocking. response = client.callProcedure("@SnapshotSave", dir, nonce, 1).getResults(); if (response.length != 1 || !response[0].advanceRow() || !response[0].getString("RESULT").equals("SUCCESS")) { if (keepTrying && response[0].getString("ERR_MSG").contains("ALREADY EXISTS")) { client.callProcedure("@SnapshotDelete", new String[] {dir}, new String[] {nonce}); keepTrying = false; continue; } System.err.println("Failed to take snapshot"); return false; } break; } // Clients other than the one that initiated the snapshot // have to check if the snapshot has completed if (m_id > 0) { int maxTry = 10; while (maxTry-- > 0) { boolean found = false; response = client.callProcedure("@SnapshotStatus").getResults(); if (response.length != 2) { System.err.println("Failed to get snapshot status"); return false; } while (response[0].advanceRow()) { if (response[0].getString("NONCE").equals(nonce)) { found = true; break; } } if (found) { // This probably means the snapshot is done if (response[0].getLong("END_TIME") > 0) break; } try { Thread.sleep(500); } catch (InterruptedException e) { return false; } } } // Get host ID to hostname mappings response = client.callProcedure("@SystemInformation").getResults(); if (response.length != 1) { System.err.println("Failed to get host ID to IP address mapping"); return false; } while (response[0].advanceRow()) { if (!response[0].getString("KEY").equals("HOSTNAME")) { continue; } hostMappings.put( (Integer) response[0].get("HOST_ID", VoltType.INTEGER), response[0].getString("VALUE")); } /* DUMP THE HOST MAPPINGS: System.err.println("\n\nhostMappings: "); for (Integer i : hostMappings.keySet()) { System.err.println("\tkey: " + i + " value: " + hostMappings.get(i)); } */ // Do a scan to get all the file names and table names response = client.callProcedure("@SnapshotScan", dir).getResults(); if (response.length != 3) { System.err.println("Failed to get snapshot filenames"); return false; } // Only copy the snapshot files we just created while (response[0].advanceRow()) { if (!response[0].getString("NONCE").equals(nonce)) continue; String[] tables = response[0].getString("TABLES_REQUIRED").split(","); for (String t : tables) { snapshotMappings.put(t, null); } break; } /* DUMP THE SNAPSHOT MAPPINGS: System.err.println("\n\nsnapshotMappings: "); for (String i : snapshotMappings.keySet()) { System.err.println("\tkey: " + i + " value: " + snapshotMappings.get(i)); } */ while (response[2].advanceRow()) { int id = (Integer) response[2].get("HOST_ID", VoltType.INTEGER); String tableName = response[2].getString("TABLE"); if (!snapshotMappings.containsKey(tableName) || !hostMappings.containsKey(id)) { System.err.println("FAILED configuring snapshotMappings for: "); System.err.println( "snapshottingMapping[" + tableName + "] " + snapshotMappings.get(tableName)); System.err.println("hostMappings[" + id + "] " + hostMappings.get(id)); continue; } snapshotMappings.put( tableName, Pair.of(hostMappings.get(id), response[2].getString("NAME"))); } } catch (NoConnectionsException e) { e.printStackTrace(); return false; } catch (ProcCallException e) { e.printStackTrace(); return false; } catch (UnknownHostException e) { e.printStackTrace(); return false; } catch (IOException e) { e.printStackTrace(); return false; } // Iterate through all the tables System.err.println("Checking " + m_tableCheckOrder.size() + " table contraints"); for (String tableName : m_tableCheckOrder) { Pair<String, String> value = snapshotMappings.get(tableName); if (value == null) { System.err.println("No snapshot mapping for table: " + tableName); continue; } String hostName = value.getFirst(); File file = new File(dir, value.getSecond()); FileInputStream inputStream = null; TableSaveFile saveFile = null; long rowCount = 0; Pair<String, Integer> key = Pair.of(tableName, 0); if (!m_constraints.containsKey(key) || hostName == null) { System.err.println("No constraint for : " + tableName); continue; } // Copy the file over String localhostName = ConnectionUtil.getHostnameOrAddress(); final SSHTools ssh = new SSHTools(m_username); if (!hostName.equals("localhost") && !hostName.equals(localhostName)) { if (!ssh.copyFromRemote(file.getPath(), hostName, file.getPath())) { System.err.println( "Failed to copy the snapshot file " + file.getPath() + " from host " + hostName); return false; } } if (!file.exists()) { System.err.println( "Snapshot file " + file.getPath() + " cannot be copied from " + hostName + " to localhost"); return false; } try { try { inputStream = new FileInputStream(file); saveFile = new TableSaveFile(inputStream.getChannel(), 3, null); // Get chunks from table while (isSatisfied && saveFile.hasMoreChunks()) { final BBContainer chunk = saveFile.getNextChunk(); VoltTable table = null; // This probably should not happen if (chunk == null) continue; table = PrivateVoltTableFactory.createVoltTableFromBuffer(chunk.b, true); // Now, check each row while (isSatisfied && table.advanceRow()) { isSatisfied = Verification.checkRow(m_constraints.get(key), table); rowCount++; } // Release the memory of the chunk we just examined, be good chunk.discard(); } } finally { if (saveFile != null) { saveFile.close(); } if (inputStream != null) inputStream.close(); if (!hostName.equals("localhost") && !hostName.equals(localhostName) && !file.delete()) System.err.println("Failed to delete snapshot file " + file.getPath()); } } catch (FileNotFoundException e) { e.printStackTrace(); return false; } catch (IOException e) { e.printStackTrace(); return false; } if (isSatisfied) { System.err.println("Table " + tableName + " with " + rowCount + " rows passed check"); } else { System.err.println("Table " + tableName + " failed check"); break; } } // Clean up the snapshot we made try { if (m_id == 0) { client .callProcedure("@SnapshotDelete", new String[] {dir}, new String[] {nonce}) .getResults(); } } catch (IOException e) { e.printStackTrace(); } catch (ProcCallException e) { e.printStackTrace(); } System.err.println( "Table checking finished " + (isSatisfied ? "successfully" : "with failures")); return isSatisfied; }
// XXX maybe consider an IOException subclass at some point public TableSaveFile( FileChannel dataIn, int readAheadChunks, Integer[] relevantPartitionIds, boolean continueOnCorruptedChunk) throws IOException { try { EELibraryLoader.loadExecutionEngineLibrary(true); if (relevantPartitionIds == null) { m_relevantPartitionIds = null; } else { m_relevantPartitionIds = new HashSet<Integer>(); for (Integer i : relevantPartitionIds) { m_relevantPartitionIds.add(i); } } m_chunkReads = new Semaphore(readAheadChunks); m_saveFile = dataIn; m_continueOnCorruptedChunk = continueOnCorruptedChunk; final PureJavaCrc32 crc = new PureJavaCrc32(); /* * If the CRC check fails because the file wasn't completed */ final PureJavaCrc32 secondCRC = new PureJavaCrc32(); /* * Get the header with the save restore specific information */ final ByteBuffer lengthBuffer = ByteBuffer.allocate(8); while (lengthBuffer.hasRemaining()) { final int read = m_saveFile.read(lengthBuffer); if (read == -1) { throw new EOFException(); } } lengthBuffer.flip(); final int originalCRC = lengthBuffer.getInt(); int length = lengthBuffer.getInt(); crc.update(lengthBuffer.array(), 4, 4); secondCRC.update(lengthBuffer.array(), 4, 4); if (length < 0) { throw new IOException("Corrupted save file has negative header length"); } if (length > 2097152) { throw new IOException("Corrupted save file has unreasonable header length > 2 megs"); } final ByteBuffer saveRestoreHeader = ByteBuffer.allocate(length); while (saveRestoreHeader.hasRemaining()) { final int read = m_saveFile.read(saveRestoreHeader); if (read == -1 || read < length) { throw new EOFException(); } } saveRestoreHeader.flip(); crc.update(saveRestoreHeader.array()); secondCRC.update(new byte[] {1}); secondCRC.update(saveRestoreHeader.array(), 1, saveRestoreHeader.array().length - 1); /* * Get the template for the VoltTable serialization header. * It will have an extra length value preceded to it so that * it can be sucked straight into a buffer. This will not * contain a row count since that varies from chunk to chunk * and is supplied by the chunk */ lengthBuffer.clear(); lengthBuffer.limit(4); /* * Why this stupidity and no while loop? * Because java is broken and complains about a random final * elsewhere if you do. */ { final int read = m_saveFile.read(lengthBuffer); if (read == -1) { throw new EOFException(); } } crc.update(lengthBuffer.array(), 0, 4); secondCRC.update(lengthBuffer.array(), 0, 4); lengthBuffer.flip(); length = lengthBuffer.getInt(); if (length < 4) { throw new IOException( "Corrupted save file has negative length or too small length for VoltTable header"); } if (length > 2097152) { throw new IOException( "Corrupted save file has unreasonable VoltTable header length > 2 megs"); } m_tableHeader = ByteBuffer.allocate(length + 4); m_tableHeader.putInt(length); while (m_tableHeader.hasRemaining()) { final int read = m_saveFile.read(m_tableHeader); if (read == -1) { throw new EOFException(); } } crc.update(m_tableHeader.array(), 4, length); secondCRC.update(m_tableHeader.array(), 4, length); boolean failedCRCDueToNotCompleted = false; final int actualCRC = (int) crc.getValue(); if (originalCRC != actualCRC) { /* * Check if the CRC mismatch is due to the snapshot not being completed */ final int secondCRCValue = (int) secondCRC.getValue(); if (secondCRCValue == originalCRC) { failedCRCDueToNotCompleted = true; } else { throw new IOException("Checksum mismatch"); } } FastDeserializer fd = new FastDeserializer(saveRestoreHeader); byte completedByte = fd.readByte(); m_completed = failedCRCDueToNotCompleted ? false : (completedByte == 1 ? true : false); for (int ii = 0; ii < 4; ii++) { m_versionNum[ii] = fd.readInt(); } /* * Support the original pre 1.3 header format as well as a new JSON format. * JSON will make it possible to add info to a snapshot header without * breaking backwards compatibility. */ if (m_versionNum[3] == 0) { m_txnId = fd.readLong(); m_timestamp = TransactionIdManager.getTimestampFromTransactionId(m_txnId); m_hostId = fd.readInt(); m_hostname = fd.readString(); m_clusterName = fd.readString(); m_databaseName = fd.readString(); m_tableName = fd.readString(); m_isReplicated = fd.readBoolean(); m_isCompressed = false; m_checksumType = ChecksumType.CRC32; if (!m_isReplicated) { m_partitionIds = (int[]) fd.readArray(int.class); if (!m_completed) { for (Integer partitionId : m_partitionIds) { m_corruptedPartitions.add(partitionId); } } m_totalPartitions = fd.readInt(); } else { m_partitionIds = new int[] {0}; m_totalPartitions = 1; if (!m_completed) { m_corruptedPartitions.add(0); } } m_hasVersion2FormatChunks = false; } else { assert (m_versionNum[3] == 1 || m_versionNum[3] == 2); if (m_versionNum[3] >= 2) { m_hasVersion2FormatChunks = true; } else { m_hasVersion2FormatChunks = false; } int numJSONBytes = fd.readInt(); byte jsonBytes[] = new byte[numJSONBytes]; fd.readFully(jsonBytes); String jsonString = new String(jsonBytes, "UTF-8"); JSONObject obj = new JSONObject(jsonString); m_txnId = obj.getLong("txnId"); // Timestamp field added for 3.0, might not be there if (obj.has("timestamp")) { m_timestamp = obj.getLong("timestamp"); } else { // Pre 3.0/IV2 the timestamp was in the transactionid m_timestamp = TransactionIdManager.getTimestampFromTransactionId(m_txnId); } m_hostId = obj.getInt("hostId"); m_hostname = obj.getString("hostname"); m_clusterName = obj.getString("clusterName"); m_databaseName = obj.getString("databaseName"); m_tableName = obj.getString("tableName"); m_isReplicated = obj.getBoolean("isReplicated"); m_isCompressed = obj.optBoolean("isCompressed", false); m_checksumType = ChecksumType.valueOf(obj.optString("checksumType", "CRC32")); if (!m_isReplicated) { JSONArray partitionIds = obj.getJSONArray("partitionIds"); m_partitionIds = new int[partitionIds.length()]; for (int ii = 0; ii < m_partitionIds.length; ii++) { m_partitionIds[ii] = partitionIds.getInt(ii); } if (!m_completed) { for (Integer partitionId : m_partitionIds) { m_corruptedPartitions.add(partitionId); } } m_totalPartitions = obj.getInt("numPartitions"); } else { m_partitionIds = new int[] {0}; m_totalPartitions = 1; if (!m_completed) { m_corruptedPartitions.add(0); } } } /* * Several runtime exceptions can be thrown in valid failure cases where * a corrupt save file is being detected. */ } catch (BufferUnderflowException e) { throw new IOException(e); } catch (BufferOverflowException e) { throw new IOException(e); } catch (IndexOutOfBoundsException e) { throw new IOException(e); } catch (JSONException e) { throw new IOException(e); } }