/** Resizes the internal byte buffer with a simple doubling policy, if needed. */ private final void growIfNeeded(int minimumDesired) { if (buffer.b().remaining() < minimumDesired) { // Compute the size of the new buffer int newCapacity = buffer.b().capacity(); int newRemaining = newCapacity - buffer.b().position(); while (newRemaining < minimumDesired) { newRemaining += newCapacity; newCapacity *= 2; } // Allocate and copy BBContainer next; if (isDirect) { next = DBBPool.allocateDirect(newCapacity); } else { next = DBBPool.wrapBB(ByteBuffer.allocate(newCapacity)); } buffer.b().flip(); next.b().put(buffer.b()); assert next.b().remaining() == newRemaining; buffer.discard(); buffer = next; if (callback != null) callback.onBufferGrow(this); assert (buffer.b().order() == ByteOrder.BIG_ENDIAN); } }
/** constructor that sets callback object. */ public FastSerializer( boolean bigEndian, boolean isDirect, BufferGrowCallback callback, int initialAllocation) { assert (initialAllocation > 0); this.isDirect = isDirect; if (isDirect) { buffer = DBBPool.allocateDirect(initialAllocation); } else { buffer = DBBPool.wrapBB(ByteBuffer.allocate(initialAllocation)); } this.callback = callback; buffer.b().order(bigEndian ? ByteOrder.BIG_ENDIAN : ByteOrder.LITTLE_ENDIAN); }
BBContainer acquire() { final BBContainer cont = m_buffers.poll(); if (cont == null) { final BBContainer originContainer = DBBPool.allocateDirect(1024 * 32); return new BBContainer(originContainer.b()) { @Override public void discard() { checkDoubleFree(); // If we had to allocate over the desired limit, start discarding if (m_buffers.size() > m_numBuffers) { originContainer.discard(); return; } m_buffers.push(originContainer); } }; } return new BBContainer(cont.b()) { @Override public void discard() { checkDoubleFree(); m_buffers.push(cont); } }; }
private Container getOutputBuffer(final int nextChunkPartitionId) { Container c = m_buffers.poll(); if (c == null) { final BBContainer originContainer = DBBPool.allocateDirect(DEFAULT_CHUNKSIZE); final ByteBuffer b = originContainer.b; final long pointer = org.voltcore.utils.DBBPool.getBufferAddress(b); c = new Container(b, pointer, originContainer, nextChunkPartitionId); } /* * Need to reconstruct the container with the partition id of the next * chunk so it can be a final public field. The buffer, address, and origin * container remain the same. */ c = new Container(c.b, c.address, c.m_origin, nextChunkPartitionId); return c; }
public void pushExportBuffer( int partitionId, String signature, long uso, long bufferPtr, ByteBuffer buffer, boolean sync, boolean endOfStream) { // System.out.println("In generation " + m_timestamp + " partition " + partitionId + " // signature " + signature + (buffer == null ? " null buffer " : (" buffer length " + // buffer.remaining()))); // for (Integer i : m_dataSourcesByPartition.keySet()) { // System.out.println("Have partition " + i); // } assert (m_dataSourcesByPartition.containsKey(partitionId)); assert (m_dataSourcesByPartition.get(partitionId).containsKey(signature)); HashMap<String, ExportDataSource> sources = m_dataSourcesByPartition.get(partitionId); if (sources == null) { exportLog.error( "Could not find export data sources for partition " + partitionId + " generation " + m_timestamp + " the export data is being discarded"); DBBPool.deleteCharArrayMemory(bufferPtr); return; } ExportDataSource source = sources.get(signature); if (source == null) { exportLog.error( "Could not find export data source for partition " + partitionId + " signature " + signature + " generation " + m_timestamp + " the export data is being discarded"); DBBPool.deleteCharArrayMemory(bufferPtr); return; } source.pushExportBuffer(uso, bufferPtr, buffer, sync, endOfStream); }
public static BBContainer allocateDirectWithAddress(final int capacity) { final ByteBuffer retval = ByteBuffer.allocateDirect(capacity); bytesAllocatedGlobally.getAndAdd(capacity); return new BBContainer(retval, DBBPool.getBufferAddress(retval)) { @Override public void discard() { try { DirectMemoryUtils.destroyDirectByteBuffer(retval); bytesAllocatedGlobally.getAndAdd(-capacity); } catch (Throwable e) { VoltDB.crashLocalVoltDB("Failed to deallocate direct byte buffer", false, e); } } }; }
/* * Allocate a DirectByteBuffer from a global lock free pool */ public static BBContainer allocateDirectAndPool(final Integer capacity) { ConcurrentLinkedQueue<BBContainer> pooledBuffers = m_pooledBuffers.get(capacity); if (pooledBuffers == null) { pooledBuffers = new ConcurrentLinkedQueue<BBContainer>(); if (m_pooledBuffers.putIfAbsent(capacity, pooledBuffers) == null) { pooledBuffers = m_pooledBuffers.get(capacity); } } BBContainer cont = pooledBuffers.poll(); if (cont == null) { // Create an origin container ByteBuffer b = ByteBuffer.allocateDirect(capacity); bytesAllocatedGlobally.getAndAdd(capacity); cont = new BBContainer(b, DBBPool.getBufferAddress(b)) { @Override public void discard() { try { DirectMemoryUtils.destroyDirectByteBuffer(b); bytesAllocatedGlobally.addAndGet(-capacity); } catch (Throwable e) { VoltDB.crashLocalVoltDB("Failed to deallocate direct byte buffer", false, e); } } }; } final BBContainer origin = cont; cont = new BBContainer(origin.b, origin.address) { @Override public void discard() { m_pooledBuffers.get(b.capacity()).offer(origin); } }; cont.b.clear(); return cont; }
public static synchronized void pushDRBuffer(int partitionId, ByteBuffer buf) { if (logDebug) { System.out.println("Received DR buffer size " + buf.remaining()); AtomicLong haveOpenTransaction = haveOpenTransactionLocal.get(); buf.order(ByteOrder.LITTLE_ENDIAN); // Magic header space for Java for implementing zero copy stuff buf.position(8); while (buf.hasRemaining()) { int startPosition = buf.position(); byte version = buf.get(); int type = buf.get(); int checksum = 0; if (version != 0) System.out.println("Remaining is " + buf.remaining()); switch (DRRecordType.valueOf(type)) { case INSERT: { // Insert if (haveOpenTransaction.get() == -1) { System.out.println("Have insert but no open transaction"); System.exit(-1); } final long tableHandle = buf.getLong(); final int lengthPrefix = buf.getInt(); buf.position(buf.position() + lengthPrefix); checksum = buf.getInt(); System.out.println( "Version " + version + " type INSERT table handle " + tableHandle + " length " + lengthPrefix + " checksum " + checksum); break; } case DELETE: { // Delete if (haveOpenTransaction.get() == -1) { System.out.println("Have insert but no open transaction"); System.exit(-1); } final long tableHandle = buf.getLong(); final int lengthPrefix = buf.getInt(); buf.position(buf.position() + lengthPrefix); checksum = buf.getInt(); System.out.println( "Version " + version + " type DELETE table handle " + tableHandle + " length " + lengthPrefix + " checksum " + checksum); break; } case UPDATE: // Update // System.out.println("Version " + version + " type UPDATE " + checksum " + checksum); break; case BEGIN_TXN: { // Begin txn final long txnId = buf.getLong(); final long spHandle = buf.getLong(); if (haveOpenTransaction.get() != -1) { System.out.println( "Have open transaction txnid " + txnId + " spHandle " + spHandle + " but already open transaction"); System.exit(-1); } haveOpenTransaction.set(spHandle); checksum = buf.getInt(); System.out.println( "Version " + version + " type BEGIN_TXN " + " txnid " + txnId + " spHandle " + spHandle + " checksum " + checksum); break; } case END_TXN: { // End txn final long spHandle = buf.getLong(); if (haveOpenTransaction.get() == -1) { System.out.println( "Have end transaction spHandle " + spHandle + " but no open transaction and its less then last committed " + lastCommittedSpHandle.get().get()); // checksum = buf.getInt(); // break; System.exit(-1); } haveOpenTransaction.set(-1); lastCommittedSpHandle.get().set(spHandle); checksum = buf.getInt(); System.out.println( "Version " + version + " type END_TXN " + " spHandle " + spHandle + " checksum " + checksum); break; } } int calculatedChecksum = DBBPool.getBufferCRC32C(buf, startPosition, buf.position() - startPosition - 4); if (calculatedChecksum != checksum) { System.out.println("Checksum " + calculatedChecksum + " didn't match " + checksum); System.exit(-1); } } } final BBContainer cont = DBBPool.wrapBB(buf); DBBPool.registerUnsafeMemory(cont.address()); cont.discard(); }
/* * Prepend length is basically synonymous with writing actual tuple data and not * the header. */ private ListenableFuture<?> write( final Callable<BBContainer> tupleDataC, final boolean prependLength) { /* * Unwrap the data to be written. For the traditional * snapshot data target this should be a noop. */ BBContainer tupleDataTemp; try { tupleDataTemp = tupleDataC.call(); /* * Can be null if the dedupe filter nulled out the buffer */ if (tupleDataTemp == null) { return Futures.immediateFuture(null); } } catch (Throwable t) { return Futures.immediateFailedFuture(t); } final BBContainer tupleData = tupleDataTemp; if (m_writeFailed) { tupleData.discard(); return null; } m_outstandingWriteTasks.incrementAndGet(); Future<BBContainer> compressionTask = null; if (prependLength) { BBContainer cont = DBBPool.allocateDirectAndPool(SnapshotSiteProcessor.m_snapshotBufferCompressedLen); // Skip 4-bytes so the partition ID is not compressed // That way if we detect a corruption we know what partition is bad tupleData.b.position(tupleData.b.position() + 4); /* * Leave 12 bytes, it's going to be a 4-byte length prefix, a 4-byte partition id, * and a 4-byte CRC32C of just the header bytes, in addition to the compressed payload CRC * that is 16 bytes, but 4 of those are done by CompressionService */ cont.b.position(12); compressionTask = CompressionService.compressAndCRC32cBufferAsync(tupleData.b, cont); } final Future<BBContainer> compressionTaskFinal = compressionTask; ListenableFuture<?> writeTask = m_es.submit( new Callable<Object>() { @Override public Object call() throws Exception { try { if (m_acceptOneWrite) { m_acceptOneWrite = false; } else { if (m_simulateBlockedWrite != null) { m_simulateBlockedWrite.await(); } if (m_simulateFullDiskWritingChunk) { throw new IOException("Disk full"); } } int totalWritten = 0; if (prependLength) { BBContainer payloadContainer = compressionTaskFinal.get(); try { final ByteBuffer payloadBuffer = payloadContainer.b; payloadBuffer.position(0); ByteBuffer lengthPrefix = ByteBuffer.allocate(12); m_bytesAllowedBeforeSync.acquire(payloadBuffer.remaining()); // Length prefix does not include 4 header items, just compressd payload // that follows lengthPrefix.putInt(payloadBuffer.remaining() - 16); // length prefix lengthPrefix.putInt(tupleData.b.getInt(0)); // partitionId /* * Checksum the header and put it in the payload buffer */ PureJavaCrc32C crc = new PureJavaCrc32C(); crc.update(lengthPrefix.array(), 0, 8); lengthPrefix.putInt((int) crc.getValue()); lengthPrefix.flip(); payloadBuffer.put(lengthPrefix); payloadBuffer.position(0); /* * Write payload to file */ while (payloadBuffer.hasRemaining()) { totalWritten += m_channel.write(payloadBuffer); } } finally { payloadContainer.discard(); } } else { while (tupleData.b.hasRemaining()) { totalWritten += m_channel.write(tupleData.b); } } m_bytesWritten += totalWritten; m_bytesWrittenSinceLastSync.addAndGet(totalWritten); } catch (IOException e) { m_writeException = e; SNAP_LOG.error( "Error while attempting to write snapshot data to file " + m_file, e); m_writeFailed = true; throw e; } finally { try { tupleData.discard(); } finally { m_outstandingWriteTasksLock.lock(); try { if (m_outstandingWriteTasks.decrementAndGet() == 0) { m_noMoreOutstandingWriteTasksCondition.signalAll(); } } finally { m_outstandingWriteTasksLock.unlock(); } } } return null; } }); return writeTask; }
public DefaultSnapshotDataTarget( final File file, final int hostId, final String clusterName, final String databaseName, final String tableName, final int numPartitions, final boolean isReplicated, final List<Integer> partitionIds, final VoltTable schemaTable, final long txnId, final long timestamp, int version[]) throws IOException { String hostname = CoreUtils.getHostnameOrAddress(); m_file = file; m_tableName = tableName; m_fos = new FileOutputStream(file); m_channel = m_fos.getChannel(); m_needsFinalClose = !isReplicated; final FastSerializer fs = new FastSerializer(); fs.writeInt(0); // CRC fs.writeInt(0); // Header length placeholder fs.writeByte( 1); // Indicate the snapshot was not completed, set to true for the CRC calculation, false // later for (int ii = 0; ii < 4; ii++) { fs.writeInt(version[ii]); // version } JSONStringer stringer = new JSONStringer(); byte jsonBytes[] = null; try { stringer.object(); stringer.key("txnId").value(txnId); stringer.key("hostId").value(hostId); stringer.key("hostname").value(hostname); stringer.key("clusterName").value(clusterName); stringer.key("databaseName").value(databaseName); stringer.key("tableName").value(tableName.toUpperCase()); stringer.key("isReplicated").value(isReplicated); stringer.key("isCompressed").value(true); stringer.key("checksumType").value("CRC32C"); stringer.key("timestamp").value(timestamp); /* * The timestamp string is for human consumption, automated stuff should use * the actual timestamp */ stringer.key("timestampString").value(SnapshotUtil.formatHumanReadableDate(timestamp)); if (!isReplicated) { stringer.key("partitionIds").array(); for (int partitionId : partitionIds) { stringer.value(partitionId); } stringer.endArray(); stringer.key("numPartitions").value(numPartitions); } stringer.endObject(); String jsonString = stringer.toString(); JSONObject jsonObj = new JSONObject(jsonString); jsonString = jsonObj.toString(4); jsonBytes = jsonString.getBytes("UTF-8"); } catch (Exception e) { throw new IOException(e); } fs.writeInt(jsonBytes.length); fs.write(jsonBytes); final BBContainer container = fs.getBBContainer(); container.b.position(4); container.b.putInt(container.b.remaining() - 4); container.b.position(0); final byte schemaBytes[] = PrivateVoltTableFactory.getSchemaBytes(schemaTable); final PureJavaCrc32 crc = new PureJavaCrc32(); ByteBuffer aggregateBuffer = ByteBuffer.allocate(container.b.remaining() + schemaBytes.length); aggregateBuffer.put(container.b); aggregateBuffer.put(schemaBytes); aggregateBuffer.flip(); crc.update(aggregateBuffer.array(), 4, aggregateBuffer.capacity() - 4); final int crcValue = (int) crc.getValue(); aggregateBuffer.putInt(crcValue).position(8); aggregateBuffer.put((byte) 0).position(0); // Haven't actually finished writing file if (m_simulateFullDiskWritingHeader) { m_writeException = new IOException("Disk full"); m_writeFailed = true; m_fos.close(); throw m_writeException; } /* * Be completely sure the write succeeded. If it didn't * the disk is probably full or the path is bunk etc. */ m_acceptOneWrite = true; ListenableFuture<?> writeFuture = write(Callables.returning((BBContainer) DBBPool.wrapBB(aggregateBuffer)), false); try { writeFuture.get(); } catch (InterruptedException e) { m_fos.close(); throw new java.io.InterruptedIOException(); } catch (ExecutionException e) { m_fos.close(); throw m_writeException; } if (m_writeFailed) { m_fos.close(); throw m_writeException; } ScheduledFuture<?> syncTask = null; syncTask = m_syncService.scheduleAtFixedRate( new Runnable() { @Override public void run() { // Only sync for at least 4 megabyte of data, enough to amortize the cost of seeking // on ye olden platters. Since we are appending to a file it's actually 2 seeks. while (m_bytesWrittenSinceLastSync.get() > (1024 * 1024 * 4)) { final int bytesSinceLastSync = m_bytesWrittenSinceLastSync.getAndSet(0); try { m_channel.force(false); } catch (IOException e) { if (!(e instanceof java.nio.channels.AsynchronousCloseException)) { SNAP_LOG.error("Error syncing snapshot", e); } else { SNAP_LOG.debug( "Asynchronous close syncing snasphot data, presumably graceful", e); } } m_bytesAllowedBeforeSync.release(bytesSinceLastSync); } } }, SNAPSHOT_SYNC_FREQUENCY, SNAPSHOT_SYNC_FREQUENCY, TimeUnit.MILLISECONDS); m_syncTask = syncTask; }
private void readChunks() { // For reading the compressed input. ByteBuffer fileInputBuffer = ByteBuffer.allocateDirect(CompressionService.maxCompressedLength(DEFAULT_CHUNKSIZE)); while (m_hasMoreChunks) { /* * Limit the number of chunk materialized into memory at one time */ try { m_chunkReads.acquire(); } catch (InterruptedException e) { return; } boolean expectedAnotherChunk = false; try { /* * Get the length of the next chunk, partition id, crc for partition id, */ ByteBuffer chunkLengthB = ByteBuffer.allocate(16); while (chunkLengthB.hasRemaining()) { final int read = m_saveFile.read(chunkLengthB); if (read == -1) { throw new EOFException(); } } chunkLengthB.flip(); int nextChunkLength = chunkLengthB.getInt(); expectedAnotherChunk = true; /* * Get the partition id and its CRC and validate it. Validating the * partition ID for the chunk separately makes it possible to * continue processing chunks from other partitions if only one partition * has corrupt chunks in the file. */ final Checksum partitionIdCRC = m_checksumType == ChecksumType.CRC32C ? new PureJavaCrc32C() : new PureJavaCrc32(); chunkLengthB.mark(); final int nextChunkPartitionId = chunkLengthB.getInt(); final int nextChunkPartitionIdCRC = chunkLengthB.getInt(); chunkLengthB.reset(); byte partitionIdBytes[] = new byte[4]; chunkLengthB.get(partitionIdBytes); partitionIdCRC.update(partitionIdBytes, 0, partitionIdBytes.length); int generatedValue = (int) partitionIdCRC.getValue(); if (generatedValue != nextChunkPartitionIdCRC) { chunkLengthB.position(0); for (int partitionId : m_partitionIds) { m_corruptedPartitions.add(partitionId); } throw new IOException( "Chunk partition ID CRC check failed. " + "This corrupts all partitions in this file"); } /* * CRC for the data portion of the chunk */ chunkLengthB.position(chunkLengthB.position() + 4); final int nextChunkCRC = chunkLengthB.getInt(); /* * Sanity check the length value to ensure there isn't * a runtime exception or OOM. */ if (nextChunkLength < 0) { throw new IOException("Corrupted TableSaveFile chunk has negative chunk length"); } if (isCompressed()) { if (nextChunkLength > fileInputBuffer.capacity()) { throw new IOException( "Corrupted TableSaveFile chunk has unreasonable length " + "> DEFAULT_CHUNKSIZE bytes"); } } else { if (nextChunkLength > DEFAULT_CHUNKSIZE) { throw new IOException( "Corrupted TableSaveFile chunk has unreasonable length " + "> DEFAULT_CHUNKSIZE bytes"); } } /* * Go fetch the compressed data so that the uncompressed size is known * and use that to set nextChunkLength to be the uncompressed length, * the code ahead that constructs the volt table is expecting * the uncompressed size/data since it is producing an uncompressed table */ if (isCompressed()) { fileInputBuffer.clear(); fileInputBuffer.limit(nextChunkLength); while (fileInputBuffer.hasRemaining()) { final int read = m_saveFile.read(fileInputBuffer); if (read == -1) { throw new EOFException(); } } fileInputBuffer.flip(); nextChunkLength = CompressionService.uncompressedLength(fileInputBuffer); } /* * Now allocate space to store the chunk using the VoltTable serialization representation. * The chunk will contain an integer row count preceding it so it can * be sucked straight in. There is a little funny business to overwrite the * partition id that is not part of the serialization format */ Container c = getOutputBuffer(nextChunkPartitionId); /* * If the length value is wrong or not all data made it to disk this read will * not complete correctly. There could be overflow, underflow etc. * so use a try finally block to indicate that all partitions are now corrupt. * The enclosing exception handlers will do the right thing WRT to * propagating the error and closing the file. */ boolean completedRead = false; int checksumStartPosition = 0; int rowCount = 0; try { /* * Assemble a VoltTable out of the chunk of tuples. * Put in the header that was cached in the constructor, * then copy the tuple data. The row count is at the end * because it isn't known until serialization is complete. * It will have to be moved back to the beginning of the tuple data * after the header once the CRC has been calculated. */ c.b.clear(); // The length of the chunk already includes space for the 4-byte row count // even though it is at the end, but we need to also leave at the end for the CRC calc if (isCompressed()) { c.b.limit(nextChunkLength + m_tableHeader.capacity() + 4); } else { // Before compression the chunk length included the stuff added in the EE // like the 2 CRCs and partition id. It is only -8 because we still need the 4-bytes // of padding to move the row count in when constructing the volt table format. c.b.limit((nextChunkLength - 8) + m_tableHeader.capacity()); } m_tableHeader.position(0); c.b.put(m_tableHeader); c.b.position(c.b.position() + 4); // Leave space for row count to be moved into checksumStartPosition = c.b.position(); if (isCompressed()) { CompressionService.decompressBuffer(fileInputBuffer, c.b); c.b.position(c.b.limit()); } else { while (c.b.hasRemaining()) { final int read = m_saveFile.read(c.b); if (read == -1) { throw new EOFException(); } } } c.b.position(c.b.position() - 4); rowCount = c.b.getInt(); c.b.position(checksumStartPosition); completedRead = true; } finally { if (!completedRead) { for (int partitionId : m_partitionIds) { m_corruptedPartitions.add(partitionId); } } } /* * Validate the rest of the chunk. This can fail if the data is corrupted * or the length value was corrupted. */ final int calculatedCRC = m_checksumType == ChecksumType.CRC32C ? DBBPool.getCRC32C(c.address, c.b.position(), c.b.remaining()) : DBBPool.getCRC32(c.address, c.b.position(), c.b.remaining()); if (calculatedCRC != nextChunkCRC) { m_corruptedPartitions.add(nextChunkPartitionId); if (m_continueOnCorruptedChunk) { c.discard(); m_chunkReads.release(); continue; } else { throw new IOException("CRC mismatch in saved table chunk"); } } /* * Skip irrelevant chunks after CRC is calculated. Always calulate the CRC * in case it is the length value that is corrupted */ if (m_relevantPartitionIds != null) { if (!m_relevantPartitionIds.contains(nextChunkPartitionId)) { c.discard(); m_chunkReads.release(); continue; } } /* * The row count which was stored on disk at the end (and for the CRC calc) * is now moved to the appropriate place for the table serialization format. * Update the limit to reflect that. * * Surrounded in a try finally just in case there is overflow/underflow. Shouldn't * happen but I could be wrong. */ boolean success = false; try { c.b.limit(c.b.limit() - 4); c.b.position(checksumStartPosition - 4); c.b.putInt(rowCount); c.b.position(0); success = true; } finally { if (!success) { for (int partitionId : m_partitionIds) { m_corruptedPartitions.add(partitionId); } } } synchronized (TableSaveFile.this) { m_availableChunks.offer(c); TableSaveFile.this.notifyAll(); } } catch (EOFException eof) { synchronized (TableSaveFile.this) { m_hasMoreChunks = false; if (expectedAnotherChunk) { m_chunkReaderException = new IOException("Expected to find another chunk but reached end of file instead"); } TableSaveFile.this.notifyAll(); } } catch (IOException e) { synchronized (TableSaveFile.this) { m_hasMoreChunks = false; m_chunkReaderException = e; TableSaveFile.this.notifyAll(); } } catch (BufferUnderflowException e) { synchronized (TableSaveFile.this) { m_hasMoreChunks = false; m_chunkReaderException = new IOException(e); TableSaveFile.this.notifyAll(); } } catch (BufferOverflowException e) { synchronized (TableSaveFile.this) { m_hasMoreChunks = false; m_chunkReaderException = new IOException(e); TableSaveFile.this.notifyAll(); } } catch (IndexOutOfBoundsException e) { synchronized (TableSaveFile.this) { m_hasMoreChunks = false; m_chunkReaderException = new IOException(e); TableSaveFile.this.notifyAll(); } } } }
/* * The old method was out of hand. Going to start a new one with a different format * that should be easier to understand and validate. */ private void readChunksV2() { // For reading the compressed input. ByteBuffer fileInputBuffer = ByteBuffer.allocateDirect(CompressionService.maxCompressedLength(DEFAULT_CHUNKSIZE)); while (m_hasMoreChunks) { /* * Limit the number of chunk materialized into memory at one time */ try { m_chunkReads.acquire(); } catch (InterruptedException e) { return; } boolean expectedAnotherChunk = false; try { /* * Get the length of the next chunk, partition id, crc for partition id, and length prefix, * and then the CRC of the compressed payload */ ByteBuffer chunkLengthB = ByteBuffer.allocate(16); while (chunkLengthB.hasRemaining()) { final int read = m_saveFile.read(chunkLengthB); if (read == -1) { throw new EOFException(); } } int nextChunkLength = chunkLengthB.getInt(0); expectedAnotherChunk = true; /* * Get the partition id and its CRC (CRC now covers length prefix) and validate it. Validating the * partition ID for the chunk separately makes it possible to * continue processing chunks from other partitions if only one partition * has corrupt chunks in the file. */ assert (m_checksumType == ChecksumType.CRC32C); final Checksum partitionIdCRC = new PureJavaCrc32C(); final int nextChunkPartitionId = chunkLengthB.getInt(4); final int nextChunkPartitionIdCRC = chunkLengthB.getInt(8); partitionIdCRC.update(chunkLengthB.array(), 0, 8); int generatedValue = (int) partitionIdCRC.getValue(); if (generatedValue != nextChunkPartitionIdCRC) { chunkLengthB.position(0); for (int partitionId : m_partitionIds) { m_corruptedPartitions.add(partitionId); } throw new IOException( "Chunk partition ID CRC check failed. " + "This corrupts all partitions in this file"); } /* * CRC for the data portion of the chunk */ final int nextChunkCRC = chunkLengthB.getInt(12); /* * Sanity check the length value to ensure there isn't * a runtime exception or OOM. */ if (nextChunkLength < 0) { throw new IOException("Corrupted TableSaveFile chunk has negative chunk length"); } if (nextChunkLength > fileInputBuffer.capacity()) { throw new IOException( "Corrupted TableSaveFile chunk has unreasonable length " + "> DEFAULT_CHUNKSIZE bytes"); } /* * Go fetch the compressed data so that the uncompressed size is known * and use that to set nextChunkLength to be the uncompressed length, * the code ahead that constructs the volt table is expecting * the uncompressed size/data since it is producing an uncompressed table */ fileInputBuffer.clear(); fileInputBuffer.limit(nextChunkLength); while (fileInputBuffer.hasRemaining()) { final int read = m_saveFile.read(fileInputBuffer); if (read == -1) { throw new EOFException(); } } fileInputBuffer.flip(); nextChunkLength = CompressionService.uncompressedLength(fileInputBuffer); /* * Validate the rest of the chunk. This can fail if the data is corrupted * or the length value was corrupted. */ final int calculatedCRC = DBBPool.getBufferCRC32C(fileInputBuffer, 0, fileInputBuffer.remaining()); if (calculatedCRC != nextChunkCRC) { m_corruptedPartitions.add(nextChunkPartitionId); if (m_continueOnCorruptedChunk) { m_chunkReads.release(); continue; } else { throw new IOException("CRC mismatch in saved table chunk"); } } /* * Now allocate space to store the chunk using the VoltTable serialization representation. * The chunk will contain an integer row count preceding it so it can * be sucked straight in. There is a little funny business to overwrite the * partition id that is not part of the serialization format */ Container c = getOutputBuffer(nextChunkPartitionId); /* * If the length value is wrong or not all data made it to disk this read will * not complete correctly. There could be overflow, underflow etc. * so use a try finally block to indicate that all partitions are now corrupt. * The enclosing exception handlers will do the right thing WRT to * propagating the error and closing the file. */ boolean completedRead = false; try { /* * Assemble a VoltTable out of the chunk of tuples. * Put in the header that was cached in the constructor, * then copy the tuple data. */ c.b.clear(); c.b.limit(nextChunkLength + m_tableHeader.capacity()); m_tableHeader.position(0); c.b.put(m_tableHeader); // Doesn't move buffer position, does change the limit CompressionService.decompressBuffer(fileInputBuffer, c.b); completedRead = true; } finally { if (!completedRead) { for (int partitionId : m_partitionIds) { m_corruptedPartitions.add(partitionId); } } } /* * Skip irrelevant chunks after CRC is calculated. Always calulate the CRC * in case it is the length value that is corrupted */ if (m_relevantPartitionIds != null) { if (!m_relevantPartitionIds.contains(nextChunkPartitionId)) { c.discard(); m_chunkReads.release(); continue; } } /* * VoltTable wants the buffer at the home position 0 */ c.b.position(0); synchronized (TableSaveFile.this) { m_availableChunks.offer(c); TableSaveFile.this.notifyAll(); } } catch (EOFException eof) { synchronized (TableSaveFile.this) { m_hasMoreChunks = false; if (expectedAnotherChunk) { m_chunkReaderException = new IOException("Expected to find another chunk but reached end of file instead"); } TableSaveFile.this.notifyAll(); } } catch (IOException e) { synchronized (TableSaveFile.this) { m_hasMoreChunks = false; m_chunkReaderException = e; TableSaveFile.this.notifyAll(); } } catch (BufferUnderflowException e) { synchronized (TableSaveFile.this) { m_hasMoreChunks = false; m_chunkReaderException = new IOException(e); TableSaveFile.this.notifyAll(); } } catch (BufferOverflowException e) { synchronized (TableSaveFile.this) { m_hasMoreChunks = false; m_chunkReaderException = new IOException(e); TableSaveFile.this.notifyAll(); } } catch (IndexOutOfBoundsException e) { synchronized (TableSaveFile.this) { m_hasMoreChunks = false; m_chunkReaderException = new IOException(e); TableSaveFile.this.notifyAll(); } } } }
public void pushExportBuffer( long uso, final long bufferPtr, ByteBuffer buffer, boolean sync, boolean endOfStream) { final java.util.concurrent.atomic.AtomicBoolean deleted = new java.util.concurrent.atomic.AtomicBoolean(false); synchronized (m_committedBuffers) { if (endOfStream) { assert (!m_endOfStream); assert (bufferPtr == 0); assert (buffer == null); assert (!sync); m_endOfStream = endOfStream; if (m_committedBuffers.sizeInBytes() == 0) { exportLog.info("Pushed EOS buffer with 0 bytes remaining"); try { m_onDrain.run(); } finally { m_onDrain = null; } } return; } assert (!m_endOfStream); if (buffer != null) { if (buffer.capacity() > 0) { try { m_committedBuffers.offer( new StreamBlock( new BBContainer(buffer, bufferPtr) { @Override public void discard() { DBBPool.deleteCharArrayMemory(address); deleted.set(true); } }, uso, false)); } catch (IOException e) { exportLog.error(e); if (!deleted.get()) { DBBPool.deleteCharArrayMemory(bufferPtr); } } } else { /* * TupleStreamWrapper::setBytesUsed propagates the USO by sending * over an empty stream block. The block will be deleted * on the native side when this method returns */ exportLog.info( "Syncing first unpolled USO to " + uso + " for table " + m_tableName + " partition " + m_partitionId); m_firstUnpolledUso = uso; } } if (sync) { try { // Don't do a real sync, just write the in memory buffers // to a file. @Quiesce or blocking snapshot will do the sync m_committedBuffers.sync(true); } catch (IOException e) { exportLog.error(e); } } } }