/** * Calculates a checksum for a given string. * * @param string string from which a checksum should be obtained * @return a checksum allowing two events with the same properties to be grouped later. */ private static String calculateChecksum(String string) { byte[] bytes = string.getBytes(Charsets.UTF_8); Checksum checksum = new CRC32(); checksum.update(bytes, 0, bytes.length); return Long.toHexString(checksum.getValue()).toUpperCase(); }
private boolean isSameFile(VFSLeaf currentFile, VersionsFileImpl versions) { boolean same = false; if (versions.getRevisions() != null && !versions.getRevisions().isEmpty()) { VFSRevision lastRevision = versions.getRevisions().get(versions.getRevisions().size() - 1); long lastSize = lastRevision.getSize(); long currentSize = currentFile.getSize(); if (currentSize == lastSize && currentSize > 0 && lastRevision instanceof RevisionFileImpl && currentFile instanceof LocalFileImpl) { RevisionFileImpl lastRev = ((RevisionFileImpl) lastRevision); LocalFileImpl current = (LocalFileImpl) currentFile; // can be the same file try { Checksum cm1 = FileUtils.checksum(((LocalFileImpl) lastRev.getFile()).getBasefile(), new Adler32()); Checksum cm2 = FileUtils.checksum(current.getBasefile(), new Adler32()); same = cm1.getValue() == cm2.getValue(); } catch (IOException e) { log.debug("Error calculating the checksum of files"); } } } return same; }
private void decompress(byte[] compressed) throws IOException { // uncompress validBufferBytes = info.parameters.sstableCompressor.uncompress( compressed, 0, compressed.length - checksumBytes.length, buffer, 0); uncompressedBytes += validBufferBytes; // validate crc randomly if (info.parameters.getCrcCheckChance() > FBUtilities.threadLocalRandom().nextDouble()) { checksum.update(buffer, 0, validBufferBytes); System.arraycopy( compressed, compressed.length - checksumBytes.length, checksumBytes, 0, checksumBytes.length); if (Ints.fromByteArray(checksumBytes) != (int) checksum.getValue()) throw new IOException("CRC unmatched"); // reset checksum object back to the original (blank) state checksum.reset(); } // buffer offset is always aligned bufferOffset = current & ~(buffer.length - 1); }
private void flushBufferedData() throws IOException { if (o == 0) { return; } checksum.reset(); checksum.update(buffer, 0, o); final int check = (int) checksum.getValue(); int compressedLength = compressor.compress(buffer, 0, o, compressedBuffer, HEADER_LENGTH); final int compressMethod; if (compressedLength >= o) { compressMethod = COMPRESSION_METHOD_RAW; compressedLength = o; System.arraycopy(buffer, 0, compressedBuffer, HEADER_LENGTH, o); } else { compressMethod = COMPRESSION_METHOD_LZ4; } compressedBuffer[MAGIC_LENGTH] = (byte) (compressMethod | compressionLevel); writeIntLE(compressedLength, compressedBuffer, MAGIC_LENGTH + 1); writeIntLE(o, compressedBuffer, MAGIC_LENGTH + 5); writeIntLE(check, compressedBuffer, MAGIC_LENGTH + 9); assert MAGIC_LENGTH + 13 == HEADER_LENGTH; out.write(compressedBuffer, 0, HEADER_LENGTH + compressedLength); o = 0; }
public ByteString calculateTag(Checksum crc, byte[] value) { crc.update(value, 0, value.length); long csum = crc.getValue(); ByteBuffer buffer = ByteBuffer.allocate(8); return ByteString.copyFrom(buffer.putLong(csum).array()); }
@Test public void findStreamingFile() throws Exception { FileStreamStateHandler rwd = new FileStreamStateHandler(); File testFilesDir = new File(samplesDir, "/multiple-logs/"); File[] testFiles = testFilesDir.listFiles((FilenameFilter) new WildcardFileFilter("orders*")); FileAccessState newFAS = new FileAccessState(); int count = 0; File fileToSearchFor = null; int lineLastRead = 0; File fileWritten = null; for (File testFile : testFiles) { count++; FileReader in; LineNumberReader reader; Long fileCRC = rwd.getFileCrc(testFile); if (count == 2) { newFAS.currentFileCrc = fileCRC; fileToSearchFor = testFile; } in = new FileReader(testFile); reader = new LineNumberReader(in); reader.setLineNumber(0); String line = reader.readLine(); int count2 = 0; while (line != null) { count2++; Checksum crcLine = new CRC32(); final byte[] bytes4Line = line.getBytes(); crcLine.update(bytes4Line, 0, bytes4Line.length); final long lineCRC = crcLine.getValue(); final int lineNumber = reader.getLineNumber(); System.out.println("for " + lineNumber + " line CRC is " + lineCRC); if (count2 == 3) { newFAS.currentLineCrc = lineCRC; newFAS.currentLineNumber = lineNumber; newFAS.lastReadTime = System.currentTimeMillis(); lineLastRead = lineNumber; } line = reader.readLine(); } fileWritten = AbstractFileStreamStateHandler.writeState(newFAS, testFilesDir, "TestStream"); Utils.close(reader); } final File findLastProcessed = rwd.findStreamingFile(newFAS, testFiles); assertEquals(fileToSearchFor, findLastProcessed); final int lineLastReadRecorded = rwd.checkLine(findLastProcessed, newFAS); assertEquals(lineLastRead, lineLastReadRecorded); fileWritten.delete(); }
public final void update(int i) { int b0 = (i >> 24) & 0xff; int b1 = (i >> 16) & 0xff; int b2 = (i >> 8) & 0xff; int b3 = i & 0xff; crc.update(b0); crc.update(b1); crc.update(b2); crc.update(b3); // com.oddlabs.tt.util.ChecksumLogger.log(i); }
private static Long computeChecksum(CompilationUnit unit, String sigString) { Long chksum = null; if (sigString != null) { // store the signature as a checksum final byte[] bytes = sigString.getBytes(); final Checksum checksum = new Adler32(); // much faster than CRC32, almost as reliable checksum.update(bytes, 0, bytes.length); chksum = new Long(checksum.getValue()); // debug("COMPUTE CRC32: " + chksum + "\t--> " + unit.getSource().getNameForReporting()); } return chksum; }
public SpillRecord(Path indexFileName, JobConf job, Checksum crc, String expectedIndexOwner) throws IOException { final FileSystem rfs = FileSystem.getLocal(job).getRaw(); final DataInputStream in = new DataInputStream( SecureIOUtils.openForRead( new File(indexFileName.toUri().getPath()), expectedIndexOwner, null)); try { final long length = rfs.getFileStatus(indexFileName).getLen(); final int partitions = (int) length / MAP_OUTPUT_INDEX_RECORD_LENGTH; final int size = partitions * MAP_OUTPUT_INDEX_RECORD_LENGTH; buf = ByteBuffer.allocate(size); if (crc != null) { crc.reset(); CheckedInputStream chk = new CheckedInputStream(in, crc); IOUtils.readFully(chk, buf.array(), 0, size); if (chk.getChecksum().getValue() != in.readLong()) { throw new ChecksumException("Checksum error reading spill index: " + indexFileName, -1); } } else { IOUtils.readFully(in, buf.array(), 0, size); } entries = buf.asLongBuffer(); } finally { in.close(); } }
@Override public void close() throws IOException { out.close(); String checksum = null; IndexOutput underlying = out; // TODO: cut over to lucene's CRC // *WARNING*: lucene has classes in same o.a.l.store package with very similar names, // but using CRC, not Adler! if (underlying instanceof BufferedChecksumIndexOutput) { Checksum digest = ((BufferedChecksumIndexOutput) underlying).digest(); assert digest instanceof Adler32; checksum = Long.toString(digest.getValue(), Character.MAX_RADIX); } synchronized (mutex) { StoreFileMetaData md = new StoreFileMetaData( name, metaData.directory().fileLength(name), checksum, metaData.directory()); filesMetadata = ImmutableOpenMap.builder(filesMetadata).fPut(name, md).build(); files = filesMetadata.keys().toArray(String.class); } }
public void writeToFile(Path loc, JobConf job, Checksum crc) throws IOException { final FileSystem rfs = FileSystem.getLocal(job).getRaw(); CheckedOutputStream chk = null; final FSDataOutputStream out = rfs.create(loc); try { if (crc != null) { crc.reset(); chk = new CheckedOutputStream(out, crc); chk.write(buf.array()); out.writeLong(chk.getChecksum().getValue()); } else { out.write(buf.array()); } } finally { if (chk != null) { chk.close(); } else { out.close(); } } }
public void reset() { summer.reset(); inSum = 0; }
public void update(byte[] b, int off, int len) { if (len > 0) { summer.update(b, off, len); inSum += len; } }
public void recover(File file) throws IOException { logger.info("Replaying " + file.getPath()); final long segment = CommitLogSegment.idFromFilename(file.getName()); RandomAccessReader reader = RandomAccessReader.open(new File(file.getAbsolutePath()), true); try { assert reader.length() <= Integer.MAX_VALUE; int replayPosition; if (globalPosition.segment < segment) replayPosition = 0; else if (globalPosition.segment == segment) replayPosition = globalPosition.position; else replayPosition = (int) reader.length(); if (replayPosition < 0 || replayPosition >= reader.length()) { // replayPosition > reader.length() can happen if some data gets flushed before it is // written to the commitlog // (see https://issues.apache.org/jira/browse/CASSANDRA-2285) logger.debug("skipping replay of fully-flushed {}", file); return; } reader.seek(replayPosition); if (logger.isDebugEnabled()) logger.debug("Replaying " + file + " starting at " + reader.getFilePointer()); /* read the logs populate RowMutation and apply */ while (!reader.isEOF()) { if (logger.isDebugEnabled()) logger.debug("Reading mutation at " + reader.getFilePointer()); long claimedCRC32; int serializedSize; try { // any of the reads may hit EOF serializedSize = reader.readInt(); if (serializedSize == CommitLog.END_OF_SEGMENT_MARKER) { logger.debug("Encountered end of segment marker at " + reader.getFilePointer()); break; } // RowMutation must be at LEAST 10 bytes: // 3 each for a non-empty Table and Key (including the // 2-byte length from writeUTF/writeWithShortLength) and 4 bytes for column count. // This prevents CRC by being fooled by special-case garbage in the file; see // CASSANDRA-2128 if (serializedSize < 10) break; long claimedSizeChecksum = reader.readLong(); checksum.reset(); checksum.update(serializedSize); if (checksum.getValue() != claimedSizeChecksum) break; // entry wasn't synced correctly/fully. that's // ok. if (serializedSize > buffer.length) buffer = new byte[(int) (1.2 * serializedSize)]; reader.readFully(buffer, 0, serializedSize); claimedCRC32 = reader.readLong(); } catch (EOFException eof) { break; // last CL entry didn't get completely written. that's ok. } checksum.update(buffer, 0, serializedSize); if (claimedCRC32 != checksum.getValue()) { // this entry must not have been fsynced. probably the rest is bad too, // but just in case there is no harm in trying them (since we still read on an entry // boundary) continue; } /* deserialize the commit log entry */ FastByteArrayInputStream bufIn = new FastByteArrayInputStream(buffer, 0, serializedSize); RowMutation rm; try { // assuming version here. We've gone to lengths to make sure what gets written to the CL // is in // the current version. so do make sure the CL is drained prior to upgrading a node. rm = RowMutation.serializer() .deserialize( new DataInputStream(bufIn), MessagingService.version_, IColumnSerializer.Flag.LOCAL); } catch (UnknownColumnFamilyException ex) { AtomicInteger i = invalidMutations.get(ex.cfId); if (i == null) { i = new AtomicInteger(1); invalidMutations.put(ex.cfId, i); } else i.incrementAndGet(); continue; } if (logger.isDebugEnabled()) logger.debug( String.format( "replaying mutation for %s.%s: %s", rm.getTable(), ByteBufferUtil.bytesToHex(rm.key()), "{" + StringUtils.join(rm.getColumnFamilies().iterator(), ", ") + "}")); final long entryLocation = reader.getFilePointer(); final RowMutation frm = rm; Runnable runnable = new WrappedRunnable() { public void runMayThrow() throws IOException { if (Schema.instance.getKSMetaData(frm.getTable()) == null) return; if (pointInTimeExceeded(frm)) return; final Table table = Table.open(frm.getTable()); RowMutation newRm = new RowMutation(frm.getTable(), frm.key()); // Rebuild the row mutation, omitting column families that // a) have already been flushed, // b) are part of a cf that was dropped. Keep in mind that the cf.name() is suspect. // do every thing based on the cfid instead. for (ColumnFamily columnFamily : frm.getColumnFamilies()) { if (Schema.instance.getCF(columnFamily.id()) == null) // null means the cf has been dropped continue; ReplayPosition rp = cfPositions.get(columnFamily.id()); // replay if current segment is newer than last flushed one or, // if it is the last known segment, if we are after the replay position if (segment > rp.segment || (segment == rp.segment && entryLocation > rp.position)) { newRm.add(columnFamily); replayedCount.incrementAndGet(); } } if (!newRm.isEmpty()) { Table.open(newRm.getTable()).apply(newRm, false); tablesRecovered.add(table); } } }; futures.add(StageManager.getStage(Stage.MUTATION).submit(runnable)); if (futures.size() > MAX_OUTSTANDING_REPLAY_COUNT) { FBUtilities.waitOnFutures(futures); futures.clear(); } } } finally { FileUtils.closeQuietly(reader); logger.info("Finished reading " + file); } }
public static String getCRC32(String key) { byte[] bytes = key.getBytes(); Checksum checksum = new CRC32(); checksum.update(bytes, 0, bytes.length); return String.valueOf(checksum.getValue()); }
public void update(int b) { summer.update(b); inSum += 1; }
public final int getValue() { return (int) crc.getValue(); }
/* * The old method was out of hand. Going to start a new one with a different format * that should be easier to understand and validate. */ private void readChunksV2() { // For reading the compressed input. ByteBuffer fileInputBuffer = ByteBuffer.allocateDirect(CompressionService.maxCompressedLength(DEFAULT_CHUNKSIZE)); while (m_hasMoreChunks) { /* * Limit the number of chunk materialized into memory at one time */ try { m_chunkReads.acquire(); } catch (InterruptedException e) { return; } boolean expectedAnotherChunk = false; try { /* * Get the length of the next chunk, partition id, crc for partition id, and length prefix, * and then the CRC of the compressed payload */ ByteBuffer chunkLengthB = ByteBuffer.allocate(16); while (chunkLengthB.hasRemaining()) { final int read = m_saveFile.read(chunkLengthB); if (read == -1) { throw new EOFException(); } } int nextChunkLength = chunkLengthB.getInt(0); expectedAnotherChunk = true; /* * Get the partition id and its CRC (CRC now covers length prefix) and validate it. Validating the * partition ID for the chunk separately makes it possible to * continue processing chunks from other partitions if only one partition * has corrupt chunks in the file. */ assert (m_checksumType == ChecksumType.CRC32C); final Checksum partitionIdCRC = new PureJavaCrc32C(); final int nextChunkPartitionId = chunkLengthB.getInt(4); final int nextChunkPartitionIdCRC = chunkLengthB.getInt(8); partitionIdCRC.update(chunkLengthB.array(), 0, 8); int generatedValue = (int) partitionIdCRC.getValue(); if (generatedValue != nextChunkPartitionIdCRC) { chunkLengthB.position(0); for (int partitionId : m_partitionIds) { m_corruptedPartitions.add(partitionId); } throw new IOException( "Chunk partition ID CRC check failed. " + "This corrupts all partitions in this file"); } /* * CRC for the data portion of the chunk */ final int nextChunkCRC = chunkLengthB.getInt(12); /* * Sanity check the length value to ensure there isn't * a runtime exception or OOM. */ if (nextChunkLength < 0) { throw new IOException("Corrupted TableSaveFile chunk has negative chunk length"); } if (nextChunkLength > fileInputBuffer.capacity()) { throw new IOException( "Corrupted TableSaveFile chunk has unreasonable length " + "> DEFAULT_CHUNKSIZE bytes"); } /* * Go fetch the compressed data so that the uncompressed size is known * and use that to set nextChunkLength to be the uncompressed length, * the code ahead that constructs the volt table is expecting * the uncompressed size/data since it is producing an uncompressed table */ fileInputBuffer.clear(); fileInputBuffer.limit(nextChunkLength); while (fileInputBuffer.hasRemaining()) { final int read = m_saveFile.read(fileInputBuffer); if (read == -1) { throw new EOFException(); } } fileInputBuffer.flip(); nextChunkLength = CompressionService.uncompressedLength(fileInputBuffer); /* * Validate the rest of the chunk. This can fail if the data is corrupted * or the length value was corrupted. */ final int calculatedCRC = DBBPool.getBufferCRC32C(fileInputBuffer, 0, fileInputBuffer.remaining()); if (calculatedCRC != nextChunkCRC) { m_corruptedPartitions.add(nextChunkPartitionId); if (m_continueOnCorruptedChunk) { m_chunkReads.release(); continue; } else { throw new IOException("CRC mismatch in saved table chunk"); } } /* * Now allocate space to store the chunk using the VoltTable serialization representation. * The chunk will contain an integer row count preceding it so it can * be sucked straight in. There is a little funny business to overwrite the * partition id that is not part of the serialization format */ Container c = getOutputBuffer(nextChunkPartitionId); /* * If the length value is wrong or not all data made it to disk this read will * not complete correctly. There could be overflow, underflow etc. * so use a try finally block to indicate that all partitions are now corrupt. * The enclosing exception handlers will do the right thing WRT to * propagating the error and closing the file. */ boolean completedRead = false; try { /* * Assemble a VoltTable out of the chunk of tuples. * Put in the header that was cached in the constructor, * then copy the tuple data. */ c.b.clear(); c.b.limit(nextChunkLength + m_tableHeader.capacity()); m_tableHeader.position(0); c.b.put(m_tableHeader); // Doesn't move buffer position, does change the limit CompressionService.decompressBuffer(fileInputBuffer, c.b); completedRead = true; } finally { if (!completedRead) { for (int partitionId : m_partitionIds) { m_corruptedPartitions.add(partitionId); } } } /* * Skip irrelevant chunks after CRC is calculated. Always calulate the CRC * in case it is the length value that is corrupted */ if (m_relevantPartitionIds != null) { if (!m_relevantPartitionIds.contains(nextChunkPartitionId)) { c.discard(); m_chunkReads.release(); continue; } } /* * VoltTable wants the buffer at the home position 0 */ c.b.position(0); synchronized (TableSaveFile.this) { m_availableChunks.offer(c); TableSaveFile.this.notifyAll(); } } catch (EOFException eof) { synchronized (TableSaveFile.this) { m_hasMoreChunks = false; if (expectedAnotherChunk) { m_chunkReaderException = new IOException("Expected to find another chunk but reached end of file instead"); } TableSaveFile.this.notifyAll(); } } catch (IOException e) { synchronized (TableSaveFile.this) { m_hasMoreChunks = false; m_chunkReaderException = e; TableSaveFile.this.notifyAll(); } } catch (BufferUnderflowException e) { synchronized (TableSaveFile.this) { m_hasMoreChunks = false; m_chunkReaderException = new IOException(e); TableSaveFile.this.notifyAll(); } } catch (BufferOverflowException e) { synchronized (TableSaveFile.this) { m_hasMoreChunks = false; m_chunkReaderException = new IOException(e); TableSaveFile.this.notifyAll(); } } catch (IndexOutOfBoundsException e) { synchronized (TableSaveFile.this) { m_hasMoreChunks = false; m_chunkReaderException = new IOException(e); TableSaveFile.this.notifyAll(); } } } }
public long getChecksum() { return digest.getValue(); }
@Override public byte readByte() throws IOException { final byte b = main.readByte(); digest.update(b); return b; }
private void readChunks() { // For reading the compressed input. ByteBuffer fileInputBuffer = ByteBuffer.allocateDirect(CompressionService.maxCompressedLength(DEFAULT_CHUNKSIZE)); while (m_hasMoreChunks) { /* * Limit the number of chunk materialized into memory at one time */ try { m_chunkReads.acquire(); } catch (InterruptedException e) { return; } boolean expectedAnotherChunk = false; try { /* * Get the length of the next chunk, partition id, crc for partition id, */ ByteBuffer chunkLengthB = ByteBuffer.allocate(16); while (chunkLengthB.hasRemaining()) { final int read = m_saveFile.read(chunkLengthB); if (read == -1) { throw new EOFException(); } } chunkLengthB.flip(); int nextChunkLength = chunkLengthB.getInt(); expectedAnotherChunk = true; /* * Get the partition id and its CRC and validate it. Validating the * partition ID for the chunk separately makes it possible to * continue processing chunks from other partitions if only one partition * has corrupt chunks in the file. */ final Checksum partitionIdCRC = m_checksumType == ChecksumType.CRC32C ? new PureJavaCrc32C() : new PureJavaCrc32(); chunkLengthB.mark(); final int nextChunkPartitionId = chunkLengthB.getInt(); final int nextChunkPartitionIdCRC = chunkLengthB.getInt(); chunkLengthB.reset(); byte partitionIdBytes[] = new byte[4]; chunkLengthB.get(partitionIdBytes); partitionIdCRC.update(partitionIdBytes, 0, partitionIdBytes.length); int generatedValue = (int) partitionIdCRC.getValue(); if (generatedValue != nextChunkPartitionIdCRC) { chunkLengthB.position(0); for (int partitionId : m_partitionIds) { m_corruptedPartitions.add(partitionId); } throw new IOException( "Chunk partition ID CRC check failed. " + "This corrupts all partitions in this file"); } /* * CRC for the data portion of the chunk */ chunkLengthB.position(chunkLengthB.position() + 4); final int nextChunkCRC = chunkLengthB.getInt(); /* * Sanity check the length value to ensure there isn't * a runtime exception or OOM. */ if (nextChunkLength < 0) { throw new IOException("Corrupted TableSaveFile chunk has negative chunk length"); } if (isCompressed()) { if (nextChunkLength > fileInputBuffer.capacity()) { throw new IOException( "Corrupted TableSaveFile chunk has unreasonable length " + "> DEFAULT_CHUNKSIZE bytes"); } } else { if (nextChunkLength > DEFAULT_CHUNKSIZE) { throw new IOException( "Corrupted TableSaveFile chunk has unreasonable length " + "> DEFAULT_CHUNKSIZE bytes"); } } /* * Go fetch the compressed data so that the uncompressed size is known * and use that to set nextChunkLength to be the uncompressed length, * the code ahead that constructs the volt table is expecting * the uncompressed size/data since it is producing an uncompressed table */ if (isCompressed()) { fileInputBuffer.clear(); fileInputBuffer.limit(nextChunkLength); while (fileInputBuffer.hasRemaining()) { final int read = m_saveFile.read(fileInputBuffer); if (read == -1) { throw new EOFException(); } } fileInputBuffer.flip(); nextChunkLength = CompressionService.uncompressedLength(fileInputBuffer); } /* * Now allocate space to store the chunk using the VoltTable serialization representation. * The chunk will contain an integer row count preceding it so it can * be sucked straight in. There is a little funny business to overwrite the * partition id that is not part of the serialization format */ Container c = getOutputBuffer(nextChunkPartitionId); /* * If the length value is wrong or not all data made it to disk this read will * not complete correctly. There could be overflow, underflow etc. * so use a try finally block to indicate that all partitions are now corrupt. * The enclosing exception handlers will do the right thing WRT to * propagating the error and closing the file. */ boolean completedRead = false; int checksumStartPosition = 0; int rowCount = 0; try { /* * Assemble a VoltTable out of the chunk of tuples. * Put in the header that was cached in the constructor, * then copy the tuple data. The row count is at the end * because it isn't known until serialization is complete. * It will have to be moved back to the beginning of the tuple data * after the header once the CRC has been calculated. */ c.b.clear(); // The length of the chunk already includes space for the 4-byte row count // even though it is at the end, but we need to also leave at the end for the CRC calc if (isCompressed()) { c.b.limit(nextChunkLength + m_tableHeader.capacity() + 4); } else { // Before compression the chunk length included the stuff added in the EE // like the 2 CRCs and partition id. It is only -8 because we still need the 4-bytes // of padding to move the row count in when constructing the volt table format. c.b.limit((nextChunkLength - 8) + m_tableHeader.capacity()); } m_tableHeader.position(0); c.b.put(m_tableHeader); c.b.position(c.b.position() + 4); // Leave space for row count to be moved into checksumStartPosition = c.b.position(); if (isCompressed()) { CompressionService.decompressBuffer(fileInputBuffer, c.b); c.b.position(c.b.limit()); } else { while (c.b.hasRemaining()) { final int read = m_saveFile.read(c.b); if (read == -1) { throw new EOFException(); } } } c.b.position(c.b.position() - 4); rowCount = c.b.getInt(); c.b.position(checksumStartPosition); completedRead = true; } finally { if (!completedRead) { for (int partitionId : m_partitionIds) { m_corruptedPartitions.add(partitionId); } } } /* * Validate the rest of the chunk. This can fail if the data is corrupted * or the length value was corrupted. */ final int calculatedCRC = m_checksumType == ChecksumType.CRC32C ? DBBPool.getCRC32C(c.address, c.b.position(), c.b.remaining()) : DBBPool.getCRC32(c.address, c.b.position(), c.b.remaining()); if (calculatedCRC != nextChunkCRC) { m_corruptedPartitions.add(nextChunkPartitionId); if (m_continueOnCorruptedChunk) { c.discard(); m_chunkReads.release(); continue; } else { throw new IOException("CRC mismatch in saved table chunk"); } } /* * Skip irrelevant chunks after CRC is calculated. Always calulate the CRC * in case it is the length value that is corrupted */ if (m_relevantPartitionIds != null) { if (!m_relevantPartitionIds.contains(nextChunkPartitionId)) { c.discard(); m_chunkReads.release(); continue; } } /* * The row count which was stored on disk at the end (and for the CRC calc) * is now moved to the appropriate place for the table serialization format. * Update the limit to reflect that. * * Surrounded in a try finally just in case there is overflow/underflow. Shouldn't * happen but I could be wrong. */ boolean success = false; try { c.b.limit(c.b.limit() - 4); c.b.position(checksumStartPosition - 4); c.b.putInt(rowCount); c.b.position(0); success = true; } finally { if (!success) { for (int partitionId : m_partitionIds) { m_corruptedPartitions.add(partitionId); } } } synchronized (TableSaveFile.this) { m_availableChunks.offer(c); TableSaveFile.this.notifyAll(); } } catch (EOFException eof) { synchronized (TableSaveFile.this) { m_hasMoreChunks = false; if (expectedAnotherChunk) { m_chunkReaderException = new IOException("Expected to find another chunk but reached end of file instead"); } TableSaveFile.this.notifyAll(); } } catch (IOException e) { synchronized (TableSaveFile.this) { m_hasMoreChunks = false; m_chunkReaderException = e; TableSaveFile.this.notifyAll(); } } catch (BufferUnderflowException e) { synchronized (TableSaveFile.this) { m_hasMoreChunks = false; m_chunkReaderException = new IOException(e); TableSaveFile.this.notifyAll(); } } catch (BufferOverflowException e) { synchronized (TableSaveFile.this) { m_hasMoreChunks = false; m_chunkReaderException = new IOException(e); TableSaveFile.this.notifyAll(); } } catch (IndexOutOfBoundsException e) { synchronized (TableSaveFile.this) { m_hasMoreChunks = false; m_chunkReaderException = new IOException(e); TableSaveFile.this.notifyAll(); } } } }
/** * Calculates a CRC32 checksum of the specified data. * * @param data - data to be hashed. * @return - a CRC32 checksum (4 bytes). */ private int crc(byte[] data) { checksum.reset(); checksum.update(data, 0, data.length); return (int) checksum.getValue(); }
/** * An almost-unique hash identifying the this event to improve aggregation. * * @param message The message we are sending to sentry * @return CRC32 Checksum string */ public static String calculateChecksum(String message) { byte bytes[] = message.getBytes(); Checksum checksum = new CRC32(); checksum.update(bytes, 0, bytes.length); return String.valueOf(checksum.getValue()); }
@Override public void readBytes(byte[] b, int offset, int len) throws IOException { main.readBytes(b, offset, len); digest.update(b, offset, len); }
public int hash(byte[] data, int len) { Checksum checksum = new CRC32(); checksum.update(data, 0, len); return (int) (((checksum.getValue() >> 32) ^ checksum.getValue()) & INTEGER_MASK); }
public static void recover(File[] clogs) throws IOException { Set<Table> tablesRecovered = new HashSet<Table>(); List<Future<?>> futures = new ArrayList<Future<?>>(); byte[] bytes = new byte[4096]; Map<Integer, AtomicInteger> invalidMutations = new HashMap<Integer, AtomicInteger>(); for (File file : clogs) { int bufferSize = (int) Math.min(file.length(), 32 * 1024 * 1024); BufferedRandomAccessFile reader = new BufferedRandomAccessFile(file.getAbsolutePath(), "r", bufferSize); try { CommitLogHeader clHeader = null; int replayPosition = 0; String headerPath = CommitLogHeader.getHeaderPathFromSegmentPath(file.getAbsolutePath()); try { clHeader = CommitLogHeader.readCommitLogHeader(headerPath); replayPosition = clHeader.getReplayPosition(); } catch (IOException ioe) { logger.info( headerPath + " incomplete, missing or corrupt. Everything is ok, don't panic. CommitLog will be replayed from the beginning"); logger.debug("exception was", ioe); } if (replayPosition < 0) { logger.debug("skipping replay of fully-flushed {}", file); continue; } reader.seek(replayPosition); if (logger.isDebugEnabled()) logger.debug("Replaying " + file + " starting at " + reader.getFilePointer()); /* read the logs populate RowMutation and apply */ while (!reader.isEOF()) { if (logger.isDebugEnabled()) logger.debug("Reading mutation at " + reader.getFilePointer()); long claimedCRC32; Checksum checksum = new CRC32(); int serializedSize; try { // any of the reads may hit EOF serializedSize = reader.readInt(); long claimedSizeChecksum = reader.readLong(); checksum.update(serializedSize); if (checksum.getValue() != claimedSizeChecksum || serializedSize <= 0) break; // entry wasn't synced correctly/fully. that's ok. if (serializedSize > bytes.length) bytes = new byte[(int) (1.2 * serializedSize)]; reader.readFully(bytes, 0, serializedSize); claimedCRC32 = reader.readLong(); } catch (EOFException eof) { break; // last CL entry didn't get completely written. that's ok. } checksum.update(bytes, 0, serializedSize); if (claimedCRC32 != checksum.getValue()) { // this entry must not have been fsynced. probably the rest is bad too, // but just in case there is no harm in trying them (since we still read on an entry // boundary) continue; } /* deserialize the commit log entry */ ByteArrayInputStream bufIn = new ByteArrayInputStream(bytes, 0, serializedSize); RowMutation rm = null; try { rm = RowMutation.serializer().deserialize(new DataInputStream(bufIn)); } catch (UnserializableColumnFamilyException ex) { AtomicInteger i = invalidMutations.get(ex.cfId); if (i == null) { i = new AtomicInteger(1); invalidMutations.put(ex.cfId, i); } else i.incrementAndGet(); continue; } if (logger.isDebugEnabled()) logger.debug( String.format( "replaying mutation for %s.%s: %s", rm.getTable(), rm.key(), "{" + StringUtils.join(rm.getColumnFamilies(), ", ") + "}")); final Table table = Table.open(rm.getTable()); tablesRecovered.add(table); final Collection<ColumnFamily> columnFamilies = new ArrayList<ColumnFamily>(rm.getColumnFamilies()); final long entryLocation = reader.getFilePointer(); final CommitLogHeader finalHeader = clHeader; final RowMutation frm = rm; Runnable runnable = new WrappedRunnable() { public void runMayThrow() throws IOException { RowMutation newRm = new RowMutation(frm.getTable(), frm.key()); // Rebuild the row mutation, omitting column families that a) have already been // flushed, // b) are part of a cf that was dropped. Keep in mind that the cf.name() is // suspect. do every // thing based on the cfid instead. for (ColumnFamily columnFamily : columnFamilies) { if (CFMetaData.getCF(columnFamily.id()) == null) // null means the cf has been dropped continue; if (finalHeader == null || (finalHeader.isDirty(columnFamily.id()) && entryLocation >= finalHeader.getPosition(columnFamily.id()))) newRm.add(columnFamily); } if (!newRm.isEmpty()) { Table.open(newRm.getTable()).apply(newRm, null, false); } } }; futures.add(StageManager.getStage(Stage.MUTATION).submit(runnable)); if (futures.size() > MAX_OUTSTANDING_REPLAY_COUNT) { FBUtilities.waitOnFutures(futures); futures.clear(); } } } finally { reader.close(); logger.info("Finished reading " + file); } } for (Map.Entry<Integer, AtomicInteger> entry : invalidMutations.entrySet()) logger.info( String.format( "Skipped %d mutations from unknown (probably removed) CF with id %d", entry.getValue().intValue(), entry.getKey())); // wait for all the writes to finish on the mutation stage FBUtilities.waitOnFutures(futures); logger.debug("Finished waiting on mutations from recovery"); // flush replayed tables futures.clear(); for (Table table : tablesRecovered) futures.addAll(table.flush()); FBUtilities.waitOnFutures(futures); logger.info("Recovery complete"); }
/** * The async processing loop that writes to the data files and does the force calls. Since the * file sync() call is the slowest of all the operations, this algorithm tries to 'batch' or group * together several file sync() requests into a single file sync() call. The batching is * accomplished attaching the same CountDownLatch instance to every force request in a group. */ private void processQueue() { DataFile dataFile = null; RandomAccessFile file = null; try { DataByteArrayOutputStream buff = new DataByteArrayOutputStream(journal.getMaxWriteBatchSize()); boolean last = false; while (true) { WriteBatch wb = batchQueue.take(); if (shutdown) { last = true; } if (!wb.writes.isEmpty()) { boolean newOrRotated = dataFile != wb.dataFile; if (newOrRotated) { if (file != null) { dataFile.closeRandomAccessFile(file); } dataFile = wb.dataFile; file = dataFile.openRandomAccessFile(); } // Write an empty batch control record. buff.reset(); buff.writeInt(Journal.BATCH_CONTROL_RECORD_SIZE); buff.writeByte(Journal.BATCH_CONTROL_RECORD_TYPE); buff.write(Journal.BATCH_CONTROL_RECORD_MAGIC); buff.writeInt(0); buff.writeLong(0); boolean forceToDisk = false; WriteCommand control = wb.writes.poll(); WriteCommand first = wb.writes.peek(); WriteCommand latest = null; for (WriteCommand current : wb.writes) { forceToDisk |= current.sync; buff.writeInt(current.location.getSize()); buff.writeByte(current.location.getType()); buff.write(current.data.getData(), current.data.getOffset(), current.data.getLength()); latest = current; } Buffer sequence = buff.toBuffer(); // Now we can fill in the batch control record properly. buff.reset(); buff.skip(Journal.HEADER_SIZE + Journal.BATCH_CONTROL_RECORD_MAGIC.length); buff.writeInt(sequence.getLength() - Journal.BATCH_CONTROL_RECORD_SIZE); if (journal.isChecksum()) { Checksum checksum = new Adler32(); checksum.update( sequence.getData(), sequence.getOffset() + Journal.BATCH_CONTROL_RECORD_SIZE, sequence.getLength() - Journal.BATCH_CONTROL_RECORD_SIZE); buff.writeLong(checksum.getValue()); } // Now do the 1 big write. file.seek(wb.offset); file.write(sequence.getData(), sequence.getOffset(), sequence.getLength()); ReplicationTarget replicationTarget = journal.getReplicationTarget(); if (replicationTarget != null) { replicationTarget.replicate(control.location, sequence, forceToDisk); } if (forceToDisk) { IOHelper.sync(file.getFD()); } journal.setLastAppendLocation(latest.location); // Now that the data is on disk, remove the writes from the in // flight // cache. inflightWrites.remove(control.location); for (WriteCommand current : wb.writes) { if (!current.sync) { inflightWrites.remove(current.location); } } if (journal.getListener() != null) { try { journal.getListener().synced(wb.writes.toArray(new WriteCommand[wb.writes.size()])); } catch (Throwable ex) { warn(ex, ex.getMessage()); } } // Clear unused data: wb.writes.clear(); // Signal any waiting threads that the write is on disk. wb.latch.countDown(); } if (last) { break; } } } catch (Exception e) { firstAsyncException.compareAndSet(null, e); } finally { try { if (file != null) { dataFile.closeRandomAccessFile(file); } } catch (Throwable ignore) { } shutdownDone.countDown(); } }
public void run() { try { byte[] out_data = new byte[pkt_size]; InetAddress dst_addr = InetAddress.getByName("127.0.0.1"); // To register the recv_port at the UnreliNet first DatagramPacket out_pkt = new DatagramPacket( ("REG:" + recv_port).getBytes(), ("REG:" + recv_port).getBytes().length, dst_addr, dst_port); sk_out.send(out_pkt); int len = (int) (new File(inPath).length()); FileInputStream fis = new FileInputStream(inPath); int currbytes = 0; int flag = 0; byte currentbyte[] = new byte[850]; try { while (true) { out_data = outPath.getBytes(); ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); outputStream.write("fNm".getBytes()); outputStream.write(out_data); outputStream.write("fNm".getBytes()); outputStream.write("dAtA".getBytes()); if (curr == seq) { if (flag == 1) { // empty packet to signify finish DatagramPacket final_pkt = new DatagramPacket( "afIlEdAtAFINdAtAcRc975554582cRcsEq-1sEqafIlE".getBytes(), "afIlEdAtAFINdAtAcRc975554582cRcsEq-1sEqafIlE".getBytes().length, dst_addr, dst_port); for (int i = 0; i < 20; i++) sk_out.send(final_pkt); System.exit(0); } if (currbytes + 850 <= len) { currentbyte = new byte[850]; fis.read(currentbyte, 0, 850); currbytes += 850; } else { currentbyte = new byte[len - currbytes]; fis.read(currentbyte, 0, len - currbytes); flag = 1; } } currentbyte = process(currentbyte); outputStream.write(currentbyte); outputStream.write("dAtA".getBytes()); byte fin[] = outputStream.toByteArray(); System.out.println(); System.out.println("CRC pkt size:" + fin.length); // update checksum Checksum checksum = new CRC32(); checksum.update(fin, 0, fin.length); long checksumValue = checksum.getValue(); outputStream = new ByteArrayOutputStream(); outputStream.write("afIlE".getBytes()); outputStream.write(fin); outputStream.write("fIlE".getBytes()); outputStream.write("cRc".getBytes()); outputStream.write(String.valueOf(checksumValue).getBytes()); outputStream.write("cRc".getBytes()); outputStream.write("sEq".getBytes()); outputStream.write(String.valueOf(seq).getBytes()); outputStream.write("sEqa".getBytes()); byte pkt[] = outputStream.toByteArray(); curr = seq; seq++; System.out.println("total size: " + pkt.length); // send the packet out_pkt = new DatagramPacket(pkt, pkt.length, dst_addr, dst_port); sk_out.send(out_pkt); // print info for (int i = 0; i < currentbyte.length; ++i) System.out.print((char) currentbyte[i]); System.out.println(); // wait for a while for (int i = 0; i < 5; i++) { sleep(send_interval); if (curr == seq) { break; } else if (i == 4) { curr--; seq--; break; } } } } catch (Exception e) { e.printStackTrace(); } finally { sk_out.close(); } } catch (Exception e) { e.printStackTrace(); System.exit(-1); } }
// Checksum Interface. Just a wrapper around member summer. public long getValue() { return summer.getValue(); }