Esempio n. 1
1
 /**
  * Calculates a checksum for a given string.
  *
  * @param string string from which a checksum should be obtained
  * @return a checksum allowing two events with the same properties to be grouped later.
  */
 private static String calculateChecksum(String string) {
   byte[] bytes = string.getBytes(Charsets.UTF_8);
   Checksum checksum = new CRC32();
   checksum.update(bytes, 0, bytes.length);
   return Long.toHexString(checksum.getValue()).toUpperCase();
 }
Esempio n. 2
0
  private boolean isSameFile(VFSLeaf currentFile, VersionsFileImpl versions) {
    boolean same = false;
    if (versions.getRevisions() != null && !versions.getRevisions().isEmpty()) {
      VFSRevision lastRevision = versions.getRevisions().get(versions.getRevisions().size() - 1);

      long lastSize = lastRevision.getSize();
      long currentSize = currentFile.getSize();
      if (currentSize == lastSize
          && currentSize > 0
          && lastRevision instanceof RevisionFileImpl
          && currentFile instanceof LocalFileImpl) {
        RevisionFileImpl lastRev = ((RevisionFileImpl) lastRevision);
        LocalFileImpl current = (LocalFileImpl) currentFile;
        // can be the same file
        try {
          Checksum cm1 =
              FileUtils.checksum(((LocalFileImpl) lastRev.getFile()).getBasefile(), new Adler32());
          Checksum cm2 = FileUtils.checksum(current.getBasefile(), new Adler32());
          same = cm1.getValue() == cm2.getValue();
        } catch (IOException e) {
          log.debug("Error calculating the checksum of files");
        }
      }
    }
    return same;
  }
  private void decompress(byte[] compressed) throws IOException {
    // uncompress
    validBufferBytes =
        info.parameters.sstableCompressor.uncompress(
            compressed, 0, compressed.length - checksumBytes.length, buffer, 0);
    uncompressedBytes += validBufferBytes;

    // validate crc randomly
    if (info.parameters.getCrcCheckChance() > FBUtilities.threadLocalRandom().nextDouble()) {
      checksum.update(buffer, 0, validBufferBytes);

      System.arraycopy(
          compressed,
          compressed.length - checksumBytes.length,
          checksumBytes,
          0,
          checksumBytes.length);
      if (Ints.fromByteArray(checksumBytes) != (int) checksum.getValue())
        throw new IOException("CRC unmatched");

      // reset checksum object back to the original (blank) state
      checksum.reset();
    }

    // buffer offset is always aligned
    bufferOffset = current & ~(buffer.length - 1);
  }
Esempio n. 4
0
  private void flushBufferedData() throws IOException {
    if (o == 0) {
      return;
    }
    checksum.reset();
    checksum.update(buffer, 0, o);
    final int check = (int) checksum.getValue();
    int compressedLength = compressor.compress(buffer, 0, o, compressedBuffer, HEADER_LENGTH);
    final int compressMethod;
    if (compressedLength >= o) {
      compressMethod = COMPRESSION_METHOD_RAW;
      compressedLength = o;
      System.arraycopy(buffer, 0, compressedBuffer, HEADER_LENGTH, o);
    } else {
      compressMethod = COMPRESSION_METHOD_LZ4;
    }

    compressedBuffer[MAGIC_LENGTH] = (byte) (compressMethod | compressionLevel);
    writeIntLE(compressedLength, compressedBuffer, MAGIC_LENGTH + 1);
    writeIntLE(o, compressedBuffer, MAGIC_LENGTH + 5);
    writeIntLE(check, compressedBuffer, MAGIC_LENGTH + 9);
    assert MAGIC_LENGTH + 13 == HEADER_LENGTH;
    out.write(compressedBuffer, 0, HEADER_LENGTH + compressedLength);
    o = 0;
  }
  public ByteString calculateTag(Checksum crc, byte[] value) {

    crc.update(value, 0, value.length);

    long csum = crc.getValue();

    ByteBuffer buffer = ByteBuffer.allocate(8);

    return ByteString.copyFrom(buffer.putLong(csum).array());
  }
  @Test
  public void findStreamingFile() throws Exception {
    FileStreamStateHandler rwd = new FileStreamStateHandler();

    File testFilesDir = new File(samplesDir, "/multiple-logs/");
    File[] testFiles = testFilesDir.listFiles((FilenameFilter) new WildcardFileFilter("orders*"));
    FileAccessState newFAS = new FileAccessState();

    int count = 0;
    File fileToSearchFor = null;
    int lineLastRead = 0;
    File fileWritten = null;
    for (File testFile : testFiles) {
      count++;
      FileReader in;
      LineNumberReader reader;

      Long fileCRC = rwd.getFileCrc(testFile);
      if (count == 2) {
        newFAS.currentFileCrc = fileCRC;
        fileToSearchFor = testFile;
      }

      in = new FileReader(testFile);
      reader = new LineNumberReader(in);
      reader.setLineNumber(0);
      String line = reader.readLine();
      int count2 = 0;
      while (line != null) {
        count2++;
        Checksum crcLine = new CRC32();
        final byte[] bytes4Line = line.getBytes();
        crcLine.update(bytes4Line, 0, bytes4Line.length);
        final long lineCRC = crcLine.getValue();
        final int lineNumber = reader.getLineNumber();
        System.out.println("for " + lineNumber + " line CRC is " + lineCRC);
        if (count2 == 3) {
          newFAS.currentLineCrc = lineCRC;
          newFAS.currentLineNumber = lineNumber;
          newFAS.lastReadTime = System.currentTimeMillis();
          lineLastRead = lineNumber;
        }
        line = reader.readLine();
      }
      fileWritten = AbstractFileStreamStateHandler.writeState(newFAS, testFilesDir, "TestStream");
      Utils.close(reader);
    }

    final File findLastProcessed = rwd.findStreamingFile(newFAS, testFiles);
    assertEquals(fileToSearchFor, findLastProcessed);
    final int lineLastReadRecorded = rwd.checkLine(findLastProcessed, newFAS);
    assertEquals(lineLastRead, lineLastReadRecorded);
    fileWritten.delete();
  }
Esempio n. 7
0
 public final void update(int i) {
   int b0 = (i >> 24) & 0xff;
   int b1 = (i >> 16) & 0xff;
   int b2 = (i >> 8) & 0xff;
   int b3 = i & 0xff;
   crc.update(b0);
   crc.update(b1);
   crc.update(b2);
   crc.update(b3);
   //		com.oddlabs.tt.util.ChecksumLogger.log(i);
 }
Esempio n. 8
0
  private static Long computeChecksum(CompilationUnit unit, String sigString) {
    Long chksum = null;

    if (sigString != null) {
      // store the signature as a checksum
      final byte[] bytes = sigString.getBytes();

      final Checksum checksum = new Adler32(); // much faster than CRC32, almost as reliable
      checksum.update(bytes, 0, bytes.length);
      chksum = new Long(checksum.getValue());

      // debug("COMPUTE   CRC32: " + chksum + "\t--> " + unit.getSource().getNameForReporting());
    }
    return chksum;
  }
Esempio n. 9
0
  public SpillRecord(Path indexFileName, JobConf job, Checksum crc, String expectedIndexOwner)
      throws IOException {

    final FileSystem rfs = FileSystem.getLocal(job).getRaw();
    final DataInputStream in =
        new DataInputStream(
            SecureIOUtils.openForRead(
                new File(indexFileName.toUri().getPath()), expectedIndexOwner, null));
    try {
      final long length = rfs.getFileStatus(indexFileName).getLen();
      final int partitions = (int) length / MAP_OUTPUT_INDEX_RECORD_LENGTH;
      final int size = partitions * MAP_OUTPUT_INDEX_RECORD_LENGTH;

      buf = ByteBuffer.allocate(size);
      if (crc != null) {
        crc.reset();
        CheckedInputStream chk = new CheckedInputStream(in, crc);
        IOUtils.readFully(chk, buf.array(), 0, size);
        if (chk.getChecksum().getValue() != in.readLong()) {
          throw new ChecksumException("Checksum error reading spill index: " + indexFileName, -1);
        }
      } else {
        IOUtils.readFully(in, buf.array(), 0, size);
      }
      entries = buf.asLongBuffer();
    } finally {
      in.close();
    }
  }
Esempio n. 10
0
 @Override
 public void close() throws IOException {
   out.close();
   String checksum = null;
   IndexOutput underlying = out;
   // TODO: cut over to lucene's CRC
   // *WARNING*: lucene has classes in same o.a.l.store package with very similar names,
   // but using CRC, not Adler!
   if (underlying instanceof BufferedChecksumIndexOutput) {
     Checksum digest = ((BufferedChecksumIndexOutput) underlying).digest();
     assert digest instanceof Adler32;
     checksum = Long.toString(digest.getValue(), Character.MAX_RADIX);
   }
   synchronized (mutex) {
     StoreFileMetaData md =
         new StoreFileMetaData(
             name, metaData.directory().fileLength(name), checksum, metaData.directory());
     filesMetadata = ImmutableOpenMap.builder(filesMetadata).fPut(name, md).build();
     files = filesMetadata.keys().toArray(String.class);
   }
 }
Esempio n. 11
0
 public void writeToFile(Path loc, JobConf job, Checksum crc) throws IOException {
   final FileSystem rfs = FileSystem.getLocal(job).getRaw();
   CheckedOutputStream chk = null;
   final FSDataOutputStream out = rfs.create(loc);
   try {
     if (crc != null) {
       crc.reset();
       chk = new CheckedOutputStream(out, crc);
       chk.write(buf.array());
       out.writeLong(chk.getChecksum().getValue());
     } else {
       out.write(buf.array());
     }
   } finally {
     if (chk != null) {
       chk.close();
     } else {
       out.close();
     }
   }
 }
Esempio n. 12
0
 public void reset() {
   summer.reset();
   inSum = 0;
 }
Esempio n. 13
0
 public void update(byte[] b, int off, int len) {
   if (len > 0) {
     summer.update(b, off, len);
     inSum += len;
   }
 }
Esempio n. 14
0
  public void recover(File file) throws IOException {
    logger.info("Replaying " + file.getPath());
    final long segment = CommitLogSegment.idFromFilename(file.getName());
    RandomAccessReader reader = RandomAccessReader.open(new File(file.getAbsolutePath()), true);
    try {
      assert reader.length() <= Integer.MAX_VALUE;
      int replayPosition;
      if (globalPosition.segment < segment) replayPosition = 0;
      else if (globalPosition.segment == segment) replayPosition = globalPosition.position;
      else replayPosition = (int) reader.length();

      if (replayPosition < 0 || replayPosition >= reader.length()) {
        // replayPosition > reader.length() can happen if some data gets flushed before it is
        // written to the commitlog
        // (see https://issues.apache.org/jira/browse/CASSANDRA-2285)
        logger.debug("skipping replay of fully-flushed {}", file);
        return;
      }

      reader.seek(replayPosition);

      if (logger.isDebugEnabled())
        logger.debug("Replaying " + file + " starting at " + reader.getFilePointer());

      /* read the logs populate RowMutation and apply */
      while (!reader.isEOF()) {
        if (logger.isDebugEnabled()) logger.debug("Reading mutation at " + reader.getFilePointer());

        long claimedCRC32;
        int serializedSize;
        try {
          // any of the reads may hit EOF
          serializedSize = reader.readInt();
          if (serializedSize == CommitLog.END_OF_SEGMENT_MARKER) {
            logger.debug("Encountered end of segment marker at " + reader.getFilePointer());
            break;
          }

          // RowMutation must be at LEAST 10 bytes:
          // 3 each for a non-empty Table and Key (including the
          // 2-byte length from writeUTF/writeWithShortLength) and 4 bytes for column count.
          // This prevents CRC by being fooled by special-case garbage in the file; see
          // CASSANDRA-2128
          if (serializedSize < 10) break;
          long claimedSizeChecksum = reader.readLong();
          checksum.reset();
          checksum.update(serializedSize);
          if (checksum.getValue() != claimedSizeChecksum)
            break; // entry wasn't synced correctly/fully. that's
          // ok.

          if (serializedSize > buffer.length) buffer = new byte[(int) (1.2 * serializedSize)];
          reader.readFully(buffer, 0, serializedSize);
          claimedCRC32 = reader.readLong();
        } catch (EOFException eof) {
          break; // last CL entry didn't get completely written. that's ok.
        }

        checksum.update(buffer, 0, serializedSize);
        if (claimedCRC32 != checksum.getValue()) {
          // this entry must not have been fsynced. probably the rest is bad too,
          // but just in case there is no harm in trying them (since we still read on an entry
          // boundary)
          continue;
        }

        /* deserialize the commit log entry */
        FastByteArrayInputStream bufIn = new FastByteArrayInputStream(buffer, 0, serializedSize);
        RowMutation rm;
        try {
          // assuming version here. We've gone to lengths to make sure what gets written to the CL
          // is in
          // the current version. so do make sure the CL is drained prior to upgrading a node.
          rm =
              RowMutation.serializer()
                  .deserialize(
                      new DataInputStream(bufIn),
                      MessagingService.version_,
                      IColumnSerializer.Flag.LOCAL);
        } catch (UnknownColumnFamilyException ex) {
          AtomicInteger i = invalidMutations.get(ex.cfId);
          if (i == null) {
            i = new AtomicInteger(1);
            invalidMutations.put(ex.cfId, i);
          } else i.incrementAndGet();
          continue;
        }

        if (logger.isDebugEnabled())
          logger.debug(
              String.format(
                  "replaying mutation for %s.%s: %s",
                  rm.getTable(),
                  ByteBufferUtil.bytesToHex(rm.key()),
                  "{" + StringUtils.join(rm.getColumnFamilies().iterator(), ", ") + "}"));

        final long entryLocation = reader.getFilePointer();
        final RowMutation frm = rm;
        Runnable runnable =
            new WrappedRunnable() {
              public void runMayThrow() throws IOException {
                if (Schema.instance.getKSMetaData(frm.getTable()) == null) return;
                if (pointInTimeExceeded(frm)) return;

                final Table table = Table.open(frm.getTable());
                RowMutation newRm = new RowMutation(frm.getTable(), frm.key());

                // Rebuild the row mutation, omitting column families that
                // a) have already been flushed,
                // b) are part of a cf that was dropped. Keep in mind that the cf.name() is suspect.
                // do every thing based on the cfid instead.
                for (ColumnFamily columnFamily : frm.getColumnFamilies()) {
                  if (Schema.instance.getCF(columnFamily.id()) == null)
                    // null means the cf has been dropped
                    continue;

                  ReplayPosition rp = cfPositions.get(columnFamily.id());

                  // replay if current segment is newer than last flushed one or,
                  // if it is the last known segment, if we are after the replay position
                  if (segment > rp.segment
                      || (segment == rp.segment && entryLocation > rp.position)) {
                    newRm.add(columnFamily);
                    replayedCount.incrementAndGet();
                  }
                }
                if (!newRm.isEmpty()) {
                  Table.open(newRm.getTable()).apply(newRm, false);
                  tablesRecovered.add(table);
                }
              }
            };
        futures.add(StageManager.getStage(Stage.MUTATION).submit(runnable));
        if (futures.size() > MAX_OUTSTANDING_REPLAY_COUNT) {
          FBUtilities.waitOnFutures(futures);
          futures.clear();
        }
      }
    } finally {
      FileUtils.closeQuietly(reader);
      logger.info("Finished reading " + file);
    }
  }
Esempio n. 15
0
 public static String getCRC32(String key) {
   byte[] bytes = key.getBytes();
   Checksum checksum = new CRC32();
   checksum.update(bytes, 0, bytes.length);
   return String.valueOf(checksum.getValue());
 }
Esempio n. 16
0
 public void update(int b) {
   summer.update(b);
   inSum += 1;
 }
Esempio n. 17
0
 public final int getValue() {
   return (int) crc.getValue();
 }
Esempio n. 18
0
    /*
     * The old method was out of hand. Going to start a new one with a different format
     * that should be easier to understand and validate.
     */
    private void readChunksV2() {
      // For reading the compressed input.
      ByteBuffer fileInputBuffer =
          ByteBuffer.allocateDirect(CompressionService.maxCompressedLength(DEFAULT_CHUNKSIZE));

      while (m_hasMoreChunks) {

        /*
         * Limit the number of chunk materialized into memory at one time
         */
        try {
          m_chunkReads.acquire();
        } catch (InterruptedException e) {
          return;
        }
        boolean expectedAnotherChunk = false;
        try {

          /*
           * Get the length of the next chunk, partition id, crc for partition id, and length prefix,
           * and then the CRC of the compressed payload
           */
          ByteBuffer chunkLengthB = ByteBuffer.allocate(16);
          while (chunkLengthB.hasRemaining()) {
            final int read = m_saveFile.read(chunkLengthB);
            if (read == -1) {
              throw new EOFException();
            }
          }
          int nextChunkLength = chunkLengthB.getInt(0);
          expectedAnotherChunk = true;

          /*
           * Get the partition id and its CRC (CRC now covers length prefix) and validate it. Validating the
           * partition ID for the chunk separately makes it possible to
           * continue processing chunks from other partitions if only one partition
           * has corrupt chunks in the file.
           */
          assert (m_checksumType == ChecksumType.CRC32C);
          final Checksum partitionIdCRC = new PureJavaCrc32C();
          final int nextChunkPartitionId = chunkLengthB.getInt(4);
          final int nextChunkPartitionIdCRC = chunkLengthB.getInt(8);

          partitionIdCRC.update(chunkLengthB.array(), 0, 8);
          int generatedValue = (int) partitionIdCRC.getValue();
          if (generatedValue != nextChunkPartitionIdCRC) {
            chunkLengthB.position(0);
            for (int partitionId : m_partitionIds) {
              m_corruptedPartitions.add(partitionId);
            }
            throw new IOException(
                "Chunk partition ID CRC check failed. "
                    + "This corrupts all partitions in this file");
          }

          /*
           * CRC for the data portion of the chunk
           */
          final int nextChunkCRC = chunkLengthB.getInt(12);

          /*
           * Sanity check the length value to ensure there isn't
           * a runtime exception or OOM.
           */
          if (nextChunkLength < 0) {
            throw new IOException("Corrupted TableSaveFile chunk has negative chunk length");
          }

          if (nextChunkLength > fileInputBuffer.capacity()) {
            throw new IOException(
                "Corrupted TableSaveFile chunk has unreasonable length "
                    + "> DEFAULT_CHUNKSIZE bytes");
          }

          /*
           * Go fetch the compressed data so that the uncompressed size is known
           * and use that to set nextChunkLength to be the uncompressed length,
           * the code ahead that constructs the volt table is expecting
           * the uncompressed size/data since it is producing an uncompressed table
           */
          fileInputBuffer.clear();
          fileInputBuffer.limit(nextChunkLength);
          while (fileInputBuffer.hasRemaining()) {
            final int read = m_saveFile.read(fileInputBuffer);
            if (read == -1) {
              throw new EOFException();
            }
          }
          fileInputBuffer.flip();
          nextChunkLength = CompressionService.uncompressedLength(fileInputBuffer);

          /*
           * Validate the rest of the chunk. This can fail if the data is corrupted
           * or the length value was corrupted.
           */
          final int calculatedCRC =
              DBBPool.getBufferCRC32C(fileInputBuffer, 0, fileInputBuffer.remaining());
          if (calculatedCRC != nextChunkCRC) {
            m_corruptedPartitions.add(nextChunkPartitionId);
            if (m_continueOnCorruptedChunk) {
              m_chunkReads.release();
              continue;
            } else {
              throw new IOException("CRC mismatch in saved table chunk");
            }
          }

          /*
           * Now allocate space to store the chunk using the VoltTable serialization representation.
           * The chunk will contain an integer row count preceding it so it can
           * be sucked straight in. There is a little funny business to overwrite the
           * partition id that is not part of the serialization format
           */
          Container c = getOutputBuffer(nextChunkPartitionId);

          /*
           * If the length value is wrong or not all data made it to disk this read will
           * not complete correctly. There could be overflow, underflow etc.
           * so use a try finally block to indicate that all partitions are now corrupt.
           * The enclosing exception handlers will do the right thing WRT to
           * propagating the error and closing the file.
           */
          boolean completedRead = false;
          try {
            /*
             * Assemble a VoltTable out of the chunk of tuples.
             * Put in the header that was cached in the constructor,
             * then copy the tuple data.
             */
            c.b.clear();
            c.b.limit(nextChunkLength + m_tableHeader.capacity());
            m_tableHeader.position(0);
            c.b.put(m_tableHeader);
            // Doesn't move buffer position, does change the limit
            CompressionService.decompressBuffer(fileInputBuffer, c.b);
            completedRead = true;
          } finally {
            if (!completedRead) {
              for (int partitionId : m_partitionIds) {
                m_corruptedPartitions.add(partitionId);
              }
            }
          }

          /*
           * Skip irrelevant chunks after CRC is calculated. Always calulate the CRC
           * in case it is the length value that is corrupted
           */
          if (m_relevantPartitionIds != null) {
            if (!m_relevantPartitionIds.contains(nextChunkPartitionId)) {
              c.discard();
              m_chunkReads.release();
              continue;
            }
          }

          /*
           * VoltTable wants the buffer at the home position 0
           */
          c.b.position(0);

          synchronized (TableSaveFile.this) {
            m_availableChunks.offer(c);
            TableSaveFile.this.notifyAll();
          }
        } catch (EOFException eof) {
          synchronized (TableSaveFile.this) {
            m_hasMoreChunks = false;
            if (expectedAnotherChunk) {
              m_chunkReaderException =
                  new IOException("Expected to find another chunk but reached end of file instead");
            }
            TableSaveFile.this.notifyAll();
          }
        } catch (IOException e) {
          synchronized (TableSaveFile.this) {
            m_hasMoreChunks = false;
            m_chunkReaderException = e;
            TableSaveFile.this.notifyAll();
          }
        } catch (BufferUnderflowException e) {
          synchronized (TableSaveFile.this) {
            m_hasMoreChunks = false;
            m_chunkReaderException = new IOException(e);
            TableSaveFile.this.notifyAll();
          }
        } catch (BufferOverflowException e) {
          synchronized (TableSaveFile.this) {
            m_hasMoreChunks = false;
            m_chunkReaderException = new IOException(e);
            TableSaveFile.this.notifyAll();
          }
        } catch (IndexOutOfBoundsException e) {
          synchronized (TableSaveFile.this) {
            m_hasMoreChunks = false;
            m_chunkReaderException = new IOException(e);
            TableSaveFile.this.notifyAll();
          }
        }
      }
    }
 public long getChecksum() {
   return digest.getValue();
 }
 @Override
 public byte readByte() throws IOException {
   final byte b = main.readByte();
   digest.update(b);
   return b;
 }
Esempio n. 21
0
    private void readChunks() {
      // For reading the compressed input.
      ByteBuffer fileInputBuffer =
          ByteBuffer.allocateDirect(CompressionService.maxCompressedLength(DEFAULT_CHUNKSIZE));

      while (m_hasMoreChunks) {

        /*
         * Limit the number of chunk materialized into memory at one time
         */
        try {
          m_chunkReads.acquire();
        } catch (InterruptedException e) {
          return;
        }
        boolean expectedAnotherChunk = false;
        try {

          /*
           * Get the length of the next chunk, partition id, crc for partition id,
           */
          ByteBuffer chunkLengthB = ByteBuffer.allocate(16);
          while (chunkLengthB.hasRemaining()) {
            final int read = m_saveFile.read(chunkLengthB);
            if (read == -1) {
              throw new EOFException();
            }
          }
          chunkLengthB.flip();
          int nextChunkLength = chunkLengthB.getInt();
          expectedAnotherChunk = true;

          /*
           * Get the partition id and its CRC and validate it. Validating the
           * partition ID for the chunk separately makes it possible to
           * continue processing chunks from other partitions if only one partition
           * has corrupt chunks in the file.
           */
          final Checksum partitionIdCRC =
              m_checksumType == ChecksumType.CRC32C ? new PureJavaCrc32C() : new PureJavaCrc32();
          chunkLengthB.mark();
          final int nextChunkPartitionId = chunkLengthB.getInt();
          final int nextChunkPartitionIdCRC = chunkLengthB.getInt();
          chunkLengthB.reset();
          byte partitionIdBytes[] = new byte[4];
          chunkLengthB.get(partitionIdBytes);
          partitionIdCRC.update(partitionIdBytes, 0, partitionIdBytes.length);
          int generatedValue = (int) partitionIdCRC.getValue();
          if (generatedValue != nextChunkPartitionIdCRC) {
            chunkLengthB.position(0);
            for (int partitionId : m_partitionIds) {
              m_corruptedPartitions.add(partitionId);
            }
            throw new IOException(
                "Chunk partition ID CRC check failed. "
                    + "This corrupts all partitions in this file");
          }

          /*
           * CRC for the data portion of the chunk
           */
          chunkLengthB.position(chunkLengthB.position() + 4);
          final int nextChunkCRC = chunkLengthB.getInt();

          /*
           * Sanity check the length value to ensure there isn't
           * a runtime exception or OOM.
           */
          if (nextChunkLength < 0) {
            throw new IOException("Corrupted TableSaveFile chunk has negative chunk length");
          }

          if (isCompressed()) {
            if (nextChunkLength > fileInputBuffer.capacity()) {
              throw new IOException(
                  "Corrupted TableSaveFile chunk has unreasonable length "
                      + "> DEFAULT_CHUNKSIZE bytes");
            }
          } else {
            if (nextChunkLength > DEFAULT_CHUNKSIZE) {
              throw new IOException(
                  "Corrupted TableSaveFile chunk has unreasonable length "
                      + "> DEFAULT_CHUNKSIZE bytes");
            }
          }

          /*
           * Go fetch the compressed data so that the uncompressed size is known
           * and use that to set nextChunkLength to be the uncompressed length,
           * the code ahead that constructs the volt table is expecting
           * the uncompressed size/data since it is producing an uncompressed table
           */
          if (isCompressed()) {
            fileInputBuffer.clear();
            fileInputBuffer.limit(nextChunkLength);
            while (fileInputBuffer.hasRemaining()) {
              final int read = m_saveFile.read(fileInputBuffer);
              if (read == -1) {
                throw new EOFException();
              }
            }
            fileInputBuffer.flip();
            nextChunkLength = CompressionService.uncompressedLength(fileInputBuffer);
          }

          /*
           * Now allocate space to store the chunk using the VoltTable serialization representation.
           * The chunk will contain an integer row count preceding it so it can
           * be sucked straight in. There is a little funny business to overwrite the
           * partition id that is not part of the serialization format
           */
          Container c = getOutputBuffer(nextChunkPartitionId);

          /*
           * If the length value is wrong or not all data made it to disk this read will
           * not complete correctly. There could be overflow, underflow etc.
           * so use a try finally block to indicate that all partitions are now corrupt.
           * The enclosing exception handlers will do the right thing WRT to
           * propagating the error and closing the file.
           */
          boolean completedRead = false;
          int checksumStartPosition = 0;
          int rowCount = 0;
          try {
            /*
             * Assemble a VoltTable out of the chunk of tuples.
             * Put in the header that was cached in the constructor,
             * then copy the tuple data. The row count is at the end
             * because it isn't known until serialization is complete.
             * It will have to be moved back to the beginning of the tuple data
             * after the header once the CRC has been calculated.
             */
            c.b.clear();
            // The length of the chunk already includes space for the 4-byte row count
            // even though it is at the end, but we need to also leave at the end for the CRC calc
            if (isCompressed()) {
              c.b.limit(nextChunkLength + m_tableHeader.capacity() + 4);
            } else {
              // Before compression the chunk length included the stuff added in the EE
              // like the 2 CRCs and partition id. It is only -8 because we still need the 4-bytes
              // of padding to move the row count in when constructing the volt table format.
              c.b.limit((nextChunkLength - 8) + m_tableHeader.capacity());
            }
            m_tableHeader.position(0);
            c.b.put(m_tableHeader);
            c.b.position(c.b.position() + 4); // Leave space for row count to be moved into
            checksumStartPosition = c.b.position();
            if (isCompressed()) {
              CompressionService.decompressBuffer(fileInputBuffer, c.b);
              c.b.position(c.b.limit());
            } else {
              while (c.b.hasRemaining()) {
                final int read = m_saveFile.read(c.b);
                if (read == -1) {
                  throw new EOFException();
                }
              }
            }
            c.b.position(c.b.position() - 4);
            rowCount = c.b.getInt();
            c.b.position(checksumStartPosition);
            completedRead = true;
          } finally {
            if (!completedRead) {
              for (int partitionId : m_partitionIds) {
                m_corruptedPartitions.add(partitionId);
              }
            }
          }

          /*
           * Validate the rest of the chunk. This can fail if the data is corrupted
           * or the length value was corrupted.
           */
          final int calculatedCRC =
              m_checksumType == ChecksumType.CRC32C
                  ? DBBPool.getCRC32C(c.address, c.b.position(), c.b.remaining())
                  : DBBPool.getCRC32(c.address, c.b.position(), c.b.remaining());
          if (calculatedCRC != nextChunkCRC) {
            m_corruptedPartitions.add(nextChunkPartitionId);
            if (m_continueOnCorruptedChunk) {
              c.discard();
              m_chunkReads.release();
              continue;
            } else {
              throw new IOException("CRC mismatch in saved table chunk");
            }
          }

          /*
           * Skip irrelevant chunks after CRC is calculated. Always calulate the CRC
           * in case it is the length value that is corrupted
           */
          if (m_relevantPartitionIds != null) {
            if (!m_relevantPartitionIds.contains(nextChunkPartitionId)) {
              c.discard();
              m_chunkReads.release();
              continue;
            }
          }

          /*
           * The row count which was stored on disk at the end (and for the CRC calc)
           * is now moved to the appropriate place for the table serialization format.
           * Update the limit to reflect that.
           *
           * Surrounded in a try finally just in case there is overflow/underflow. Shouldn't
           * happen but I could be wrong.
           */
          boolean success = false;
          try {
            c.b.limit(c.b.limit() - 4);
            c.b.position(checksumStartPosition - 4);
            c.b.putInt(rowCount);
            c.b.position(0);
            success = true;
          } finally {
            if (!success) {
              for (int partitionId : m_partitionIds) {
                m_corruptedPartitions.add(partitionId);
              }
            }
          }

          synchronized (TableSaveFile.this) {
            m_availableChunks.offer(c);
            TableSaveFile.this.notifyAll();
          }
        } catch (EOFException eof) {
          synchronized (TableSaveFile.this) {
            m_hasMoreChunks = false;
            if (expectedAnotherChunk) {
              m_chunkReaderException =
                  new IOException("Expected to find another chunk but reached end of file instead");
            }
            TableSaveFile.this.notifyAll();
          }
        } catch (IOException e) {
          synchronized (TableSaveFile.this) {
            m_hasMoreChunks = false;
            m_chunkReaderException = e;
            TableSaveFile.this.notifyAll();
          }
        } catch (BufferUnderflowException e) {
          synchronized (TableSaveFile.this) {
            m_hasMoreChunks = false;
            m_chunkReaderException = new IOException(e);
            TableSaveFile.this.notifyAll();
          }
        } catch (BufferOverflowException e) {
          synchronized (TableSaveFile.this) {
            m_hasMoreChunks = false;
            m_chunkReaderException = new IOException(e);
            TableSaveFile.this.notifyAll();
          }
        } catch (IndexOutOfBoundsException e) {
          synchronized (TableSaveFile.this) {
            m_hasMoreChunks = false;
            m_chunkReaderException = new IOException(e);
            TableSaveFile.this.notifyAll();
          }
        }
      }
    }
Esempio n. 22
0
 /**
  * Calculates a CRC32 checksum of the specified data.
  *
  * @param data - data to be hashed.
  * @return - a CRC32 checksum (4 bytes).
  */
 private int crc(byte[] data) {
   checksum.reset();
   checksum.update(data, 0, data.length);
   return (int) checksum.getValue();
 }
Esempio n. 23
0
 /**
  * An almost-unique hash identifying the this event to improve aggregation.
  *
  * @param message The message we are sending to sentry
  * @return CRC32 Checksum string
  */
 public static String calculateChecksum(String message) {
   byte bytes[] = message.getBytes();
   Checksum checksum = new CRC32();
   checksum.update(bytes, 0, bytes.length);
   return String.valueOf(checksum.getValue());
 }
 @Override
 public void readBytes(byte[] b, int offset, int len) throws IOException {
   main.readBytes(b, offset, len);
   digest.update(b, offset, len);
 }
Esempio n. 25
0
 public int hash(byte[] data, int len) {
   Checksum checksum = new CRC32();
   checksum.update(data, 0, len);
   return (int) (((checksum.getValue() >> 32) ^ checksum.getValue()) & INTEGER_MASK);
 }
  public static void recover(File[] clogs) throws IOException {
    Set<Table> tablesRecovered = new HashSet<Table>();
    List<Future<?>> futures = new ArrayList<Future<?>>();
    byte[] bytes = new byte[4096];
    Map<Integer, AtomicInteger> invalidMutations = new HashMap<Integer, AtomicInteger>();

    for (File file : clogs) {
      int bufferSize = (int) Math.min(file.length(), 32 * 1024 * 1024);
      BufferedRandomAccessFile reader =
          new BufferedRandomAccessFile(file.getAbsolutePath(), "r", bufferSize);

      try {
        CommitLogHeader clHeader = null;
        int replayPosition = 0;
        String headerPath = CommitLogHeader.getHeaderPathFromSegmentPath(file.getAbsolutePath());
        try {
          clHeader = CommitLogHeader.readCommitLogHeader(headerPath);
          replayPosition = clHeader.getReplayPosition();
        } catch (IOException ioe) {
          logger.info(
              headerPath
                  + " incomplete, missing or corrupt.  Everything is ok, don't panic.  CommitLog will be replayed from the beginning");
          logger.debug("exception was", ioe);
        }
        if (replayPosition < 0) {
          logger.debug("skipping replay of fully-flushed {}", file);
          continue;
        }
        reader.seek(replayPosition);

        if (logger.isDebugEnabled())
          logger.debug("Replaying " + file + " starting at " + reader.getFilePointer());

        /* read the logs populate RowMutation and apply */
        while (!reader.isEOF()) {
          if (logger.isDebugEnabled())
            logger.debug("Reading mutation at " + reader.getFilePointer());

          long claimedCRC32;

          Checksum checksum = new CRC32();
          int serializedSize;
          try {
            // any of the reads may hit EOF
            serializedSize = reader.readInt();
            long claimedSizeChecksum = reader.readLong();
            checksum.update(serializedSize);
            if (checksum.getValue() != claimedSizeChecksum || serializedSize <= 0)
              break; // entry wasn't synced correctly/fully.  that's ok.

            if (serializedSize > bytes.length) bytes = new byte[(int) (1.2 * serializedSize)];
            reader.readFully(bytes, 0, serializedSize);
            claimedCRC32 = reader.readLong();
          } catch (EOFException eof) {
            break; // last CL entry didn't get completely written.  that's ok.
          }

          checksum.update(bytes, 0, serializedSize);
          if (claimedCRC32 != checksum.getValue()) {
            // this entry must not have been fsynced.  probably the rest is bad too,
            // but just in case there is no harm in trying them (since we still read on an entry
            // boundary)
            continue;
          }

          /* deserialize the commit log entry */
          ByteArrayInputStream bufIn = new ByteArrayInputStream(bytes, 0, serializedSize);
          RowMutation rm = null;
          try {
            rm = RowMutation.serializer().deserialize(new DataInputStream(bufIn));
          } catch (UnserializableColumnFamilyException ex) {
            AtomicInteger i = invalidMutations.get(ex.cfId);
            if (i == null) {
              i = new AtomicInteger(1);
              invalidMutations.put(ex.cfId, i);
            } else i.incrementAndGet();
            continue;
          }

          if (logger.isDebugEnabled())
            logger.debug(
                String.format(
                    "replaying mutation for %s.%s: %s",
                    rm.getTable(),
                    rm.key(),
                    "{" + StringUtils.join(rm.getColumnFamilies(), ", ") + "}"));
          final Table table = Table.open(rm.getTable());
          tablesRecovered.add(table);
          final Collection<ColumnFamily> columnFamilies =
              new ArrayList<ColumnFamily>(rm.getColumnFamilies());
          final long entryLocation = reader.getFilePointer();
          final CommitLogHeader finalHeader = clHeader;
          final RowMutation frm = rm;
          Runnable runnable =
              new WrappedRunnable() {
                public void runMayThrow() throws IOException {
                  RowMutation newRm = new RowMutation(frm.getTable(), frm.key());

                  // Rebuild the row mutation, omitting column families that a) have already been
                  // flushed,
                  // b) are part of a cf that was dropped. Keep in mind that the cf.name() is
                  // suspect. do every
                  // thing based on the cfid instead.
                  for (ColumnFamily columnFamily : columnFamilies) {
                    if (CFMetaData.getCF(columnFamily.id()) == null)
                      // null means the cf has been dropped
                      continue;

                    if (finalHeader == null
                        || (finalHeader.isDirty(columnFamily.id())
                            && entryLocation >= finalHeader.getPosition(columnFamily.id())))
                      newRm.add(columnFamily);
                  }
                  if (!newRm.isEmpty()) {
                    Table.open(newRm.getTable()).apply(newRm, null, false);
                  }
                }
              };
          futures.add(StageManager.getStage(Stage.MUTATION).submit(runnable));
          if (futures.size() > MAX_OUTSTANDING_REPLAY_COUNT) {
            FBUtilities.waitOnFutures(futures);
            futures.clear();
          }
        }
      } finally {
        reader.close();
        logger.info("Finished reading " + file);
      }
    }

    for (Map.Entry<Integer, AtomicInteger> entry : invalidMutations.entrySet())
      logger.info(
          String.format(
              "Skipped %d mutations from unknown (probably removed) CF with id %d",
              entry.getValue().intValue(), entry.getKey()));

    // wait for all the writes to finish on the mutation stage
    FBUtilities.waitOnFutures(futures);
    logger.debug("Finished waiting on mutations from recovery");

    // flush replayed tables
    futures.clear();
    for (Table table : tablesRecovered) futures.addAll(table.flush());
    FBUtilities.waitOnFutures(futures);
    logger.info("Recovery complete");
  }
Esempio n. 27
0
  /**
   * The async processing loop that writes to the data files and does the force calls. Since the
   * file sync() call is the slowest of all the operations, this algorithm tries to 'batch' or group
   * together several file sync() requests into a single file sync() call. The batching is
   * accomplished attaching the same CountDownLatch instance to every force request in a group.
   */
  private void processQueue() {
    DataFile dataFile = null;
    RandomAccessFile file = null;
    try {
      DataByteArrayOutputStream buff =
          new DataByteArrayOutputStream(journal.getMaxWriteBatchSize());
      boolean last = false;
      while (true) {
        WriteBatch wb = batchQueue.take();

        if (shutdown) {
          last = true;
        }

        if (!wb.writes.isEmpty()) {
          boolean newOrRotated = dataFile != wb.dataFile;
          if (newOrRotated) {
            if (file != null) {
              dataFile.closeRandomAccessFile(file);
            }
            dataFile = wb.dataFile;
            file = dataFile.openRandomAccessFile();
          }

          // Write an empty batch control record.
          buff.reset();
          buff.writeInt(Journal.BATCH_CONTROL_RECORD_SIZE);
          buff.writeByte(Journal.BATCH_CONTROL_RECORD_TYPE);
          buff.write(Journal.BATCH_CONTROL_RECORD_MAGIC);
          buff.writeInt(0);
          buff.writeLong(0);

          boolean forceToDisk = false;

          WriteCommand control = wb.writes.poll();
          WriteCommand first = wb.writes.peek();
          WriteCommand latest = null;
          for (WriteCommand current : wb.writes) {
            forceToDisk |= current.sync;
            buff.writeInt(current.location.getSize());
            buff.writeByte(current.location.getType());
            buff.write(current.data.getData(), current.data.getOffset(), current.data.getLength());
            latest = current;
          }

          Buffer sequence = buff.toBuffer();

          // Now we can fill in the batch control record properly.
          buff.reset();
          buff.skip(Journal.HEADER_SIZE + Journal.BATCH_CONTROL_RECORD_MAGIC.length);
          buff.writeInt(sequence.getLength() - Journal.BATCH_CONTROL_RECORD_SIZE);
          if (journal.isChecksum()) {
            Checksum checksum = new Adler32();
            checksum.update(
                sequence.getData(),
                sequence.getOffset() + Journal.BATCH_CONTROL_RECORD_SIZE,
                sequence.getLength() - Journal.BATCH_CONTROL_RECORD_SIZE);
            buff.writeLong(checksum.getValue());
          }

          // Now do the 1 big write.
          file.seek(wb.offset);
          file.write(sequence.getData(), sequence.getOffset(), sequence.getLength());

          ReplicationTarget replicationTarget = journal.getReplicationTarget();
          if (replicationTarget != null) {
            replicationTarget.replicate(control.location, sequence, forceToDisk);
          }

          if (forceToDisk) {
            IOHelper.sync(file.getFD());
          }

          journal.setLastAppendLocation(latest.location);

          // Now that the data is on disk, remove the writes from the in
          // flight
          // cache.
          inflightWrites.remove(control.location);
          for (WriteCommand current : wb.writes) {
            if (!current.sync) {
              inflightWrites.remove(current.location);
            }
          }

          if (journal.getListener() != null) {
            try {
              journal.getListener().synced(wb.writes.toArray(new WriteCommand[wb.writes.size()]));
            } catch (Throwable ex) {
              warn(ex, ex.getMessage());
            }
          }

          // Clear unused data:
          wb.writes.clear();

          // Signal any waiting threads that the write is on disk.
          wb.latch.countDown();
        }

        if (last) {
          break;
        }
      }
    } catch (Exception e) {
      firstAsyncException.compareAndSet(null, e);
    } finally {
      try {
        if (file != null) {
          dataFile.closeRandomAccessFile(file);
        }
      } catch (Throwable ignore) {
      }
      shutdownDone.countDown();
    }
  }
Esempio n. 28
0
    public void run() {
      try {

        byte[] out_data = new byte[pkt_size];
        InetAddress dst_addr = InetAddress.getByName("127.0.0.1");

        // To register the recv_port at the UnreliNet first
        DatagramPacket out_pkt =
            new DatagramPacket(
                ("REG:" + recv_port).getBytes(),
                ("REG:" + recv_port).getBytes().length,
                dst_addr,
                dst_port);
        sk_out.send(out_pkt);

        int len = (int) (new File(inPath).length());
        FileInputStream fis = new FileInputStream(inPath);
        int currbytes = 0;
        int flag = 0;
        byte currentbyte[] = new byte[850];

        try {
          while (true) {

            out_data = outPath.getBytes();
            ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
            outputStream.write("fNm".getBytes());
            outputStream.write(out_data);
            outputStream.write("fNm".getBytes());
            outputStream.write("dAtA".getBytes());
            if (curr == seq) {
              if (flag == 1) {
                // empty packet to signify finish
                DatagramPacket final_pkt =
                    new DatagramPacket(
                        "afIlEdAtAFINdAtAcRc975554582cRcsEq-1sEqafIlE".getBytes(),
                        "afIlEdAtAFINdAtAcRc975554582cRcsEq-1sEqafIlE".getBytes().length,
                        dst_addr,
                        dst_port);
                for (int i = 0; i < 20; i++) sk_out.send(final_pkt);
                System.exit(0);
              }
              if (currbytes + 850 <= len) {
                currentbyte = new byte[850];
                fis.read(currentbyte, 0, 850);
                currbytes += 850;
              } else {
                currentbyte = new byte[len - currbytes];
                fis.read(currentbyte, 0, len - currbytes);
                flag = 1;
              }
            }
            currentbyte = process(currentbyte);
            outputStream.write(currentbyte);
            outputStream.write("dAtA".getBytes());
            byte fin[] = outputStream.toByteArray();
            System.out.println();
            System.out.println("CRC pkt size:" + fin.length);

            // update checksum
            Checksum checksum = new CRC32();
            checksum.update(fin, 0, fin.length);

            long checksumValue = checksum.getValue();
            outputStream = new ByteArrayOutputStream();
            outputStream.write("afIlE".getBytes());
            outputStream.write(fin);
            outputStream.write("fIlE".getBytes());
            outputStream.write("cRc".getBytes());
            outputStream.write(String.valueOf(checksumValue).getBytes());
            outputStream.write("cRc".getBytes());
            outputStream.write("sEq".getBytes());
            outputStream.write(String.valueOf(seq).getBytes());
            outputStream.write("sEqa".getBytes());
            byte pkt[] = outputStream.toByteArray();
            curr = seq;
            seq++;
            System.out.println("total size: " + pkt.length);
            // send the packet
            out_pkt = new DatagramPacket(pkt, pkt.length, dst_addr, dst_port);
            sk_out.send(out_pkt);

            // print info
            for (int i = 0; i < currentbyte.length; ++i) System.out.print((char) currentbyte[i]);
            System.out.println();

            // wait for a while
            for (int i = 0; i < 5; i++) {
              sleep(send_interval);
              if (curr == seq) {
                break;
              } else if (i == 4) {
                curr--;
                seq--;
                break;
              }
            }
          }
        } catch (Exception e) {
          e.printStackTrace();
        } finally {
          sk_out.close();
        }
      } catch (Exception e) {
        e.printStackTrace();
        System.exit(-1);
      }
    }
Esempio n. 29
0
 // Checksum Interface. Just a wrapper around member summer.
 public long getValue() {
   return summer.getValue();
 }