Beispiel #1
0
  /** Resizes the internal byte buffer with a simple doubling policy, if needed. */
  private final void growIfNeeded(int minimumDesired) {
    if (buffer.b().remaining() < minimumDesired) {
      // Compute the size of the new buffer
      int newCapacity = buffer.b().capacity();
      int newRemaining = newCapacity - buffer.b().position();
      while (newRemaining < minimumDesired) {
        newRemaining += newCapacity;
        newCapacity *= 2;
      }

      // Allocate and copy
      BBContainer next;
      if (isDirect) {
        next = DBBPool.allocateDirect(newCapacity);
      } else {
        next = DBBPool.wrapBB(ByteBuffer.allocate(newCapacity));
      }
      buffer.b().flip();
      next.b().put(buffer.b());
      assert next.b().remaining() == newRemaining;
      buffer.discard();
      buffer = next;
      if (callback != null) callback.onBufferGrow(this);
      assert (buffer.b().order() == ByteOrder.BIG_ENDIAN);
    }
  }
Beispiel #2
0
 /** constructor that sets callback object. */
 public FastSerializer(
     boolean bigEndian, boolean isDirect, BufferGrowCallback callback, int initialAllocation) {
   assert (initialAllocation > 0);
   this.isDirect = isDirect;
   if (isDirect) {
     buffer = DBBPool.allocateDirect(initialAllocation);
   } else {
     buffer = DBBPool.wrapBB(ByteBuffer.allocate(initialAllocation));
   }
   this.callback = callback;
   buffer.b().order(bigEndian ? ByteOrder.BIG_ENDIAN : ByteOrder.LITTLE_ENDIAN);
 }
Beispiel #3
0
 BBContainer acquire() {
   final BBContainer cont = m_buffers.poll();
   if (cont == null) {
     final BBContainer originContainer = DBBPool.allocateDirect(1024 * 32);
     return new BBContainer(originContainer.b()) {
       @Override
       public void discard() {
         checkDoubleFree();
         // If we had to allocate over the desired limit, start discarding
         if (m_buffers.size() > m_numBuffers) {
           originContainer.discard();
           return;
         }
         m_buffers.push(originContainer);
       }
     };
   }
   return new BBContainer(cont.b()) {
     @Override
     public void discard() {
       checkDoubleFree();
       m_buffers.push(cont);
     }
   };
 }
Beispiel #4
0
 private Container getOutputBuffer(final int nextChunkPartitionId) {
   Container c = m_buffers.poll();
   if (c == null) {
     final BBContainer originContainer = DBBPool.allocateDirect(DEFAULT_CHUNKSIZE);
     final ByteBuffer b = originContainer.b;
     final long pointer = org.voltcore.utils.DBBPool.getBufferAddress(b);
     c = new Container(b, pointer, originContainer, nextChunkPartitionId);
   }
   /*
    * Need to reconstruct the container with the partition id of the next
    * chunk so it can be a final public field. The buffer, address, and origin
    * container remain the same.
    */
   c = new Container(c.b, c.address, c.m_origin, nextChunkPartitionId);
   return c;
 }
  public void pushExportBuffer(
      int partitionId,
      String signature,
      long uso,
      long bufferPtr,
      ByteBuffer buffer,
      boolean sync,
      boolean endOfStream) {
    //        System.out.println("In generation " + m_timestamp + " partition " + partitionId + "
    // signature " + signature + (buffer == null ? " null buffer " : (" buffer length " +
    // buffer.remaining())));
    //        for (Integer i : m_dataSourcesByPartition.keySet()) {
    //            System.out.println("Have partition " + i);
    //        }
    assert (m_dataSourcesByPartition.containsKey(partitionId));
    assert (m_dataSourcesByPartition.get(partitionId).containsKey(signature));
    HashMap<String, ExportDataSource> sources = m_dataSourcesByPartition.get(partitionId);

    if (sources == null) {
      exportLog.error(
          "Could not find export data sources for partition "
              + partitionId
              + " generation "
              + m_timestamp
              + " the export data is being discarded");
      DBBPool.deleteCharArrayMemory(bufferPtr);
      return;
    }

    ExportDataSource source = sources.get(signature);
    if (source == null) {
      exportLog.error(
          "Could not find export data source for partition "
              + partitionId
              + " signature "
              + signature
              + " generation "
              + m_timestamp
              + " the export data is being discarded");
      DBBPool.deleteCharArrayMemory(bufferPtr);
      return;
    }

    source.pushExportBuffer(uso, bufferPtr, buffer, sync, endOfStream);
  }
Beispiel #6
0
  public static BBContainer allocateDirectWithAddress(final int capacity) {
    final ByteBuffer retval = ByteBuffer.allocateDirect(capacity);
    bytesAllocatedGlobally.getAndAdd(capacity);

    return new BBContainer(retval, DBBPool.getBufferAddress(retval)) {

      @Override
      public void discard() {
        try {
          DirectMemoryUtils.destroyDirectByteBuffer(retval);
          bytesAllocatedGlobally.getAndAdd(-capacity);
        } catch (Throwable e) {
          VoltDB.crashLocalVoltDB("Failed to deallocate direct byte buffer", false, e);
        }
      }
    };
  }
Beispiel #7
0
  /*
   * Allocate a DirectByteBuffer from a global lock free pool
   */
  public static BBContainer allocateDirectAndPool(final Integer capacity) {
    ConcurrentLinkedQueue<BBContainer> pooledBuffers = m_pooledBuffers.get(capacity);
    if (pooledBuffers == null) {
      pooledBuffers = new ConcurrentLinkedQueue<BBContainer>();
      if (m_pooledBuffers.putIfAbsent(capacity, pooledBuffers) == null) {
        pooledBuffers = m_pooledBuffers.get(capacity);
      }
    }

    BBContainer cont = pooledBuffers.poll();
    if (cont == null) {
      // Create an origin container
      ByteBuffer b = ByteBuffer.allocateDirect(capacity);
      bytesAllocatedGlobally.getAndAdd(capacity);
      cont =
          new BBContainer(b, DBBPool.getBufferAddress(b)) {
            @Override
            public void discard() {
              try {
                DirectMemoryUtils.destroyDirectByteBuffer(b);
                bytesAllocatedGlobally.addAndGet(-capacity);
              } catch (Throwable e) {
                VoltDB.crashLocalVoltDB("Failed to deallocate direct byte buffer", false, e);
              }
            }
          };
    }
    final BBContainer origin = cont;
    cont =
        new BBContainer(origin.b, origin.address) {
          @Override
          public void discard() {
            m_pooledBuffers.get(b.capacity()).offer(origin);
          }
        };
    cont.b.clear();
    return cont;
  }
  public static synchronized void pushDRBuffer(int partitionId, ByteBuffer buf) {
    if (logDebug) {
      System.out.println("Received DR buffer size " + buf.remaining());
      AtomicLong haveOpenTransaction = haveOpenTransactionLocal.get();
      buf.order(ByteOrder.LITTLE_ENDIAN);
      // Magic header space for Java for implementing zero copy stuff
      buf.position(8);
      while (buf.hasRemaining()) {
        int startPosition = buf.position();
        byte version = buf.get();
        int type = buf.get();

        int checksum = 0;
        if (version != 0) System.out.println("Remaining is " + buf.remaining());

        switch (DRRecordType.valueOf(type)) {
          case INSERT:
            {
              // Insert
              if (haveOpenTransaction.get() == -1) {
                System.out.println("Have insert but no open transaction");
                System.exit(-1);
              }
              final long tableHandle = buf.getLong();
              final int lengthPrefix = buf.getInt();
              buf.position(buf.position() + lengthPrefix);
              checksum = buf.getInt();
              System.out.println(
                  "Version "
                      + version
                      + " type INSERT table handle "
                      + tableHandle
                      + " length "
                      + lengthPrefix
                      + " checksum "
                      + checksum);
              break;
            }
          case DELETE:
            {
              // Delete
              if (haveOpenTransaction.get() == -1) {
                System.out.println("Have insert but no open transaction");
                System.exit(-1);
              }
              final long tableHandle = buf.getLong();
              final int lengthPrefix = buf.getInt();
              buf.position(buf.position() + lengthPrefix);
              checksum = buf.getInt();
              System.out.println(
                  "Version "
                      + version
                      + " type DELETE table handle "
                      + tableHandle
                      + " length "
                      + lengthPrefix
                      + " checksum "
                      + checksum);
              break;
            }
          case UPDATE:
            // Update
            // System.out.println("Version " + version + " type UPDATE " + checksum " + checksum);
            break;
          case BEGIN_TXN:
            {
              // Begin txn
              final long txnId = buf.getLong();
              final long spHandle = buf.getLong();
              if (haveOpenTransaction.get() != -1) {
                System.out.println(
                    "Have open transaction txnid "
                        + txnId
                        + " spHandle "
                        + spHandle
                        + " but already open transaction");
                System.exit(-1);
              }
              haveOpenTransaction.set(spHandle);
              checksum = buf.getInt();
              System.out.println(
                  "Version "
                      + version
                      + " type BEGIN_TXN "
                      + " txnid "
                      + txnId
                      + " spHandle "
                      + spHandle
                      + " checksum "
                      + checksum);
              break;
            }
          case END_TXN:
            {
              // End txn
              final long spHandle = buf.getLong();
              if (haveOpenTransaction.get() == -1) {
                System.out.println(
                    "Have end transaction spHandle "
                        + spHandle
                        + " but no open transaction and its less then last committed "
                        + lastCommittedSpHandle.get().get());
                //                    checksum = buf.getInt();
                //                    break;
                System.exit(-1);
              }
              haveOpenTransaction.set(-1);
              lastCommittedSpHandle.get().set(spHandle);
              checksum = buf.getInt();
              System.out.println(
                  "Version "
                      + version
                      + " type END_TXN "
                      + " spHandle "
                      + spHandle
                      + " checksum "
                      + checksum);
              break;
            }
        }
        int calculatedChecksum =
            DBBPool.getBufferCRC32C(buf, startPosition, buf.position() - startPosition - 4);
        if (calculatedChecksum != checksum) {
          System.out.println("Checksum " + calculatedChecksum + " didn't match " + checksum);
          System.exit(-1);
        }
      }
    }
    final BBContainer cont = DBBPool.wrapBB(buf);
    DBBPool.registerUnsafeMemory(cont.address());
    cont.discard();
  }
  /*
   * Prepend length is basically synonymous with writing actual tuple data and not
   * the header.
   */
  private ListenableFuture<?> write(
      final Callable<BBContainer> tupleDataC, final boolean prependLength) {
    /*
     * Unwrap the data to be written. For the traditional
     * snapshot data target this should be a noop.
     */
    BBContainer tupleDataTemp;
    try {
      tupleDataTemp = tupleDataC.call();
      /*
       * Can be null if the dedupe filter nulled out the buffer
       */
      if (tupleDataTemp == null) {
        return Futures.immediateFuture(null);
      }
    } catch (Throwable t) {
      return Futures.immediateFailedFuture(t);
    }
    final BBContainer tupleData = tupleDataTemp;

    if (m_writeFailed) {
      tupleData.discard();
      return null;
    }

    m_outstandingWriteTasks.incrementAndGet();

    Future<BBContainer> compressionTask = null;
    if (prependLength) {
      BBContainer cont =
          DBBPool.allocateDirectAndPool(SnapshotSiteProcessor.m_snapshotBufferCompressedLen);
      // Skip 4-bytes so the partition ID is not compressed
      // That way if we detect a corruption we know what partition is bad
      tupleData.b.position(tupleData.b.position() + 4);
      /*
       * Leave 12 bytes, it's going to be a 4-byte length prefix, a 4-byte partition id,
       * and a 4-byte CRC32C of just the header bytes, in addition to the compressed payload CRC
       * that is 16 bytes, but 4 of those are done by CompressionService
       */
      cont.b.position(12);
      compressionTask = CompressionService.compressAndCRC32cBufferAsync(tupleData.b, cont);
    }
    final Future<BBContainer> compressionTaskFinal = compressionTask;

    ListenableFuture<?> writeTask =
        m_es.submit(
            new Callable<Object>() {
              @Override
              public Object call() throws Exception {
                try {
                  if (m_acceptOneWrite) {
                    m_acceptOneWrite = false;
                  } else {
                    if (m_simulateBlockedWrite != null) {
                      m_simulateBlockedWrite.await();
                    }
                    if (m_simulateFullDiskWritingChunk) {
                      throw new IOException("Disk full");
                    }
                  }

                  int totalWritten = 0;
                  if (prependLength) {
                    BBContainer payloadContainer = compressionTaskFinal.get();
                    try {
                      final ByteBuffer payloadBuffer = payloadContainer.b;
                      payloadBuffer.position(0);

                      ByteBuffer lengthPrefix = ByteBuffer.allocate(12);
                      m_bytesAllowedBeforeSync.acquire(payloadBuffer.remaining());
                      // Length prefix does not include 4 header items, just compressd payload
                      // that follows
                      lengthPrefix.putInt(payloadBuffer.remaining() - 16); // length prefix
                      lengthPrefix.putInt(tupleData.b.getInt(0)); // partitionId

                      /*
                       * Checksum the header and put it in the payload buffer
                       */
                      PureJavaCrc32C crc = new PureJavaCrc32C();
                      crc.update(lengthPrefix.array(), 0, 8);
                      lengthPrefix.putInt((int) crc.getValue());
                      lengthPrefix.flip();
                      payloadBuffer.put(lengthPrefix);
                      payloadBuffer.position(0);

                      /*
                       * Write payload to file
                       */
                      while (payloadBuffer.hasRemaining()) {
                        totalWritten += m_channel.write(payloadBuffer);
                      }
                    } finally {
                      payloadContainer.discard();
                    }
                  } else {
                    while (tupleData.b.hasRemaining()) {
                      totalWritten += m_channel.write(tupleData.b);
                    }
                  }
                  m_bytesWritten += totalWritten;
                  m_bytesWrittenSinceLastSync.addAndGet(totalWritten);
                } catch (IOException e) {
                  m_writeException = e;
                  SNAP_LOG.error(
                      "Error while attempting to write snapshot data to file " + m_file, e);
                  m_writeFailed = true;
                  throw e;
                } finally {
                  try {
                    tupleData.discard();
                  } finally {
                    m_outstandingWriteTasksLock.lock();
                    try {
                      if (m_outstandingWriteTasks.decrementAndGet() == 0) {
                        m_noMoreOutstandingWriteTasksCondition.signalAll();
                      }
                    } finally {
                      m_outstandingWriteTasksLock.unlock();
                    }
                  }
                }
                return null;
              }
            });
    return writeTask;
  }
  public DefaultSnapshotDataTarget(
      final File file,
      final int hostId,
      final String clusterName,
      final String databaseName,
      final String tableName,
      final int numPartitions,
      final boolean isReplicated,
      final List<Integer> partitionIds,
      final VoltTable schemaTable,
      final long txnId,
      final long timestamp,
      int version[])
      throws IOException {
    String hostname = CoreUtils.getHostnameOrAddress();
    m_file = file;
    m_tableName = tableName;
    m_fos = new FileOutputStream(file);
    m_channel = m_fos.getChannel();
    m_needsFinalClose = !isReplicated;
    final FastSerializer fs = new FastSerializer();
    fs.writeInt(0); // CRC
    fs.writeInt(0); // Header length placeholder
    fs.writeByte(
        1); // Indicate the snapshot was not completed, set to true for the CRC calculation, false
    // later
    for (int ii = 0; ii < 4; ii++) {
      fs.writeInt(version[ii]); // version
    }
    JSONStringer stringer = new JSONStringer();
    byte jsonBytes[] = null;
    try {
      stringer.object();
      stringer.key("txnId").value(txnId);
      stringer.key("hostId").value(hostId);
      stringer.key("hostname").value(hostname);
      stringer.key("clusterName").value(clusterName);
      stringer.key("databaseName").value(databaseName);
      stringer.key("tableName").value(tableName.toUpperCase());
      stringer.key("isReplicated").value(isReplicated);
      stringer.key("isCompressed").value(true);
      stringer.key("checksumType").value("CRC32C");
      stringer.key("timestamp").value(timestamp);
      /*
       * The timestamp string is for human consumption, automated stuff should use
       * the actual timestamp
       */
      stringer.key("timestampString").value(SnapshotUtil.formatHumanReadableDate(timestamp));
      if (!isReplicated) {
        stringer.key("partitionIds").array();
        for (int partitionId : partitionIds) {
          stringer.value(partitionId);
        }
        stringer.endArray();

        stringer.key("numPartitions").value(numPartitions);
      }
      stringer.endObject();
      String jsonString = stringer.toString();
      JSONObject jsonObj = new JSONObject(jsonString);
      jsonString = jsonObj.toString(4);
      jsonBytes = jsonString.getBytes("UTF-8");
    } catch (Exception e) {
      throw new IOException(e);
    }
    fs.writeInt(jsonBytes.length);
    fs.write(jsonBytes);

    final BBContainer container = fs.getBBContainer();
    container.b.position(4);
    container.b.putInt(container.b.remaining() - 4);
    container.b.position(0);

    final byte schemaBytes[] = PrivateVoltTableFactory.getSchemaBytes(schemaTable);

    final PureJavaCrc32 crc = new PureJavaCrc32();
    ByteBuffer aggregateBuffer = ByteBuffer.allocate(container.b.remaining() + schemaBytes.length);
    aggregateBuffer.put(container.b);
    aggregateBuffer.put(schemaBytes);
    aggregateBuffer.flip();
    crc.update(aggregateBuffer.array(), 4, aggregateBuffer.capacity() - 4);

    final int crcValue = (int) crc.getValue();
    aggregateBuffer.putInt(crcValue).position(8);
    aggregateBuffer.put((byte) 0).position(0); // Haven't actually finished writing file

    if (m_simulateFullDiskWritingHeader) {
      m_writeException = new IOException("Disk full");
      m_writeFailed = true;
      m_fos.close();
      throw m_writeException;
    }

    /*
     * Be completely sure the write succeeded. If it didn't
     * the disk is probably full or the path is bunk etc.
     */
    m_acceptOneWrite = true;
    ListenableFuture<?> writeFuture =
        write(Callables.returning((BBContainer) DBBPool.wrapBB(aggregateBuffer)), false);
    try {
      writeFuture.get();
    } catch (InterruptedException e) {
      m_fos.close();
      throw new java.io.InterruptedIOException();
    } catch (ExecutionException e) {
      m_fos.close();
      throw m_writeException;
    }
    if (m_writeFailed) {
      m_fos.close();
      throw m_writeException;
    }

    ScheduledFuture<?> syncTask = null;
    syncTask =
        m_syncService.scheduleAtFixedRate(
            new Runnable() {
              @Override
              public void run() {
                // Only sync for at least 4 megabyte of data, enough to amortize the cost of seeking
                // on ye olden platters. Since we are appending to a file it's actually 2 seeks.
                while (m_bytesWrittenSinceLastSync.get() > (1024 * 1024 * 4)) {
                  final int bytesSinceLastSync = m_bytesWrittenSinceLastSync.getAndSet(0);
                  try {
                    m_channel.force(false);
                  } catch (IOException e) {
                    if (!(e instanceof java.nio.channels.AsynchronousCloseException)) {
                      SNAP_LOG.error("Error syncing snapshot", e);
                    } else {
                      SNAP_LOG.debug(
                          "Asynchronous close syncing snasphot data, presumably graceful", e);
                    }
                  }
                  m_bytesAllowedBeforeSync.release(bytesSinceLastSync);
                }
              }
            },
            SNAPSHOT_SYNC_FREQUENCY,
            SNAPSHOT_SYNC_FREQUENCY,
            TimeUnit.MILLISECONDS);
    m_syncTask = syncTask;
  }
Beispiel #11
0
    private void readChunks() {
      // For reading the compressed input.
      ByteBuffer fileInputBuffer =
          ByteBuffer.allocateDirect(CompressionService.maxCompressedLength(DEFAULT_CHUNKSIZE));

      while (m_hasMoreChunks) {

        /*
         * Limit the number of chunk materialized into memory at one time
         */
        try {
          m_chunkReads.acquire();
        } catch (InterruptedException e) {
          return;
        }
        boolean expectedAnotherChunk = false;
        try {

          /*
           * Get the length of the next chunk, partition id, crc for partition id,
           */
          ByteBuffer chunkLengthB = ByteBuffer.allocate(16);
          while (chunkLengthB.hasRemaining()) {
            final int read = m_saveFile.read(chunkLengthB);
            if (read == -1) {
              throw new EOFException();
            }
          }
          chunkLengthB.flip();
          int nextChunkLength = chunkLengthB.getInt();
          expectedAnotherChunk = true;

          /*
           * Get the partition id and its CRC and validate it. Validating the
           * partition ID for the chunk separately makes it possible to
           * continue processing chunks from other partitions if only one partition
           * has corrupt chunks in the file.
           */
          final Checksum partitionIdCRC =
              m_checksumType == ChecksumType.CRC32C ? new PureJavaCrc32C() : new PureJavaCrc32();
          chunkLengthB.mark();
          final int nextChunkPartitionId = chunkLengthB.getInt();
          final int nextChunkPartitionIdCRC = chunkLengthB.getInt();
          chunkLengthB.reset();
          byte partitionIdBytes[] = new byte[4];
          chunkLengthB.get(partitionIdBytes);
          partitionIdCRC.update(partitionIdBytes, 0, partitionIdBytes.length);
          int generatedValue = (int) partitionIdCRC.getValue();
          if (generatedValue != nextChunkPartitionIdCRC) {
            chunkLengthB.position(0);
            for (int partitionId : m_partitionIds) {
              m_corruptedPartitions.add(partitionId);
            }
            throw new IOException(
                "Chunk partition ID CRC check failed. "
                    + "This corrupts all partitions in this file");
          }

          /*
           * CRC for the data portion of the chunk
           */
          chunkLengthB.position(chunkLengthB.position() + 4);
          final int nextChunkCRC = chunkLengthB.getInt();

          /*
           * Sanity check the length value to ensure there isn't
           * a runtime exception or OOM.
           */
          if (nextChunkLength < 0) {
            throw new IOException("Corrupted TableSaveFile chunk has negative chunk length");
          }

          if (isCompressed()) {
            if (nextChunkLength > fileInputBuffer.capacity()) {
              throw new IOException(
                  "Corrupted TableSaveFile chunk has unreasonable length "
                      + "> DEFAULT_CHUNKSIZE bytes");
            }
          } else {
            if (nextChunkLength > DEFAULT_CHUNKSIZE) {
              throw new IOException(
                  "Corrupted TableSaveFile chunk has unreasonable length "
                      + "> DEFAULT_CHUNKSIZE bytes");
            }
          }

          /*
           * Go fetch the compressed data so that the uncompressed size is known
           * and use that to set nextChunkLength to be the uncompressed length,
           * the code ahead that constructs the volt table is expecting
           * the uncompressed size/data since it is producing an uncompressed table
           */
          if (isCompressed()) {
            fileInputBuffer.clear();
            fileInputBuffer.limit(nextChunkLength);
            while (fileInputBuffer.hasRemaining()) {
              final int read = m_saveFile.read(fileInputBuffer);
              if (read == -1) {
                throw new EOFException();
              }
            }
            fileInputBuffer.flip();
            nextChunkLength = CompressionService.uncompressedLength(fileInputBuffer);
          }

          /*
           * Now allocate space to store the chunk using the VoltTable serialization representation.
           * The chunk will contain an integer row count preceding it so it can
           * be sucked straight in. There is a little funny business to overwrite the
           * partition id that is not part of the serialization format
           */
          Container c = getOutputBuffer(nextChunkPartitionId);

          /*
           * If the length value is wrong or not all data made it to disk this read will
           * not complete correctly. There could be overflow, underflow etc.
           * so use a try finally block to indicate that all partitions are now corrupt.
           * The enclosing exception handlers will do the right thing WRT to
           * propagating the error and closing the file.
           */
          boolean completedRead = false;
          int checksumStartPosition = 0;
          int rowCount = 0;
          try {
            /*
             * Assemble a VoltTable out of the chunk of tuples.
             * Put in the header that was cached in the constructor,
             * then copy the tuple data. The row count is at the end
             * because it isn't known until serialization is complete.
             * It will have to be moved back to the beginning of the tuple data
             * after the header once the CRC has been calculated.
             */
            c.b.clear();
            // The length of the chunk already includes space for the 4-byte row count
            // even though it is at the end, but we need to also leave at the end for the CRC calc
            if (isCompressed()) {
              c.b.limit(nextChunkLength + m_tableHeader.capacity() + 4);
            } else {
              // Before compression the chunk length included the stuff added in the EE
              // like the 2 CRCs and partition id. It is only -8 because we still need the 4-bytes
              // of padding to move the row count in when constructing the volt table format.
              c.b.limit((nextChunkLength - 8) + m_tableHeader.capacity());
            }
            m_tableHeader.position(0);
            c.b.put(m_tableHeader);
            c.b.position(c.b.position() + 4); // Leave space for row count to be moved into
            checksumStartPosition = c.b.position();
            if (isCompressed()) {
              CompressionService.decompressBuffer(fileInputBuffer, c.b);
              c.b.position(c.b.limit());
            } else {
              while (c.b.hasRemaining()) {
                final int read = m_saveFile.read(c.b);
                if (read == -1) {
                  throw new EOFException();
                }
              }
            }
            c.b.position(c.b.position() - 4);
            rowCount = c.b.getInt();
            c.b.position(checksumStartPosition);
            completedRead = true;
          } finally {
            if (!completedRead) {
              for (int partitionId : m_partitionIds) {
                m_corruptedPartitions.add(partitionId);
              }
            }
          }

          /*
           * Validate the rest of the chunk. This can fail if the data is corrupted
           * or the length value was corrupted.
           */
          final int calculatedCRC =
              m_checksumType == ChecksumType.CRC32C
                  ? DBBPool.getCRC32C(c.address, c.b.position(), c.b.remaining())
                  : DBBPool.getCRC32(c.address, c.b.position(), c.b.remaining());
          if (calculatedCRC != nextChunkCRC) {
            m_corruptedPartitions.add(nextChunkPartitionId);
            if (m_continueOnCorruptedChunk) {
              c.discard();
              m_chunkReads.release();
              continue;
            } else {
              throw new IOException("CRC mismatch in saved table chunk");
            }
          }

          /*
           * Skip irrelevant chunks after CRC is calculated. Always calulate the CRC
           * in case it is the length value that is corrupted
           */
          if (m_relevantPartitionIds != null) {
            if (!m_relevantPartitionIds.contains(nextChunkPartitionId)) {
              c.discard();
              m_chunkReads.release();
              continue;
            }
          }

          /*
           * The row count which was stored on disk at the end (and for the CRC calc)
           * is now moved to the appropriate place for the table serialization format.
           * Update the limit to reflect that.
           *
           * Surrounded in a try finally just in case there is overflow/underflow. Shouldn't
           * happen but I could be wrong.
           */
          boolean success = false;
          try {
            c.b.limit(c.b.limit() - 4);
            c.b.position(checksumStartPosition - 4);
            c.b.putInt(rowCount);
            c.b.position(0);
            success = true;
          } finally {
            if (!success) {
              for (int partitionId : m_partitionIds) {
                m_corruptedPartitions.add(partitionId);
              }
            }
          }

          synchronized (TableSaveFile.this) {
            m_availableChunks.offer(c);
            TableSaveFile.this.notifyAll();
          }
        } catch (EOFException eof) {
          synchronized (TableSaveFile.this) {
            m_hasMoreChunks = false;
            if (expectedAnotherChunk) {
              m_chunkReaderException =
                  new IOException("Expected to find another chunk but reached end of file instead");
            }
            TableSaveFile.this.notifyAll();
          }
        } catch (IOException e) {
          synchronized (TableSaveFile.this) {
            m_hasMoreChunks = false;
            m_chunkReaderException = e;
            TableSaveFile.this.notifyAll();
          }
        } catch (BufferUnderflowException e) {
          synchronized (TableSaveFile.this) {
            m_hasMoreChunks = false;
            m_chunkReaderException = new IOException(e);
            TableSaveFile.this.notifyAll();
          }
        } catch (BufferOverflowException e) {
          synchronized (TableSaveFile.this) {
            m_hasMoreChunks = false;
            m_chunkReaderException = new IOException(e);
            TableSaveFile.this.notifyAll();
          }
        } catch (IndexOutOfBoundsException e) {
          synchronized (TableSaveFile.this) {
            m_hasMoreChunks = false;
            m_chunkReaderException = new IOException(e);
            TableSaveFile.this.notifyAll();
          }
        }
      }
    }
Beispiel #12
0
    /*
     * The old method was out of hand. Going to start a new one with a different format
     * that should be easier to understand and validate.
     */
    private void readChunksV2() {
      // For reading the compressed input.
      ByteBuffer fileInputBuffer =
          ByteBuffer.allocateDirect(CompressionService.maxCompressedLength(DEFAULT_CHUNKSIZE));

      while (m_hasMoreChunks) {

        /*
         * Limit the number of chunk materialized into memory at one time
         */
        try {
          m_chunkReads.acquire();
        } catch (InterruptedException e) {
          return;
        }
        boolean expectedAnotherChunk = false;
        try {

          /*
           * Get the length of the next chunk, partition id, crc for partition id, and length prefix,
           * and then the CRC of the compressed payload
           */
          ByteBuffer chunkLengthB = ByteBuffer.allocate(16);
          while (chunkLengthB.hasRemaining()) {
            final int read = m_saveFile.read(chunkLengthB);
            if (read == -1) {
              throw new EOFException();
            }
          }
          int nextChunkLength = chunkLengthB.getInt(0);
          expectedAnotherChunk = true;

          /*
           * Get the partition id and its CRC (CRC now covers length prefix) and validate it. Validating the
           * partition ID for the chunk separately makes it possible to
           * continue processing chunks from other partitions if only one partition
           * has corrupt chunks in the file.
           */
          assert (m_checksumType == ChecksumType.CRC32C);
          final Checksum partitionIdCRC = new PureJavaCrc32C();
          final int nextChunkPartitionId = chunkLengthB.getInt(4);
          final int nextChunkPartitionIdCRC = chunkLengthB.getInt(8);

          partitionIdCRC.update(chunkLengthB.array(), 0, 8);
          int generatedValue = (int) partitionIdCRC.getValue();
          if (generatedValue != nextChunkPartitionIdCRC) {
            chunkLengthB.position(0);
            for (int partitionId : m_partitionIds) {
              m_corruptedPartitions.add(partitionId);
            }
            throw new IOException(
                "Chunk partition ID CRC check failed. "
                    + "This corrupts all partitions in this file");
          }

          /*
           * CRC for the data portion of the chunk
           */
          final int nextChunkCRC = chunkLengthB.getInt(12);

          /*
           * Sanity check the length value to ensure there isn't
           * a runtime exception or OOM.
           */
          if (nextChunkLength < 0) {
            throw new IOException("Corrupted TableSaveFile chunk has negative chunk length");
          }

          if (nextChunkLength > fileInputBuffer.capacity()) {
            throw new IOException(
                "Corrupted TableSaveFile chunk has unreasonable length "
                    + "> DEFAULT_CHUNKSIZE bytes");
          }

          /*
           * Go fetch the compressed data so that the uncompressed size is known
           * and use that to set nextChunkLength to be the uncompressed length,
           * the code ahead that constructs the volt table is expecting
           * the uncompressed size/data since it is producing an uncompressed table
           */
          fileInputBuffer.clear();
          fileInputBuffer.limit(nextChunkLength);
          while (fileInputBuffer.hasRemaining()) {
            final int read = m_saveFile.read(fileInputBuffer);
            if (read == -1) {
              throw new EOFException();
            }
          }
          fileInputBuffer.flip();
          nextChunkLength = CompressionService.uncompressedLength(fileInputBuffer);

          /*
           * Validate the rest of the chunk. This can fail if the data is corrupted
           * or the length value was corrupted.
           */
          final int calculatedCRC =
              DBBPool.getBufferCRC32C(fileInputBuffer, 0, fileInputBuffer.remaining());
          if (calculatedCRC != nextChunkCRC) {
            m_corruptedPartitions.add(nextChunkPartitionId);
            if (m_continueOnCorruptedChunk) {
              m_chunkReads.release();
              continue;
            } else {
              throw new IOException("CRC mismatch in saved table chunk");
            }
          }

          /*
           * Now allocate space to store the chunk using the VoltTable serialization representation.
           * The chunk will contain an integer row count preceding it so it can
           * be sucked straight in. There is a little funny business to overwrite the
           * partition id that is not part of the serialization format
           */
          Container c = getOutputBuffer(nextChunkPartitionId);

          /*
           * If the length value is wrong or not all data made it to disk this read will
           * not complete correctly. There could be overflow, underflow etc.
           * so use a try finally block to indicate that all partitions are now corrupt.
           * The enclosing exception handlers will do the right thing WRT to
           * propagating the error and closing the file.
           */
          boolean completedRead = false;
          try {
            /*
             * Assemble a VoltTable out of the chunk of tuples.
             * Put in the header that was cached in the constructor,
             * then copy the tuple data.
             */
            c.b.clear();
            c.b.limit(nextChunkLength + m_tableHeader.capacity());
            m_tableHeader.position(0);
            c.b.put(m_tableHeader);
            // Doesn't move buffer position, does change the limit
            CompressionService.decompressBuffer(fileInputBuffer, c.b);
            completedRead = true;
          } finally {
            if (!completedRead) {
              for (int partitionId : m_partitionIds) {
                m_corruptedPartitions.add(partitionId);
              }
            }
          }

          /*
           * Skip irrelevant chunks after CRC is calculated. Always calulate the CRC
           * in case it is the length value that is corrupted
           */
          if (m_relevantPartitionIds != null) {
            if (!m_relevantPartitionIds.contains(nextChunkPartitionId)) {
              c.discard();
              m_chunkReads.release();
              continue;
            }
          }

          /*
           * VoltTable wants the buffer at the home position 0
           */
          c.b.position(0);

          synchronized (TableSaveFile.this) {
            m_availableChunks.offer(c);
            TableSaveFile.this.notifyAll();
          }
        } catch (EOFException eof) {
          synchronized (TableSaveFile.this) {
            m_hasMoreChunks = false;
            if (expectedAnotherChunk) {
              m_chunkReaderException =
                  new IOException("Expected to find another chunk but reached end of file instead");
            }
            TableSaveFile.this.notifyAll();
          }
        } catch (IOException e) {
          synchronized (TableSaveFile.this) {
            m_hasMoreChunks = false;
            m_chunkReaderException = e;
            TableSaveFile.this.notifyAll();
          }
        } catch (BufferUnderflowException e) {
          synchronized (TableSaveFile.this) {
            m_hasMoreChunks = false;
            m_chunkReaderException = new IOException(e);
            TableSaveFile.this.notifyAll();
          }
        } catch (BufferOverflowException e) {
          synchronized (TableSaveFile.this) {
            m_hasMoreChunks = false;
            m_chunkReaderException = new IOException(e);
            TableSaveFile.this.notifyAll();
          }
        } catch (IndexOutOfBoundsException e) {
          synchronized (TableSaveFile.this) {
            m_hasMoreChunks = false;
            m_chunkReaderException = new IOException(e);
            TableSaveFile.this.notifyAll();
          }
        }
      }
    }
Beispiel #13
0
  public void pushExportBuffer(
      long uso, final long bufferPtr, ByteBuffer buffer, boolean sync, boolean endOfStream) {
    final java.util.concurrent.atomic.AtomicBoolean deleted =
        new java.util.concurrent.atomic.AtomicBoolean(false);
    synchronized (m_committedBuffers) {
      if (endOfStream) {
        assert (!m_endOfStream);
        assert (bufferPtr == 0);
        assert (buffer == null);
        assert (!sync);
        m_endOfStream = endOfStream;

        if (m_committedBuffers.sizeInBytes() == 0) {
          exportLog.info("Pushed EOS buffer with 0 bytes remaining");
          try {
            m_onDrain.run();
          } finally {
            m_onDrain = null;
          }
        }
        return;
      }
      assert (!m_endOfStream);
      if (buffer != null) {
        if (buffer.capacity() > 0) {
          try {
            m_committedBuffers.offer(
                new StreamBlock(
                    new BBContainer(buffer, bufferPtr) {
                      @Override
                      public void discard() {
                        DBBPool.deleteCharArrayMemory(address);
                        deleted.set(true);
                      }
                    },
                    uso,
                    false));
          } catch (IOException e) {
            exportLog.error(e);
            if (!deleted.get()) {
              DBBPool.deleteCharArrayMemory(bufferPtr);
            }
          }
        } else {
          /*
           * TupleStreamWrapper::setBytesUsed propagates the USO by sending
           * over an empty stream block. The block will be deleted
           * on the native side when this method returns
           */
          exportLog.info(
              "Syncing first unpolled USO to "
                  + uso
                  + " for table "
                  + m_tableName
                  + " partition "
                  + m_partitionId);
          m_firstUnpolledUso = uso;
        }
      }
      if (sync) {
        try {
          // Don't do a real sync, just write the in memory buffers
          // to a file. @Quiesce or blocking snapshot will do the sync
          m_committedBuffers.sync(true);
        } catch (IOException e) {
          exportLog.error(e);
        }
      }
    }
  }