Exemplo n.º 1
0
  public static Pair<byte[], Integer> retrieveChunksAsBytes(
      ZooKeeper zk, String path, String prefix, boolean getCRC) throws Exception {
    TreeSet<String> chunks = new TreeSet<String>();
    while (true) {
      boolean allUploadsComplete = true;
      if (!chunks.contains(path + "/" + prefix + "_complete")) {
        allUploadsComplete = false;
      }
      if (allUploadsComplete) {
        break;
      }

      chunks = new TreeSet<String>(zk.getChildren(path, false));
      for (String chunk : chunks) {
        for (int ii = 0; ii < chunks.size(); ii++) {
          if (chunk.startsWith(path + "/" + prefix)) {
            chunks.add(chunk);
          }
        }
      }
    }

    byte resultBuffers[][] = new byte[chunks.size() - 1][];
    int ii = 0;
    PureJavaCrc32 crc = getCRC ? new PureJavaCrc32() : null;
    for (String chunk : chunks) {
      if (chunk.endsWith("_complete")) continue;
      resultBuffers[ii] = zk.getData(chunk, false, null);
      if (crc != null) {
        crc.update(resultBuffers[ii]);
      }
      ii++;
    }

    return Pair.of(decompressBytes(resultBuffers), crc != null ? (int) crc.getValue() : null);
  }
  public DefaultSnapshotDataTarget(
      final File file,
      final int hostId,
      final String clusterName,
      final String databaseName,
      final String tableName,
      final int numPartitions,
      final boolean isReplicated,
      final List<Integer> partitionIds,
      final VoltTable schemaTable,
      final long txnId,
      final long timestamp,
      int version[])
      throws IOException {
    String hostname = CoreUtils.getHostnameOrAddress();
    m_file = file;
    m_tableName = tableName;
    m_fos = new FileOutputStream(file);
    m_channel = m_fos.getChannel();
    m_needsFinalClose = !isReplicated;
    final FastSerializer fs = new FastSerializer();
    fs.writeInt(0); // CRC
    fs.writeInt(0); // Header length placeholder
    fs.writeByte(
        1); // Indicate the snapshot was not completed, set to true for the CRC calculation, false
    // later
    for (int ii = 0; ii < 4; ii++) {
      fs.writeInt(version[ii]); // version
    }
    JSONStringer stringer = new JSONStringer();
    byte jsonBytes[] = null;
    try {
      stringer.object();
      stringer.key("txnId").value(txnId);
      stringer.key("hostId").value(hostId);
      stringer.key("hostname").value(hostname);
      stringer.key("clusterName").value(clusterName);
      stringer.key("databaseName").value(databaseName);
      stringer.key("tableName").value(tableName.toUpperCase());
      stringer.key("isReplicated").value(isReplicated);
      stringer.key("isCompressed").value(true);
      stringer.key("checksumType").value("CRC32C");
      stringer.key("timestamp").value(timestamp);
      /*
       * The timestamp string is for human consumption, automated stuff should use
       * the actual timestamp
       */
      stringer.key("timestampString").value(SnapshotUtil.formatHumanReadableDate(timestamp));
      if (!isReplicated) {
        stringer.key("partitionIds").array();
        for (int partitionId : partitionIds) {
          stringer.value(partitionId);
        }
        stringer.endArray();

        stringer.key("numPartitions").value(numPartitions);
      }
      stringer.endObject();
      String jsonString = stringer.toString();
      JSONObject jsonObj = new JSONObject(jsonString);
      jsonString = jsonObj.toString(4);
      jsonBytes = jsonString.getBytes("UTF-8");
    } catch (Exception e) {
      throw new IOException(e);
    }
    fs.writeInt(jsonBytes.length);
    fs.write(jsonBytes);

    final BBContainer container = fs.getBBContainer();
    container.b.position(4);
    container.b.putInt(container.b.remaining() - 4);
    container.b.position(0);

    final byte schemaBytes[] = PrivateVoltTableFactory.getSchemaBytes(schemaTable);

    final PureJavaCrc32 crc = new PureJavaCrc32();
    ByteBuffer aggregateBuffer = ByteBuffer.allocate(container.b.remaining() + schemaBytes.length);
    aggregateBuffer.put(container.b);
    aggregateBuffer.put(schemaBytes);
    aggregateBuffer.flip();
    crc.update(aggregateBuffer.array(), 4, aggregateBuffer.capacity() - 4);

    final int crcValue = (int) crc.getValue();
    aggregateBuffer.putInt(crcValue).position(8);
    aggregateBuffer.put((byte) 0).position(0); // Haven't actually finished writing file

    if (m_simulateFullDiskWritingHeader) {
      m_writeException = new IOException("Disk full");
      m_writeFailed = true;
      m_fos.close();
      throw m_writeException;
    }

    /*
     * Be completely sure the write succeeded. If it didn't
     * the disk is probably full or the path is bunk etc.
     */
    m_acceptOneWrite = true;
    ListenableFuture<?> writeFuture =
        write(Callables.returning((BBContainer) DBBPool.wrapBB(aggregateBuffer)), false);
    try {
      writeFuture.get();
    } catch (InterruptedException e) {
      m_fos.close();
      throw new java.io.InterruptedIOException();
    } catch (ExecutionException e) {
      m_fos.close();
      throw m_writeException;
    }
    if (m_writeFailed) {
      m_fos.close();
      throw m_writeException;
    }

    ScheduledFuture<?> syncTask = null;
    syncTask =
        m_syncService.scheduleAtFixedRate(
            new Runnable() {
              @Override
              public void run() {
                // Only sync for at least 4 megabyte of data, enough to amortize the cost of seeking
                // on ye olden platters. Since we are appending to a file it's actually 2 seeks.
                while (m_bytesWrittenSinceLastSync.get() > (1024 * 1024 * 4)) {
                  final int bytesSinceLastSync = m_bytesWrittenSinceLastSync.getAndSet(0);
                  try {
                    m_channel.force(false);
                  } catch (IOException e) {
                    if (!(e instanceof java.nio.channels.AsynchronousCloseException)) {
                      SNAP_LOG.error("Error syncing snapshot", e);
                    } else {
                      SNAP_LOG.debug(
                          "Asynchronous close syncing snasphot data, presumably graceful", e);
                    }
                  }
                  m_bytesAllowedBeforeSync.release(bytesSinceLastSync);
                }
              }
            },
            SNAPSHOT_SYNC_FREQUENCY,
            SNAPSHOT_SYNC_FREQUENCY,
            TimeUnit.MILLISECONDS);
    m_syncTask = syncTask;
  }
Exemplo n.º 3
0
  // XXX maybe consider an IOException subclass at some point
  public TableSaveFile(
      FileChannel dataIn,
      int readAheadChunks,
      Integer[] relevantPartitionIds,
      boolean continueOnCorruptedChunk)
      throws IOException {
    try {
      EELibraryLoader.loadExecutionEngineLibrary(true);
      if (relevantPartitionIds == null) {
        m_relevantPartitionIds = null;
      } else {
        m_relevantPartitionIds = new HashSet<Integer>();
        for (Integer i : relevantPartitionIds) {
          m_relevantPartitionIds.add(i);
        }
      }
      m_chunkReads = new Semaphore(readAheadChunks);
      m_saveFile = dataIn;
      m_continueOnCorruptedChunk = continueOnCorruptedChunk;

      final PureJavaCrc32 crc = new PureJavaCrc32();
      /*
       * If the CRC check fails because the file wasn't completed
       */
      final PureJavaCrc32 secondCRC = new PureJavaCrc32();

      /*
       * Get the header with the save restore specific information
       */
      final ByteBuffer lengthBuffer = ByteBuffer.allocate(8);
      while (lengthBuffer.hasRemaining()) {
        final int read = m_saveFile.read(lengthBuffer);
        if (read == -1) {
          throw new EOFException();
        }
      }
      lengthBuffer.flip();
      final int originalCRC = lengthBuffer.getInt();
      int length = lengthBuffer.getInt();
      crc.update(lengthBuffer.array(), 4, 4);
      secondCRC.update(lengthBuffer.array(), 4, 4);

      if (length < 0) {
        throw new IOException("Corrupted save file has negative header length");
      }

      if (length > 2097152) {
        throw new IOException("Corrupted save file has unreasonable header length > 2 megs");
      }

      final ByteBuffer saveRestoreHeader = ByteBuffer.allocate(length);
      while (saveRestoreHeader.hasRemaining()) {
        final int read = m_saveFile.read(saveRestoreHeader);
        if (read == -1 || read < length) {
          throw new EOFException();
        }
      }
      saveRestoreHeader.flip();
      crc.update(saveRestoreHeader.array());
      secondCRC.update(new byte[] {1});
      secondCRC.update(saveRestoreHeader.array(), 1, saveRestoreHeader.array().length - 1);

      /*
       *  Get the template for the VoltTable serialization header.
       *  It will have an extra length value preceded to it so that
       *  it can be sucked straight into a buffer. This will not
       *  contain a row count since that varies from chunk to chunk
       *  and is supplied by the chunk
       */
      lengthBuffer.clear();
      lengthBuffer.limit(4);
      /*
       * Why this stupidity and no while loop?
       * Because java is broken and complains about a random final
       * elsewhere if you do.
       */
      {
        final int read = m_saveFile.read(lengthBuffer);
        if (read == -1) {
          throw new EOFException();
        }
      }
      crc.update(lengthBuffer.array(), 0, 4);
      secondCRC.update(lengthBuffer.array(), 0, 4);
      lengthBuffer.flip();
      length = lengthBuffer.getInt();

      if (length < 4) {
        throw new IOException(
            "Corrupted save file has negative length or too small length for VoltTable header");
      }

      if (length > 2097152) {
        throw new IOException(
            "Corrupted save file has unreasonable VoltTable header length > 2 megs");
      }

      m_tableHeader = ByteBuffer.allocate(length + 4);
      m_tableHeader.putInt(length);
      while (m_tableHeader.hasRemaining()) {
        final int read = m_saveFile.read(m_tableHeader);
        if (read == -1) {
          throw new EOFException();
        }
      }
      crc.update(m_tableHeader.array(), 4, length);
      secondCRC.update(m_tableHeader.array(), 4, length);

      boolean failedCRCDueToNotCompleted = false;

      final int actualCRC = (int) crc.getValue();
      if (originalCRC != actualCRC) {
        /*
         * Check if the CRC mismatch is due to the snapshot not being completed
         */
        final int secondCRCValue = (int) secondCRC.getValue();
        if (secondCRCValue == originalCRC) {
          failedCRCDueToNotCompleted = true;
        } else {
          throw new IOException("Checksum mismatch");
        }
      }

      FastDeserializer fd = new FastDeserializer(saveRestoreHeader);
      byte completedByte = fd.readByte();
      m_completed = failedCRCDueToNotCompleted ? false : (completedByte == 1 ? true : false);
      for (int ii = 0; ii < 4; ii++) {
        m_versionNum[ii] = fd.readInt();
      }

      /*
       * Support the original pre 1.3 header format as well as a new JSON format.
       * JSON will make it possible to add info to a snapshot header without
       * breaking backwards compatibility.
       */
      if (m_versionNum[3] == 0) {
        m_txnId = fd.readLong();
        m_timestamp = TransactionIdManager.getTimestampFromTransactionId(m_txnId);
        m_hostId = fd.readInt();
        m_hostname = fd.readString();
        m_clusterName = fd.readString();
        m_databaseName = fd.readString();
        m_tableName = fd.readString();
        m_isReplicated = fd.readBoolean();
        m_isCompressed = false;
        m_checksumType = ChecksumType.CRC32;
        if (!m_isReplicated) {
          m_partitionIds = (int[]) fd.readArray(int.class);
          if (!m_completed) {
            for (Integer partitionId : m_partitionIds) {
              m_corruptedPartitions.add(partitionId);
            }
          }
          m_totalPartitions = fd.readInt();
        } else {
          m_partitionIds = new int[] {0};
          m_totalPartitions = 1;
          if (!m_completed) {
            m_corruptedPartitions.add(0);
          }
        }
        m_hasVersion2FormatChunks = false;
      } else {
        assert (m_versionNum[3] == 1 || m_versionNum[3] == 2);
        if (m_versionNum[3] >= 2) {
          m_hasVersion2FormatChunks = true;
        } else {
          m_hasVersion2FormatChunks = false;
        }
        int numJSONBytes = fd.readInt();
        byte jsonBytes[] = new byte[numJSONBytes];
        fd.readFully(jsonBytes);
        String jsonString = new String(jsonBytes, "UTF-8");
        JSONObject obj = new JSONObject(jsonString);

        m_txnId = obj.getLong("txnId");
        // Timestamp field added for 3.0, might not be there
        if (obj.has("timestamp")) {
          m_timestamp = obj.getLong("timestamp");
        } else {
          // Pre 3.0/IV2 the timestamp was in the transactionid
          m_timestamp = TransactionIdManager.getTimestampFromTransactionId(m_txnId);
        }
        m_hostId = obj.getInt("hostId");
        m_hostname = obj.getString("hostname");
        m_clusterName = obj.getString("clusterName");
        m_databaseName = obj.getString("databaseName");
        m_tableName = obj.getString("tableName");
        m_isReplicated = obj.getBoolean("isReplicated");
        m_isCompressed = obj.optBoolean("isCompressed", false);
        m_checksumType = ChecksumType.valueOf(obj.optString("checksumType", "CRC32"));
        if (!m_isReplicated) {
          JSONArray partitionIds = obj.getJSONArray("partitionIds");
          m_partitionIds = new int[partitionIds.length()];
          for (int ii = 0; ii < m_partitionIds.length; ii++) {
            m_partitionIds[ii] = partitionIds.getInt(ii);
          }

          if (!m_completed) {
            for (Integer partitionId : m_partitionIds) {
              m_corruptedPartitions.add(partitionId);
            }
          }
          m_totalPartitions = obj.getInt("numPartitions");
        } else {
          m_partitionIds = new int[] {0};
          m_totalPartitions = 1;
          if (!m_completed) {
            m_corruptedPartitions.add(0);
          }
        }
      }
      /*
       * Several runtime exceptions can be thrown in valid failure cases where
       * a corrupt save file is being detected.
       */
    } catch (BufferUnderflowException e) {
      throw new IOException(e);
    } catch (BufferOverflowException e) {
      throw new IOException(e);
    } catch (IndexOutOfBoundsException e) {
      throw new IOException(e);
    } catch (JSONException e) {
      throw new IOException(e);
    }
  }