예제 #1
0
  public int blockForWrites() throws IOException {
    for (Map.Entry<Integer, AtomicInteger> entry : invalidMutations.entrySet())
      logger.info(
          String.format(
              "Skipped %d mutations from unknown (probably removed) CF with id %s",
              entry.getValue().intValue(), entry.getKey()));

    // wait for all the writes to finish on the mutation stage
    FBUtilities.waitOnFutures(futures);
    logger.debug("Finished waiting on mutations from recovery");

    // flush replayed tables
    futures.clear();
    for (Table table : tablesRecovered) futures.addAll(table.flush());
    FBUtilities.waitOnFutures(futures);
    return replayedCount.get();
  }
  public static void recover(File[] clogs) throws IOException {
    Set<Table> tablesRecovered = new HashSet<Table>();
    List<Future<?>> futures = new ArrayList<Future<?>>();
    byte[] bytes = new byte[4096];
    Map<Integer, AtomicInteger> invalidMutations = new HashMap<Integer, AtomicInteger>();

    for (File file : clogs) {
      int bufferSize = (int) Math.min(file.length(), 32 * 1024 * 1024);
      BufferedRandomAccessFile reader =
          new BufferedRandomAccessFile(file.getAbsolutePath(), "r", bufferSize);

      try {
        CommitLogHeader clHeader = null;
        int replayPosition = 0;
        String headerPath = CommitLogHeader.getHeaderPathFromSegmentPath(file.getAbsolutePath());
        try {
          clHeader = CommitLogHeader.readCommitLogHeader(headerPath);
          replayPosition = clHeader.getReplayPosition();
        } catch (IOException ioe) {
          logger.info(
              headerPath
                  + " incomplete, missing or corrupt.  Everything is ok, don't panic.  CommitLog will be replayed from the beginning");
          logger.debug("exception was", ioe);
        }
        if (replayPosition < 0) {
          logger.debug("skipping replay of fully-flushed {}", file);
          continue;
        }
        reader.seek(replayPosition);

        if (logger.isDebugEnabled())
          logger.debug("Replaying " + file + " starting at " + reader.getFilePointer());

        /* read the logs populate RowMutation and apply */
        while (!reader.isEOF()) {
          if (logger.isDebugEnabled())
            logger.debug("Reading mutation at " + reader.getFilePointer());

          long claimedCRC32;

          Checksum checksum = new CRC32();
          int serializedSize;
          try {
            // any of the reads may hit EOF
            serializedSize = reader.readInt();
            long claimedSizeChecksum = reader.readLong();
            checksum.update(serializedSize);
            if (checksum.getValue() != claimedSizeChecksum || serializedSize <= 0)
              break; // entry wasn't synced correctly/fully.  that's ok.

            if (serializedSize > bytes.length) bytes = new byte[(int) (1.2 * serializedSize)];
            reader.readFully(bytes, 0, serializedSize);
            claimedCRC32 = reader.readLong();
          } catch (EOFException eof) {
            break; // last CL entry didn't get completely written.  that's ok.
          }

          checksum.update(bytes, 0, serializedSize);
          if (claimedCRC32 != checksum.getValue()) {
            // this entry must not have been fsynced.  probably the rest is bad too,
            // but just in case there is no harm in trying them (since we still read on an entry
            // boundary)
            continue;
          }

          /* deserialize the commit log entry */
          ByteArrayInputStream bufIn = new ByteArrayInputStream(bytes, 0, serializedSize);
          RowMutation rm = null;
          try {
            rm = RowMutation.serializer().deserialize(new DataInputStream(bufIn));
          } catch (UnserializableColumnFamilyException ex) {
            AtomicInteger i = invalidMutations.get(ex.cfId);
            if (i == null) {
              i = new AtomicInteger(1);
              invalidMutations.put(ex.cfId, i);
            } else i.incrementAndGet();
            continue;
          }

          if (logger.isDebugEnabled())
            logger.debug(
                String.format(
                    "replaying mutation for %s.%s: %s",
                    rm.getTable(),
                    rm.key(),
                    "{" + StringUtils.join(rm.getColumnFamilies(), ", ") + "}"));
          final Table table = Table.open(rm.getTable());
          tablesRecovered.add(table);
          final Collection<ColumnFamily> columnFamilies =
              new ArrayList<ColumnFamily>(rm.getColumnFamilies());
          final long entryLocation = reader.getFilePointer();
          final CommitLogHeader finalHeader = clHeader;
          final RowMutation frm = rm;
          Runnable runnable =
              new WrappedRunnable() {
                public void runMayThrow() throws IOException {
                  RowMutation newRm = new RowMutation(frm.getTable(), frm.key());

                  // Rebuild the row mutation, omitting column families that a) have already been
                  // flushed,
                  // b) are part of a cf that was dropped. Keep in mind that the cf.name() is
                  // suspect. do every
                  // thing based on the cfid instead.
                  for (ColumnFamily columnFamily : columnFamilies) {
                    if (CFMetaData.getCF(columnFamily.id()) == null)
                      // null means the cf has been dropped
                      continue;

                    if (finalHeader == null
                        || (finalHeader.isDirty(columnFamily.id())
                            && entryLocation >= finalHeader.getPosition(columnFamily.id())))
                      newRm.add(columnFamily);
                  }
                  if (!newRm.isEmpty()) {
                    Table.open(newRm.getTable()).apply(newRm, null, false);
                  }
                }
              };
          futures.add(StageManager.getStage(Stage.MUTATION).submit(runnable));
          if (futures.size() > MAX_OUTSTANDING_REPLAY_COUNT) {
            FBUtilities.waitOnFutures(futures);
            futures.clear();
          }
        }
      } finally {
        reader.close();
        logger.info("Finished reading " + file);
      }
    }

    for (Map.Entry<Integer, AtomicInteger> entry : invalidMutations.entrySet())
      logger.info(
          String.format(
              "Skipped %d mutations from unknown (probably removed) CF with id %d",
              entry.getValue().intValue(), entry.getKey()));

    // wait for all the writes to finish on the mutation stage
    FBUtilities.waitOnFutures(futures);
    logger.debug("Finished waiting on mutations from recovery");

    // flush replayed tables
    futures.clear();
    for (Table table : tablesRecovered) futures.addAll(table.flush());
    FBUtilities.waitOnFutures(futures);
    logger.info("Recovery complete");
  }