/** * Delete log segments whose contents have been turned into SSTables. NOT threadsafe. * * <p>param @ context The commitLog context . param @ id id of the columnFamily being flushed to * disk. */ private void discardCompletedSegmentsInternal( CommitLogSegment.CommitLogContext context, Integer id) throws IOException { if (logger.isDebugEnabled()) logger.debug("discard completed log segments for " + context + ", column family " + id + "."); /* * log replay assumes that we only have to look at entries past the last * flush position, so verify that this flush happens after the last. See CASSANDRA-936 */ assert context.position >= context.getSegment().getHeader().getPosition(id) : "discard at " + context + " is not after last flush at " + context.getSegment().getHeader().getPosition(id); /* * Loop through all the commit log files in the history. Now process * all files that are older than the one in the context. For each of * these files the header needs to modified by resetting the dirty * bit corresponding to the flushed CF. */ Iterator<CommitLogSegment> iter = segments.iterator(); while (iter.hasNext()) { CommitLogSegment segment = iter.next(); CommitLogHeader header = segment.getHeader(); if (segment.equals(context.getSegment())) { // we can't just mark the segment where the flush happened clean, // since there may have been writes to it between when the flush // started and when it finished. so mark the flush position as // the replay point for this CF, instead. if (logger.isDebugEnabled()) logger.debug("Marking replay position " + context.position + " on commit log " + segment); header.turnOn(id, context.position); segment.writeHeader(); break; } header.turnOff(id); if (header.isSafeToDelete()) { logger.info("Discarding obsolete commit log:" + segment); segment.close(); DeletionService.submitDelete(segment.getHeaderPath()); DeletionService.submitDelete(segment.getPath()); // usually this will be the first (remaining) segment, but not always, if segment A contains // writes to a CF that is unflushed but is followed by segment B whose CFs are all flushed. iter.remove(); } else { if (logger.isDebugEnabled()) logger.debug( "Not safe to delete commit log " + segment + "; dirty is " + header.dirtyString()); segment.writeHeader(); } } }
public static void recover() throws IOException { String directory = DatabaseDescriptor.getCommitLogLocation(); File[] files = new File(directory) .listFiles( new FilenameFilter() { public boolean accept(File dir, String name) { return CommitLogSegment.possibleCommitLogFile(name); } }); if (files.length == 0) return; Arrays.sort(files, new FileUtils.FileComparator()); logger.info("Replaying " + StringUtils.join(files, ", ")); recover(files); for (File f : files) { FileUtils.delete( CommitLogHeader.getHeaderPathFromSegmentPath( f.getAbsolutePath())); // may not actually exist if (!f.delete()) logger.error( "Unable to remove " + f + "; you should remove it manually or next restart will replay it again (harmless, but time-consuming)"); } logger.info("Log replay complete"); }
public static void recover() throws IOException { String directory = DatabaseDescriptor.getCommitLogLocation(); File[] files = new File(directory) .listFiles( new FilenameFilter() { public boolean accept(File dir, String name) { // we used to try to avoid instantiating commitlog (thus creating an empty // segment ready for writes) // until after recover was finished. this turns out to be fragile; it is less // error-prone to go // ahead and allow writes before recover(), and just skip active segments when // we do. return CommitLogSegment.possibleCommitLogFile(name) && !instance.manages(name); } }); if (files.length == 0) { logger.info("No commitlog files found; skipping replay"); return; } Arrays.sort(files, new FileUtils.FileComparator()); logger.info("Replaying " + StringUtils.join(files, ", ")); recover(files); for (File f : files) { FileUtils.delete( CommitLogHeader.getHeaderPathFromSegmentPath( f.getAbsolutePath())); // may not actually exist if (!f.delete()) logger.error( "Unable to remove " + f + "; you should remove it manually or next restart will replay it again (harmless, but time-consuming)"); } logger.info("Log replay complete"); }
public static void recover(File[] clogs) throws IOException { Set<Table> tablesRecovered = new HashSet<Table>(); List<Future<?>> futures = new ArrayList<Future<?>>(); byte[] bytes = new byte[4096]; Map<Integer, AtomicInteger> invalidMutations = new HashMap<Integer, AtomicInteger>(); for (File file : clogs) { int bufferSize = (int) Math.min(file.length(), 32 * 1024 * 1024); BufferedRandomAccessFile reader = new BufferedRandomAccessFile(file.getAbsolutePath(), "r", bufferSize); try { CommitLogHeader clHeader = null; int replayPosition = 0; String headerPath = CommitLogHeader.getHeaderPathFromSegmentPath(file.getAbsolutePath()); try { clHeader = CommitLogHeader.readCommitLogHeader(headerPath); replayPosition = clHeader.getReplayPosition(); } catch (IOException ioe) { logger.info( headerPath + " incomplete, missing or corrupt. Everything is ok, don't panic. CommitLog will be replayed from the beginning"); logger.debug("exception was", ioe); } if (replayPosition < 0) { logger.debug("skipping replay of fully-flushed {}", file); continue; } reader.seek(replayPosition); if (logger.isDebugEnabled()) logger.debug("Replaying " + file + " starting at " + reader.getFilePointer()); /* read the logs populate RowMutation and apply */ while (!reader.isEOF()) { if (logger.isDebugEnabled()) logger.debug("Reading mutation at " + reader.getFilePointer()); long claimedCRC32; Checksum checksum = new CRC32(); int serializedSize; try { // any of the reads may hit EOF serializedSize = reader.readInt(); long claimedSizeChecksum = reader.readLong(); checksum.update(serializedSize); if (checksum.getValue() != claimedSizeChecksum || serializedSize <= 0) break; // entry wasn't synced correctly/fully. that's ok. if (serializedSize > bytes.length) bytes = new byte[(int) (1.2 * serializedSize)]; reader.readFully(bytes, 0, serializedSize); claimedCRC32 = reader.readLong(); } catch (EOFException eof) { break; // last CL entry didn't get completely written. that's ok. } checksum.update(bytes, 0, serializedSize); if (claimedCRC32 != checksum.getValue()) { // this entry must not have been fsynced. probably the rest is bad too, // but just in case there is no harm in trying them (since we still read on an entry // boundary) continue; } /* deserialize the commit log entry */ ByteArrayInputStream bufIn = new ByteArrayInputStream(bytes, 0, serializedSize); RowMutation rm = null; try { rm = RowMutation.serializer().deserialize(new DataInputStream(bufIn)); } catch (UnserializableColumnFamilyException ex) { AtomicInteger i = invalidMutations.get(ex.cfId); if (i == null) { i = new AtomicInteger(1); invalidMutations.put(ex.cfId, i); } else i.incrementAndGet(); continue; } if (logger.isDebugEnabled()) logger.debug( String.format( "replaying mutation for %s.%s: %s", rm.getTable(), rm.key(), "{" + StringUtils.join(rm.getColumnFamilies(), ", ") + "}")); final Table table = Table.open(rm.getTable()); tablesRecovered.add(table); final Collection<ColumnFamily> columnFamilies = new ArrayList<ColumnFamily>(rm.getColumnFamilies()); final long entryLocation = reader.getFilePointer(); final CommitLogHeader finalHeader = clHeader; final RowMutation frm = rm; Runnable runnable = new WrappedRunnable() { public void runMayThrow() throws IOException { RowMutation newRm = new RowMutation(frm.getTable(), frm.key()); // Rebuild the row mutation, omitting column families that a) have already been // flushed, // b) are part of a cf that was dropped. Keep in mind that the cf.name() is // suspect. do every // thing based on the cfid instead. for (ColumnFamily columnFamily : columnFamilies) { if (CFMetaData.getCF(columnFamily.id()) == null) // null means the cf has been dropped continue; if (finalHeader == null || (finalHeader.isDirty(columnFamily.id()) && entryLocation >= finalHeader.getPosition(columnFamily.id()))) newRm.add(columnFamily); } if (!newRm.isEmpty()) { Table.open(newRm.getTable()).apply(newRm, null, false); } } }; futures.add(StageManager.getStage(Stage.MUTATION).submit(runnable)); if (futures.size() > MAX_OUTSTANDING_REPLAY_COUNT) { FBUtilities.waitOnFutures(futures); futures.clear(); } } } finally { reader.close(); logger.info("Finished reading " + file); } } for (Map.Entry<Integer, AtomicInteger> entry : invalidMutations.entrySet()) logger.info( String.format( "Skipped %d mutations from unknown (probably removed) CF with id %d", entry.getValue().intValue(), entry.getKey())); // wait for all the writes to finish on the mutation stage FBUtilities.waitOnFutures(futures); logger.debug("Finished waiting on mutations from recovery"); // flush replayed tables futures.clear(); for (Table table : tablesRecovered) futures.addAll(table.flush()); FBUtilities.waitOnFutures(futures); logger.info("Recovery complete"); }