public int blockForWrites() throws IOException { for (Map.Entry<Integer, AtomicInteger> entry : invalidMutations.entrySet()) logger.info( String.format( "Skipped %d mutations from unknown (probably removed) CF with id %s", entry.getValue().intValue(), entry.getKey())); // wait for all the writes to finish on the mutation stage FBUtilities.waitOnFutures(futures); logger.debug("Finished waiting on mutations from recovery"); // flush replayed tables futures.clear(); for (Table table : tablesRecovered) futures.addAll(table.flush()); FBUtilities.waitOnFutures(futures); return replayedCount.get(); }
/* * This excercise in particular the code of #4142 */ @Test public void testValidationMultipleSSTablePerLevel() throws Exception { String ksname = "Keyspace1"; String cfname = "StandardLeveled"; Table table = Table.open(ksname); ColumnFamilyStore store = table.getColumnFamilyStore(cfname); ByteBuffer value = ByteBuffer.wrap(new byte[100 * 1024]); // 100 KB value, make it easy to have multiple files // Enough data to have a level 1 and 2 int rows = 20; int columns = 10; // Adds enough data to trigger multiple sstable per level for (int r = 0; r < rows; r++) { DecoratedKey key = Util.dk(String.valueOf(r)); RowMutation rm = new RowMutation(ksname, key.key); for (int c = 0; c < columns; c++) { rm.add(new QueryPath(cfname, null, ByteBufferUtil.bytes("column" + c)), value, 0); } rm.apply(); store.forceFlush(); } LeveledCompactionStrategy strat = (LeveledCompactionStrategy) store.getCompactionStrategy(); while (strat.getLevelSize(0) > 0) { store.forceMajorCompaction(); Thread.sleep(200); } // Checking we're not completely bad at math assert strat.getLevelSize(1) > 0; assert strat.getLevelSize(2) > 0; AntiEntropyService.CFPair p = new AntiEntropyService.CFPair(ksname, cfname); Range<Token> range = new Range<Token>(Util.token(""), Util.token("")); AntiEntropyService.TreeRequest req = new AntiEntropyService.TreeRequest("1", FBUtilities.getLocalAddress(), range, p); AntiEntropyService.Validator validator = new AntiEntropyService.Validator(req); CompactionManager.instance.submitValidation(store, validator).get(); }
/** * Writes out a bunch of rows for a single column family. * * @param rows A group of RowMutations for the same table and column family. * @return The ColumnFamilyStore that was used. */ public static ColumnFamilyStore writeColumnFamily(List<IMutation> rms) throws IOException, ExecutionException, InterruptedException { IMutation first = rms.get(0); String tablename = first.getTable(); UUID cfid = first.getColumnFamilyIds().iterator().next(); for (IMutation rm : rms) rm.apply(); ColumnFamilyStore store = Table.open(tablename).getColumnFamilyStore(cfid); store.forceBlockingFlush(); return store; }
@Override public void applyModels() throws IOException { KSMetaData ksm = DatabaseDescriptor.getTableDefinition(name); // remove the table from the static instances. Table table = Table.clear(ksm.name); if (table == null) throw new IOException("Table is not active. " + ksm.name); // remove all cfs from the table instance. for (CFMetaData cfm : ksm.cfMetaData().values()) { CFMetaData.purge(cfm); table.dropCf(cfm.cfId); SystemTable.markForRemoval(cfm); } // reset defs. DatabaseDescriptor.clearTableDefinition(ksm, newVersion); CommitLog.instance().forceNewSegment(); Migration.cleanupDeadFiles(blockOnFileDeletion); // clear up any local hinted data for this keyspace. HintedHandOffManager.renameHints(name, null); }
public static void validateIndexClauses( String keyspace, String columnFamily, IndexClause index_clause) throws InvalidRequestException { if (index_clause.expressions.isEmpty()) throw new InvalidRequestException("index clause list may not be empty"); Set<ByteBuffer> indexedColumns = Table.open(keyspace).getColumnFamilyStore(columnFamily).getIndexedColumns(); for (IndexExpression expression : index_clause.expressions) { if (expression.op.equals(IndexOperator.EQ) && indexedColumns.contains(expression.column_name)) return; } throw new InvalidRequestException( "No indexed columns present in index clause with operator EQ"); }
public void applyModels() throws IOException { ColumnFamilyStore cfs = Table.open(tableName).getColumnFamilyStore(cfName); // reinitialize the table. KSMetaData existing = DatabaseDescriptor.getTableDefinition(tableName); CFMetaData cfm = existing.cfMetaData().get(cfName); KSMetaData ksm = makeNewKeyspaceDefinition(existing); CFMetaData.purge(cfm); DatabaseDescriptor.setTableDefinition(ksm, newVersion); if (!clientMode) { cfs.snapshot(Table.getTimestampedSnapshotName(null)); CompactionManager.instance.getCompactionLock().lock(); cfs.flushLock.lock(); try { Table.open(ksm.name).dropCf(cfm.cfId); } finally { cfs.flushLock.unlock(); CompactionManager.instance.getCompactionLock().unlock(); } } }
/** Trigger a validation compaction which will return the tree upon completion. */ public void doVerb(Message message, String id) { byte[] bytes = message.getMessageBody(); DataInputStream buffer = new DataInputStream(new FastByteArrayInputStream(bytes)); try { TreeRequest remotereq = this.deserialize(buffer, message.getVersion()); TreeRequest request = new TreeRequest(remotereq.sessionid, message.getFrom(), remotereq.range, remotereq.cf); // trigger readonly-compaction ColumnFamilyStore store = Table.open(request.cf.left).getColumnFamilyStore(request.cf.right); Validator validator = new Validator(request); logger.debug("Queueing validation compaction for " + request); CompactionManager.instance.submitValidation(store, validator); } catch (IOException e) { throw new IOError(e); } }
/** * Starts sending/receiving our list of differences to/from the remote endpoint: creates a * callback that will be called out of band once the streams complete. */ void performStreamingRepair() throws IOException { logger.info( "Performing streaming repair of " + differences.size() + " ranges with " + remote + " for " + range); ColumnFamilyStore cfstore = Table.open(tablename).getColumnFamilyStore(cfname); try { Collection<SSTableReader> sstables = cfstore.getSSTables(); Callback callback = new Callback(); // send ranges to the remote node StreamOutSession outsession = StreamOutSession.create(tablename, remote, callback); StreamOut.transferSSTables(outsession, sstables, differences, OperationType.AES); // request ranges from the remote node StreamIn.requestRanges(remote, tablename, differences, callback, OperationType.AES); } catch (Exception e) { throw new IOException("Streaming repair failed.", e); } }
public static void recover(File[] clogs) throws IOException { Set<Table> tablesRecovered = new HashSet<Table>(); List<Future<?>> futures = new ArrayList<Future<?>>(); byte[] bytes = new byte[4096]; Map<Integer, AtomicInteger> invalidMutations = new HashMap<Integer, AtomicInteger>(); for (File file : clogs) { int bufferSize = (int) Math.min(file.length(), 32 * 1024 * 1024); BufferedRandomAccessFile reader = new BufferedRandomAccessFile(file.getAbsolutePath(), "r", bufferSize); try { CommitLogHeader clHeader = null; int replayPosition = 0; String headerPath = CommitLogHeader.getHeaderPathFromSegmentPath(file.getAbsolutePath()); try { clHeader = CommitLogHeader.readCommitLogHeader(headerPath); replayPosition = clHeader.getReplayPosition(); } catch (IOException ioe) { logger.info( headerPath + " incomplete, missing or corrupt. Everything is ok, don't panic. CommitLog will be replayed from the beginning"); logger.debug("exception was", ioe); } if (replayPosition < 0) { logger.debug("skipping replay of fully-flushed {}", file); continue; } reader.seek(replayPosition); if (logger.isDebugEnabled()) logger.debug("Replaying " + file + " starting at " + reader.getFilePointer()); /* read the logs populate RowMutation and apply */ while (!reader.isEOF()) { if (logger.isDebugEnabled()) logger.debug("Reading mutation at " + reader.getFilePointer()); long claimedCRC32; Checksum checksum = new CRC32(); int serializedSize; try { // any of the reads may hit EOF serializedSize = reader.readInt(); long claimedSizeChecksum = reader.readLong(); checksum.update(serializedSize); if (checksum.getValue() != claimedSizeChecksum || serializedSize <= 0) break; // entry wasn't synced correctly/fully. that's ok. if (serializedSize > bytes.length) bytes = new byte[(int) (1.2 * serializedSize)]; reader.readFully(bytes, 0, serializedSize); claimedCRC32 = reader.readLong(); } catch (EOFException eof) { break; // last CL entry didn't get completely written. that's ok. } checksum.update(bytes, 0, serializedSize); if (claimedCRC32 != checksum.getValue()) { // this entry must not have been fsynced. probably the rest is bad too, // but just in case there is no harm in trying them (since we still read on an entry // boundary) continue; } /* deserialize the commit log entry */ ByteArrayInputStream bufIn = new ByteArrayInputStream(bytes, 0, serializedSize); RowMutation rm = null; try { rm = RowMutation.serializer().deserialize(new DataInputStream(bufIn)); } catch (UnserializableColumnFamilyException ex) { AtomicInteger i = invalidMutations.get(ex.cfId); if (i == null) { i = new AtomicInteger(1); invalidMutations.put(ex.cfId, i); } else i.incrementAndGet(); continue; } if (logger.isDebugEnabled()) logger.debug( String.format( "replaying mutation for %s.%s: %s", rm.getTable(), rm.key(), "{" + StringUtils.join(rm.getColumnFamilies(), ", ") + "}")); final Table table = Table.open(rm.getTable()); tablesRecovered.add(table); final Collection<ColumnFamily> columnFamilies = new ArrayList<ColumnFamily>(rm.getColumnFamilies()); final long entryLocation = reader.getFilePointer(); final CommitLogHeader finalHeader = clHeader; final RowMutation frm = rm; Runnable runnable = new WrappedRunnable() { public void runMayThrow() throws IOException { RowMutation newRm = new RowMutation(frm.getTable(), frm.key()); // Rebuild the row mutation, omitting column families that a) have already been // flushed, // b) are part of a cf that was dropped. Keep in mind that the cf.name() is // suspect. do every // thing based on the cfid instead. for (ColumnFamily columnFamily : columnFamilies) { if (CFMetaData.getCF(columnFamily.id()) == null) // null means the cf has been dropped continue; if (finalHeader == null || (finalHeader.isDirty(columnFamily.id()) && entryLocation >= finalHeader.getPosition(columnFamily.id()))) newRm.add(columnFamily); } if (!newRm.isEmpty()) { Table.open(newRm.getTable()).apply(newRm, null, false); } } }; futures.add(StageManager.getStage(Stage.MUTATION).submit(runnable)); if (futures.size() > MAX_OUTSTANDING_REPLAY_COUNT) { FBUtilities.waitOnFutures(futures); futures.clear(); } } } finally { reader.close(); logger.info("Finished reading " + file); } } for (Map.Entry<Integer, AtomicInteger> entry : invalidMutations.entrySet()) logger.info( String.format( "Skipped %d mutations from unknown (probably removed) CF with id %d", entry.getValue().intValue(), entry.getKey())); // wait for all the writes to finish on the mutation stage FBUtilities.waitOnFutures(futures); logger.debug("Finished waiting on mutations from recovery"); // flush replayed tables futures.clear(); for (Table table : tablesRecovered) futures.addAll(table.flush()); FBUtilities.waitOnFutures(futures); logger.info("Recovery complete"); }
/** * Retrieves a local subBlock * * @param blockId row key * @param sblockId SubBlock column name * @param offset inside the sblock * @return a local sublock * @throws TException */ private LocalBlock getLocalSubBlock( String subBlockCFName, ByteBuffer blockId, ByteBuffer sblockId, int offset) throws TException { DecoratedKey<Token<?>> decoratedKey = new DecoratedKey<Token<?>>(StorageService.getPartitioner().getToken(blockId), blockId); Table table = Table.open(cfsKeyspace); ColumnFamilyStore sblockStore = table.getColumnFamilyStore(subBlockCFName); Collection<SSTableReader> sstables = sblockStore.getSSTables(); for (SSTableReader sstable : sstables) { long position = sstable.getPosition(decoratedKey, Operator.EQ); if (position == -1) continue; String filename = sstable.descriptor.filenameFor(Component.DATA); RandomAccessFile raf = null; int mappedLength = -1; MappedByteBuffer mappedData = null; MappedFileDataInput file = null; try { raf = new RandomAccessFile(filename, "r"); assert position < raf.length(); mappedLength = (raf.length() - position) < Integer.MAX_VALUE ? (int) (raf.length() - position) : Integer.MAX_VALUE; mappedData = raf.getChannel().map(FileChannel.MapMode.READ_ONLY, position, mappedLength); file = new MappedFileDataInput(mappedData, filename, 0); if (file == null) continue; // Verify key was found in data file DecoratedKey keyInDisk = SSTableReader.decodeKey( sstable.partitioner, sstable.descriptor, ByteBufferUtil.readWithShortLength(file)); assert keyInDisk.equals(decoratedKey) : String.format("%s != %s in %s", keyInDisk, decoratedKey, file.getPath()); long rowSize = SSTableReader.readRowSize(file, sstable.descriptor); assert rowSize > 0; assert rowSize < mappedLength; Filter bf = IndexHelper.defreezeBloomFilter(file, sstable.descriptor.usesOldBloomFilter); // verify this column in in this version of the row. if (!bf.isPresent(sblockId)) continue; List<IndexHelper.IndexInfo> indexList = IndexHelper.deserializeIndex(file); // we can stop early if bloom filter says none of the // columns actually exist -- but, // we can't stop before initializing the cf above, in // case there's a relevant tombstone ColumnFamilySerializer serializer = ColumnFamily.serializer(); try { ColumnFamily cf = serializer.deserializeFromSSTableNoColumns( ColumnFamily.create(sstable.metadata), file); if (cf.isMarkedForDelete()) continue; } catch (Exception e) { e.printStackTrace(); throw new IOException( serializer + " failed to deserialize " + sstable.getColumnFamilyName() + " with " + sstable.metadata + " from " + file, e); } Integer sblockLength = null; if (indexList == null) sblockLength = seekToSubColumn(sstable.metadata, file, sblockId); else sblockLength = seekToSubColumn(sstable.metadata, file, sblockId, indexList); if (sblockLength == null || sblockLength < 0) continue; int bytesReadFromStart = mappedLength - (int) file.bytesRemaining(); if (logger.isDebugEnabled()) logger.debug("BlockLength = " + sblockLength + " Availible " + file.bytesRemaining()); assert offset <= sblockLength : String.format("%d > %d", offset, sblockLength); long dataOffset = position + bytesReadFromStart; if (file.bytesRemaining() == 0 || sblockLength == 0) continue; return new LocalBlock(file.getPath(), dataOffset + offset, sblockLength - offset); } catch (IOException e) { throw new TException(e); } finally { FileUtils.closeQuietly(raf); } } return null; }
public static void main(String args[]) throws IOException { Options options = Options.parseArgs(args); try { // load keyspace descriptions. DatabaseDescriptor.loadSchemas(); String ksName = null; String cfName = null; Map<Descriptor, Set<Component>> parsedFilenames = new HashMap<Descriptor, Set<Component>>(); for (String filename : options.filenames) { File file = new File(filename); if (!file.exists()) { System.out.println("Skipping inexisting file " + file); continue; } Pair<Descriptor, Component> pair = SSTable.tryComponentFromFilename(file.getParentFile(), file.getName()); if (pair == null) { System.out.println("Skipping non sstable file " + file); continue; } Descriptor desc = pair.left; if (ksName == null) ksName = desc.ksname; else if (!ksName.equals(desc.ksname)) throw new IllegalArgumentException("All sstables must be part of the same keyspace"); if (cfName == null) cfName = desc.cfname; else if (!cfName.equals(desc.cfname)) throw new IllegalArgumentException("All sstables must be part of the same column family"); Set<Component> components = new HashSet<Component>( Arrays.asList( new Component[] { Component.DATA, Component.PRIMARY_INDEX, Component.FILTER, Component.COMPRESSION_INFO, Component.STATS })); Iterator<Component> iter = components.iterator(); while (iter.hasNext()) { Component component = iter.next(); if (!(new File(desc.filenameFor(component)).exists())) iter.remove(); } parsedFilenames.put(desc, components); } if (ksName == null || cfName == null) { System.err.println("No valid sstables to split"); System.exit(1); } // Do not load sstables since they might be broken Table table = Table.openWithoutSSTables(ksName); ColumnFamilyStore cfs = table.getColumnFamilyStore(cfName); String snapshotName = "pre-split-" + System.currentTimeMillis(); List<SSTableReader> sstables = new ArrayList<SSTableReader>(); for (Map.Entry<Descriptor, Set<Component>> fn : parsedFilenames.entrySet()) { try { SSTableReader sstable = SSTableReader.openNoValidation(fn.getKey(), fn.getValue(), cfs.metadata); sstables.add(sstable); if (options.snapshot) { File snapshotDirectory = Directories.getSnapshotDirectory(sstable.descriptor, snapshotName); sstable.createLinks(snapshotDirectory.getPath()); } } catch (Exception e) { System.err.println(String.format("Error Loading %s: %s", fn.getKey(), e.getMessage())); if (options.debug) e.printStackTrace(System.err); } } if (options.snapshot) System.out.println( String.format("Pre-split sstables snapshotted into snapshot %s", snapshotName)); cfs.getDataTracker().markCompacting(sstables); for (SSTableReader sstable : sstables) { try { new SSTableSplitter(cfs, sstable, options.sizeInMB).split(); // Remove the sstable sstable.markCompacted(); sstable.releaseReference(); } catch (Exception e) { System.err.println(String.format("Error splitting %s: %s", sstable, e.getMessage())); if (options.debug) e.printStackTrace(System.err); } } SSTableDeletingTask.waitForDeletions(); System.exit(0); // We need that to stop non daemonized threads } catch (Exception e) { System.err.println(e.getMessage()); if (options.debug) e.printStackTrace(System.err); System.exit(1); } }
public static ColumnFamily getColumnFamily(Table table, DecoratedKey key, String cfName) throws IOException { ColumnFamilyStore cfStore = table.getColumnFamilyStore(cfName); assert cfStore != null : "Column family " + cfName + " has not been defined"; return cfStore.getColumnFamily(QueryFilter.getIdentityFilter(key, new QueryPath(cfName))); }