예제 #1
0
 public Collection<SSTableReader> flush() {
   long estimatedSize = estimatedSize();
   Directories.DataDirectory dataDirectory =
       cfs.getDirectories().getWriteableLocation(estimatedSize);
   if (dataDirectory == null)
     throw new RuntimeException("Insufficient disk space to write " + estimatedSize + " bytes");
   File sstableDirectory = cfs.getDirectories().getLocationForDisk(dataDirectory);
   assert sstableDirectory != null : "Flush task is not bound to any disk";
   return writeSortedContents(sstableDirectory);
 }
예제 #2
0
  @SuppressWarnings("resource") // log and writer closed by SSTableTxnWriter
  public SSTableTxnWriter createFlushWriter(
      String filename, PartitionColumns columns, EncodingStats stats) {
    // we operate "offline" here, as we expose the resulting reader consciously when done
    // (although we may want to modify this behaviour in future, to encapsulate full flush behaviour
    // in LifecycleTransaction)
    LifecycleTransaction txn = null;
    try {
      txn = LifecycleTransaction.offline(OperationType.FLUSH);
      MetadataCollector sstableMetadataCollector =
          new MetadataCollector(cfs.metadata.comparator)
              .commitLogIntervals(
                  new IntervalSet(commitLogLowerBound.get(), commitLogUpperBound.get()));

      return new SSTableTxnWriter(
          txn,
          cfs.createSSTableMultiWriter(
              Descriptor.fromFilename(filename),
              (long) partitions.size(),
              ActiveRepairService.UNREPAIRED_SSTABLE,
              sstableMetadataCollector,
              new SerializationHeader(true, cfs.metadata, columns, stats),
              txn));
    } catch (Throwable t) {
      if (txn != null) txn.close();
      throw t;
    }
  }
예제 #3
0
  public MemtableUnfilteredPartitionIterator makePartitionIterator(
      final ColumnFilter columnFilter, final DataRange dataRange, final boolean isForThrift) {
    AbstractBounds<PartitionPosition> keyRange = dataRange.keyRange();

    boolean startIsMin = keyRange.left.isMinimum();
    boolean stopIsMin = keyRange.right.isMinimum();

    boolean isBound = keyRange instanceof Bounds;
    boolean includeStart = isBound || keyRange instanceof IncludingExcludingBounds;
    boolean includeStop = isBound || keyRange instanceof Range;
    Map<PartitionPosition, AtomicBTreePartition> subMap;
    if (startIsMin)
      subMap = stopIsMin ? partitions : partitions.headMap(keyRange.right, includeStop);
    else
      subMap =
          stopIsMin
              ? partitions.tailMap(keyRange.left, includeStart)
              : partitions.subMap(keyRange.left, includeStart, keyRange.right, includeStop);

    int minLocalDeletionTime = Integer.MAX_VALUE;

    // avoid iterating over the memtable if we purge all tombstones
    if (cfs.getCompactionStrategyManager().onlyPurgeRepairedTombstones())
      minLocalDeletionTime = findMinLocalDeletionTime(subMap.entrySet().iterator());

    final Iterator<Map.Entry<PartitionPosition, AtomicBTreePartition>> iter =
        subMap.entrySet().iterator();

    return new MemtableUnfilteredPartitionIterator(
        cfs, iter, isForThrift, minLocalDeletionTime, columnFilter, dataRange);
  }
예제 #4
0
  private Collection<SSTableReader> writeSortedContents(File sstableDirectory) {
    boolean isBatchLogTable =
        cfs.name.equals(SystemKeyspace.BATCHES)
            && cfs.keyspace.getName().equals(SystemKeyspace.NAME);

    logger.debug("Writing {}", Memtable.this.toString());

    Collection<SSTableReader> ssTables;
    try (SSTableTxnWriter writer =
        createFlushWriter(
            cfs.getSSTablePath(sstableDirectory), columnsCollector.get(), statsCollector.get())) {
      boolean trackContention = logger.isTraceEnabled();
      int heavilyContendedRowCount = 0;
      // (we can't clear out the map as-we-go to free up memory,
      //  since the memtable is being used for queries in the "pending flush" category)
      for (AtomicBTreePartition partition : partitions.values()) {
        // Each batchlog partition is a separate entry in the log. And for an entry, we only do 2
        // operations: 1) we insert the entry and 2) we delete it. Further, BL data is strictly
        // local,
        // we don't need to preserve tombstones for repair. So if both operation are in this
        // memtable (which will almost always be the case if there is no ongoing failure), we can
        // just skip the entry (CASSANDRA-4667).
        if (isBatchLogTable && !partition.partitionLevelDeletion().isLive() && partition.hasRows())
          continue;

        if (trackContention && partition.usePessimisticLocking()) heavilyContendedRowCount++;

        if (!partition.isEmpty()) {
          try (UnfilteredRowIterator iter = partition.unfilteredIterator()) {
            writer.append(iter);
          }
        }
      }

      if (writer.getFilePointer() > 0) {
        logger.debug(
            String.format(
                "Completed flushing %s (%s) for commitlog position %s",
                writer.getFilename(),
                FBUtilities.prettyPrintMemory(writer.getFilePointer()),
                commitLogUpperBound));

        // sstables should contain non-repaired data.
        ssTables = writer.finish(true);
      } else {
        logger.debug(
            "Completed flushing {}; nothing needed to be retained.  Commitlog position was {}",
            writer.getFilename(),
            commitLogUpperBound);
        writer.abort();
        ssTables = Collections.emptyList();
      }

      if (heavilyContendedRowCount > 0)
        logger.trace(
            String.format(
                "High update contention in %d/%d partitions of %s ",
                heavilyContendedRowCount, partitions.size(), Memtable.this.toString()));

      return ssTables;
    }
  }