private void performMerge(List<StoreFileScanner> scanners, HStore store, StoreFile.Writer writer)
      throws IOException {
    InternalScanner scanner = null;
    try {
      Scan scan = new Scan();

      // Include deletes
      scanner =
          new StoreScanner(
              store,
              store.scanInfo,
              scan,
              scanners,
              ScanType.MAJOR_COMPACT,
              Long.MIN_VALUE,
              Long.MIN_VALUE);

      ArrayList<KeyValue> kvs = new ArrayList<KeyValue>();

      while (scanner.next(kvs) || kvs.size() != 0) {
        numKV.addAndGet(kvs.size());
        for (KeyValue kv : kvs) {
          totalBytes.addAndGet(kv.getLength());
          writer.append(kv);
        }
        kvs.clear();
      }
    } finally {
      if (scanner != null) scanner.close();
    }
  }
Пример #2
0
 /**
  * Creates the scanner for flushing snapshot. Also calls coprocessors.
  *
  * @param snapshotScanner
  * @param smallestReadPoint
  * @return The scanner; null if coprocessor is canceling the flush.
  */
 protected InternalScanner createScanner(KeyValueScanner snapshotScanner, long smallestReadPoint)
     throws IOException {
   InternalScanner scanner = null;
   if (store.getCoprocessorHost() != null) {
     scanner = store.getCoprocessorHost().preFlushScannerOpen(store, snapshotScanner);
   }
   if (scanner == null) {
     Scan scan = new Scan();
     scan.setMaxVersions(store.getScanInfo().getMaxVersions());
     scanner =
         new StoreScanner(
             store,
             store.getScanInfo(),
             scan,
             Collections.singletonList(snapshotScanner),
             ScanType.COMPACT_RETAIN_DELETES,
             smallestReadPoint,
             HConstants.OLDEST_TIMESTAMP);
   }
   assert scanner != null;
   if (store.getCoprocessorHost() != null) {
     try {
       return store.getCoprocessorHost().preFlush(store, scanner);
     } catch (IOException ioe) {
       scanner.close();
       throw ioe;
     }
   }
   return scanner;
 }
Пример #3
0
  public void testWideScanBatching() throws IOException {
    final int batch = 256;
    try {
      this.r = createNewHRegion(TESTTABLEDESC, null, null);
      int inserted = addWideContent(this.r);
      List<Cell> results = new ArrayList<Cell>();
      Scan scan = new Scan();
      scan.addFamily(A);
      scan.addFamily(B);
      scan.addFamily(C);
      scan.setMaxVersions(100);
      scan.setBatch(batch);
      InternalScanner s = r.getScanner(scan);
      int total = 0;
      int i = 0;
      boolean more;
      do {
        more = s.next(results);
        i++;
        LOG.info("iteration #" + i + ", results.size=" + results.size());

        // assert that the result set is no larger
        assertTrue(results.size() <= batch);

        total += results.size();

        if (results.size() > 0) {
          // assert that all results are from the same row
          byte[] row = CellUtil.cloneRow(results.get(0));
          for (Cell kv : results) {
            assertTrue(Bytes.equals(row, CellUtil.cloneRow(kv)));
          }
        }

        results.clear();

        // trigger ChangedReadersObservers
        Iterator<KeyValueScanner> scanners =
            ((HRegion.RegionScannerImpl) s).storeHeap.getHeap().iterator();
        while (scanners.hasNext()) {
          StoreScanner ss = (StoreScanner) scanners.next();
          ss.updateReaders();
        }
      } while (more);

      // assert that the scanner returned all values
      LOG.info("inserted " + inserted + ", scanned " + total);
      assertEquals(total, inserted);

      s.close();
    } finally {
      HRegion.closeHRegion(this.r);
    }
  }
Пример #4
0
 /**
  * Performs memstore flush, writing data from scanner into sink.
  *
  * @param scanner Scanner to get data from.
  * @param sink Sink to write data to. Could be StoreFile.Writer.
  * @param smallestReadPoint Smallest read point used for the flush.
  */
 protected void performFlush(
     InternalScanner scanner, Compactor.CellSink sink, long smallestReadPoint) throws IOException {
   int compactionKVMax =
       conf.getInt(HConstants.COMPACTION_KV_MAX, HConstants.COMPACTION_KV_MAX_DEFAULT);
   List<Cell> kvs = new ArrayList<Cell>();
   boolean hasMore;
   do {
     hasMore = scanner.next(kvs, compactionKVMax);
     if (!kvs.isEmpty()) {
       for (Cell c : kvs) {
         // If we know that this KV is going to be included always, then let us
         // set its memstoreTS to 0. This will help us save space when writing to
         // disk.
         sink.append(c);
       }
       kvs.clear();
     }
   } while (hasMore);
 }
  /**
   * Do a minor/major compaction on an explicit set of storefiles from a Store.
   *
   * @param request the requested compaction that contains all necessary information to complete the
   *     compaction (i.e. the store, the files, etc.)
   * @return Product of compaction or null if all cells expired or deleted and nothing made it
   *     through the compaction.
   * @throws IOException
   */
  StoreFile.Writer compact(CompactionRequest request, long maxId) throws IOException {
    // Calculate maximum key count after compaction (for blooms)
    // Also calculate earliest put timestamp if major compaction
    int maxKeyCount = 0;
    long earliestPutTs = HConstants.LATEST_TIMESTAMP;
    long maxMVCCReadpoint = 0;

    // pull out the interesting things from the CR for ease later
    final Store store = request.getStore();
    final boolean majorCompaction = request.isMajor();
    final List<StoreFile> filesToCompact = request.getFiles();

    for (StoreFile file : filesToCompact) {
      StoreFile.Reader r = file.getReader();
      if (r == null) {
        LOG.warn("Null reader for " + file.getPath());
        continue;
      }
      // NOTE: getFilterEntries could cause under-sized blooms if the user
      //       switches bloom type (e.g. from ROW to ROWCOL)
      long keyCount =
          (r.getBloomFilterType() == store.getFamily().getBloomFilterType())
              ? r.getFilterEntries()
              : r.getEntries();
      maxKeyCount += keyCount;
      // Calculate the maximum MVCC readpoint used in any of the involved files
      Map<byte[], byte[]> fileInfo = r.loadFileInfo();
      byte[] tmp = fileInfo.get(HFileWriterV2.MAX_MEMSTORE_TS_KEY);
      if (tmp != null) {
        maxMVCCReadpoint = Math.max(maxMVCCReadpoint, Bytes.toLong(tmp));
      }
      // For major compactions calculate the earliest put timestamp
      // of all involved storefiles. This is used to remove
      // family delete marker during the compaction.
      if (majorCompaction) {
        tmp = fileInfo.get(StoreFile.EARLIEST_PUT_TS);
        if (tmp == null) {
          // There's a file with no information, must be an old one
          // assume we have very old puts
          earliestPutTs = HConstants.OLDEST_TIMESTAMP;
        } else {
          earliestPutTs = Math.min(earliestPutTs, Bytes.toLong(tmp));
        }
      }
      if (LOG.isDebugEnabled()) {
        LOG.debug(
            "Compacting "
                + file
                + ", keycount="
                + keyCount
                + ", bloomtype="
                + r.getBloomFilterType().toString()
                + ", size="
                + StringUtils.humanReadableInt(r.length())
                + ", encoding="
                + r.getHFileReader().getEncodingOnDisk()
                + (majorCompaction ? ", earliestPutTs=" + earliestPutTs : ""));
      }
    }

    // keep track of compaction progress
    this.progress = new CompactionProgress(maxKeyCount);
    // Get some configs
    int compactionKVMax = getConf().getInt("hbase.hstore.compaction.kv.max", 10);
    Compression.Algorithm compression = store.getFamily().getCompression();
    // Avoid overriding compression setting for major compactions if the user
    // has not specified it separately
    Compression.Algorithm compactionCompression =
        (store.getFamily().getCompactionCompression() != Compression.Algorithm.NONE)
            ? store.getFamily().getCompactionCompression()
            : compression;

    // For each file, obtain a scanner:
    List<StoreFileScanner> scanners =
        StoreFileScanner.getScannersForStoreFiles(filesToCompact, false, false, true);

    // Make the instantiation lazy in case compaction produces no product; i.e.
    // where all source cells are expired or deleted.
    StoreFile.Writer writer = null;
    // Find the smallest read point across all the Scanners.
    long smallestReadPoint = store.getHRegion().getSmallestReadPoint();
    MultiVersionConsistencyControl.setThreadReadPoint(smallestReadPoint);
    try {
      InternalScanner scanner = null;
      try {
        if (store.getHRegion().getCoprocessorHost() != null) {
          scanner =
              store
                  .getHRegion()
                  .getCoprocessorHost()
                  .preCompactScannerOpen(
                      store,
                      scanners,
                      majorCompaction ? ScanType.MAJOR_COMPACT : ScanType.MINOR_COMPACT,
                      earliestPutTs,
                      request);
        }
        if (scanner == null) {
          Scan scan = new Scan();
          scan.setMaxVersions(store.getFamily().getMaxVersions());
          /* Include deletes, unless we are doing a major compaction */
          scanner =
              new StoreScanner(
                  store,
                  store.getScanInfo(),
                  scan,
                  scanners,
                  majorCompaction ? ScanType.MAJOR_COMPACT : ScanType.MINOR_COMPACT,
                  smallestReadPoint,
                  earliestPutTs);
        }
        if (store.getHRegion().getCoprocessorHost() != null) {
          InternalScanner cpScanner =
              store.getHRegion().getCoprocessorHost().preCompact(store, scanner, request);
          // NULL scanner returned from coprocessor hooks means skip normal processing
          if (cpScanner == null) {
            return null;
          }
          scanner = cpScanner;
        }

        int bytesWritten = 0;
        // since scanner.next() can return 'false' but still be delivering data,
        // we have to use a do/while loop.
        List<KeyValue> kvs = new ArrayList<KeyValue>();
        // Limit to "hbase.hstore.compaction.kv.max" (default 10) to avoid OOME
        boolean hasMore;
        do {
          hasMore = scanner.next(kvs, compactionKVMax);
          if (writer == null && !kvs.isEmpty()) {
            writer =
                store.createWriterInTmp(
                    maxKeyCount,
                    compactionCompression,
                    true,
                    maxMVCCReadpoint >= smallestReadPoint);
          }
          if (writer != null) {
            // output to writer:
            for (KeyValue kv : kvs) {
              if (kv.getMemstoreTS() <= smallestReadPoint) {
                kv.setMemstoreTS(0);
              }
              writer.append(kv);
              // update progress per key
              ++progress.currentCompactedKVs;

              // check periodically to see if a system stop is requested
              if (Store.closeCheckInterval > 0) {
                bytesWritten += kv.getLength();
                if (bytesWritten > Store.closeCheckInterval) {
                  bytesWritten = 0;
                  isInterrupted(store, writer);
                }
              }
            }
          }
          kvs.clear();
        } while (hasMore);
      } finally {
        if (scanner != null) {
          scanner.close();
        }
      }
    } finally {
      if (writer != null) {
        writer.appendMetadata(maxId, majorCompaction);
        writer.close();
      }
    }
    return writer;
  }