예제 #1
0
 /**
  * Return an array of scanners corresponding to the given set of store files, And set the
  * ScanQueryMatcher for each store file scanner for further optimization
  */
 public static List<StoreFileScanner> getScannersForStoreFiles(
     Collection<StoreFile> files,
     boolean cacheBlocks,
     boolean isCompaction,
     ScanQueryMatcher matcher)
     throws IOException {
   List<StoreFileScanner> scanners = new ArrayList<StoreFileScanner>(files.size());
   for (StoreFile file : files) {
     StoreFile.Reader r = file.createReader();
     StoreFileScanner scanner = r.getStoreFileScanner(cacheBlocks, isCompaction);
     scanner.setScanQueryMatcher(matcher);
     scanners.add(scanner);
   }
   return scanners;
 }
    @Override
    public Boolean call() throws Exception {
      Thread.currentThread().setName("reader " + readerId);
      Random rand = new Random();
      StoreFileScanner scanner = reader.getStoreFileScanner(true, pread);

      while (System.currentTimeMillis() < endTime) {
        byte[] row = createRandomRow(rand, firstRow, lastRow);
        KeyValue kvToSeek = new KeyValue(row, family, createRandomQualifier(rand));
        if (rand.nextDouble() < 0.0001) {
          LOG.info("kvToSeek=" + kvToSeek);
        }
        boolean seekResult;
        try {
          seekResult = scanner.seek(kvToSeek);
        } catch (IOException ex) {
          throw new IOException("Seek failed for key " + kvToSeek + ", pread=" + pread, ex);
        }
        numSeeks.incrementAndGet();
        if (!seekResult) {
          error("Seek returned false for row " + Bytes.toStringBinary(row));
          return false;
        }
        for (int i = 0; i < rand.nextInt(10) + 1; ++i) {
          KeyValue kv = scanner.next();
          numKV.incrementAndGet();
          if (i == 0 && kv == null) {
            error(
                "scanner.next() returned null at the first iteration for "
                    + "row "
                    + Bytes.toStringBinary(row));
            return false;
          }
          if (kv == null) break;

          String keyHashStr = MD5Hash.getMD5AsHex(kv.getKey());
          keysRead.add(keyHashStr);
          totalBytes.addAndGet(kv.getLength());
        }
      }

      return true;
    }
  public void runMergeWorkload() throws IOException {
    long maxKeyCount = prepareForMerge();

    List<StoreFileScanner> scanners =
        StoreFileScanner.getScannersForStoreFiles(inputStoreFiles, false, false);

    HColumnDescriptor columnDescriptor =
        new HColumnDescriptor(HFileReadWriteTest.class.getSimpleName());
    columnDescriptor.setBlocksize(blockSize);
    columnDescriptor.setBloomFilterType(bloomType);
    columnDescriptor.setCompressionType(compression);
    columnDescriptor.setDataBlockEncoding(dataBlockEncoding);
    HRegionInfo regionInfo = new HRegionInfo();
    HTableDescriptor htd = new HTableDescriptor(TABLE_NAME);
    HRegion region = new HRegion(outputDir, null, fs, conf, regionInfo, htd, null);
    HStore store = new HStore(outputDir, region, columnDescriptor, fs, conf);

    StoreFile.Writer writer =
        new StoreFile.WriterBuilder(conf, new CacheConfig(conf), fs, blockSize)
            .withOutputDir(outputDir)
            .withCompression(compression)
            .withDataBlockEncoder(dataBlockEncoder)
            .withBloomType(bloomType)
            .withMaxKeyCount(maxKeyCount)
            .withChecksumType(HFile.DEFAULT_CHECKSUM_TYPE)
            .withBytesPerChecksum(HFile.DEFAULT_BYTES_PER_CHECKSUM)
            .build();

    StatisticsPrinter statsPrinter = new StatisticsPrinter();
    statsPrinter.startThread();

    try {
      performMerge(scanners, store, writer);
      writer.close();
    } finally {
      statsPrinter.requestStop();
    }

    Path resultPath = writer.getPath();

    resultPath = tryUsingSimpleOutputPath(resultPath);

    long fileSize = fs.getFileStatus(resultPath).getLen();
    LOG.info("Created " + resultPath + ", size " + fileSize);

    System.out.println();
    System.out.println("HFile information for " + resultPath);
    System.out.println();

    HFilePrettyPrinter hfpp = new HFilePrettyPrinter();
    hfpp.run(new String[] {"-m", "-f", resultPath.toString()});
  }
예제 #4
0
파일: Compactor.java 프로젝트: kfive/hbase
 /**
  * Creates file scanners for compaction.
  *
  * @param filesToCompact Files.
  * @return Scanners.
  */
 protected List<StoreFileScanner> createFileScanners(
     final Collection<StoreFile> filesToCompact, long smallestReadPoint) throws IOException {
   return StoreFileScanner.getScannersForStoreFiles(
       filesToCompact, false, false, true, smallestReadPoint);
 }
  /**
   * Do a minor/major compaction on an explicit set of storefiles from a Store.
   *
   * @param request the requested compaction that contains all necessary information to complete the
   *     compaction (i.e. the store, the files, etc.)
   * @return Product of compaction or null if all cells expired or deleted and nothing made it
   *     through the compaction.
   * @throws IOException
   */
  StoreFile.Writer compact(CompactionRequest request, long maxId) throws IOException {
    // Calculate maximum key count after compaction (for blooms)
    // Also calculate earliest put timestamp if major compaction
    int maxKeyCount = 0;
    long earliestPutTs = HConstants.LATEST_TIMESTAMP;
    long maxMVCCReadpoint = 0;

    // pull out the interesting things from the CR for ease later
    final Store store = request.getStore();
    final boolean majorCompaction = request.isMajor();
    final List<StoreFile> filesToCompact = request.getFiles();

    for (StoreFile file : filesToCompact) {
      StoreFile.Reader r = file.getReader();
      if (r == null) {
        LOG.warn("Null reader for " + file.getPath());
        continue;
      }
      // NOTE: getFilterEntries could cause under-sized blooms if the user
      //       switches bloom type (e.g. from ROW to ROWCOL)
      long keyCount =
          (r.getBloomFilterType() == store.getFamily().getBloomFilterType())
              ? r.getFilterEntries()
              : r.getEntries();
      maxKeyCount += keyCount;
      // Calculate the maximum MVCC readpoint used in any of the involved files
      Map<byte[], byte[]> fileInfo = r.loadFileInfo();
      byte[] tmp = fileInfo.get(HFileWriterV2.MAX_MEMSTORE_TS_KEY);
      if (tmp != null) {
        maxMVCCReadpoint = Math.max(maxMVCCReadpoint, Bytes.toLong(tmp));
      }
      // For major compactions calculate the earliest put timestamp
      // of all involved storefiles. This is used to remove
      // family delete marker during the compaction.
      if (majorCompaction) {
        tmp = fileInfo.get(StoreFile.EARLIEST_PUT_TS);
        if (tmp == null) {
          // There's a file with no information, must be an old one
          // assume we have very old puts
          earliestPutTs = HConstants.OLDEST_TIMESTAMP;
        } else {
          earliestPutTs = Math.min(earliestPutTs, Bytes.toLong(tmp));
        }
      }
      if (LOG.isDebugEnabled()) {
        LOG.debug(
            "Compacting "
                + file
                + ", keycount="
                + keyCount
                + ", bloomtype="
                + r.getBloomFilterType().toString()
                + ", size="
                + StringUtils.humanReadableInt(r.length())
                + ", encoding="
                + r.getHFileReader().getEncodingOnDisk()
                + (majorCompaction ? ", earliestPutTs=" + earliestPutTs : ""));
      }
    }

    // keep track of compaction progress
    this.progress = new CompactionProgress(maxKeyCount);
    // Get some configs
    int compactionKVMax = getConf().getInt("hbase.hstore.compaction.kv.max", 10);
    Compression.Algorithm compression = store.getFamily().getCompression();
    // Avoid overriding compression setting for major compactions if the user
    // has not specified it separately
    Compression.Algorithm compactionCompression =
        (store.getFamily().getCompactionCompression() != Compression.Algorithm.NONE)
            ? store.getFamily().getCompactionCompression()
            : compression;

    // For each file, obtain a scanner:
    List<StoreFileScanner> scanners =
        StoreFileScanner.getScannersForStoreFiles(filesToCompact, false, false, true);

    // Make the instantiation lazy in case compaction produces no product; i.e.
    // where all source cells are expired or deleted.
    StoreFile.Writer writer = null;
    // Find the smallest read point across all the Scanners.
    long smallestReadPoint = store.getHRegion().getSmallestReadPoint();
    MultiVersionConsistencyControl.setThreadReadPoint(smallestReadPoint);
    try {
      InternalScanner scanner = null;
      try {
        if (store.getHRegion().getCoprocessorHost() != null) {
          scanner =
              store
                  .getHRegion()
                  .getCoprocessorHost()
                  .preCompactScannerOpen(
                      store,
                      scanners,
                      majorCompaction ? ScanType.MAJOR_COMPACT : ScanType.MINOR_COMPACT,
                      earliestPutTs,
                      request);
        }
        if (scanner == null) {
          Scan scan = new Scan();
          scan.setMaxVersions(store.getFamily().getMaxVersions());
          /* Include deletes, unless we are doing a major compaction */
          scanner =
              new StoreScanner(
                  store,
                  store.getScanInfo(),
                  scan,
                  scanners,
                  majorCompaction ? ScanType.MAJOR_COMPACT : ScanType.MINOR_COMPACT,
                  smallestReadPoint,
                  earliestPutTs);
        }
        if (store.getHRegion().getCoprocessorHost() != null) {
          InternalScanner cpScanner =
              store.getHRegion().getCoprocessorHost().preCompact(store, scanner, request);
          // NULL scanner returned from coprocessor hooks means skip normal processing
          if (cpScanner == null) {
            return null;
          }
          scanner = cpScanner;
        }

        int bytesWritten = 0;
        // since scanner.next() can return 'false' but still be delivering data,
        // we have to use a do/while loop.
        List<KeyValue> kvs = new ArrayList<KeyValue>();
        // Limit to "hbase.hstore.compaction.kv.max" (default 10) to avoid OOME
        boolean hasMore;
        do {
          hasMore = scanner.next(kvs, compactionKVMax);
          if (writer == null && !kvs.isEmpty()) {
            writer =
                store.createWriterInTmp(
                    maxKeyCount,
                    compactionCompression,
                    true,
                    maxMVCCReadpoint >= smallestReadPoint);
          }
          if (writer != null) {
            // output to writer:
            for (KeyValue kv : kvs) {
              if (kv.getMemstoreTS() <= smallestReadPoint) {
                kv.setMemstoreTS(0);
              }
              writer.append(kv);
              // update progress per key
              ++progress.currentCompactedKVs;

              // check periodically to see if a system stop is requested
              if (Store.closeCheckInterval > 0) {
                bytesWritten += kv.getLength();
                if (bytesWritten > Store.closeCheckInterval) {
                  bytesWritten = 0;
                  isInterrupted(store, writer);
                }
              }
            }
          }
          kvs.clear();
        } while (hasMore);
      } finally {
        if (scanner != null) {
          scanner.close();
        }
      }
    } finally {
      if (writer != null) {
        writer.appendMetadata(maxId, majorCompaction);
        writer.close();
      }
    }
    return writer;
  }