/**
   * @return the total key count in the files being merged
   * @throws IOException
   */
  private long prepareForMerge() throws IOException {
    LOG.info("Merging " + inputFileNames);
    LOG.info("Using block size: " + blockSize);
    inputStoreFiles = new ArrayList<StoreFile>();

    long maxKeyCount = 0;
    for (String fileName : inputFileNames) {
      Path filePath = new Path(fileName);

      // Open without caching.
      StoreFile sf = openStoreFile(filePath, false);
      sf.createReader();
      inputStoreFiles.add(sf);

      StoreFile.Reader r = sf.getReader();
      if (r != null) {
        long keyCount = r.getFilterEntries();
        maxKeyCount += keyCount;
        LOG.info(
            "Compacting: "
                + sf
                + "; keyCount = "
                + keyCount
                + "; Bloom Type = "
                + r.getBloomFilterType().toString()
                + "; Size = "
                + StringUtils.humanReadableInt(r.length()));
      }
    }
    return maxKeyCount;
  }
Пример #2
0
 private static CompactionRequest createDummyRequest() throws Exception {
   // "Files" are totally unused, it's Scanner class below that gives compactor fake KVs.
   // But compaction depends on everything under the sun, so stub everything with dummies.
   StoreFile sf = mock(StoreFile.class);
   StoreFile.Reader r = mock(StoreFile.Reader.class);
   when(r.length()).thenReturn(1L);
   when(r.getBloomFilterType()).thenReturn(BloomType.NONE);
   when(r.getHFileReader()).thenReturn(mock(HFile.Reader.class));
   when(r.getStoreFileScanner(anyBoolean(), anyBoolean(), anyBoolean(), anyLong()))
       .thenReturn(mock(StoreFileScanner.class));
   when(sf.getReader()).thenReturn(r);
   when(sf.createReader()).thenReturn(r);
   return new CompactionRequest(Arrays.asList(sf));
 }
Пример #3
0
 /**
  * Return an array of scanners corresponding to the given set of store files, And set the
  * ScanQueryMatcher for each store file scanner for further optimization
  */
 public static List<StoreFileScanner> getScannersForStoreFiles(
     Collection<StoreFile> files,
     boolean cacheBlocks,
     boolean isCompaction,
     ScanQueryMatcher matcher)
     throws IOException {
   List<StoreFileScanner> scanners = new ArrayList<StoreFileScanner>(files.size());
   for (StoreFile file : files) {
     StoreFile.Reader r = file.createReader();
     StoreFileScanner scanner = r.getStoreFileScanner(cacheBlocks, isCompaction);
     scanner.setScanQueryMatcher(matcher);
     scanners.add(scanner);
   }
   return scanners;
 }
  public boolean runRandomReadWorkload() throws IOException {
    if (inputFileNames.size() != 1) {
      throw new IOException("Need exactly one input file for random reads: " + inputFileNames);
    }

    Path inputPath = new Path(inputFileNames.get(0));

    // Make sure we are using caching.
    StoreFile storeFile = openStoreFile(inputPath, true);

    StoreFile.Reader reader = storeFile.createReader();

    LOG.info("First key: " + Bytes.toStringBinary(reader.getFirstKey()));
    LOG.info("Last key: " + Bytes.toStringBinary(reader.getLastKey()));

    KeyValue firstKV = KeyValue.createKeyValueFromKey(reader.getFirstKey());
    firstRow = firstKV.getRow();

    KeyValue lastKV = KeyValue.createKeyValueFromKey(reader.getLastKey());
    lastRow = lastKV.getRow();

    byte[] family = firstKV.getFamily();
    if (!Bytes.equals(family, lastKV.getFamily())) {
      LOG.error(
          "First and last key have different families: "
              + Bytes.toStringBinary(family)
              + " and "
              + Bytes.toStringBinary(lastKV.getFamily()));
      return false;
    }

    if (Bytes.equals(firstRow, lastRow)) {
      LOG.error(
          "First and last row are the same, cannot run read workload: "
              + "firstRow="
              + Bytes.toStringBinary(firstRow)
              + ", "
              + "lastRow="
              + Bytes.toStringBinary(lastRow));
      return false;
    }

    ExecutorService exec = Executors.newFixedThreadPool(numReadThreads + 1);
    int numCompleted = 0;
    int numFailed = 0;
    try {
      ExecutorCompletionService<Boolean> ecs = new ExecutorCompletionService<Boolean>(exec);
      endTime = System.currentTimeMillis() + 1000 * durationSec;
      boolean pread = true;
      for (int i = 0; i < numReadThreads; ++i) ecs.submit(new RandomReader(i, reader, pread));
      ecs.submit(new StatisticsPrinter());
      Future<Boolean> result;
      while (true) {
        try {
          result = ecs.poll(endTime + 1000 - System.currentTimeMillis(), TimeUnit.MILLISECONDS);
          if (result == null) break;
          try {
            if (result.get()) {
              ++numCompleted;
            } else {
              ++numFailed;
            }
          } catch (ExecutionException e) {
            LOG.error("Worker thread failure", e.getCause());
            ++numFailed;
          }
        } catch (InterruptedException ex) {
          LOG.error("Interrupted after " + numCompleted + " workers completed");
          Thread.currentThread().interrupt();
          continue;
        }
      }
    } finally {
      storeFile.closeReader(true);
      exec.shutdown();

      BlockCache c = cacheConf.getBlockCache();
      if (c != null) {
        c.shutdown();
      }
    }
    LOG.info("Worker threads completed: " + numCompleted);
    LOG.info("Worker threads failed: " + numFailed);
    return true;
  }
Пример #5
0
  /**
   * Write out a split reference. Package local so it doesnt leak out of regionserver.
   *
   * @param hri {@link HRegionInfo} of the destination
   * @param familyName Column Family Name
   * @param f File to split.
   * @param splitRow Split Row
   * @param top True if we are referring to the top half of the hfile.
   * @return Path to created reference.
   * @throws IOException
   */
  Path splitStoreFile(
      final HRegionInfo hri,
      final String familyName,
      final StoreFile f,
      final byte[] splitRow,
      final boolean top)
      throws IOException {

    // Check whether the split row lies in the range of the store file
    // If it is outside the range, return directly.
    if (!isIndexTable()) {
      if (top) {
        // check if larger than last key.
        KeyValue splitKey = KeyValue.createFirstOnRow(splitRow);
        byte[] lastKey = f.createReader().getLastKey();
        // If lastKey is null means storefile is empty.
        if (lastKey == null) return null;
        if (f.getReader()
                .getComparator()
                .compareFlatKey(
                    splitKey.getBuffer(),
                    splitKey.getKeyOffset(),
                    splitKey.getKeyLength(),
                    lastKey,
                    0,
                    lastKey.length)
            > 0) {
          return null;
        }
      } else {
        // check if smaller than first key
        KeyValue splitKey = KeyValue.createLastOnRow(splitRow);
        byte[] firstKey = f.createReader().getFirstKey();
        // If firstKey is null means storefile is empty.
        if (firstKey == null) return null;
        if (f.getReader()
                .getComparator()
                .compareFlatKey(
                    splitKey.getBuffer(),
                    splitKey.getKeyOffset(),
                    splitKey.getKeyLength(),
                    firstKey,
                    0,
                    firstKey.length)
            < 0) {
          return null;
        }
      }

      f.getReader().close(true);
    }

    Path splitDir = new Path(getSplitsDir(hri), familyName);
    // A reference to the bottom half of the hsf store file.
    Reference r =
        top ? Reference.createTopReference(splitRow) : Reference.createBottomReference(splitRow);
    // Add the referred-to regions name as a dot separated suffix.
    // See REF_NAME_REGEX regex above.  The referred-to regions name is
    // up in the path of the passed in <code>f</code> -- parentdir is family,
    // then the directory above is the region name.
    String parentRegionName = regionInfo.getEncodedName();
    // Write reference with same file id only with the other region name as
    // suffix and into the new region location (under same family).
    Path p = new Path(splitDir, f.getPath().getName() + "." + parentRegionName);
    return r.write(fs, p);
  }