/** * @return the total key count in the files being merged * @throws IOException */ private long prepareForMerge() throws IOException { LOG.info("Merging " + inputFileNames); LOG.info("Using block size: " + blockSize); inputStoreFiles = new ArrayList<StoreFile>(); long maxKeyCount = 0; for (String fileName : inputFileNames) { Path filePath = new Path(fileName); // Open without caching. StoreFile sf = openStoreFile(filePath, false); sf.createReader(); inputStoreFiles.add(sf); StoreFile.Reader r = sf.getReader(); if (r != null) { long keyCount = r.getFilterEntries(); maxKeyCount += keyCount; LOG.info( "Compacting: " + sf + "; keyCount = " + keyCount + "; Bloom Type = " + r.getBloomFilterType().toString() + "; Size = " + StringUtils.humanReadableInt(r.length())); } } return maxKeyCount; }
private static CompactionRequest createDummyRequest() throws Exception { // "Files" are totally unused, it's Scanner class below that gives compactor fake KVs. // But compaction depends on everything under the sun, so stub everything with dummies. StoreFile sf = mock(StoreFile.class); StoreFile.Reader r = mock(StoreFile.Reader.class); when(r.length()).thenReturn(1L); when(r.getBloomFilterType()).thenReturn(BloomType.NONE); when(r.getHFileReader()).thenReturn(mock(HFile.Reader.class)); when(r.getStoreFileScanner(anyBoolean(), anyBoolean(), anyBoolean(), anyLong())) .thenReturn(mock(StoreFileScanner.class)); when(sf.getReader()).thenReturn(r); when(sf.createReader()).thenReturn(r); return new CompactionRequest(Arrays.asList(sf)); }
/** * Return an array of scanners corresponding to the given set of store files, And set the * ScanQueryMatcher for each store file scanner for further optimization */ public static List<StoreFileScanner> getScannersForStoreFiles( Collection<StoreFile> files, boolean cacheBlocks, boolean isCompaction, ScanQueryMatcher matcher) throws IOException { List<StoreFileScanner> scanners = new ArrayList<StoreFileScanner>(files.size()); for (StoreFile file : files) { StoreFile.Reader r = file.createReader(); StoreFileScanner scanner = r.getStoreFileScanner(cacheBlocks, isCompaction); scanner.setScanQueryMatcher(matcher); scanners.add(scanner); } return scanners; }
public boolean runRandomReadWorkload() throws IOException { if (inputFileNames.size() != 1) { throw new IOException("Need exactly one input file for random reads: " + inputFileNames); } Path inputPath = new Path(inputFileNames.get(0)); // Make sure we are using caching. StoreFile storeFile = openStoreFile(inputPath, true); StoreFile.Reader reader = storeFile.createReader(); LOG.info("First key: " + Bytes.toStringBinary(reader.getFirstKey())); LOG.info("Last key: " + Bytes.toStringBinary(reader.getLastKey())); KeyValue firstKV = KeyValue.createKeyValueFromKey(reader.getFirstKey()); firstRow = firstKV.getRow(); KeyValue lastKV = KeyValue.createKeyValueFromKey(reader.getLastKey()); lastRow = lastKV.getRow(); byte[] family = firstKV.getFamily(); if (!Bytes.equals(family, lastKV.getFamily())) { LOG.error( "First and last key have different families: " + Bytes.toStringBinary(family) + " and " + Bytes.toStringBinary(lastKV.getFamily())); return false; } if (Bytes.equals(firstRow, lastRow)) { LOG.error( "First and last row are the same, cannot run read workload: " + "firstRow=" + Bytes.toStringBinary(firstRow) + ", " + "lastRow=" + Bytes.toStringBinary(lastRow)); return false; } ExecutorService exec = Executors.newFixedThreadPool(numReadThreads + 1); int numCompleted = 0; int numFailed = 0; try { ExecutorCompletionService<Boolean> ecs = new ExecutorCompletionService<Boolean>(exec); endTime = System.currentTimeMillis() + 1000 * durationSec; boolean pread = true; for (int i = 0; i < numReadThreads; ++i) ecs.submit(new RandomReader(i, reader, pread)); ecs.submit(new StatisticsPrinter()); Future<Boolean> result; while (true) { try { result = ecs.poll(endTime + 1000 - System.currentTimeMillis(), TimeUnit.MILLISECONDS); if (result == null) break; try { if (result.get()) { ++numCompleted; } else { ++numFailed; } } catch (ExecutionException e) { LOG.error("Worker thread failure", e.getCause()); ++numFailed; } } catch (InterruptedException ex) { LOG.error("Interrupted after " + numCompleted + " workers completed"); Thread.currentThread().interrupt(); continue; } } } finally { storeFile.closeReader(true); exec.shutdown(); BlockCache c = cacheConf.getBlockCache(); if (c != null) { c.shutdown(); } } LOG.info("Worker threads completed: " + numCompleted); LOG.info("Worker threads failed: " + numFailed); return true; }
/** * Write out a split reference. Package local so it doesnt leak out of regionserver. * * @param hri {@link HRegionInfo} of the destination * @param familyName Column Family Name * @param f File to split. * @param splitRow Split Row * @param top True if we are referring to the top half of the hfile. * @return Path to created reference. * @throws IOException */ Path splitStoreFile( final HRegionInfo hri, final String familyName, final StoreFile f, final byte[] splitRow, final boolean top) throws IOException { // Check whether the split row lies in the range of the store file // If it is outside the range, return directly. if (!isIndexTable()) { if (top) { // check if larger than last key. KeyValue splitKey = KeyValue.createFirstOnRow(splitRow); byte[] lastKey = f.createReader().getLastKey(); // If lastKey is null means storefile is empty. if (lastKey == null) return null; if (f.getReader() .getComparator() .compareFlatKey( splitKey.getBuffer(), splitKey.getKeyOffset(), splitKey.getKeyLength(), lastKey, 0, lastKey.length) > 0) { return null; } } else { // check if smaller than first key KeyValue splitKey = KeyValue.createLastOnRow(splitRow); byte[] firstKey = f.createReader().getFirstKey(); // If firstKey is null means storefile is empty. if (firstKey == null) return null; if (f.getReader() .getComparator() .compareFlatKey( splitKey.getBuffer(), splitKey.getKeyOffset(), splitKey.getKeyLength(), firstKey, 0, firstKey.length) < 0) { return null; } } f.getReader().close(true); } Path splitDir = new Path(getSplitsDir(hri), familyName); // A reference to the bottom half of the hsf store file. Reference r = top ? Reference.createTopReference(splitRow) : Reference.createBottomReference(splitRow); // Add the referred-to regions name as a dot separated suffix. // See REF_NAME_REGEX regex above. The referred-to regions name is // up in the path of the passed in <code>f</code> -- parentdir is family, // then the directory above is the region name. String parentRegionName = regionInfo.getEncodedName(); // Write reference with same file id only with the other region name as // suffix and into the new region location (under same family). Path p = new Path(splitDir, f.getPath().getName() + "." + parentRegionName); return r.write(fs, p); }