/** * Verify that {@link ScanQueryMatcher} only skips expired KeyValue instances and does not exit * early from the row (skipping later non-expired KeyValues). This version mimics a Get with * explicitly specified column qualifiers. * * @throws IOException */ @Test public void testMatch_ExpiredExplicit() throws IOException { long testTTL = 1000; MatchCode[] expected = new MatchCode[] { ScanQueryMatcher.MatchCode.SEEK_NEXT_COL, ScanQueryMatcher.MatchCode.INCLUDE_AND_SEEK_NEXT_COL, ScanQueryMatcher.MatchCode.SEEK_NEXT_COL, ScanQueryMatcher.MatchCode.INCLUDE_AND_SEEK_NEXT_COL, ScanQueryMatcher.MatchCode.SEEK_NEXT_ROW, ScanQueryMatcher.MatchCode.DONE }; long now = EnvironmentEdgeManager.currentTime(); ScanQueryMatcher qm = new ScanQueryMatcher( scan, new ScanInfo(this.conf, fam2, 0, 1, testTTL, KeepDeletedCells.FALSE, 0, rowComparator), get.getFamilyMap().get(fam2), now - testTTL, now); KeyValue[] kvs = new KeyValue[] { new KeyValue(row1, fam2, col1, now - 100, data), new KeyValue(row1, fam2, col2, now - 50, data), new KeyValue(row1, fam2, col3, now - 5000, data), new KeyValue(row1, fam2, col4, now - 500, data), new KeyValue(row1, fam2, col5, now - 10000, data), new KeyValue(row2, fam1, col1, now - 10, data) }; KeyValue k = kvs[0]; qm.setToNewRow(k); List<MatchCode> actual = new ArrayList<MatchCode>(kvs.length); for (KeyValue kv : kvs) { actual.add(qm.match(kv)); } assertEquals(expected.length, actual.size()); for (int i = 0; i < expected.length; i++) { if (PRINT) { System.out.println("expected " + expected[i] + ", actual " + actual.get(i)); } assertEquals(expected[i], actual.get(i)); } }
@Test public void testMatch_Wildcard() throws IOException { // Moving up from the Tracker by using Gets and List<KeyValue> instead // of just byte [] // Expected result List<MatchCode> expected = new ArrayList<ScanQueryMatcher.MatchCode>(); expected.add(ScanQueryMatcher.MatchCode.INCLUDE); expected.add(ScanQueryMatcher.MatchCode.INCLUDE); expected.add(ScanQueryMatcher.MatchCode.INCLUDE); expected.add(ScanQueryMatcher.MatchCode.INCLUDE); expected.add(ScanQueryMatcher.MatchCode.INCLUDE); expected.add(ScanQueryMatcher.MatchCode.DONE); long now = EnvironmentEdgeManager.currentTime(); ScanQueryMatcher qm = new ScanQueryMatcher( scan, new ScanInfo(this.conf, fam2, 0, 1, ttl, KeepDeletedCells.FALSE, 0, rowComparator), null, now - ttl, now); List<KeyValue> memstore = new ArrayList<KeyValue>(); memstore.add(new KeyValue(row1, fam2, col1, 1, data)); memstore.add(new KeyValue(row1, fam2, col2, 1, data)); memstore.add(new KeyValue(row1, fam2, col3, 1, data)); memstore.add(new KeyValue(row1, fam2, col4, 1, data)); memstore.add(new KeyValue(row1, fam2, col5, 1, data)); memstore.add(new KeyValue(row2, fam1, col1, 1, data)); List<ScanQueryMatcher.MatchCode> actual = new ArrayList<ScanQueryMatcher.MatchCode>(); KeyValue k = memstore.get(0); qm.setToNewRow(k); for (KeyValue kv : memstore) { actual.add(qm.match(kv)); } assertEquals(expected.size(), actual.size()); for (int i = 0; i < expected.size(); i++) { assertEquals(expected.get(i), actual.get(i)); if (PRINT) { System.out.println("expected " + expected.get(i) + ", actual " + actual.get(i)); } } }
private void testDropDeletes(byte[] from, byte[] to, byte[][] rows, MatchCode... expected) throws IOException { long now = EnvironmentEdgeManager.currentTime(); // Set time to purge deletes to negative value to avoid it ever happening. ScanInfo scanInfo = new ScanInfo(this.conf, fam2, 0, 1, ttl, KeepDeletedCells.FALSE, -1L, rowComparator); NavigableSet<byte[]> cols = get.getFamilyMap().get(fam2); ScanQueryMatcher qm = new ScanQueryMatcher( scan, scanInfo, cols, Long.MAX_VALUE, HConstants.OLDEST_TIMESTAMP, HConstants.OLDEST_TIMESTAMP, now, from, to, null); List<ScanQueryMatcher.MatchCode> actual = new ArrayList<ScanQueryMatcher.MatchCode>(rows.length); byte[] prevRow = null; for (byte[] row : rows) { if (prevRow == null || !Bytes.equals(prevRow, row)) { qm.setToNewRow(KeyValueUtil.createFirstOnRow(row)); prevRow = row; } actual.add(qm.match(new KeyValue(row, fam2, null, now, Type.Delete))); } assertEquals(expected.length, actual.size()); for (int i = 0; i < expected.length; i++) { if (PRINT) System.out.println("expected " + expected[i] + ", actual " + actual.get(i)); assertEquals(expected[i], actual.get(i)); } }
private void _testMatch_ExplicitColumns(Scan scan, List<MatchCode> expected) throws IOException { long now = EnvironmentEdgeManager.currentTime(); // 2,4,5 ScanQueryMatcher qm = new ScanQueryMatcher( scan, new ScanInfo(this.conf, fam2, 0, 1, ttl, KeepDeletedCells.FALSE, 0, rowComparator), get.getFamilyMap().get(fam2), now - ttl, now); List<KeyValue> memstore = new ArrayList<KeyValue>(); memstore.add(new KeyValue(row1, fam2, col1, 1, data)); memstore.add(new KeyValue(row1, fam2, col2, 1, data)); memstore.add(new KeyValue(row1, fam2, col3, 1, data)); memstore.add(new KeyValue(row1, fam2, col4, 1, data)); memstore.add(new KeyValue(row1, fam2, col5, 1, data)); memstore.add(new KeyValue(row2, fam1, col1, data)); List<ScanQueryMatcher.MatchCode> actual = new ArrayList<ScanQueryMatcher.MatchCode>(); KeyValue k = memstore.get(0); qm.setToNewRow(k); for (KeyValue kv : memstore) { actual.add(qm.match(kv)); } assertEquals(expected.size(), actual.size()); for (int i = 0; i < expected.size(); i++) { assertEquals(expected.get(i), actual.get(i)); if (PRINT) { System.out.println("expected " + expected.get(i) + ", actual " + actual.get(i)); } } }
/** * Pretend we have done a seek but don't do it yet, if possible. The hope is that we find * requested columns in more recent files and won't have to seek in older files. Creates a fake * key/value with the given row/column and the highest (most recent) possible timestamp we might * get from this file. When users of such "lazy scanner" need to know the next KV precisely (e.g. * when this scanner is at the top of the heap), they run {@link #enforceSeek()}. * * <p>Note that this function does guarantee that the current KV of this scanner will be advanced * to at least the given KV. Because of this, it does have to do a real seek in cases when the * seek timestamp is older than the highest timestamp of the file, e.g. when we are trying to seek * to the next row/column and use OLDEST_TIMESTAMP in the seek key. */ @Override public boolean requestSeek(KeyValue kv, boolean forward, boolean useBloom) throws IOException { if (kv.getFamilyLength() == 0) { useBloom = false; } boolean haveToSeek = true; if (useBloom) { // check ROWCOL Bloom filter first. if (reader.getBloomFilterType() == StoreFile.BloomType.ROWCOL) { haveToSeek = reader.passesGeneralBloomFilter( kv.getBuffer(), kv.getRowOffset(), kv.getRowLength(), kv.getBuffer(), kv.getQualifierOffset(), kv.getQualifierLength()); } else if (this.matcher != null && !matcher.hasNullColumnInQuery() && kv.isDeleteFamily()) { // if there is no such delete family kv in the store file, // then no need to seek. haveToSeek = reader.passesDeleteFamilyBloomFilter( kv.getBuffer(), kv.getRowOffset(), kv.getRowLength()); } } delayedReseek = forward; delayedSeekKV = kv; if (haveToSeek) { // This row/column might be in this store file (or we did not use the // Bloom filter), so we still need to seek. realSeekDone = false; long maxTimestampInFile = reader.getMaxTimestamp(); long seekTimestamp = kv.getTimestamp(); if (seekTimestamp > maxTimestampInFile) { // Create a fake key that is not greater than the real next key. // (Lower timestamps correspond to higher KVs.) // To understand this better, consider that we are asked to seek to // a higher timestamp than the max timestamp in this file. We know that // the next point when we have to consider this file again is when we // pass the max timestamp of this file (with the same row/column). cur = kv.createFirstOnRowColTS(maxTimestampInFile); } else { // This will be the case e.g. when we need to seek to the next // row/column, and we don't know exactly what they are, so we set the // seek key's timestamp to OLDEST_TIMESTAMP to skip the rest of this // row/column. enforceSeek(); } return cur != null; } // Multi-column Bloom filter optimization. // Create a fake key/value, so that this scanner only bubbles up to the top // of the KeyValueHeap in StoreScanner after we scanned this row/column in // all other store files. The query matcher will then just skip this fake // key/value and the store scanner will progress to the next column. This // is obviously not a "real real" seek, but unlike the fake KV earlier in // this method, we want this to be propagated to ScanQueryMatcher. cur = kv.createLastOnRowCol(); realSeekDone = true; return true; }