public Filter group(Filter filter) { if (filter instanceof FilterList) { FilterList fList = (FilterList) filter; // We need to create a new FL here taking up only the filters of our interest FilterList newFList = new FilterList(fList.getOperator()); List<Filter> filters = fList.getFilters(); if (fList.getOperator() == Operator.MUST_PASS_ONE) { for (Filter subFilter : filters) { Filter resultFilter = handleFilterWithinOR(subFilter); // If result filter is not SingleColumnValueFilter or filter list that means OR branch is // having different type of filter other than SCVF. In that case we should not consider // the OR branch for scanning. if (resultFilter instanceof FilterList) { newFList.addFilter(resultFilter); } else if (resultFilter != null) { // This means OR filter list have at least one filter other than SCVF(may be other // child OR branches). return null; } } addORColsToFinalList(newFList); if (newFList.getFilters().isEmpty()) { return null; } return newFList; } else { // AND condition as long as the condition is AND in one sub tree all those can be // grouped under one AND parent(new one). for (Filter subFilter : filters) { Filter group = handleFilterWithinAND(subFilter); // group is null means, all are AND conditions and will be handled at once with the // below createFinalFilter if (group != null) { newFList.addFilter(group); } } addANDColsToFinalList(newFList); if (newFList.getFilters().isEmpty()) { return null; } return newFList; } } else if (filter instanceof SingleColumnValueFilter || filter instanceof SingleColumnRangeFilter) { return filter; } return null; }
/** * Since you can use Filter Lists as children of Filter Lists, you can create a hierarchy of * filters to be evaluated. In the hierarchy if OR branch having any filter type other than SCVF * as child then we should not consider the branch for scanning because we cannot fetch seek * points from other type of filters without column and value details. Ex: AND AND * __________|_______ | | | --> SCVF OR SCVF _______|______ | | ROWFILTER SVCF If the OR is root * then we should skip index table scanning for this filter. OR _______|______ --> null | | * ROWFILTER SVCF If the OR is child of another OR branch then parent OR branch will be excluded * for scanning. Ex: AND AND __________|_______ | | | --> SCVF OR SCVF _______|______ | | OR SVCF * _______|______ | | ROWFILTER SVCF * * @param filter * @return if filter is filter list with AND condition then we will return AND branch after * grouping. if filter is filter list with OR condition return null if no children is of type * other than SCVF or filter list else return different filter. if filter is SCVF then return * null. returning null means we are combining the filter(s) with children of parent OR filter * to perform optimizations. */ private Filter handleFilterWithinOR(Filter filter) { if (filter instanceof FilterList) { FilterList fList = (FilterList) filter; if (fList.getOperator() == Operator.MUST_PASS_ONE) { List<Filter> filters = fList.getFilters(); Filter resultFilter = null; for (Filter subFilter : filters) { // If this OR branch in the filter list have filter type other than SCVF we should report // it to parent by returning the other type of filter in such a way that the branch will // be skipped from index scan. resultFilter = handleFilterWithinOR(subFilter); if (resultFilter == null || (resultFilter instanceof FilterList)) { continue; } else { return resultFilter; } } return null; } else { return new FilterGroupingWorker().group(fList); } } else if (filter instanceof SingleColumnValueFilter) { handleScvfOfOR((SingleColumnValueFilter) filter); return null; } // TODO when we expose SingleColumnRangeFilter to handle that also here. // filter other than SingleColumnValueFilter. return filter; }
public static void andFilterAtEnd(Scan scan, Filter andWithFilter) { if (andWithFilter == null) { return; } Filter filter = scan.getFilter(); if (filter == null) { scan.setFilter(andWithFilter); } else if (filter instanceof FilterList && ((FilterList) filter).getOperator() == FilterList.Operator.MUST_PASS_ALL) { FilterList filterList = (FilterList) filter; List<Filter> allFilters = new ArrayList<Filter>(filterList.getFilters().size() + 1); allFilters.addAll(filterList.getFilters()); allFilters.add(andWithFilter); scan.setFilter(new FilterList(FilterList.Operator.MUST_PASS_ALL, allFilters)); } else { scan.setFilter( new FilterList(FilterList.Operator.MUST_PASS_ALL, Arrays.asList(filter, andWithFilter))); } }
/** * Process a {@link org.plasma.query.model.LogicalOperator logical operator} query traversal start * event. If the {@link FilterList filter list} on the top of the filter stack is not an 'OR' * filter, since it's immutable and we cannot modify its operator, create an 'OR' filter and swaps * out the existing filters into the new 'OR' {@link FilterList filter list}. */ public void start(LogicalOperator operator) { switch (operator.getValue()) { case AND: break; // default filter list oper is must-pass-all (AND) case OR: FilterList top = this.filterStack.peek(); if (top.getOperator().ordinal() != FilterList.Operator.MUST_PASS_ONE.ordinal()) { FilterList orList = new FilterList(FilterList.Operator.MUST_PASS_ONE); for (Filter filter : top.getFilters()) orList.addFilter(filter); top.getFilters().clear(); this.filterStack.pop(); FilterList previous = this.filterStack.peek(); if (!previous.getFilters().remove(top)) throw new IllegalStateException("could not remove filter list"); previous.addFilter(orList); this.filterStack.push(orList); } break; } super.start(operator); }
@Test public void testParsedFilter() { String q1 = "@cluster = \"cluster1\" and @datacenter = \"dc1\" and @field3 = 100000"; try { FilterList filterList = (FilterList) buildFilter(q1); Assert.assertEquals(FilterList.Operator.MUST_PASS_ONE, filterList.getOperator()); Assert.assertEquals(1, filterList.getFilters().size()); Assert.assertEquals(2, ((FilterList) filterList.getFilters().get(0)).getFilters().size()); } catch (EagleQueryParseException e) { Assert.fail(e.getMessage()); } String q2 = "@cluster = \"cluster1\" and @datacenter = \"dc1\" and ( @field3 = 100000 or @field3 < 100000)"; try { FilterList filterList = (FilterList) buildFilter(q2); Assert.assertEquals(FilterList.Operator.MUST_PASS_ONE, filterList.getOperator()); Assert.assertEquals(2, filterList.getFilters().size()); Assert.assertEquals(2, ((FilterList) filterList.getFilters().get(0)).getFilters().size()); } catch (EagleQueryParseException e) { Assert.fail(e.getMessage()); } // Test parse success but bad type of value String q3 = "@cluster = \"cluster1\" and @datacenter = \"dc1\" and ( @field3 = 100000 or @field3 < \"bad_int_100000\")"; boolean q3Ex = false; try { Assert.assertNull(buildFilter(q3)); } catch (EagleQueryParseException e) { Assert.fail(e.getMessage()); } catch (IllegalArgumentException e) { LOG.debug("Expect: ", e); Assert.assertTrue(e.getCause() instanceof NumberFormatException); q3Ex = true; } Assert.assertTrue(q3Ex); }
public static void setRowKeyOffset(Scan scan, int offset) { Filter filter = scan.getFilter(); if (filter == null) { return; } if (filter instanceof FilterList) { FilterList filterList = (FilterList) filter; for (Filter childFilter : filterList.getFilters()) { setRowKeyOffset(childFilter, offset); } } else { setRowKeyOffset(filter, offset); } }
private Filter handleFilterWithinAND(Filter filter) { if (filter instanceof FilterList) { FilterList fList = (FilterList) filter; if (fList.getOperator() == Operator.MUST_PASS_ONE) { return new FilterGroupingWorker().group(fList); } else { List<Filter> filters = fList.getFilters(); for (Filter subFilter : filters) { handleFilterWithinAND(subFilter); } } } else if (filter instanceof SingleColumnValueFilter) { handleScvf((SingleColumnValueFilter) filter); } // TODO when we expose SingleColumnRangeFilter to handle that also here. return null; }
@Test /** FilterListAdapter should handle the fact that PageFilterAdapter returns null. */ public void testPageFilter() throws IOException { byte[] qualA = Bytes.toBytes("qualA"); PageFilter pageFilter = new PageFilter(20); FilterList filterList = new FilterList( Operator.MUST_PASS_ALL, new QualifierFilter(CompareOp.EQUAL, new BinaryComparator(qualA)), pageFilter); FilterAdapter adapter = FilterAdapter.buildAdapter(); Optional<RowFilter> adapted = adapter.adaptFilter( new FilterAdapterContext(new Scan(), new DefaultReadHooks()), filterList); Assert.assertTrue(adapted.isPresent()); Optional<RowFilter> qualifierAdapted = adapter.adaptFilter( new FilterAdapterContext(new Scan(), new DefaultReadHooks()), filterList.getFilters().get(0)); Assert.assertEquals(qualifierAdapted.get(), adapted.get()); }
public static boolean intersectScanRange( Scan scan, byte[] startKey, byte[] stopKey, boolean useSkipScan) { boolean mayHaveRows = false; int offset = 0; if (ScanUtil.isLocalIndex(scan)) { offset = startKey.length != 0 ? startKey.length : stopKey.length; } byte[] existingStartKey = scan.getStartRow(); byte[] existingStopKey = scan.getStopRow(); if (existingStartKey.length > 0) { if (startKey.length == 0 || Bytes.compareTo(existingStartKey, startKey) > 0) { startKey = existingStartKey; } } else { mayHaveRows = true; } if (existingStopKey.length > 0) { if (stopKey.length == 0 || Bytes.compareTo(existingStopKey, stopKey) < 0) { stopKey = existingStopKey; } } else { mayHaveRows = true; } scan.setStartRow(startKey); scan.setStopRow(stopKey); if (offset > 0 && useSkipScan) { byte[] temp = null; if (startKey.length != 0) { temp = new byte[startKey.length - offset]; System.arraycopy(startKey, offset, temp, 0, startKey.length - offset); startKey = temp; } if (stopKey.length != 0) { temp = new byte[stopKey.length - offset]; System.arraycopy(stopKey, offset, temp, 0, stopKey.length - offset); stopKey = temp; } } mayHaveRows = mayHaveRows || Bytes.compareTo(scan.getStartRow(), scan.getStopRow()) < 0; // If the scan is using skip scan filter, intersect and replace the filter. if (mayHaveRows && useSkipScan) { Filter filter = scan.getFilter(); if (filter instanceof SkipScanFilter) { SkipScanFilter oldFilter = (SkipScanFilter) filter; SkipScanFilter newFilter = oldFilter.intersect(startKey, stopKey); if (newFilter == null) { return false; } // Intersect found: replace skip scan with intersected one scan.setFilter(newFilter); } else if (filter instanceof FilterList) { FilterList oldList = (FilterList) filter; FilterList newList = new FilterList(FilterList.Operator.MUST_PASS_ALL); for (Filter f : oldList.getFilters()) { if (f instanceof SkipScanFilter) { SkipScanFilter newFilter = ((SkipScanFilter) f).intersect(startKey, stopKey); if (newFilter == null) { return false; } newList.addFilter(newFilter); } else { newList.addFilter(f); } } scan.setFilter(newList); } } return mayHaveRows; }
@Test public void testWithUnescapedString() { /////////////////////////////////// // Tag filter with IN or EQUAL // Should use RowKeyFilter only /////////////////////////////////// String query = "@cluster = \"cluster1\" and @datacenter = \"dc1\" and @jobID = \"job.1234\""; try { FilterList filter = (FilterList) buildFilter(query); Assert.assertEquals( RowFilter.class, ((FilterList) filter.getFilters().get(0)).getFilters().get(0).getClass()); Assert.assertFalse( "Should use rowkey filter only", filter.toString().matches(".*job.1234.*")); } catch (EagleQueryParseException e) { Assert.fail(e.getMessage()); } catch (Exception ex) { Assert.fail(ex.getMessage()); } query = "@cluster = \"cluster1\" and @datacenter = \"dc1\" and @jobID in (\"job_1234\")"; try { FilterList filter = (FilterList) buildFilter(query); Assert.assertEquals( RowFilter.class, ((FilterList) filter.getFilters().get(0)).getFilters().get(0).getClass()); Assert.assertFalse( "Should use rowkey filter only", filter.toString().matches(".*job_1234.*")); } catch (EagleQueryParseException e) { Assert.fail(e.getMessage()); } catch (Exception ex) { Assert.fail(ex.getMessage()); } query = "@cluster = \"cluster1\" and @datacenter = \"dc1\" and @jobID in (\"job.1234\")"; try { FilterList filter = (FilterList) buildFilter(query); Assert.assertEquals( RowFilter.class, ((FilterList) filter.getFilters().get(0)).getFilters().get(0).getClass()); Assert.assertFalse( "Should use rowkey filter only", filter.toString().matches(".*job.*1234.*")); } catch (EagleQueryParseException e) { Assert.fail(e.getMessage()); } catch (Exception ex) { Assert.fail(ex.getMessage()); } /////////////////////////////// // Tag with other operators /////////////////////////////// query = "@cluster = \"cluster1\" and @datacenter = \"dc1\" and @jobID =~ \"job_1234\""; try { FilterList filter = (FilterList) buildFilter(query); Assert.assertEquals( RowFilter.class, ((FilterList) filter.getFilters().get(0)).getFilters().get(0).getClass()); Assert.assertTrue(filter.toString().matches(".*job_1234.*")); } catch (EagleQueryParseException e) { Assert.fail(e.getMessage()); } catch (Exception ex) { Assert.fail(ex.getMessage()); } query = "@cluster = \"cluster1\" and @datacenter = \"dc1\" and @jobID =~ \"job.1234\""; try { FilterList filter = (FilterList) buildFilter(query); Assert.assertEquals( RowFilter.class, ((FilterList) filter.getFilters().get(0)).getFilters().get(0).getClass()); Assert.assertTrue(filter.toString().matches(".*job.1234.*")); } catch (EagleQueryParseException e) { Assert.fail(e.getMessage()); } catch (Exception ex) { Assert.fail(ex.getMessage()); } /////////////////////////////// // Tag with IN // Should escape regexp chars /////////////////////////////// query = "@cluster = \"cluster1\" and @datacenter = \"dc1\" and @field7 = \"job_1234\""; try { FilterList filter = (FilterList) buildFilter(query); Assert.assertEquals( RowFilter.class, ((FilterList) filter.getFilters().get(0)).getFilters().get(0).getClass()); Assert.assertTrue(filter.toString().matches(".*job_1234.*")); } catch (EagleQueryParseException e) { Assert.fail(e.getMessage()); } catch (Exception ex) { ex.printStackTrace(); Assert.fail(ex.getMessage()); } query = "@cluster = \"cluster1\" and @datacenter = \"dc1\" and @field7 in (\"job.1234\",\"others\")"; try { FilterList filter = (FilterList) buildFilter(query); Assert.assertEquals( RowFilter.class, ((FilterList) filter.getFilters().get(0)).getFilters().get(0).getClass()); Assert.assertTrue(filter.toString().matches(".*job\\.1234.*")); } catch (EagleQueryParseException e) { Assert.fail(e.getMessage()); } catch (Exception ex) { Assert.fail(ex.getMessage()); } }
public Scan intersectScan( Scan scan, final byte[] originalStartKey, final byte[] originalStopKey, final int keyOffset, boolean crossesRegionBoundary) { byte[] startKey = originalStartKey; byte[] stopKey = originalStopKey; if (stopKey.length > 0 && Bytes.compareTo(startKey, stopKey) >= 0) { return null; } boolean mayHaveRows = false; // Keep the keys as they are if we have a point lookup, as we've already resolved the // salt bytes in that case. final int scanKeyOffset = this.isSalted && !this.isPointLookup ? SaltingUtil.NUM_SALTING_BYTES : 0; assert (scanKeyOffset == 0 || keyOffset == 0); // Total offset for startKey/stopKey. Either 1 for salted tables or the prefix length // of the current region for local indexes. We'll never have a case where a table is // both salted and local. final int totalKeyOffset = scanKeyOffset + keyOffset; byte[] prefixBytes = ByteUtil.EMPTY_BYTE_ARRAY; if (totalKeyOffset > 0) { prefixBytes = ScanUtil.getPrefix(startKey, totalKeyOffset); /* * If our startKey to stopKey crosses a region boundary consider everything after the startKey as our scan * is always done within a single region. This prevents us from having to prefix the key prior to knowing * whether or not there may be an intersection. We can't calculate whether or not we've crossed a region * boundary for local indexes, because we don't know the key offset of the next region, but only for the * current one (which is the one passed in). If the next prefix happened to be a subset of the previous * prefix, then this wouldn't detect that we crossed a region boundary. */ if (crossesRegionBoundary) { stopKey = ByteUtil.EMPTY_BYTE_ARRAY; } } int scanStartKeyOffset = scanKeyOffset; byte[] scanStartKey = scan == null ? ByteUtil.EMPTY_BYTE_ARRAY : scan.getStartRow(); // Compare ignoring key prefix and salt byte if (scanStartKey.length > 0) { if (startKey.length > 0 && Bytes.compareTo( scanStartKey, scanKeyOffset, scanStartKey.length - scanKeyOffset, startKey, totalKeyOffset, startKey.length - totalKeyOffset) < 0) { scanStartKey = startKey; scanStartKeyOffset = totalKeyOffset; } } else { scanStartKey = startKey; scanStartKeyOffset = totalKeyOffset; mayHaveRows = true; } int scanStopKeyOffset = scanKeyOffset; byte[] scanStopKey = scan == null ? ByteUtil.EMPTY_BYTE_ARRAY : scan.getStopRow(); if (scanStopKey.length > 0) { if (stopKey.length > 0 && Bytes.compareTo( scanStopKey, scanKeyOffset, scanStopKey.length - scanKeyOffset, stopKey, totalKeyOffset, stopKey.length - totalKeyOffset) > 0) { scanStopKey = stopKey; scanStopKeyOffset = totalKeyOffset; } } else { scanStopKey = stopKey; scanStopKeyOffset = totalKeyOffset; mayHaveRows = true; } mayHaveRows = mayHaveRows || Bytes.compareTo( scanStartKey, scanStartKeyOffset, scanStartKey.length - scanStartKeyOffset, scanStopKey, scanStopKeyOffset, scanStopKey.length - scanStopKeyOffset) < 0; if (!mayHaveRows) { return null; } if (originalStopKey.length != 0 && scanStopKey.length == 0) { scanStopKey = originalStopKey; } Filter newFilter = null; // If the scan is using skip scan filter, intersect and replace the filter. if (scan == null || this.useSkipScanFilter()) { byte[] skipScanStartKey = scanStartKey; byte[] skipScanStopKey = scanStopKey; // If we have a keyOffset and we've used the startKey/stopKey that // were passed in (which have the prefix) for the above range check, // we need to remove the prefix before running our intersect method. // TODO: we could use skipScanFilter.setOffset(keyOffset) if both // the startKey and stopKey were used above *and* our intersect // method honored the skipScanFilter.offset variable. if (scanKeyOffset > 0) { if (skipScanStartKey != originalStartKey) { // original already has correct salt byte skipScanStartKey = replaceSaltByte(skipScanStartKey, prefixBytes); } if (skipScanStopKey != originalStopKey) { skipScanStopKey = replaceSaltByte(skipScanStopKey, prefixBytes); } } else if (keyOffset > 0) { if (skipScanStartKey == originalStartKey) { skipScanStartKey = stripPrefix(skipScanStartKey, keyOffset); } if (skipScanStopKey == originalStopKey) { skipScanStopKey = stripPrefix(skipScanStopKey, keyOffset); } } if (scan == null) { return filter.hasIntersect(skipScanStartKey, skipScanStopKey) ? HAS_INTERSECTION : null; } Filter filter = scan.getFilter(); SkipScanFilter newSkipScanFilter = null; if (filter instanceof SkipScanFilter) { SkipScanFilter oldSkipScanFilter = (SkipScanFilter) filter; newFilter = newSkipScanFilter = oldSkipScanFilter.intersect(skipScanStartKey, skipScanStopKey); if (newFilter == null) { return null; } } else if (filter instanceof FilterList) { FilterList oldList = (FilterList) filter; FilterList newList = new FilterList(FilterList.Operator.MUST_PASS_ALL); newFilter = newList; for (Filter f : oldList.getFilters()) { if (f instanceof SkipScanFilter) { newSkipScanFilter = ((SkipScanFilter) f).intersect(skipScanStartKey, skipScanStopKey); if (newSkipScanFilter == null) { return null; } newList.addFilter(newSkipScanFilter); } else { newList.addFilter(f); } } } // TODO: it seems that our SkipScanFilter or HBase runs into problems if we don't // have an enclosing range when we do a point lookup. if (isPointLookup) { scanStartKey = ScanUtil.getMinKey(schema, newSkipScanFilter.getSlots(), slotSpan); scanStopKey = ScanUtil.getMaxKey(schema, newSkipScanFilter.getSlots(), slotSpan); } } if (newFilter == null) { newFilter = scan.getFilter(); } Scan newScan = ScanUtil.newScan(scan); newScan.setFilter(newFilter); // If we have an offset (salted table or local index), we need to make sure to // prefix our scan start/stop row by the prefix of the startKey or stopKey that // were passed in. Our scan either doesn't have the prefix or has a placeholder // for it. if (totalKeyOffset > 0) { if (scanStartKey != originalStartKey) { scanStartKey = prefixKey(scanStartKey, scanKeyOffset, prefixBytes, keyOffset); } if (scanStopKey != originalStopKey) { scanStopKey = prefixKey(scanStopKey, scanKeyOffset, prefixBytes, keyOffset); } } // Don't let the stopRow of the scan go beyond the originalStopKey if (originalStopKey.length > 0 && Bytes.compareTo(scanStopKey, originalStopKey) > 0) { scanStopKey = originalStopKey; } if (scanStopKey.length > 0 && Bytes.compareTo(scanStartKey, scanStopKey) >= 0) { return null; } newScan.setAttribute(SCAN_ACTUAL_START_ROW, scanStartKey); newScan.setStartRow(scanStartKey); newScan.setStopRow(scanStopKey); if (keyOffset > 0) { newScan.setAttribute(STARTKEY_OFFSET, Bytes.toBytes(keyOffset)); } return newScan; }