public static void main(String[] args) throws Exception { Configuration con = new Configuration(); String[] otherArgs = new GenericOptionsParser(con, args).getRemainingArgs(); HBaseConfiguration conf = new HBaseConfiguration(); Job job = new Job(conf, "AverageCalc"); job.setJarByClass(AverageCalculator.class); Scan scan = new Scan(); scan.setCaching(500); scan.setCacheBlocks(false); scan.addFamily(Bytes.toBytes("Post")); FilterList li = new FilterList(FilterList.Operator.MUST_PASS_ALL); SingleColumnValueFilter filter = new SingleColumnValueFilter( Bytes.toBytes("Post"), Bytes.toBytes("PostTypeId"), CompareOp.EQUAL, Bytes.toBytes("1")); li.addFilter(filter); scan.setFilter(li); FileOutputFormat.setOutputPath(job, new Path(otherArgs[0])); job.setOutputKeyClass(Text.class); TableMapReduceUtil.initTableMapperJob( "bigd24-hbase-sample", scan, Mapper1.class, Text.class, IntWritable.class, job); job.setReducerClass(Reducer1.class); job.setOutputValueClass(FloatWritable.class); System.exit(job.waitForCompletion(true) ? 0 : 1); }
public static List<String> getBooksbyPrice(String min, String max) throws IOException { List list = new ArrayList<String>(); Scan scan = new Scan(); scan.setMaxVersions(); FilterList filterList = new FilterList(); Filter maxFilter = new SingleColumnValueFilter( Bytes.toBytes("statics"), Bytes.toBytes("price"), CompareOp.GREATER_OR_EQUAL, Bytes.toBytes(min)); Filter minFilter = new SingleColumnValueFilter( Bytes.toBytes("statics"), Bytes.toBytes("price"), CompareOp.LESS_OR_EQUAL, Bytes.toBytes(max)); filterList.addFilter(maxFilter); filterList.addFilter(minFilter); scan.setFilter(filterList); ResultScanner rs = table.getScanner(scan); for (Result r : rs) { String str = "title: " + Bytes.toString(r.getRow()); for (KeyValue kv : r.raw()) { str = str + " " + Bytes.toString(kv.getQualifier()) + ": "; str += Bytes.toString(kv.getValue()); } System.out.println(str); list.add(str); } return list; }
public String[] getObjectIDs(String objectType, String... tags) throws IOException { List<String> ret = new ArrayList<String>(); FilterList list = new FilterList(FilterList.Operator.MUST_PASS_ALL); SingleColumnValueFilter filter1 = new SingleColumnValueFilter( "tags".getBytes(), "OBJECTTYPE".getBytes(), CompareOp.EQUAL, Bytes.toBytes(objectType)); list.addFilter(filter1); for (String tag : tags) { SingleColumnValueFilter filter2 = new SingleColumnValueFilter( "tags".getBytes(), tag.toUpperCase().getBytes(), CompareOp.EQUAL, Bytes.toBytes(1)); filter2.setFilterIfMissing(true); list.addFilter(filter2); } Scan s = new Scan(); s.setFilter(list); s.setMaxVersions(1); ResultScanner scanner = htable.getScanner(s); try { for (Result rr = scanner.next(); rr != null; rr = scanner.next()) { String localObjectType = new String(rr.getValue("tags".getBytes(), "OBJECTTYPE".getBytes())); String localObjectId = new String(rr.getValue("tags".getBytes(), "OBJECTID".getBytes())); ret.add(localObjectId); } } finally { scanner.close(); } return ret.toArray(new String[] {}); }
public static ResultScanner getList( String startRowRange, String stopRowRange, byte[] cf1, byte[] cf2, long limit, FilterList filterList, String ctableName) { Scan scan = new Scan(); scan.addFamily(cf1); scan.setStartRow(Bytes.toBytes(startRowRange)); if (stopRowRange != null) { scan.setStopRow(Bytes.toBytes(stopRowRange)); } if (limit != 0) { filterList.addFilter(new PageFilter(limit)); } else { filterList.addFilter(new PageFilter(100)); } scan.setFilter(filterList); ResultScanner resultScanner = null; try { resultScanner = tblMngr.getTable(ctableName).getScanner(scan); } catch (Exception e) { } return resultScanner; }
/** * make the hbase filter for selecting values of y-axis(response time) in order to select * transactions in scatter chart. 4 bytes for elapsed time should be attached for the prefix of * column qualifier for to use this filter. * * @param area * @param offsetTransactionId * @param offsetTransactionElapsed * @return */ private Filter makeResponseTimeFilter( final SelectedScatterArea area, final TransactionId offsetTransactionId, int offsetTransactionElapsed) { // filter by response time ResponseTimeRange responseTimeRange = area.getResponseTimeRange(); byte[] responseFrom = Bytes.toBytes(responseTimeRange.getFrom()); byte[] responseTo = Bytes.toBytes(responseTimeRange.getTo()); FilterList filterList = new FilterList(Operator.MUST_PASS_ALL); filterList.addFilter( new QualifierFilter(CompareOp.GREATER_OR_EQUAL, new BinaryPrefixComparator(responseFrom))); filterList.addFilter( new QualifierFilter(CompareOp.LESS_OR_EQUAL, new BinaryPrefixComparator(responseTo))); // add offset if (offsetTransactionId != null) { final Buffer buffer = new AutomaticBuffer(32); buffer.put(offsetTransactionElapsed); buffer.putPrefixedString(offsetTransactionId.getAgentId()); buffer.putSVar(offsetTransactionId.getAgentStartTime()); buffer.putVar(offsetTransactionId.getTransactionSequence()); byte[] qualifierOffset = buffer.getBuffer(); filterList.addFilter( new QualifierFilter(CompareOp.GREATER, new BinaryPrefixComparator(qualifierOffset))); } return filterList; }
private void addANDColsToFinalList(FilterList filterList) { for (Entry<Column, Pair<Value, Value>> entry : colWithOperators.entrySet()) { Pair<Value, Value> value = entry.getValue(); if (value.getFirst() != null && value.getSecond() != null) { // Here we are introducing a new Filter SingleColumnRangeFilter rangeFltr = new SingleColumnRangeFilter( entry.getKey().getFamily(), entry.getKey().getQualifier(), entry.getKey().getValuePartition(), value.getFirst().getValue(), value.getFirst().getOperator(), value.getSecond().getValue(), value.getSecond().getOperator()); filterList.addFilter(rangeFltr); } else if (value.getFirst() != null) { if (value.getFirst().getOperator() == CompareOp.EQUAL) { filterList.addFilter(value.getFirst().getFilter()); } else { SingleColumnRangeFilter rangeFltr = new SingleColumnRangeFilter( entry.getKey().getFamily(), entry.getKey().getQualifier(), entry.getKey().getValuePartition(), value.getFirst().getValue(), value.getFirst().getOperator(), null, null); filterList.addFilter(rangeFltr); } } } }
/** * Since you can use Filter Lists as children of Filter Lists, you can create a hierarchy of * filters to be evaluated. In the hierarchy if OR branch having any filter type other than SCVF * as child then we should not consider the branch for scanning because we cannot fetch seek * points from other type of filters without column and value details. Ex: AND AND * __________|_______ | | | --> SCVF OR SCVF _______|______ | | ROWFILTER SVCF If the OR is root * then we should skip index table scanning for this filter. OR _______|______ --> null | | * ROWFILTER SVCF If the OR is child of another OR branch then parent OR branch will be excluded * for scanning. Ex: AND AND __________|_______ | | | --> SCVF OR SCVF _______|______ | | OR SVCF * _______|______ | | ROWFILTER SVCF * * @param filter * @return if filter is filter list with AND condition then we will return AND branch after * grouping. if filter is filter list with OR condition return null if no children is of type * other than SCVF or filter list else return different filter. if filter is SCVF then return * null. returning null means we are combining the filter(s) with children of parent OR filter * to perform optimizations. */ private Filter handleFilterWithinOR(Filter filter) { if (filter instanceof FilterList) { FilterList fList = (FilterList) filter; if (fList.getOperator() == Operator.MUST_PASS_ONE) { List<Filter> filters = fList.getFilters(); Filter resultFilter = null; for (Filter subFilter : filters) { // If this OR branch in the filter list have filter type other than SCVF we should report // it to parent by returning the other type of filter in such a way that the branch will // be skipped from index scan. resultFilter = handleFilterWithinOR(subFilter); if (resultFilter == null || (resultFilter instanceof FilterList)) { continue; } else { return resultFilter; } } return null; } else { return new FilterGroupingWorker().group(fList); } } else if (filter instanceof SingleColumnValueFilter) { handleScvfOfOR((SingleColumnValueFilter) filter); return null; } // TODO when we expose SingleColumnRangeFilter to handle that also here. // filter other than SingleColumnValueFilter. return filter; }
public static void setRowKeyOffset(Scan scan, int offset) { Filter filter = scan.getFilter(); if (filter == null) { return; } if (filter instanceof FilterList) { FilterList filterList = (FilterList) filter; for (Filter childFilter : filterList.getFilters()) { setRowKeyOffset(childFilter, offset); } } else { setRowKeyOffset(filter, offset); } }
private Filter handleFilterWithinAND(Filter filter) { if (filter instanceof FilterList) { FilterList fList = (FilterList) filter; if (fList.getOperator() == Operator.MUST_PASS_ONE) { return new FilterGroupingWorker().group(fList); } else { List<Filter> filters = fList.getFilters(); for (Filter subFilter : filters) { handleFilterWithinAND(subFilter); } } } else if (filter instanceof SingleColumnValueFilter) { handleScvf((SingleColumnValueFilter) filter); } // TODO when we expose SingleColumnRangeFilter to handle that also here. return null; }
public Filter group(Filter filter) { if (filter instanceof FilterList) { FilterList fList = (FilterList) filter; // We need to create a new FL here taking up only the filters of our interest FilterList newFList = new FilterList(fList.getOperator()); List<Filter> filters = fList.getFilters(); if (fList.getOperator() == Operator.MUST_PASS_ONE) { for (Filter subFilter : filters) { Filter resultFilter = handleFilterWithinOR(subFilter); // If result filter is not SingleColumnValueFilter or filter list that means OR branch is // having different type of filter other than SCVF. In that case we should not consider // the OR branch for scanning. if (resultFilter instanceof FilterList) { newFList.addFilter(resultFilter); } else if (resultFilter != null) { // This means OR filter list have at least one filter other than SCVF(may be other // child OR branches). return null; } } addORColsToFinalList(newFList); if (newFList.getFilters().isEmpty()) { return null; } return newFList; } else { // AND condition as long as the condition is AND in one sub tree all those can be // grouped under one AND parent(new one). for (Filter subFilter : filters) { Filter group = handleFilterWithinAND(subFilter); // group is null means, all are AND conditions and will be handled at once with the // below createFinalFilter if (group != null) { newFList.addFilter(group); } } addANDColsToFinalList(newFList); if (newFList.getFilters().isEmpty()) { return null; } return newFList; } } else if (filter instanceof SingleColumnValueFilter || filter instanceof SingleColumnRangeFilter) { return filter; } return null; }
public static void andFilterAtEnd(Scan scan, Filter andWithFilter) { if (andWithFilter == null) { return; } Filter filter = scan.getFilter(); if (filter == null) { scan.setFilter(andWithFilter); } else if (filter instanceof FilterList && ((FilterList) filter).getOperator() == FilterList.Operator.MUST_PASS_ALL) { FilterList filterList = (FilterList) filter; List<Filter> allFilters = new ArrayList<Filter>(filterList.getFilters().size() + 1); allFilters.addAll(filterList.getFilters()); allFilters.add(andWithFilter); scan.setFilter(new FilterList(FilterList.Operator.MUST_PASS_ALL, allFilters)); } else { scan.setFilter( new FilterList(FilterList.Operator.MUST_PASS_ALL, Arrays.asList(filter, andWithFilter))); } }
/** 封装查询条件 */ private FilterList packageFilters(SearchParam searchParam, boolean isPage) { FilterList filterList = null; // MUST_PASS_ALL(条件 AND) MUST_PASS_ONE(条件OR) filterList = new FilterList(FilterList.Operator.MUST_PASS_ALL); if (isPage) { filterList.addFilter(new FirstKeyOnlyFilter()); } if (StringUtils.isNotEmpty(searchParam.getHbaseFilter())) { // 将fel expression转为filter Filter userInputFilter = HbaseFilterUtil.createFilterByFelExpr( searchParam.getFamily(), searchParam.getHbaseFilter()); logger.info("[Filter for export " + searchParam.getTableName() + "] " + userInputFilter); filterList.addFilter(userInputFilter); } return filterList; }
@Test /** FilterListAdapter should handle the fact that PageFilterAdapter returns null. */ public void testPageFilter() throws IOException { byte[] qualA = Bytes.toBytes("qualA"); PageFilter pageFilter = new PageFilter(20); FilterList filterList = new FilterList( Operator.MUST_PASS_ALL, new QualifierFilter(CompareOp.EQUAL, new BinaryComparator(qualA)), pageFilter); FilterAdapter adapter = FilterAdapter.buildAdapter(); Optional<RowFilter> adapted = adapter.adaptFilter( new FilterAdapterContext(new Scan(), new DefaultReadHooks()), filterList); Assert.assertTrue(adapted.isPresent()); Optional<RowFilter> qualifierAdapted = adapter.adaptFilter( new FilterAdapterContext(new Scan(), new DefaultReadHooks()), filterList.getFilters().get(0)); Assert.assertEquals(qualifierAdapted.get(), adapted.get()); }
private void addORColsToFinalList(FilterList filterList) { for (Entry<Column, List<Value>> entry : colWithOperatorsOfOR.entrySet()) { List<Value> valueList = entry.getValue(); for (Value value : valueList) { if (value.getOperator() == CompareOp.EQUAL) { filterList.addFilter(value.getFilter()); } else { SingleColumnRangeFilter rangeFltr = new SingleColumnRangeFilter( entry.getKey().getFamily(), entry.getKey().getQualifier(), entry.getKey().getValuePartition(), value.getValue(), value.getOperator(), null, null); filterList.addFilter(rangeFltr); } } } }
/** * Process the traversal start event for a query {@link org.plasma.query.model.Literal literal} * within an {@link org.plasma.query.model.Expression expression} creating an HBase {@link * org.apache.hadoop.hbase.filter.RowFilter row filter} and adding it to the filter hierarchy. * Looks at the context under which the literal is encountered and if a user defined row key token * configuration is found, creates a regular expression based HBase row filter. * * @param literal the expression literal * @throws GraphFilterException if no user defined row-key token is configured for the current * literal context. */ @Override public void start(Literal literal) { String content = literal.getValue(); if (this.contextProperty == null) throw new IllegalStateException("expected context property for literal"); if (this.contextType == null) throw new IllegalStateException("expected context type for literal"); if (this.rootType == null) throw new IllegalStateException("expected context type for literal"); if (this.contextHBaseCompareOp == null) throw new IllegalStateException("expected context operator for literal"); // Match the current property to a user defined // row key token, if match we can add a row filter. if (this.rowKeyFac.hasUserDefinedRowKeyToken(this.rootType, this.contextPropertyPath)) { KeyValue pair = new KeyValue(this.contextProperty, content); pair.setPropertyPath(this.contextPropertyPath); if (this.contextOpWildcard) pair.setIsWildcard(true); // FIXME: can't several of these be lumped together if in the same AND expression parent?? List<KeyValue> pairs = new ArrayList<KeyValue>(); pairs.add(pair); String rowKeyExpr = this.rowKeyFac.createRowKeyExpr(pairs); WritableByteArrayComparable exprComp = new RegexStringComparator(rowKeyExpr); Filter rowFilter = new RowFilter(this.contextHBaseCompareOp, exprComp); FilterList top = this.filterStack.peek(); top.addFilter(rowFilter); if (log.isDebugEnabled()) log.debug("created row filter: " + rowKeyExpr + " operator: " + this.contextHBaseCompareOp); } else throw new GraphFilterException( "no user defined row-key token for query path '" + this.contextPropertyPath + "'"); super.start(literal); }
@Test public void scan_by_prefix_date() throws IOException { FilterList fl = new FilterList(); Filter filter = new RowFilter(CompareFilter.CompareOp.EQUAL, new SubstringComparator("20150903")); fl.addFilter(filter); Scan scan = new Scan(); scan.setFilter(fl); long t1 = System.currentTimeMillis(); ResultScanner rs = errorTable.getScanner(scan); Result result; int count = 0; while ((result = rs.next()) != null) { System.out.println("rowkey=" + new String(result.getRow())); System.out.println( "value=" + new String(result.getValue(fBytes, Bytes.toBytes("stacktrace")))); System.out.println(); count++; } long t2 = System.currentTimeMillis(); System.out.println("count=" + count + ",t2 - t1=" + ((t2 - t1) / 1000)); }
/** * Process a {@link org.plasma.query.model.LogicalOperator logical operator} query traversal start * event. If the {@link FilterList filter list} on the top of the filter stack is not an 'OR' * filter, since it's immutable and we cannot modify its operator, create an 'OR' filter and swaps * out the existing filters into the new 'OR' {@link FilterList filter list}. */ public void start(LogicalOperator operator) { switch (operator.getValue()) { case AND: break; // default filter list oper is must-pass-all (AND) case OR: FilterList top = this.filterStack.peek(); if (top.getOperator().ordinal() != FilterList.Operator.MUST_PASS_ONE.ordinal()) { FilterList orList = new FilterList(FilterList.Operator.MUST_PASS_ONE); for (Filter filter : top.getFilters()) orList.addFilter(filter); top.getFilters().clear(); this.filterStack.pop(); FilterList previous = this.filterStack.peek(); if (!previous.getFilters().remove(top)) throw new IllegalStateException("could not remove filter list"); previous.addFilter(orList); this.filterStack.push(orList); } break; } super.start(operator); }
@Test public void testParsedFilter() { String q1 = "@cluster = \"cluster1\" and @datacenter = \"dc1\" and @field3 = 100000"; try { FilterList filterList = (FilterList) buildFilter(q1); Assert.assertEquals(FilterList.Operator.MUST_PASS_ONE, filterList.getOperator()); Assert.assertEquals(1, filterList.getFilters().size()); Assert.assertEquals(2, ((FilterList) filterList.getFilters().get(0)).getFilters().size()); } catch (EagleQueryParseException e) { Assert.fail(e.getMessage()); } String q2 = "@cluster = \"cluster1\" and @datacenter = \"dc1\" and ( @field3 = 100000 or @field3 < 100000)"; try { FilterList filterList = (FilterList) buildFilter(q2); Assert.assertEquals(FilterList.Operator.MUST_PASS_ONE, filterList.getOperator()); Assert.assertEquals(2, filterList.getFilters().size()); Assert.assertEquals(2, ((FilterList) filterList.getFilters().get(0)).getFilters().size()); } catch (EagleQueryParseException e) { Assert.fail(e.getMessage()); } // Test parse success but bad type of value String q3 = "@cluster = \"cluster1\" and @datacenter = \"dc1\" and ( @field3 = 100000 or @field3 < \"bad_int_100000\")"; boolean q3Ex = false; try { Assert.assertNull(buildFilter(q3)); } catch (EagleQueryParseException e) { Assert.fail(e.getMessage()); } catch (IllegalArgumentException e) { LOG.debug("Expect: ", e); Assert.assertTrue(e.getCause() instanceof NumberFormatException); q3Ex = true; } Assert.assertTrue(q3Ex); }
public Scan intersectScan( Scan scan, final byte[] originalStartKey, final byte[] originalStopKey, final int keyOffset, boolean crossesRegionBoundary) { byte[] startKey = originalStartKey; byte[] stopKey = originalStopKey; if (stopKey.length > 0 && Bytes.compareTo(startKey, stopKey) >= 0) { return null; } boolean mayHaveRows = false; // Keep the keys as they are if we have a point lookup, as we've already resolved the // salt bytes in that case. final int scanKeyOffset = this.isSalted && !this.isPointLookup ? SaltingUtil.NUM_SALTING_BYTES : 0; assert (scanKeyOffset == 0 || keyOffset == 0); // Total offset for startKey/stopKey. Either 1 for salted tables or the prefix length // of the current region for local indexes. We'll never have a case where a table is // both salted and local. final int totalKeyOffset = scanKeyOffset + keyOffset; byte[] prefixBytes = ByteUtil.EMPTY_BYTE_ARRAY; if (totalKeyOffset > 0) { prefixBytes = ScanUtil.getPrefix(startKey, totalKeyOffset); /* * If our startKey to stopKey crosses a region boundary consider everything after the startKey as our scan * is always done within a single region. This prevents us from having to prefix the key prior to knowing * whether or not there may be an intersection. We can't calculate whether or not we've crossed a region * boundary for local indexes, because we don't know the key offset of the next region, but only for the * current one (which is the one passed in). If the next prefix happened to be a subset of the previous * prefix, then this wouldn't detect that we crossed a region boundary. */ if (crossesRegionBoundary) { stopKey = ByteUtil.EMPTY_BYTE_ARRAY; } } int scanStartKeyOffset = scanKeyOffset; byte[] scanStartKey = scan == null ? ByteUtil.EMPTY_BYTE_ARRAY : scan.getStartRow(); // Compare ignoring key prefix and salt byte if (scanStartKey.length > 0) { if (startKey.length > 0 && Bytes.compareTo( scanStartKey, scanKeyOffset, scanStartKey.length - scanKeyOffset, startKey, totalKeyOffset, startKey.length - totalKeyOffset) < 0) { scanStartKey = startKey; scanStartKeyOffset = totalKeyOffset; } } else { scanStartKey = startKey; scanStartKeyOffset = totalKeyOffset; mayHaveRows = true; } int scanStopKeyOffset = scanKeyOffset; byte[] scanStopKey = scan == null ? ByteUtil.EMPTY_BYTE_ARRAY : scan.getStopRow(); if (scanStopKey.length > 0) { if (stopKey.length > 0 && Bytes.compareTo( scanStopKey, scanKeyOffset, scanStopKey.length - scanKeyOffset, stopKey, totalKeyOffset, stopKey.length - totalKeyOffset) > 0) { scanStopKey = stopKey; scanStopKeyOffset = totalKeyOffset; } } else { scanStopKey = stopKey; scanStopKeyOffset = totalKeyOffset; mayHaveRows = true; } mayHaveRows = mayHaveRows || Bytes.compareTo( scanStartKey, scanStartKeyOffset, scanStartKey.length - scanStartKeyOffset, scanStopKey, scanStopKeyOffset, scanStopKey.length - scanStopKeyOffset) < 0; if (!mayHaveRows) { return null; } if (originalStopKey.length != 0 && scanStopKey.length == 0) { scanStopKey = originalStopKey; } Filter newFilter = null; // If the scan is using skip scan filter, intersect and replace the filter. if (scan == null || this.useSkipScanFilter()) { byte[] skipScanStartKey = scanStartKey; byte[] skipScanStopKey = scanStopKey; // If we have a keyOffset and we've used the startKey/stopKey that // were passed in (which have the prefix) for the above range check, // we need to remove the prefix before running our intersect method. // TODO: we could use skipScanFilter.setOffset(keyOffset) if both // the startKey and stopKey were used above *and* our intersect // method honored the skipScanFilter.offset variable. if (scanKeyOffset > 0) { if (skipScanStartKey != originalStartKey) { // original already has correct salt byte skipScanStartKey = replaceSaltByte(skipScanStartKey, prefixBytes); } if (skipScanStopKey != originalStopKey) { skipScanStopKey = replaceSaltByte(skipScanStopKey, prefixBytes); } } else if (keyOffset > 0) { if (skipScanStartKey == originalStartKey) { skipScanStartKey = stripPrefix(skipScanStartKey, keyOffset); } if (skipScanStopKey == originalStopKey) { skipScanStopKey = stripPrefix(skipScanStopKey, keyOffset); } } if (scan == null) { return filter.hasIntersect(skipScanStartKey, skipScanStopKey) ? HAS_INTERSECTION : null; } Filter filter = scan.getFilter(); SkipScanFilter newSkipScanFilter = null; if (filter instanceof SkipScanFilter) { SkipScanFilter oldSkipScanFilter = (SkipScanFilter) filter; newFilter = newSkipScanFilter = oldSkipScanFilter.intersect(skipScanStartKey, skipScanStopKey); if (newFilter == null) { return null; } } else if (filter instanceof FilterList) { FilterList oldList = (FilterList) filter; FilterList newList = new FilterList(FilterList.Operator.MUST_PASS_ALL); newFilter = newList; for (Filter f : oldList.getFilters()) { if (f instanceof SkipScanFilter) { newSkipScanFilter = ((SkipScanFilter) f).intersect(skipScanStartKey, skipScanStopKey); if (newSkipScanFilter == null) { return null; } newList.addFilter(newSkipScanFilter); } else { newList.addFilter(f); } } } // TODO: it seems that our SkipScanFilter or HBase runs into problems if we don't // have an enclosing range when we do a point lookup. if (isPointLookup) { scanStartKey = ScanUtil.getMinKey(schema, newSkipScanFilter.getSlots(), slotSpan); scanStopKey = ScanUtil.getMaxKey(schema, newSkipScanFilter.getSlots(), slotSpan); } } if (newFilter == null) { newFilter = scan.getFilter(); } Scan newScan = ScanUtil.newScan(scan); newScan.setFilter(newFilter); // If we have an offset (salted table or local index), we need to make sure to // prefix our scan start/stop row by the prefix of the startKey or stopKey that // were passed in. Our scan either doesn't have the prefix or has a placeholder // for it. if (totalKeyOffset > 0) { if (scanStartKey != originalStartKey) { scanStartKey = prefixKey(scanStartKey, scanKeyOffset, prefixBytes, keyOffset); } if (scanStopKey != originalStopKey) { scanStopKey = prefixKey(scanStopKey, scanKeyOffset, prefixBytes, keyOffset); } } // Don't let the stopRow of the scan go beyond the originalStopKey if (originalStopKey.length > 0 && Bytes.compareTo(scanStopKey, originalStopKey) > 0) { scanStopKey = originalStopKey; } if (scanStopKey.length > 0 && Bytes.compareTo(scanStartKey, scanStopKey) >= 0) { return null; } newScan.setAttribute(SCAN_ACTUAL_START_ROW, scanStartKey); newScan.setStartRow(scanStartKey); newScan.setStopRow(scanStopKey); if (keyOffset > 0) { newScan.setAttribute(STARTKEY_OFFSET, Bytes.toBytes(keyOffset)); } return newScan; }
private ResultScanner createHBaseResultScanner(RecordScan scan) throws RepositoryException, InterruptedException { Scan hbaseScan = new Scan(); hbaseScan.setMaxVersions(1); if (scan.getRawStartRecordId() != null) { hbaseScan.setStartRow(scan.getRawStartRecordId()); } else if (scan.getStartRecordId() != null) { hbaseScan.setStartRow(scan.getStartRecordId().toBytes()); } if (scan.getRawStopRecordId() != null) { hbaseScan.setStopRow(scan.getRawStopRecordId()); } else if (scan.getStopRecordId() != null) { hbaseScan.setStopRow(scan.getStopRecordId().toBytes()); } // Filters FilterList filterList = new FilterList(FilterList.Operator.MUST_PASS_ALL); // filter out deleted records filterList.addFilter(REAL_RECORDS_FILTER); // add user's filter if (scan.getRecordFilter() != null) { Filter filter = filterFactory.createHBaseFilter(scan.getRecordFilter(), this, filterFactory); filterList.addFilter(filter); } hbaseScan.setFilter(filterList); hbaseScan.setCaching(scan.getCaching()); hbaseScan.setCacheBlocks(scan.getCacheBlocks()); ReturnFields returnFields = scan.getReturnFields(); if (returnFields != null && returnFields.getType() != ReturnFields.Type.ALL) { RecordDecoder.addSystemColumnsToScan(hbaseScan); switch (returnFields.getType()) { case ENUM: for (QName field : returnFields.getFields()) { FieldTypeImpl fieldType = (FieldTypeImpl) typeManager.getFieldTypeByName(field); hbaseScan.addColumn(RecordCf.DATA.bytes, fieldType.getQualifier()); } break; case NONE: // nothing to add break; default: throw new RuntimeException("Unrecognized ReturnFields type: " + returnFields.getType()); } } else { hbaseScan.addFamily(RecordCf.DATA.bytes); } ResultScanner hbaseScanner; try { hbaseScanner = recordTable.getScanner(hbaseScan); } catch (IOException e) { throw new RecordException("Error creating scanner", e); } return hbaseScanner; }
@Test public void testWithUnescapedString() { /////////////////////////////////// // Tag filter with IN or EQUAL // Should use RowKeyFilter only /////////////////////////////////// String query = "@cluster = \"cluster1\" and @datacenter = \"dc1\" and @jobID = \"job.1234\""; try { FilterList filter = (FilterList) buildFilter(query); Assert.assertEquals( RowFilter.class, ((FilterList) filter.getFilters().get(0)).getFilters().get(0).getClass()); Assert.assertFalse( "Should use rowkey filter only", filter.toString().matches(".*job.1234.*")); } catch (EagleQueryParseException e) { Assert.fail(e.getMessage()); } catch (Exception ex) { Assert.fail(ex.getMessage()); } query = "@cluster = \"cluster1\" and @datacenter = \"dc1\" and @jobID in (\"job_1234\")"; try { FilterList filter = (FilterList) buildFilter(query); Assert.assertEquals( RowFilter.class, ((FilterList) filter.getFilters().get(0)).getFilters().get(0).getClass()); Assert.assertFalse( "Should use rowkey filter only", filter.toString().matches(".*job_1234.*")); } catch (EagleQueryParseException e) { Assert.fail(e.getMessage()); } catch (Exception ex) { Assert.fail(ex.getMessage()); } query = "@cluster = \"cluster1\" and @datacenter = \"dc1\" and @jobID in (\"job.1234\")"; try { FilterList filter = (FilterList) buildFilter(query); Assert.assertEquals( RowFilter.class, ((FilterList) filter.getFilters().get(0)).getFilters().get(0).getClass()); Assert.assertFalse( "Should use rowkey filter only", filter.toString().matches(".*job.*1234.*")); } catch (EagleQueryParseException e) { Assert.fail(e.getMessage()); } catch (Exception ex) { Assert.fail(ex.getMessage()); } /////////////////////////////// // Tag with other operators /////////////////////////////// query = "@cluster = \"cluster1\" and @datacenter = \"dc1\" and @jobID =~ \"job_1234\""; try { FilterList filter = (FilterList) buildFilter(query); Assert.assertEquals( RowFilter.class, ((FilterList) filter.getFilters().get(0)).getFilters().get(0).getClass()); Assert.assertTrue(filter.toString().matches(".*job_1234.*")); } catch (EagleQueryParseException e) { Assert.fail(e.getMessage()); } catch (Exception ex) { Assert.fail(ex.getMessage()); } query = "@cluster = \"cluster1\" and @datacenter = \"dc1\" and @jobID =~ \"job.1234\""; try { FilterList filter = (FilterList) buildFilter(query); Assert.assertEquals( RowFilter.class, ((FilterList) filter.getFilters().get(0)).getFilters().get(0).getClass()); Assert.assertTrue(filter.toString().matches(".*job.1234.*")); } catch (EagleQueryParseException e) { Assert.fail(e.getMessage()); } catch (Exception ex) { Assert.fail(ex.getMessage()); } /////////////////////////////// // Tag with IN // Should escape regexp chars /////////////////////////////// query = "@cluster = \"cluster1\" and @datacenter = \"dc1\" and @field7 = \"job_1234\""; try { FilterList filter = (FilterList) buildFilter(query); Assert.assertEquals( RowFilter.class, ((FilterList) filter.getFilters().get(0)).getFilters().get(0).getClass()); Assert.assertTrue(filter.toString().matches(".*job_1234.*")); } catch (EagleQueryParseException e) { Assert.fail(e.getMessage()); } catch (Exception ex) { ex.printStackTrace(); Assert.fail(ex.getMessage()); } query = "@cluster = \"cluster1\" and @datacenter = \"dc1\" and @field7 in (\"job.1234\",\"others\")"; try { FilterList filter = (FilterList) buildFilter(query); Assert.assertEquals( RowFilter.class, ((FilterList) filter.getFilters().get(0)).getFilters().get(0).getClass()); Assert.assertTrue(filter.toString().matches(".*job\\.1234.*")); } catch (EagleQueryParseException e) { Assert.fail(e.getMessage()); } catch (Exception ex) { Assert.fail(ex.getMessage()); } }
public GCResult getBinFor(String chr, int gcContent) throws IOException { log.debug("Get GC bin: " + chr + " " + gcContent); FilterList filters = new FilterList(FilterList.Operator.MUST_PASS_ALL); filters.addFilter( new SingleColumnValueFilter( Bytes.toBytes("chr"), Bytes.toBytes("name"), CompareFilter.CompareOp.EQUAL, Bytes.toBytes(chr))); filters.addFilter( new SingleColumnValueFilter( Bytes.toBytes("gc"), Bytes.toBytes("min"), CompareFilter.CompareOp.LESS_OR_EQUAL, Bytes.toBytes(gcContent))); filters.addFilter( new SingleColumnValueFilter( Bytes.toBytes("gc"), Bytes.toBytes("max"), CompareFilter.CompareOp.GREATER_OR_EQUAL, Bytes.toBytes(gcContent))); Scan scan = new Scan(); scan.setFilter(filters); ResultScanner scanner = this.getScanner(scan); Iterator<Result> rI = scanner.iterator(); if (!rI.hasNext() && gcContent > 0) { // it's possible that I've hit the max, might be a smarter way to do this with filters but I // can't think of one right now filters = new FilterList(FilterList.Operator.MUST_PASS_ALL); filters.addFilter( new SingleColumnValueFilter( Bytes.toBytes("chr"), Bytes.toBytes("name"), CompareFilter.CompareOp.EQUAL, Bytes.toBytes(chr))); filters.addFilter( new SingleColumnValueFilter( Bytes.toBytes("gc"), Bytes.toBytes("max"), CompareFilter.CompareOp.GREATER_OR_EQUAL, Bytes.toBytes(gcContent))); scan = new Scan(); scan.setFilter(filters); scanner = this.getScanner(scan); rI = scanner.iterator(); if (!rI.hasNext()) log.warn("No GC bin for " + chr + " " + gcContent); } if (!rI.hasNext()) { throw new IOException("Failed to retrieve any GC bins for chr " + chr + " GC=" + gcContent); } // only expect one result GCResult gcResult = createResult(rI.next()); if (rI.hasNext()) log.warn( "Found multiple matches for " + chr + " " + gcContent + " returning only the first."); scanner.close(); return gcResult; }
/** * 查询一张表中所有记录 分页 * * @param tableName * @return * @throws ParamIsNullException 参数为空 */ public static PaginationResult<List<Result>> queryTableByPagination( String tableName, Pagination pagination) throws ParamIsNullException { if (null == tableName) { throw new ParamIsNullException("tableName不能为空"); } Connection conn = null; HTable table = null; ResultScanner scann = null; PaginationResult<List<Result>> result = new PaginationResult<List<Result>>(); List<Result> resultList = new ArrayList<Result>(); int currentPage = 1; try { conn = ConnectionFactory.createConnection(conf); table = (HTable) conn.getTable(TableName.valueOf(tableName)); int pagesize = 10; Scan scan = new Scan(); if (null != pagination) { pagesize = pagination.getPagesize(); if (null != pagination.getNextPageRowKey() && pagination.getNextPageRowKey().length > 0) { scan.setStartRow(pagination.getNextPageRowKey()); } currentPage = pagination.getCurrentPage(); } // MUST_PASS_ALL(条件 AND) MUST_PASS_ONE(条件OR) FilterList filterList = new FilterList(FilterList.Operator.MUST_PASS_ALL); Filter filter = new PageFilter(pagesize); filterList.addFilter(filter); scan.setFilter(filterList); scann = table.getScanner(scan); for (Result rs : scann) { resultList.add(rs); } /** 设置Pagination */ Pagination page = new Pagination(); Result row = resultList.get(resultList.size() - 1 > -1 ? resultList.size() - 1 : 0); page.setNextPageRowKey(row.getRow()); page.setPagesize(pagesize); page.setTotalRows( null == HBaseUtil.getRowCount(tableName) ? 0 : HBaseUtil.getRowCount(tableName).intValue()); page.setCurrentPage(currentPage); result.setR(resultList); result.setPagination(page); } catch (Exception e) { logger.error("获取Hbase连接发生异常, errMsg:{}", e.getMessage()); } finally { if (null != scann) { scann.close(); } if (null != table) { try { table.close(); } catch (IOException e) { logger.error("HTable close exception, errMsg:{}", e.getMessage()); } } if (null != conn) { try { conn.close(); } catch (IOException e) { logger.error("Connection close exception, errMsg:{}", e.getMessage()); } } } return result; }
@Override public Scan getRawScan(ItemData d, Map<String, HCompareOp> ops) { int startShopID = 0; int startItemID = 0; int endShopID = MAX_SHOP_ID; int endItemID = MAX_ITEM_ID; // some performance improvement // shop_id指定 HCompareOp shopIDOp = ops.get("shop_id"); if (shopIDOp == HCompareOp.EQUAL) { startShopID = d.shopID; endShopID = startShopID; } // item idも指定 HCompareOp itemIDOp = ops.get("item_id"); if (itemIDOp == HCompareOp.EQUAL) { startItemID = d.itemID; endItemID = startItemID; } log.info(String.format("scan start row, shop_id=%d, item_id=%d", startShopID, startItemID)); log.info(String.format("scan stop row, shop_id=%d, item_id=%d", endShopID, endItemID)); byte[] startRow = encodeRowkey(startShopID, startItemID); byte[] endRow = encodeRowkey(endShopID, endItemID); Scan s = new Scan(startRow, endRow); s.addFamily(DATA_FAMILY); s.addFamily(META_FAMILY); s.setCacheBlocks(false); s.setMaxVersions(); s.setCaching(DEFAULT_SCAN_CACHE); FilterList fl = new FilterList(); for (String column : ops.keySet()) { byte[] value; byte[] family = DATA_FAMILY; if ("ctime".equals(column)) { value = Bytes.toBytes(d.ctime); family = META_FAMILY; } else if ("shop_id".equals(column)) { value = Bytes.toBytes(d.shopID); } else if ("item_id".equals(column)) { value = Bytes.toBytes(d.itemID); } else if ("genre_id".equals(column)) { value = Bytes.toBytes(d.genreID); } else if ("price".equals(column)) { value = Bytes.toBytes(d.price); } else if ("full_item_url".equals(column)) { value = Bytes.toBytes(d.fullItemUrl); } else if ("item_name".equals(column)) { value = Bytes.toBytes(d.itemName); } else { // ignore continue; } byte[] qualifier = Bytes.toBytes(column); HCompareOp hop = ops.get(column); CompareOp op = HClient.toCompareOp(hop); SingleColumnValueFilter filter = new SingleColumnValueFilter(family, qualifier, op, value); filter.setFilterIfMissing(true); fl.addFilter(filter); } s.setFilter(fl); return s; }
public List<Point2D.Double> debugColumnVersion( String timestamp, double latitude, double longitude, double radius) { this.getStatLog(this.STAT_FILE_NAME); long sTime = System.currentTimeMillis(); // build up a raster XRaster raster = new XRaster(this.space, this.min_size_of_height, this.max_num_of_column); Point2D.Double point = new Point2D.Double(latitude, longitude); ResultScanner rScanner = null; // return result HashMap<String, String> results = new HashMap<String, String>(); ArrayList<Point2D.Double> returnPoints = new ArrayList<Point2D.Double>(); try { // match rect to find the subspace it belongs to XBox[] match_boxes = raster.match(latitude, longitude, radius); String[] rowRange = new String[2]; rowRange[0] = match_boxes[0].getRow(); rowRange[1] = match_boxes[1].getRow() + "0"; String[] c = raster.getColumns(match_boxes[0], match_boxes[1]); // the version here is harded coded, because i cannot get how many // objects in one cell now FilterList fList = new FilterList(); fList.addFilter(this.hbaseUtil.getInclusiveFilter(rowRange[1])); rScanner = this.hbaseUtil.getResultSet(rowRange, fList, this.familyName, c, 1000000); BixiReader reader = new BixiReader(); int count = 0; int accepted = 0; int max_column = 0; int min_column = 10000; int max_version = 0; int min_version = 10000; int row_count = 0; int byte_lenght = 0; for (Result r : rScanner) { byte_lenght = r.getBytes().getLength(); row_count++; NavigableMap<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>> resultMap = r.getMap(); int count_column = 0; for (byte[] family : resultMap.keySet()) { NavigableMap<byte[], NavigableMap<Long, byte[]>> columns = resultMap.get(family); count_column = 0; for (byte[] col : columns.keySet()) { NavigableMap<Long, byte[]> values = columns.get(col); count_column++; if (values.values().size() > max_version) { max_version = values.values().size(); } if (values.values().size() < min_version) { min_version = values.values().size(); } for (Long version : values.keySet()) { count++; // get the distance between this point and the given // point XStation station = reader.getStationFromJson(Bytes.toString(values.get(version))); Point2D.Double resPoint = new Point2D.Double(station.getLatitude(), station.getlongitude()); double distance = resPoint.distance(point); if (Bytes.toString(col).equals("0011")) { /* System.out.println("!!!! key=>"+Bytes.toString(r.getRow())+ ";column=>"+Bytes.toString(col)+ ";version=>"+version+ ";point=>"+resPoint.toString());*/ } if (distance <= radius) { returnPoints.add(resPoint); // System.out.println("row=>"+Bytes.toString(r.getRow()) // + // ";colum=>"+Bytes.toString(col)+";version=>"+version+ // ";station=>"+station.getId()+";distance=>"+distance); accepted++; results.put(station.getId(), String.valueOf(distance)); } } } if (count_column > max_column) max_column = count_column; if (count_column < min_column) min_column = count_column; } } System.out.println("byte_length=>" + byte_lenght + ";row_count=>" + row_count); System.out.println( "max_column=>" + max_column + ";min_column=>" + min_column + ";max_version=>" + max_version + ";min_version=>" + min_version); long eTime = System.currentTimeMillis(); System.out.println( "count=>" + count + ";accepted=>" + accepted + ";time=>" + (eTime - sTime)); String outStr = "radius=>" + radius + ";count=>" + count + ";accepted=>" + accepted + ";time=>" + (eTime - sTime) + ";row_stride=>" + this.min_size_of_height + ";columns=>" + this.max_num_of_column; this.writeStat(outStr); } catch (Exception e) { e.printStackTrace(); } finally { this.hbaseUtil.closeTableHandler(); this.closeStatLog(); } return returnPoints; }
public static boolean intersectScanRange( Scan scan, byte[] startKey, byte[] stopKey, boolean useSkipScan) { boolean mayHaveRows = false; int offset = 0; if (ScanUtil.isLocalIndex(scan)) { offset = startKey.length != 0 ? startKey.length : stopKey.length; } byte[] existingStartKey = scan.getStartRow(); byte[] existingStopKey = scan.getStopRow(); if (existingStartKey.length > 0) { if (startKey.length == 0 || Bytes.compareTo(existingStartKey, startKey) > 0) { startKey = existingStartKey; } } else { mayHaveRows = true; } if (existingStopKey.length > 0) { if (stopKey.length == 0 || Bytes.compareTo(existingStopKey, stopKey) < 0) { stopKey = existingStopKey; } } else { mayHaveRows = true; } scan.setStartRow(startKey); scan.setStopRow(stopKey); if (offset > 0 && useSkipScan) { byte[] temp = null; if (startKey.length != 0) { temp = new byte[startKey.length - offset]; System.arraycopy(startKey, offset, temp, 0, startKey.length - offset); startKey = temp; } if (stopKey.length != 0) { temp = new byte[stopKey.length - offset]; System.arraycopy(stopKey, offset, temp, 0, stopKey.length - offset); stopKey = temp; } } mayHaveRows = mayHaveRows || Bytes.compareTo(scan.getStartRow(), scan.getStopRow()) < 0; // If the scan is using skip scan filter, intersect and replace the filter. if (mayHaveRows && useSkipScan) { Filter filter = scan.getFilter(); if (filter instanceof SkipScanFilter) { SkipScanFilter oldFilter = (SkipScanFilter) filter; SkipScanFilter newFilter = oldFilter.intersect(startKey, stopKey); if (newFilter == null) { return false; } // Intersect found: replace skip scan with intersected one scan.setFilter(newFilter); } else if (filter instanceof FilterList) { FilterList oldList = (FilterList) filter; FilterList newList = new FilterList(FilterList.Operator.MUST_PASS_ALL); for (Filter f : oldList.getFilters()) { if (f instanceof SkipScanFilter) { SkipScanFilter newFilter = ((SkipScanFilter) f).intersect(startKey, stopKey); if (newFilter == null) { return false; } newList.addFilter(newFilter); } else { newList.addFilter(f); } } scan.setFilter(newList); } } return mayHaveRows; }