/** * Core impl. * * @param n The expected #of index entries (this value is ignored for {@link IndexSegment}s). * @param p The desired error rate for the bloom filter at that #of index entries (or at the * actual #of index entries for an {@link IndexSegment}). * @param maxP The maximum error rate for the bloom filter for a {@link BTree} (it will be * disabled for a {@link BTree} once the bloom filter can be expected to realize this error * rate). * @throws IllegalArgumentException if <i>n</i> is non-positive. * @throws IllegalArgumentException unless <i>p</i> lies in (0:1]. * @throws IllegalArgumentException * @throws IllegalArgumentException unless <i>maxP</i> lies in (<i>p</i>:1]. */ public BloomFilterFactory(final int n, final double p, final double maxP) { if (n <= 0) throw new IllegalArgumentException(); if (p <= 0d || p > 1d) throw new IllegalArgumentException(); if (maxP <= p || maxP > 1d) throw new IllegalArgumentException(); this.n = n; this.p = p; this.maxP = maxP; // #of hash functions. final int k = BloomFilter.getHashFunctionCount(p); // bit length of the filter. final long m = BloomFilter.getBitLength(k, n); /* * The maximum #of index entries before we disable the filter because * the expected performance will be worse than the specified maximum * error rate. */ this.maxN = BloomFilter.getEntryCountForErrorRate(k, m, maxP); }
@SqlType(StandardTypes.BIGINT) @Nullable @SqlNullable public static Long bloomFilterExpectedInsertions( @SqlNullable @SqlType(BloomFilterType.TYPE) Slice bloomFilterSlice) { BloomFilter bf = getOrLoadBloomFilter(bloomFilterSlice); return (long) bf.getExpectedInsertions(); }
/** * Build the index on the given file. * * @param file , RegionDataFile, should be sorted on key in ascend order * @param blockSize * @param blockCount the approximate blocks in each index entry.This factor is useful to balance * the key/values in each index entry,and also have a influence on how many blocks the system * load when the system try to load data from file system. * @return * @throws IOException */ public static int build( List<IndexEntry> list, BloomFilter filter, String file, int blockSize, int blockCount) throws IOException { if (filter != null) { filter.clear(); } IBlockInputStream in = new KVInputStream(DFSManager.getDFS().open(new Path(file)), blockSize, 0, 0); int keyNum = 0; byte[] prevKey = null, curKey = null; int prevBlock = 0, curBlock = 0; int offset = 0; KeyValue kv = null; try { while (true) { int len = in.readInt(); if (len == 0) { // prevKey == null means the index entry has been flushed // before if (prevKey != null) { list.add(new IndexEntry(prevKey, curKey, prevBlock, curBlock, offset)); } in.close(); break; } else { in.skipBytes(-4); } kv = KeyValueIOUtil.readFromExternal(in); keyNum++; curKey = kv.getKey(); if (filter != null) { filter.set(curKey); } if (prevKey == null) { prevKey = curKey; } curBlock = in.getCurrentBlock(); int count = curBlock - prevBlock; if (count >= blockCount || (count == blockCount - 1 && in.getBlockAvailable() < 4)) { list.add(new IndexEntry(prevKey, curKey, prevBlock, curBlock, offset)); offset = in.getBlockPos() % blockSize; prevBlock = curBlock; if (count == blockCount - 1) { prevBlock++; offset = 0; } prevKey = null; } } } catch (EOFException e) { if (prevKey != null && curKey != null) { list.add(new IndexEntry(prevKey, curKey, prevBlock, curBlock, offset)); } in.close(); } return keyNum; }
public void serializeToFile(Context context) { if (this.bloomFilter == null) return; try { FileOutputStream fout = context.openFileOutput(DIRECTORY_FILE, 0); String numberFilter = Base64.encodeBytes(bloomFilter.getFilter()); NumberFilterStorage storage = new NumberFilterStorage(numberFilter, bloomFilter.getHashCount()); storage.serializeToStream(fout); fout.close(); } catch (IOException ioe) { Log.w("NumberFilter", ioe); } }
/** * Builds a FilterLoad message * * @param peer Destination peer * @param filter Bloom filter * @return 'filterload' message */ public static Message buildFilterLoadMessage(Peer peer, BloomFilter filter) { // // Build the message // ByteBuffer buffer = MessageHeader.buildMessage("filterload", filter.getBytes()); return new Message(buffer, peer, MessageHeader.MessageCommand.FILTERLOAD); }
public void put(WriteFieldAccess write) { // Add to bloom filter bloomFilter.add(write.hashCode()); // Add to write set writeSet.put(write, write); }
@Test(dataProvider = "strategies") public void shouldUseSpecifiedCapacityAndFalsePositiveProbability(ConcurrencyStrategy strategy) { // Given Funnel<Integer> funnel = Funnels.integerFunnel(); long capacity = 100; double fpp = 0.01d; // When BloomFilter<Integer> result = strategy.<Integer>getFactory(funnel).create(capacity, fpp); // Then assertThat(result.getStatistics().getCapacity()).as("capacity").isEqualTo(capacity); assertThat(result.getStatistics().getConfiguredFalsePositiveProbability()) .as("falsePositiveProbability") .isEqualTo(fpp); }
public static void main(String[] args) throws IOException { // Set the path to the initial text-file. Path path = Paths.get("assets/", "words.txt"); // Set the path to the "test-text-file". Path pathDeutsch = Paths.get("assets/", "deutsch.txt"); // Construct a new Bloom-Filter with 58111-elements and an // error-probability of 5%. BloomFilter bf = new BloomFilter(58111, 0.05); // Hash every line of the initial file and add the word to the data- // structure of the bloom-filter. Files.lines(path).forEach(s -> bf.addToFilter(s)); int counter = 0; int linesDeutsch = 71700; /** TEST-SECTION Tests the bloom-filter. */ // Calculate for every line (aka word) of the test-file if // it is already a member of the data-structure. Object[] strings = Files.lines(pathDeutsch).toArray(); for (Object s : strings) { if (bf.checkIfExists((String) s)) { counter++; } } System.out.println( counter + " von " + linesDeutsch + " wurden falsch erkannt: " + (double) counter / linesDeutsch + "%"); counter = 0; for (boolean b : bf.booleans) { if (b) counter++; } System.out.println(counter + "/" + bf.booleans.length); }
@Override protected void onCreate(Bundle savedInstanceState) { super.onCreate(savedInstanceState); setContentView(R.layout.activity_main); tvSet = (TextView) findViewById(R.id.tv_set); tvTestSet = (TextView) findViewById(R.id.tv_test_set); tvOutput = (TextView) findViewById(R.id.tv_output); tvSet.append("["); // creating set for adding in adding in array for (int i = 0; i < set.length; i++) { // if(set.length == i) // break; int no = random.nextInt(High - Low) + Low; set[i] = no; bloomFilter.add(no + ""); tvSet.append(no + ""); if (set.length - 1 != i) tvSet.append(","); } tvSet.append("]"); tvTestSet.append("["); // creating set for testing for (int i = 0; i < testSet.length; i++) { // if(testSet.length == i) // break; int no = random.nextInt(High - Low) + Low; testSet[i] = no; tvTestSet.append(no + ""); if (testSet.length - 1 != i) tvTestSet.append(","); } tvTestSet.append("]"); for (int i = 0; i < testSet.length; i++) { if (bloomFilter.contains(testSet[i] + "")) { tvOutput.append("\n" + testSet[i] + ", " + bloomFilter.expectedFalsePositiveProbability()); Log.d("test", "\n" + testSet[i] + ", " + bloomFilter.expectedFalsePositiveProbability()); } else { Log.d("test", "MainActivity:onCreate: bloom filter does not contain " + testSet[i]); } } }
/** * Creates the Bloom filter * * @param msg Message * @param inBuffer Input buffer * @param msgListener Message listener * @throws EOFException End-of-data processing input stream * @throws VerificationException Verification error */ public static void processFilterLoadMessage( Message msg, SerializedBuffer inBuffer, MessageListener msgListener) throws EOFException, VerificationException { // // Load the new bloom filter // Peer peer = msg.getPeer(); BloomFilter newFilter = new BloomFilter(inBuffer); BloomFilter oldFilter; synchronized (peer) { oldFilter = peer.getBloomFilter(); newFilter.setPeer(peer); peer.setBloomFilter(newFilter); } // // Notify the message listener // msgListener.processFilterLoad(msg, oldFilter, newFilter); }
public void serialize(BloomFilter bf, DataOutput dos) throws IOException { int bitLength = bf.bitset.getNumWords(); int pageSize = bf.bitset.getPageSize(); int pageCount = bf.bitset.getPageCount(); dos.writeInt(bf.getHashCount()); dos.writeInt(bitLength); for (int p = 0; p < pageCount; p++) { long[] bits = bf.bitset.getPage(p); for (int i = 0; i < pageSize && bitLength-- > 0; i++) dos.writeLong(bits[i]); } }
/** * Calculates a serialized size of the given Bloom Filter * * @see BloomFilterSerializer#serialize(BloomFilter, DataOutput) * @param bf Bloom filter to calculate serialized size * @return serialized size of the given bloom filter */ public long serializedSize(BloomFilter bf, TypeSizes typeSizes) { int bitLength = bf.bitset.getNumWords(); int pageSize = bf.bitset.getPageSize(); int pageCount = bf.bitset.getPageCount(); int size = 0; size += typeSizes.sizeof(bf.getHashCount()); // hash count size += typeSizes.sizeof(bitLength); // length for (int p = 0; p < pageCount; p++) { long[] bits = bf.bitset.getPage(p); for (int i = 0; i < pageSize && bitLength-- > 0; i++) size += typeSizes.sizeof(bits[i]); // bucket } return size; }
public void clear() { bloomFilter.clear(); writeSet.clear(); }
public WriteFieldAccess contains(ReadFieldAccess read) { // Check if it is already included in the write set return bloomFilter.contains(read.hashCode()) ? writeSet.get(read) : null; }
public void test_add() { String str = "quickpoint"; filter.add(str); assertTrue(filter.contains(str)); }
public boolean containsNumber(Context context, String number) { if (bloomFilter == null) return false; else return bloomFilter.contains(PhoneNumberFormatter.formatNumber(context, number)); }
public void test_case_in_sensitive() { String str = "quickpoint"; filter.add(str); assertTrue(filter.contains("QUICKPOINT")); }