/**
   * Core impl.
   *
   * @param n The expected #of index entries (this value is ignored for {@link IndexSegment}s).
   * @param p The desired error rate for the bloom filter at that #of index entries (or at the
   *     actual #of index entries for an {@link IndexSegment}).
   * @param maxP The maximum error rate for the bloom filter for a {@link BTree} (it will be
   *     disabled for a {@link BTree} once the bloom filter can be expected to realize this error
   *     rate).
   * @throws IllegalArgumentException if <i>n</i> is non-positive.
   * @throws IllegalArgumentException unless <i>p</i> lies in (0:1].
   * @throws IllegalArgumentException
   * @throws IllegalArgumentException unless <i>maxP</i> lies in (<i>p</i>:1].
   */
  public BloomFilterFactory(final int n, final double p, final double maxP) {

    if (n <= 0) throw new IllegalArgumentException();
    if (p <= 0d || p > 1d) throw new IllegalArgumentException();
    if (maxP <= p || maxP > 1d) throw new IllegalArgumentException();

    this.n = n;

    this.p = p;

    this.maxP = maxP;

    // #of hash functions.
    final int k = BloomFilter.getHashFunctionCount(p);

    // bit length of the filter.
    final long m = BloomFilter.getBitLength(k, n);

    /*
     * The maximum #of index entries before we disable the filter because
     * the expected performance will be worse than the specified maximum
     * error rate.
     */
    this.maxN = BloomFilter.getEntryCountForErrorRate(k, m, maxP);
  }
 @SqlType(StandardTypes.BIGINT)
 @Nullable
 @SqlNullable
 public static Long bloomFilterExpectedInsertions(
     @SqlNullable @SqlType(BloomFilterType.TYPE) Slice bloomFilterSlice) {
   BloomFilter bf = getOrLoadBloomFilter(bloomFilterSlice);
   return (long) bf.getExpectedInsertions();
 }
Esempio n. 3
0
 /**
  * Build the index on the given file.
  *
  * @param file , RegionDataFile, should be sorted on key in ascend order
  * @param blockSize
  * @param blockCount the approximate blocks in each index entry.This factor is useful to balance
  *     the key/values in each index entry,and also have a influence on how many blocks the system
  *     load when the system try to load data from file system.
  * @return
  * @throws IOException
  */
 public static int build(
     List<IndexEntry> list, BloomFilter filter, String file, int blockSize, int blockCount)
     throws IOException {
   if (filter != null) {
     filter.clear();
   }
   IBlockInputStream in =
       new KVInputStream(DFSManager.getDFS().open(new Path(file)), blockSize, 0, 0);
   int keyNum = 0;
   byte[] prevKey = null, curKey = null;
   int prevBlock = 0, curBlock = 0;
   int offset = 0;
   KeyValue kv = null;
   try {
     while (true) {
       int len = in.readInt();
       if (len == 0) {
         // prevKey == null means the index entry has been flushed
         // before
         if (prevKey != null) {
           list.add(new IndexEntry(prevKey, curKey, prevBlock, curBlock, offset));
         }
         in.close();
         break;
       } else {
         in.skipBytes(-4);
       }
       kv = KeyValueIOUtil.readFromExternal(in);
       keyNum++;
       curKey = kv.getKey();
       if (filter != null) {
         filter.set(curKey);
       }
       if (prevKey == null) {
         prevKey = curKey;
       }
       curBlock = in.getCurrentBlock();
       int count = curBlock - prevBlock;
       if (count >= blockCount || (count == blockCount - 1 && in.getBlockAvailable() < 4)) {
         list.add(new IndexEntry(prevKey, curKey, prevBlock, curBlock, offset));
         offset = in.getBlockPos() % blockSize;
         prevBlock = curBlock;
         if (count == blockCount - 1) {
           prevBlock++;
           offset = 0;
         }
         prevKey = null;
       }
     }
   } catch (EOFException e) {
     if (prevKey != null && curKey != null) {
       list.add(new IndexEntry(prevKey, curKey, prevBlock, curBlock, offset));
     }
     in.close();
   }
   return keyNum;
 }
Esempio n. 4
0
  public void serializeToFile(Context context) {
    if (this.bloomFilter == null) return;

    try {
      FileOutputStream fout = context.openFileOutput(DIRECTORY_FILE, 0);
      String numberFilter = Base64.encodeBytes(bloomFilter.getFilter());
      NumberFilterStorage storage =
          new NumberFilterStorage(numberFilter, bloomFilter.getHashCount());

      storage.serializeToStream(fout);
      fout.close();
    } catch (IOException ioe) {
      Log.w("NumberFilter", ioe);
    }
  }
 /**
  * Builds a FilterLoad message
  *
  * @param peer Destination peer
  * @param filter Bloom filter
  * @return 'filterload' message
  */
 public static Message buildFilterLoadMessage(Peer peer, BloomFilter filter) {
   //
   // Build the message
   //
   ByteBuffer buffer = MessageHeader.buildMessage("filterload", filter.getBytes());
   return new Message(buffer, peer, MessageHeader.MessageCommand.FILTERLOAD);
 }
Esempio n. 6
0
  public void put(WriteFieldAccess write) {
    // Add to bloom filter
    bloomFilter.add(write.hashCode());

    // Add to write set
    writeSet.put(write, write);
  }
  @Test(dataProvider = "strategies")
  public void shouldUseSpecifiedCapacityAndFalsePositiveProbability(ConcurrencyStrategy strategy) {
    // Given
    Funnel<Integer> funnel = Funnels.integerFunnel();
    long capacity = 100;
    double fpp = 0.01d;

    // When
    BloomFilter<Integer> result = strategy.<Integer>getFactory(funnel).create(capacity, fpp);

    // Then
    assertThat(result.getStatistics().getCapacity()).as("capacity").isEqualTo(capacity);
    assertThat(result.getStatistics().getConfiguredFalsePositiveProbability())
        .as("falsePositiveProbability")
        .isEqualTo(fpp);
  }
  public static void main(String[] args) throws IOException {

    // Set the path to the initial text-file.
    Path path = Paths.get("assets/", "words.txt");

    // Set the path to the "test-text-file".
    Path pathDeutsch = Paths.get("assets/", "deutsch.txt");

    // Construct a new Bloom-Filter with 58111-elements and an
    // error-probability of 5%.
    BloomFilter bf = new BloomFilter(58111, 0.05);

    // Hash every line of the initial file and add the word to the data-
    // structure of the bloom-filter.
    Files.lines(path).forEach(s -> bf.addToFilter(s));

    int counter = 0;
    int linesDeutsch = 71700;

    /** TEST-SECTION Tests the bloom-filter. */

    // Calculate for every line (aka word) of the test-file if
    // it is already a member of the data-structure.
    Object[] strings = Files.lines(pathDeutsch).toArray();
    for (Object s : strings) {
      if (bf.checkIfExists((String) s)) {
        counter++;
      }
    }

    System.out.println(
        counter
            + " von "
            + linesDeutsch
            + " wurden falsch erkannt: "
            + (double) counter / linesDeutsch
            + "%");

    counter = 0;
    for (boolean b : bf.booleans) {
      if (b) counter++;
    }

    System.out.println(counter + "/" + bf.booleans.length);
  }
  @Override
  protected void onCreate(Bundle savedInstanceState) {
    super.onCreate(savedInstanceState);
    setContentView(R.layout.activity_main);

    tvSet = (TextView) findViewById(R.id.tv_set);
    tvTestSet = (TextView) findViewById(R.id.tv_test_set);
    tvOutput = (TextView) findViewById(R.id.tv_output);

    tvSet.append("[");

    // creating set for adding in adding in array
    for (int i = 0; i < set.length; i++) {
      // if(set.length == i)
      //  break;

      int no = random.nextInt(High - Low) + Low;
      set[i] = no;
      bloomFilter.add(no + "");
      tvSet.append(no + "");
      if (set.length - 1 != i) tvSet.append(",");
    }
    tvSet.append("]");

    tvTestSet.append("[");
    // creating set for testing
    for (int i = 0; i < testSet.length; i++) {
      // if(testSet.length == i)
      //  break;
      int no = random.nextInt(High - Low) + Low;
      testSet[i] = no;
      tvTestSet.append(no + "");
      if (testSet.length - 1 != i) tvTestSet.append(",");
    }
    tvTestSet.append("]");

    for (int i = 0; i < testSet.length; i++) {
      if (bloomFilter.contains(testSet[i] + "")) {
        tvOutput.append("\n" + testSet[i] + ", " + bloomFilter.expectedFalsePositiveProbability());
        Log.d("test", "\n" + testSet[i] + ", " + bloomFilter.expectedFalsePositiveProbability());
      } else {
        Log.d("test", "MainActivity:onCreate: bloom filter does not contain " + testSet[i]);
      }
    }
  }
 /**
  * Creates the Bloom filter
  *
  * @param msg Message
  * @param inBuffer Input buffer
  * @param msgListener Message listener
  * @throws EOFException End-of-data processing input stream
  * @throws VerificationException Verification error
  */
 public static void processFilterLoadMessage(
     Message msg, SerializedBuffer inBuffer, MessageListener msgListener)
     throws EOFException, VerificationException {
   //
   // Load the new bloom filter
   //
   Peer peer = msg.getPeer();
   BloomFilter newFilter = new BloomFilter(inBuffer);
   BloomFilter oldFilter;
   synchronized (peer) {
     oldFilter = peer.getBloomFilter();
     newFilter.setPeer(peer);
     peer.setBloomFilter(newFilter);
   }
   //
   // Notify the message listener
   //
   msgListener.processFilterLoad(msg, oldFilter, newFilter);
 }
  public void serialize(BloomFilter bf, DataOutput dos) throws IOException {
    int bitLength = bf.bitset.getNumWords();
    int pageSize = bf.bitset.getPageSize();
    int pageCount = bf.bitset.getPageCount();

    dos.writeInt(bf.getHashCount());
    dos.writeInt(bitLength);

    for (int p = 0; p < pageCount; p++) {
      long[] bits = bf.bitset.getPage(p);
      for (int i = 0; i < pageSize && bitLength-- > 0; i++) dos.writeLong(bits[i]);
    }
  }
  /**
   * Calculates a serialized size of the given Bloom Filter
   *
   * @see BloomFilterSerializer#serialize(BloomFilter, DataOutput)
   * @param bf Bloom filter to calculate serialized size
   * @return serialized size of the given bloom filter
   */
  public long serializedSize(BloomFilter bf, TypeSizes typeSizes) {
    int bitLength = bf.bitset.getNumWords();
    int pageSize = bf.bitset.getPageSize();
    int pageCount = bf.bitset.getPageCount();

    int size = 0;
    size += typeSizes.sizeof(bf.getHashCount()); // hash count
    size += typeSizes.sizeof(bitLength); // length

    for (int p = 0; p < pageCount; p++) {
      long[] bits = bf.bitset.getPage(p);
      for (int i = 0; i < pageSize && bitLength-- > 0; i++)
        size += typeSizes.sizeof(bits[i]); // bucket
    }
    return size;
  }
Esempio n. 13
0
 public void clear() {
   bloomFilter.clear();
   writeSet.clear();
 }
Esempio n. 14
0
 public WriteFieldAccess contains(ReadFieldAccess read) {
   // Check if it is already included in the write set
   return bloomFilter.contains(read.hashCode()) ? writeSet.get(read) : null;
 }
 public void test_add() {
   String str = "quickpoint";
   filter.add(str);
   assertTrue(filter.contains(str));
 }
Esempio n. 16
0
 public boolean containsNumber(Context context, String number) {
   if (bloomFilter == null) return false;
   else return bloomFilter.contains(PhoneNumberFormatter.formatNumber(context, number));
 }
 public void test_case_in_sensitive() {
   String str = "quickpoint";
   filter.add(str);
   assertTrue(filter.contains("QUICKPOINT"));
 }