Example #1
0
 public SpillMap(SpillFile file, int thresholdBytes) throws IOException {
   this.thresholdBytes = thresholdBytes;
   this.spillFile = file;
   byteMap = MappedByteBufferMap.newMappedByteBufferMap(thresholdBytes, spillFile);
   bFilters = Lists.newArrayList();
   bFilters.add(BloomFilter.create(Funnels.byteArrayFunnel(), 1000));
 }
Example #2
0
 // Funtion checks if current page has enough space to fit the new serialized tuple
 // If not it flushes the current buffer and gets a new page
 // TODO: The code does not ensure that pages are optimally packed.
 //       It only tries to fill up the current page as much as possbile, if its
 //       exhausted it requests a new page. Instead it would be nice to load the next page
 //       that could fit the new value.
 private void ensureSpace(byte[] value) {
   if (!byteMap.canFit(value)) {
     // Flush current buffer
     byteMap.flushBuffer();
     // Get next page
     byteMap = MappedByteBufferMap.newMappedByteBufferMap(thresholdBytes, spillFile);
     // Create new bloomfilter
     bFilters.add(BloomFilter.create(Funnels.byteArrayFunnel(), 1000));
   }
 }
  private TitleNameNormalizer(String pathToEvaluationRedirectsData) throws IOException {

    if (useBloomFilter) {
      redirectFilter = BloomFilter.create(Funnels.stringFunnel(), ESTIMATED_REDIRECTS);
      redirects =
          new LRUCache<String, String>(5000) {
            protected String loadValue(String src) {
              String normalized = TitleNameIndexer.normalize(src);
              if (normalized == null) return src;
              return TitleNameIndexer.normalize(src);
            }
          };
    } else redirects = new StringMap<String>();
    if (showInitProgress)
      System.out.println(
          "Loading the most recent redirect pages from Wikipedia to normalize the output links to the latest version");
    if (pathToEvaluationRedirectsData != null) {
      InputStream is = CompressionUtils.readSnappyCompressed(pathToEvaluationRedirectsData);
      LineIterator iterator = IOUtils.lineIterator(is, StandardCharsets.UTF_8);

      long linecount = 0;
      while (iterator.hasNext()) {
        String line = iterator.nextLine();
        if (showInitProgress && linecount++ % 100000 == 0)
          System.out.println("loading the latest redirects; linecount=" + linecount);
        String[] parts = StringUtils.split(line, '\t');

        String src = parts[0].trim().replace(' ', '_');
        String trg = parts[1].trim().replace(' ', '_');
        if (useBloomFilter) redirectFilter.put(src);
        else redirects.put(src, trg);
      }
      iterator.close();
    }
    redirects = Collections.unmodifiableMap(redirects);
    if (showInitProgress)
      System.out.println(
          "Done  - Loading the most recent redirect pages from Wikipedia to normalize the output links to the latest version");
  }
Example #4
0
/** For comparing the load differences between consistent hash and HRW */
public class Compare {
  private static final HashFunction hfunc = Hashing.murmur3_128();
  private static final Funnel<CharSequence> strFunnel =
      Funnels.stringFunnel(Charset.defaultCharset());

  public static void main(String[] args) {
    Map<String, AtomicInteger> distribution = Maps.newHashMap();

    System.out.println("======: ConsistentHash :========");
    ConsistentHash<String, String> c =
        new ConsistentHash(hfunc, strFunnel, strFunnel, getNodes(distribution));
    for (int i = 0; i < 10000; i++) {
      distribution.get(c.get("" + i)).incrementAndGet();
    }
    for (Entry<String, AtomicInteger> e : distribution.entrySet()) {
      System.out.println(e.getKey() + "," + e.getValue().get());
      e.getValue().set(0);
    }
    System.out.println("====== remove 2 ========");
    for (int i = 0; i < 2; i++) {
      c.remove("Node" + i);
      distribution.remove("Node" + i);
    }
    for (int i = 0; i < 10000; i++) {
      distribution.get(c.get("" + i)).incrementAndGet();
    }
    for (Entry<String, AtomicInteger> e : distribution.entrySet()) {
      System.out.println(e.getKey() + "," + e.getValue().get());
    }

    System.out.println("======: RendezvousHash :========");
    distribution = Maps.newHashMap();
    RendezvousHash<String, String> r =
        new RendezvousHash(hfunc, strFunnel, strFunnel, getNodes(distribution));

    for (int i = 0; i < 10000; i++) {
      distribution.get(r.get("" + i)).incrementAndGet();
    }
    for (Entry<String, AtomicInteger> e : distribution.entrySet()) {
      System.out.println(e.getKey() + "," + e.getValue().get());
      e.getValue().set(0);
    }
    System.out.println("====== remove 2 ========");
    for (int i = 0; i < 2; i++) {
      r.remove("Node" + i);
      distribution.remove("Node" + i);
    }
    for (int i = 0; i < 10000; i++) {
      distribution.get(r.get("" + i)).incrementAndGet();
    }
    for (Entry<String, AtomicInteger> e : distribution.entrySet()) {
      System.out.println(e.getKey() + "," + e.getValue().get());
    }
  }

  private static List<String> getNodes(Map<String, AtomicInteger> distribution) {
    List<String> nodes = Lists.newArrayList();
    for (int i = 0; i < 5; i++) {
      nodes.add("Node" + i);
      distribution.put("Node" + i, new AtomicInteger());
    }
    return nodes;
  }
}
Example #5
0
 /**
  * Hashes the contents of this byte source using the given hash function.
  *
  * @throws IOException if an I/O error occurs in the process of reading from this source
  */
 public HashCode hash(HashFunction hashFunction) throws IOException {
   Hasher hasher = hashFunction.newHasher();
   copyTo(Funnels.asOutputStream(hasher));
   return hasher.hash();
 }
Example #6
0
 private void createNewBloomFilter(int expectedInsertions) throws IOException {
   bloomFilter =
       BloomFilter.create(
           Funnels.byteArrayFunnel(), expectedInsertions, FALSE_POSITIVE_PROBABILITY);
   this.persistBloomFilter();
 }