private TitleNameNormalizer(String pathToEvaluationRedirectsData) throws IOException {

    if (useBloomFilter) {
      redirectFilter = BloomFilter.create(Funnels.stringFunnel(), ESTIMATED_REDIRECTS);
      redirects =
          new LRUCache<String, String>(5000) {
            protected String loadValue(String src) {
              String normalized = TitleNameIndexer.normalize(src);
              if (normalized == null) return src;
              return TitleNameIndexer.normalize(src);
            }
          };
    } else redirects = new StringMap<String>();
    if (showInitProgress)
      System.out.println(
          "Loading the most recent redirect pages from Wikipedia to normalize the output links to the latest version");
    if (pathToEvaluationRedirectsData != null) {
      InputStream is = CompressionUtils.readSnappyCompressed(pathToEvaluationRedirectsData);
      LineIterator iterator = IOUtils.lineIterator(is, StandardCharsets.UTF_8);

      long linecount = 0;
      while (iterator.hasNext()) {
        String line = iterator.nextLine();
        if (showInitProgress && linecount++ % 100000 == 0)
          System.out.println("loading the latest redirects; linecount=" + linecount);
        String[] parts = StringUtils.split(line, '\t');

        String src = parts[0].trim().replace(' ', '_');
        String trg = parts[1].trim().replace(' ', '_');
        if (useBloomFilter) redirectFilter.put(src);
        else redirects.put(src, trg);
      }
      iterator.close();
    }
    redirects = Collections.unmodifiableMap(redirects);
    if (showInitProgress)
      System.out.println(
          "Done  - Loading the most recent redirect pages from Wikipedia to normalize the output links to the latest version");
  }
Beispiel #2
0
/** For comparing the load differences between consistent hash and HRW */
public class Compare {
  private static final HashFunction hfunc = Hashing.murmur3_128();
  private static final Funnel<CharSequence> strFunnel =
      Funnels.stringFunnel(Charset.defaultCharset());

  public static void main(String[] args) {
    Map<String, AtomicInteger> distribution = Maps.newHashMap();

    System.out.println("======: ConsistentHash :========");
    ConsistentHash<String, String> c =
        new ConsistentHash(hfunc, strFunnel, strFunnel, getNodes(distribution));
    for (int i = 0; i < 10000; i++) {
      distribution.get(c.get("" + i)).incrementAndGet();
    }
    for (Entry<String, AtomicInteger> e : distribution.entrySet()) {
      System.out.println(e.getKey() + "," + e.getValue().get());
      e.getValue().set(0);
    }
    System.out.println("====== remove 2 ========");
    for (int i = 0; i < 2; i++) {
      c.remove("Node" + i);
      distribution.remove("Node" + i);
    }
    for (int i = 0; i < 10000; i++) {
      distribution.get(c.get("" + i)).incrementAndGet();
    }
    for (Entry<String, AtomicInteger> e : distribution.entrySet()) {
      System.out.println(e.getKey() + "," + e.getValue().get());
    }

    System.out.println("======: RendezvousHash :========");
    distribution = Maps.newHashMap();
    RendezvousHash<String, String> r =
        new RendezvousHash(hfunc, strFunnel, strFunnel, getNodes(distribution));

    for (int i = 0; i < 10000; i++) {
      distribution.get(r.get("" + i)).incrementAndGet();
    }
    for (Entry<String, AtomicInteger> e : distribution.entrySet()) {
      System.out.println(e.getKey() + "," + e.getValue().get());
      e.getValue().set(0);
    }
    System.out.println("====== remove 2 ========");
    for (int i = 0; i < 2; i++) {
      r.remove("Node" + i);
      distribution.remove("Node" + i);
    }
    for (int i = 0; i < 10000; i++) {
      distribution.get(r.get("" + i)).incrementAndGet();
    }
    for (Entry<String, AtomicInteger> e : distribution.entrySet()) {
      System.out.println(e.getKey() + "," + e.getValue().get());
    }
  }

  private static List<String> getNodes(Map<String, AtomicInteger> distribution) {
    List<String> nodes = Lists.newArrayList();
    for (int i = 0; i < 5; i++) {
      nodes.add("Node" + i);
      distribution.put("Node" + i, new AtomicInteger());
    }
    return nodes;
  }
}