private TitleNameNormalizer(String pathToEvaluationRedirectsData) throws IOException { if (useBloomFilter) { redirectFilter = BloomFilter.create(Funnels.stringFunnel(), ESTIMATED_REDIRECTS); redirects = new LRUCache<String, String>(5000) { protected String loadValue(String src) { String normalized = TitleNameIndexer.normalize(src); if (normalized == null) return src; return TitleNameIndexer.normalize(src); } }; } else redirects = new StringMap<String>(); if (showInitProgress) System.out.println( "Loading the most recent redirect pages from Wikipedia to normalize the output links to the latest version"); if (pathToEvaluationRedirectsData != null) { InputStream is = CompressionUtils.readSnappyCompressed(pathToEvaluationRedirectsData); LineIterator iterator = IOUtils.lineIterator(is, StandardCharsets.UTF_8); long linecount = 0; while (iterator.hasNext()) { String line = iterator.nextLine(); if (showInitProgress && linecount++ % 100000 == 0) System.out.println("loading the latest redirects; linecount=" + linecount); String[] parts = StringUtils.split(line, '\t'); String src = parts[0].trim().replace(' ', '_'); String trg = parts[1].trim().replace(' ', '_'); if (useBloomFilter) redirectFilter.put(src); else redirects.put(src, trg); } iterator.close(); } redirects = Collections.unmodifiableMap(redirects); if (showInitProgress) System.out.println( "Done - Loading the most recent redirect pages from Wikipedia to normalize the output links to the latest version"); }
/** For comparing the load differences between consistent hash and HRW */ public class Compare { private static final HashFunction hfunc = Hashing.murmur3_128(); private static final Funnel<CharSequence> strFunnel = Funnels.stringFunnel(Charset.defaultCharset()); public static void main(String[] args) { Map<String, AtomicInteger> distribution = Maps.newHashMap(); System.out.println("======: ConsistentHash :========"); ConsistentHash<String, String> c = new ConsistentHash(hfunc, strFunnel, strFunnel, getNodes(distribution)); for (int i = 0; i < 10000; i++) { distribution.get(c.get("" + i)).incrementAndGet(); } for (Entry<String, AtomicInteger> e : distribution.entrySet()) { System.out.println(e.getKey() + "," + e.getValue().get()); e.getValue().set(0); } System.out.println("====== remove 2 ========"); for (int i = 0; i < 2; i++) { c.remove("Node" + i); distribution.remove("Node" + i); } for (int i = 0; i < 10000; i++) { distribution.get(c.get("" + i)).incrementAndGet(); } for (Entry<String, AtomicInteger> e : distribution.entrySet()) { System.out.println(e.getKey() + "," + e.getValue().get()); } System.out.println("======: RendezvousHash :========"); distribution = Maps.newHashMap(); RendezvousHash<String, String> r = new RendezvousHash(hfunc, strFunnel, strFunnel, getNodes(distribution)); for (int i = 0; i < 10000; i++) { distribution.get(r.get("" + i)).incrementAndGet(); } for (Entry<String, AtomicInteger> e : distribution.entrySet()) { System.out.println(e.getKey() + "," + e.getValue().get()); e.getValue().set(0); } System.out.println("====== remove 2 ========"); for (int i = 0; i < 2; i++) { r.remove("Node" + i); distribution.remove("Node" + i); } for (int i = 0; i < 10000; i++) { distribution.get(r.get("" + i)).incrementAndGet(); } for (Entry<String, AtomicInteger> e : distribution.entrySet()) { System.out.println(e.getKey() + "," + e.getValue().get()); } } private static List<String> getNodes(Map<String, AtomicInteger> distribution) { List<String> nodes = Lists.newArrayList(); for (int i = 0; i < 5; i++) { nodes.add("Node" + i); distribution.put("Node" + i, new AtomicInteger()); } return nodes; } }