void setDomainStats(JsonObject domainStats, Reporter reporter) throws IOException { _domainStats = domainStats; if (_domainStats.has("dR")) { _domainRank = _domainStats.get("dR").getAsDouble(); } else { _domainRank = 0.0; } if (_domainStats.has("urls")) { int urlCount = _domainStats.get("urls").getAsInt(); int crawledCount = _domainStats.get("crawled").getAsInt(); int Http200Count = (_domainStats.has("200")) ? _domainStats.get("200").getAsInt() : 0; if (urlCount != 0 && crawledCount != 0 && Http200Count == 0) { reporter.incrCounter(Counters.SKIPPING_BAD_DOMAIN_BASED_ON_CRAWL_HISTORY, 1); LOG.info( "Skipping Everything But Homepage for Domain:" + _newDomainBytes.toString() + " CrawledCount:" + crawledCount + " HTTP200Count:" + Http200Count + " URLCount:" + urlCount); _skipEverythingButHomepage = true; } else if (urlCount > 25000 && urlCount < 100000) { if (!_domainStats.has("dR") || _domainStats.get("dR").getAsDouble() < 3.0) { LOG.info("Skipping Domain:" + _newDomainBytes.toString()); reporter.incrCounter(Counters.SKIPPING_DOMAIN_EXCEEDED_URL_COUNT_AND_LOW_DR, 1); _skipDomain = true; } } else if (urlCount > 250000 && urlCount < 1000000) { if (!_domainStats.has("dR") || _domainStats.get("dR").getAsDouble() < 4.0) { LOG.info("Skipping Domain:" + _newDomainBytes.toString()); reporter.incrCounter(Counters.SKIPPING_DOMAIN_EXCEEDED_URL_COUNT_AND_LOW_DR, 1); _skipDomain = true; } } else if (urlCount > 1000000) { if (!_domainStats.has("dR") || _domainStats.get("dR").getAsDouble() < 5.0) { LOG.info("Skipping Domain:" + _newDomainBytes.toString()); reporter.incrCounter(Counters.SKIPPING_DOMAIN_EXCEEDED_URL_COUNT_AND_LOW_DR, 1); _skipDomain = true; } } } if (_emittedURLSInFilter >= FLUSH_THRESHOLD) { _emittedURLSFilter.clear(); _emittedURLSInFilter = 0; reporter.incrCounter(Counters.FLUSHED_BLOOMFILTER, 1); } }