public void testFilter() throws Exception { String domainFile = SAMPLES + SEPARATOR + "hosts.txt"; Configuration conf = NutchConfiguration.create(); DomainURLFilter domainFilter = new DomainURLFilter(domainFile); domainFilter.setConf(conf); assertNotNull(domainFilter.filter("http://lucene.apache.org")); assertNotNull(domainFilter.filter("http://hadoop.apache.org")); assertNotNull(domainFilter.filter("http://www.apache.org")); assertNull(domainFilter.filter("http://www.google.com")); assertNull(domainFilter.filter("http://mail.yahoo.com")); assertNotNull(domainFilter.filter("http://www.foobar.net")); assertNotNull(domainFilter.filter("http://www.foobas.net")); assertNotNull(domainFilter.filter("http://www.yahoo.com")); assertNotNull(domainFilter.filter("http://www.foobar.be")); assertNull(domainFilter.filter("http://www.adobe.com")); }
@Test public void testNoFilter() throws Exception { // https://issues.apache.org/jira/browse/NUTCH-2189 String domainFile = SAMPLES + SEPARATOR + "this-file-does-not-exist.txt"; Configuration conf = NutchConfiguration.create(); DomainURLFilter domainFilter = new DomainURLFilter(domainFile); domainFilter.setConf(conf); Assert.assertNotNull(domainFilter.filter("http://lucene.apache.org")); Assert.assertNotNull(domainFilter.filter("http://hadoop.apache.org")); Assert.assertNotNull(domainFilter.filter("http://www.apache.org")); Assert.assertNotNull(domainFilter.filter("http://www.google.com")); Assert.assertNotNull(domainFilter.filter("http://mail.yahoo.com")); Assert.assertNotNull(domainFilter.filter("http://www.foobar.net")); Assert.assertNotNull(domainFilter.filter("http://www.foobas.net")); Assert.assertNotNull(domainFilter.filter("http://www.yahoo.com")); Assert.assertNotNull(domainFilter.filter("http://www.foobar.be")); Assert.assertNotNull(domainFilter.filter("http://www.adobe.com")); }