@Test public void testGetDomainSuffix() throws Exception { URL url = null; url = new URL("http://lucene.apache.org/nutch"); assertEquals("org", URLUtil.getDomainSuffix(url).getDomain()); url = new URL("http://140.211.11.130/foundation/contributing.html"); assertNull(URLUtil.getDomainSuffix(url)); url = new URL("http://www.example.co.uk:8080/index.html"); assertEquals("co.uk", URLUtil.getDomainSuffix(url).getDomain()); url = new URL("http://com"); assertEquals("com", URLUtil.getDomainSuffix(url).getDomain()); url = new URL("http://www.example.co.uk.com"); assertEquals("com", URLUtil.getDomainSuffix(url).getDomain()); // "nn" is not a tld url = new URL("http://example.com.nn"); assertNull(URLUtil.getDomainSuffix(url)); url = new URL("http://"); assertNull(URLUtil.getDomainSuffix(url)); url = new URL("http://www.edu.tr.xyz"); assertNull(URLUtil.getDomainSuffix(url)); url = new URL("http://subdomain.example.edu.tr"); assertEquals("edu.tr", URLUtil.getDomainSuffix(url).getDomain()); url = new URL("http://subdomain.example.presse.fr"); assertEquals("presse.fr", URLUtil.getDomainSuffix(url).getDomain()); url = new URL("http://subdomain.example.presse.tr"); assertEquals("tr", URLUtil.getDomainSuffix(url).getDomain()); // plc.co.im is listed as a domain suffix url = new URL("http://www.example.plc.co.im"); assertEquals("plc.co.im", URLUtil.getDomainSuffix(url).getDomain()); // 2000.hu is listed as a domain suffix url = new URL("http://www.example.2000.hu"); assertEquals("2000.hu", URLUtil.getDomainSuffix(url).getDomain()); // test non-ascii url = new URL("http://www.example.商業.tw"); assertEquals("商業.tw", URLUtil.getDomainSuffix(url).getDomain()); }