@Test public void testGetOutlinks() throws Exception { if (testDOMs[0] == null) setup(); for (int i = 0; i < testPages.length; i++) { ArrayList<Outlink> outlinks = new ArrayList<Outlink>(); if (i == SKIP) { conf.setBoolean("parser.html.form.use_action", false); utils.setConf(conf); } else { conf.setBoolean("parser.html.form.use_action", true); utils.setConf(conf); } utils.getOutlinks(testBaseHrefURLs[i], outlinks, testDOMs[i]); Outlink[] outlinkArr = new Outlink[outlinks.size()]; outlinkArr = outlinks.toArray(outlinkArr); compareOutlinks(i, answerOutlinks[i], outlinkArr); } }
// won't work with Tika - the title is stored in the metadata but // not put in the XHTML representation @Test public void testGetTitle() throws Exception { if (testDOMs[0] == null) setup(); for (int i = 0; i < testPages.length; i++) { StringBuffer sb = new StringBuffer(); utils.getTitle(sb, testDOMs[i]); String title = sb.toString(); assertTrue( "example " + i + " : expecting title: " + answerTitle[i] + System.getProperty("line.separator") + System.getProperty("line.separator") + "got title: " + title, equalsIgnoreWhitespace(answerTitle[i], title)); } }