public Chapter createChapter(int id, String page) { Chapter chapter = new Chapter(id); chapter.setUrl(Constants.BASE_URL + getVersion() + page); String cache = getCachePath() + page; try { String html = client.requestWithCache(chapter.getUrl(), cache, client.METHOD_GET, null); Document chapterDoc = Jsoup.parse(html); // 取出内容 Elements tables = chapterDoc.select("table"); int tableIndexOfMainBody = 1; if (tables.size() == 1) { tableIndexOfMainBody = 0; } Element table = chapterDoc.select("table").get(tableIndexOfMainBody); Elements sectionElements = table.select("td[class=v]"); logger.debug(sectionElements.size()); for (Element tdIndex : sectionElements) { Element tdContent = tdIndex.nextElementSibling(); String section = tdContent.text(); logger.debug(section); chapter.addSection(section); } } catch (IOException e) { logger.error(e.getMessage()); } catch (IndexOutOfBoundsException e) { logger.error(e.getMessage()); } return chapter; }
public BibleCrawler() { client = UndHttpClient.getInstance(); }