Пример #1
0
 public Chapter createChapter(int id, String page) {
   Chapter chapter = new Chapter(id);
   chapter.setUrl(Constants.BASE_URL + getVersion() + page);
   String cache = getCachePath() + page;
   try {
     String html = client.requestWithCache(chapter.getUrl(), cache, client.METHOD_GET, null);
     Document chapterDoc = Jsoup.parse(html);
     // 取出内容
     Elements tables = chapterDoc.select("table");
     int tableIndexOfMainBody = 1;
     if (tables.size() == 1) {
       tableIndexOfMainBody = 0;
     }
     Element table = chapterDoc.select("table").get(tableIndexOfMainBody);
     Elements sectionElements = table.select("td[class=v]");
     logger.debug(sectionElements.size());
     for (Element tdIndex : sectionElements) {
       Element tdContent = tdIndex.nextElementSibling();
       String section = tdContent.text();
       logger.debug(section);
       chapter.addSection(section);
     }
   } catch (IOException e) {
     logger.error(e.getMessage());
   } catch (IndexOutOfBoundsException e) {
     logger.error(e.getMessage());
   }
   return chapter;
 }
Пример #2
0
 public BibleCrawler() {
   client = UndHttpClient.getInstance();
 }