private LotteryDraw nowPhaseResult() { String url = RESULT_LOCALITY_URL; LotteryDraw lotteryDraw = new LotteryDraw(); String data = null; String pageInfo = "结果页面" + url; String encoding = "utf-8"; String logHeader = "==" + lotteryScope + "==" + siteName + "==" + pageInfo + "==抓取==" + getLotteryType().getName() + "=="; try { data = CoreFetcherUtils.URLGet(url, null, encoding); } catch (Exception e) { logger.error("获取xml数据失败" + e.getMessage()); return null; } if (data == null || data.indexOf("404 Not Found") > 0 || data.isEmpty()) { logger.error(logHeader + "data is null or 404 Not Found"); return null; } List<LotteryDrawPrizeItem> lotteryDrawPrizeItemList = new ArrayList<LotteryDrawPrizeItem>(); SAXReader saxReader = new SAXReader(); Document document = DocumentFactory.getInstance().createDocument(); try { ByteArrayInputStream bais = new ByteArrayInputStream(data.getBytes(encoding)); document = saxReader.read(bais); Iterator<?> it = document.getRootElement().elementIterator(); while (it.hasNext()) { Element element = (Element) it.next(); if (!element.getName().equals("po7oprize")) { continue; } Iterator<?> childIt = element.elementIterator(); Element childElement = null; String name = ""; String count = ""; String bonus = ""; while (childIt.hasNext()) { childElement = (Element) childIt.next(); if (childElement.getName().equals("code")) { lotteryDraw.setResult(childElement.getTextTrim()); } if (childElement.getName().equals("term")) { lotteryDraw.setPhase(childElement.getTextTrim()); lotteryDraw.setLotteryType(getLotteryType()); } if (childElement.getName().equals("drawOpenDate")) { lotteryDraw.setTimeDraw(childElement.getTextTrim() + " 00:00:00"); } if (childElement.getName().equals("drawSaleCount")) { lotteryDraw.setVolumeOfSales(childElement.getTextTrim().replace(",", "")); } if (childElement.getName().equals("drawPrizePoolCount")) { lotteryDraw.setJackpot(childElement.getTextTrim().replace(",", "")); } if (childElement.getName().equals("name")) { name = childElement.getTextTrim(); } if (childElement.getName().equals("count2")) { count = childElement.getTextTrim().replace(",", ""); } if (childElement.getName().equals("bonus")) { bonus = childElement.getTextTrim().replace(",", ""); } if (name != null && !"".equals(name) && bonus != null && !"".equals(bonus) && count != null && !"".equals(count)) { LotteryDrawPrizeItem lotteryDrawPrizeItem = new LotteryDrawPrizeItem(); lotteryDrawPrizeItem.setName(name); lotteryDrawPrizeItem.setWinningCount(count); lotteryDrawPrizeItem.setPrizeAmount(bonus); lotteryDrawPrizeItemList.add(lotteryDrawPrizeItem); name = ""; bonus = ""; count = ""; } } } lotteryDraw.setResultDetail(lotteryDrawPrizeItemList); } catch (Exception e) { logger.error("数据解析错误==" + e.getMessage(), e); return null; } return lotteryDraw; }
@Override public LotteryDraw fetchResultDetail(String phase) { LotteryDraw lotteryDraw = null; lotteryDraw = nowPhaseResult(); if (phase == null || "".equals(phase) || lotteryDraw.getPhase().equals(phase)) { return lotteryDraw; } else { lotteryDraw = null; } String url = RESULT_MORE_LOCALITY_URL; String data = null; String pageInfo = "结果页面" + url; String encoding = "utf-8"; String logHeader = "==" + lotteryScope + "==" + siteName + "==" + pageInfo + "==抓取==" + getLotteryType().getName() + "=="; try { data = CoreFetcherUtils.URLGet(url, null, encoding); } catch (Exception e) { logger.error("获取html数据失败" + e.getMessage()); return null; } if (data == null || data.indexOf("404 Not Found") > 0 || data.isEmpty()) { logger.error(logHeader + "data is null or 404 Not Found"); return null; } Parser parser = null; try { parser = Parser.createParser(data, encoding); } catch (Exception e) { logger.error("解析html页面失败" + e.getMessage()); return null; } NodeFilter filter = new HasAttributeFilter("class", "mytable"); NodeList nodeList = null; try { nodeList = parser.extractAllNodesThatMatch(filter); TableTag tableTag = (TableTag) nodeList.elementAt(0); TableRow[] tableRows = tableTag.getRows(); for (int i = 1; i < tableRows.length; i++) { TableColumn[] tableColumns = tableRows[i].getColumns(); String phaseTmp = tableColumns[0].toPlainTextString(); if (phaseTmp != null && !"".equals(phaseTmp) && phase.equals(phaseTmp)) { lotteryDraw = new LotteryDraw(); // 彩期 lotteryDraw.setPhase(phaseTmp); // 开奖结果 String strResult = tableColumns[1].toPlainTextString(); strResult = strResult.trim().replace(" ", ","); lotteryDraw.setResult(strResult); // 彩种 lotteryDraw.setLotteryType(super.getLotteryType()); break; } } } catch (ParserException e) { logger.error("数据解析错误==" + e.getMessage(), e); return null; } return lotteryDraw; }
@Override protected LotteryDraw parseLotteryDrawResult(String html) { LotteryType lotteryType = this.getLotteryType(); Parser parser = null; try { parser = Parser.createParser(html, CharsetConstant.CHARSET_UTF8); } catch (Exception e) { logger.error("解析html内容出错: {}", html, e); return null; } LotteryDraw lotteryDraw = new LotteryDraw(); lotteryDraw.setLotteryType(lotteryType); // 解析基本信息 try { NodeFilter tInfoFilter = new HasAttributeFilter("class", "tInfo"); NodeList tInfoNodeList = parser.extractAllNodesThatMatch(tInfoFilter); if (tInfoNodeList.size() == 0) { return null; } parser.setInputHTML(tInfoNodeList.elementAt(0).toHtml()); // 取四个红色部分,依次为彩期、销售总额、开奖日期、开奖号码 NodeFilter redFilter = new HasAttributeFilter("class", "fc-red"); NodeList redNodeList = parser.extractAllNodesThatMatch(redFilter); if (redNodeList.size() < 4) { logger.error("解析的内容不符合要求: {}", tInfoNodeList.elementAt(0).toHtml()); return null; } lotteryDraw.setPhase(redNodeList.elementAt(0).toPlainTextString().trim()); lotteryDraw.setVolumeOfSales( StringUtils.replace(redNodeList.elementAt(1).toPlainTextString().trim(), ",", "")); Date drawDate = CoreDateUtils.parseDate( redNodeList.elementAt(2).toPlainTextString().trim(), "yyyy年MM月dd日"); if (drawDate != null) { lotteryDraw.setTimeDraw(CoreDateUtils.formatDateTime(drawDate)); } lotteryDraw.setResult( StringUtils.replace(redNodeList.elementAt(3).toPlainTextString().trim(), " ", ",")); } catch (ParserException e) { logger.error(e.getMessage(), e); return null; } // 解析详情信息 try { parser.setInputHTML(html); NodeFilter dInfoFilter = new HasAttributeFilter("class", "dInfo"); NodeList dInfoNodeList = parser.extractAllNodesThatMatch(dInfoFilter); if (dInfoNodeList.size() == 0) { return null; } parser.setInputHTML(dInfoNodeList.elementAt(0).toHtml()); NodeFilter prizeFilter = new TagNameFilter("p"); NodeList prizeNodeList = parser.extractAllNodesThatMatch(prizeFilter); if (prizeNodeList.size() == 0) { return null; } String[] splitted = prizeNodeList .elementAt(0) .toPlainTextString() .split("--------------------------------------------------"); if (splitted.length < 2) { logger.error("未解析到{}开奖详情: {}", lotteryType.getName()); return lotteryDraw; } splitted = StringUtils.split(splitted[1].trim(), " "); List<LotteryDrawPrizeItem> resultDetail = new ArrayList<LotteryDrawPrizeItem>(); int index = -1; LotteryDrawPrizeItem prizeItem = null; for (int i = 0; i < splitted.length; i++) { String s = splitted[i].trim(); if (s.length() == 0) { continue; } index++; if (index % 4 == 0) { // 一行有4列 index = 0; prizeItem = new LotteryDrawPrizeItem(); resultDetail.add(prizeItem); } switch (index) { case 0: prizeItem.setName(s); break; case 1: prizeItem.setWinningCount(StringUtils.replace(s, "注", "")); break; case 2: prizeItem.setPrizeAmount( CoreStringUtils.replaceAll( s, new String[][] { {"元", ""}, {",", ""} })); break; default: break; } } lotteryDraw.setResultDetail(resultDetail); } catch (ParserException e) { logger.error(e.getMessage(), e); } return lotteryDraw; }