/** * 摘要 * * @param list * @param dom * @param component * @param strings */ @Override public void parseBrief(List<WeixinData> list, Node dom, Component component, String... args) { if (args[0] == null || args[0] == "") return; List<String> results = StringUtil.regMatches(args[0], "content168>", "/content168", true); for (int i = 0; i < results.size(); i++) { String tmp = results.get(i); String result = StringUtil.regMatcher(tmp, "CDATA\\[", "\\]"); list.get(i).setBrief(result); } }
@Override public void parseTitle(List<WeixinData> list, Node dom, Component component, String... args) { if (args[0] == null || args[0] == "") return; List<String> results = StringUtil.regMatches(args[0], "title>", "/title", true); for (int i = 0; i < results.size(); i++) { String tmp = results.get(i); String result = StringUtil.regMatcher(tmp, "CDATA\\[", "\\]"); WeixinData vd = new WeixinData(); vd.setTitle(result); list.add(vd); } }
@Override public void parseUrl(List<WeixinData> list, Node dom, Component component, String... args) { if (args[0] == null || args[0] == "" || args[1] == null || args[1] == "") return; String cookie = args[1]; // String referer = args[1]; List<String> results = StringUtil.regMatches(args[0], "<url>", "/url", true); for (int i = 0; i < results.size(); i++) { String tmpUrl = results.get(i); tmpUrl = "http://weixin.sogou.com" + tmpUrl.substring(tmpUrl.indexOf("CDATA[") + 6, tmpUrl.lastIndexOf("]]>")); String loc = null; try { HttpURLConnection conn = (HttpURLConnection) new URL(tmpUrl).openConnection(); conn.addRequestProperty( "User-Agent", "Mozilla/5.0 (Windows NT 6.1; rv:38.0) Gecko/20100101 Firefox/38.0"); conn.setRequestProperty("Cookie", cookie); // conn.setRequestProperty("Referer", referer); HttpURLConnection.setFollowRedirects(false); conn.setFollowRedirects(false); conn.connect(); loc = conn.getHeaderField("Location"); if (loc != null) Systemconfig.sysLog.log(conn.getResponseMessage()); Systemconfig.sysLog.log("real url: " + loc); int sleepTime = 30 + (int) (Math.random() * 20); Systemconfig.sysLog.log("sleep..." + sleepTime); TimeUtil.rest(sleepTime); } catch (MalformedURLException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } list.get(i).setUrl(loc == null ? "err." : loc); } }