private static Response execute( String url, Method method, Map<String, String> cookies, Map<String, String> data) { Response response = null; Connection connection = Jsoup.connect(url); connection.method(method); connection.timeout(10000); connection.ignoreContentType(true); connection.maxBodySize(0); if (cookies != null) { connection.cookies(cookies); } if (data != null) { for (Entry<String, String> entry : data.entrySet()) { String key = entry.getKey(); String value = entry.getValue(); connection.data(key, value); } } try { response = connection.execute(); } catch (IOException e) { e.printStackTrace(); } return response; }
public boolean getCaptchaImgAndCookies(int times) { captchaCookies.clear(); if (times > maxRecursiveTimes) return false; Connection con = JsoupUtil.getResourceCon( "https://www.zhihu.com/captcha.gif?r=" + System.currentTimeMillis() + "&type=login"); Response rs = null; try { rs = con.execute(); } catch (IOException e) { e.printStackTrace(); log.info("获取验证码第" + times + "次失败"); return getCaptchaImgAndCookies(++times); } File file = new File(EzraPoundUtil.CAPTCHA_DIR); try { FileOutputStream out = (new FileOutputStream(file)); out.write(rs.bodyAsBytes()); } catch (IOException e) { e.printStackTrace(); } captchaCookies.putAll(rs.cookies()); log.info("验证码已保存" + ",路径为:" + file.getAbsolutePath()); log.info("验证码对应cookie为:" + captchaCookies); return true; }
public static Document getDocument(LocalDate date, Earning.EARNINGS_TYPE earningType) throws Exception { Thread.sleep(2000); String actualURLRequest = getYahooURL(earningType, date); Connection calConnectionObj = null; Response calResponse = null; int calRespAttempts = 0; int maxAttempt = 5; if (isWeekend(date)) { maxAttempt = 2; } while (calRespAttempts <= maxAttempt && calResponse == null) { calRespAttempts++; try { calConnectionObj = Jsoup.connect(actualURLRequest); calResponse = calConnectionObj.execute(); } catch (Exception e1) { Thread.sleep(4000); } } if (calResponse != null && isValidConnection(calResponse)) { return calConnectionObj.get(); } else { if (calResponse != null) { throw new Exception( "Invalid HTTP Status Code:" + calResponse.statusCode() + ";" + actualURLRequest); } else { throw new Exception("The connection response was null;" + actualURLRequest); } } }
public static String extractContent(String url) { try { Connection connection = Jsoup.connect(url); connection.userAgent(USER_AGENT); connection.followRedirects(true); connection.timeout(GET_TIMEOUT); long start = System.currentTimeMillis(); Connection.Response response = connection.execute(); long diff = System.currentTimeMillis() - start; int responseCode = response.statusCode(); if (response.statusCode() == OK) { String body = response.body(); Logger.info( "%s retrieved, content length %d, time %s sec.", url, body.length(), FormatUtil.millis2Seconds(diff)); return response.body(); } else { Logger.error("%s returned %d", url, responseCode); return ""; } } catch (IOException e) { Logger.error(e, "%s cannot be read.", url); return ""; } }
/** * Parse cn page and write in hbase * * @param symbol */ public static void parseCNSymbols(String symbol) { if (!Hbase.getData(symbol).equals("")) { // System.out.println(symbol + " Exists!"); return; } String url = "http://xueqiu.com/S/" + symbol + "/historical.csv"; Response rs = null; // System.out.println(url); try { Connection con = getConnection(url, "historyHttp"); con.header("Referer", " http://xueqiu.com/S/" + symbol); rs = con.execute(); // System.out.println(rs.body()); } catch (IOException e1) { if (handleError) { System.out.println(symbol + " http error"); errors.add(symbol); } else { WriteError(symbol); System.out.println(symbol + " http error"); } return; } try { BufferedReader reader = new BufferedReader(new StringReader(rs.body())); // 换成你的文件名 reader.readLine(); // 第一行信息,为标题信息,不用,如果需要,注释掉 String line = null; JSONArray HistoricalData = new JSONArray(); List<JSONArray> jsonLists = new ArrayList<JSONArray>(); while ((line = reader.readLine()) != null) { String item[] = line.split(","); // CSV格式文件为逗号分隔符文件,这里根据逗号切分 // System.out.println(item[0]); JSONArray DailyData = new JSONArray(); for (int i = 1; i < item.length; i++) { item[i] = item[i].replace("\"", ""); DailyData.put(item[i]); } if (Double.valueOf(DailyData.getString(2)) != 0) { jsonLists.add(DailyData); } } for (int i = (jsonLists.size() - 1); i >= 0; i--) { HistoricalData.put(jsonLists.get(i)); } Hbase.addData(symbol, type, HistoricalData.toString()); // System.out.println(symbol + " done"); // System.out.println(jsonLists); } catch (Exception e) { if (handleError) { System.out.println(symbol + " parsing error"); errors.add(symbol); } else { WriteError(symbol); System.out.println(symbol + " parsing error"); } } }
public static Map<String, String> login( HttpServletRequest req, HttpServletResponse res, String username, String password) throws Exception { String url = ReadProperties.getByName("login.ip") + "/login"; Map<String, String> datas = new HashMap<String, String>(); Map<String, String> cookies = new HashMap<String, String>(); Connection con = Jsoup.connect(url).timeout(120000); // 获取连接 con.header( "User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:29.0) Gecko/20100101 Firefox/29.0"); // 配置模拟浏览器 Response rs; rs = con.execute(); cookies = rs.cookies(); Document doc = Jsoup.parse(rs.body()); // 转换为Dom树 List<Element> et = doc.select("form"); // 获取form表单,可以通过查看页面源码代码得知 for (Element e : et.get(0).getAllElements()) { if (e.attr("name").equals("username")) { e.attr("value", username); // 设置用户名 } if (e.attr("name").equals("password")) { e.attr("value", password); // 设置用户密码 } if (e.attr("name").length() > 0) { // 排除空值表单属性 datas.put(e.attr("name"), e.attr("value")); } } // 设置cookie和post上面的map数据 Response login = null; login = con.data(datas).cookies(cookies).method(Method.POST).execute(); url = ReadProperties.getByName("common.ip") + req.getContextPath() + "/user/getUser"; con = Jsoup.connect(url) .cookies(login.cookies()) .ignoreContentType(true) .method(Method.GET); // 获取连接 rs = con.execute(); for (Entry<String, String> entry : rs.cookies().entrySet()) { Cookie cookie = new Cookie(entry.getKey(), entry.getValue()); cookie.setPath(req.getContextPath() + "/"); res.addCookie(cookie); } return JsonUtil.jsonToObject(rs.body(), Map.class); }
@Test public void test03() throws Exception { Connection connect = Jsoup.connect("http://www.jb51.net/article/16829.htm"); Connection.Response execute = connect.execute(); String body = execute.body(); System.out.println(body); Document parse = Jsoup.parse(body); Elements elements = parse.select("[src$=.gif]"); for (Element element : elements) { System.out.println(element.attr("src")); } }
/** * 点赞 * * @param statusId * @param like * @param cookie * @return * @throws TaskException */ public LikeResultBean doLike(String statusId, boolean like, String cookie) throws TaskException { try { String url = like ? "http://m.weibo.cn/attitudesDeal/add" : "http://m.weibo.cn/attitudesDeal/delete"; Map<String, String> cookieMap = new HashMap<String, String>(); String[] cookieValues = cookie.split(";"); for (String cookieValue : cookieValues) { String key = cookieValue.split("=")[0]; String value = cookieValue.split("=")[1]; cookieMap.put(key, value); } // Logger.d(WeiboClientActivity.TAG, cookieMap); Connection connection = Jsoup.connect(url); connection .userAgent( "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:33.0) Gecko/20100101 Firefox/33.0") .referrer("http://m.weibo.cn/") .cookies(cookieMap) .data("id", statusId) .method(Connection.Method.POST); if (like) connection.data("attitude", "heart"); String body = connection.execute().body(); if (!TextUtils.isEmpty(body)) { Logger.d(TAG, body); if (body.indexOf("http://passport.weibo.cn/sso/crossdomain") != -1) throw new TaskException("-100", "未登录"); else if (body.indexOf("<html") != -1) throw new TaskException("-100", "未登录"); LikeResultBean likeBean = JSON.parseObject(body, LikeResultBean.class); if (likeBean.getOk() == 1) { return likeBean; } else if (likeBean.getOk() == -100) { throw new TaskException("-100", "未登录"); } else { throw new TaskException("", likeBean.getMsg()); } } } catch (Exception e) { if (e instanceof TaskException) throw (TaskException) e; e.printStackTrace(); } throw new TaskException(TaskException.TaskError.timeout.toString()); }
public boolean getXsrf(int times) { if (times > maxRecursiveTimes) return false; Connection con = JsoupUtil.getGetCon("http://www.zhihu.com"); Response rs = null; try { rs = con.execute(); } catch (IOException e) { e.printStackTrace(); log.info("获取_xsrf第" + times + "次失败"); return getXsrf(++times); } Document doc = Jsoup.parse(rs.body()); xsrf = doc.select(".view.view-signin [name=\"_xsrf\"]").attr("value"); log.info("已获得xsrf:" + xsrf); return true; }
/** * @param connection Jsoup connection object * @param method HTTP method * @return Jsoup Connection.Response object */ public Connection.Response execute(Connection connection, Connection.Method method) { Connection.Response response; if (method != null) { connection.method(method); } try { System.out.println("Calling " + connection.request().url()); if (props.getMode() == Mode.TEST) { return null; } response = connection.execute(); } catch (IOException e) { e.printStackTrace(); return null; } this.cookies.putAll(response.cookies()); return response; }