/** Implementation method */ public List<CCCAMEntity> getLines() { List<CCCAMEntity> clines = new ArrayList<CCCAMEntity>(); try { Response res = Jsoup.connect(BASE_URL) .data("user", "RDS580" + System.currentTimeMillis()) .data("pass", "RDS580") .data("submit", "Activate!") .userAgent( "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0") .referrer(BASE_URL) .method(Method.POST) .execute(); final String linesweb = res.body(); String lineSearch1 = "C: "; String lineSearch2 = " :|: and it will "; String line = linesweb.substring( linesweb.indexOf(lineSearch1) + lineSearch1.length(), linesweb.indexOf(lineSearch2, linesweb.indexOf(lineSearch1) + lineSearch1.length())); line = line.trim(); final String[] tokens = line.split(" "); final String host = tokens[0].trim(); final String port = tokens[1].trim(); final String user = tokens[2].trim(); final String pass = tokens[3].trim(); clines.add(new CCCAMEntity(host, port, user, pass, default_hops)); } catch (Exception e) { System.out.println("Error en " + BASE_URL + ". " + e.getMessage()); } return clines; }
/** * Parse cn page and write in hbase * * @param symbol */ public static void parseCNSymbols(String symbol) { if (!Hbase.getData(symbol).equals("")) { // System.out.println(symbol + " Exists!"); return; } String url = "http://xueqiu.com/S/" + symbol + "/historical.csv"; Response rs = null; // System.out.println(url); try { Connection con = getConnection(url, "historyHttp"); con.header("Referer", " http://xueqiu.com/S/" + symbol); rs = con.execute(); // System.out.println(rs.body()); } catch (IOException e1) { if (handleError) { System.out.println(symbol + " http error"); errors.add(symbol); } else { WriteError(symbol); System.out.println(symbol + " http error"); } return; } try { BufferedReader reader = new BufferedReader(new StringReader(rs.body())); // 换成你的文件名 reader.readLine(); // 第一行信息,为标题信息,不用,如果需要,注释掉 String line = null; JSONArray HistoricalData = new JSONArray(); List<JSONArray> jsonLists = new ArrayList<JSONArray>(); while ((line = reader.readLine()) != null) { String item[] = line.split(","); // CSV格式文件为逗号分隔符文件,这里根据逗号切分 // System.out.println(item[0]); JSONArray DailyData = new JSONArray(); for (int i = 1; i < item.length; i++) { item[i] = item[i].replace("\"", ""); DailyData.put(item[i]); } if (Double.valueOf(DailyData.getString(2)) != 0) { jsonLists.add(DailyData); } } for (int i = (jsonLists.size() - 1); i >= 0; i--) { HistoricalData.put(jsonLists.get(i)); } Hbase.addData(symbol, type, HistoricalData.toString()); // System.out.println(symbol + " done"); // System.out.println(jsonLists); } catch (Exception e) { if (handleError) { System.out.println(symbol + " parsing error"); errors.add(symbol); } else { WriteError(symbol); System.out.println(symbol + " parsing error"); } } }
public static Map<String, String> login( HttpServletRequest req, HttpServletResponse res, String username, String password) throws Exception { String url = ReadProperties.getByName("login.ip") + "/login"; Map<String, String> datas = new HashMap<String, String>(); Map<String, String> cookies = new HashMap<String, String>(); Connection con = Jsoup.connect(url).timeout(120000); // 获取连接 con.header( "User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:29.0) Gecko/20100101 Firefox/29.0"); // 配置模拟浏览器 Response rs; rs = con.execute(); cookies = rs.cookies(); Document doc = Jsoup.parse(rs.body()); // 转换为Dom树 List<Element> et = doc.select("form"); // 获取form表单,可以通过查看页面源码代码得知 for (Element e : et.get(0).getAllElements()) { if (e.attr("name").equals("username")) { e.attr("value", username); // 设置用户名 } if (e.attr("name").equals("password")) { e.attr("value", password); // 设置用户密码 } if (e.attr("name").length() > 0) { // 排除空值表单属性 datas.put(e.attr("name"), e.attr("value")); } } // 设置cookie和post上面的map数据 Response login = null; login = con.data(datas).cookies(cookies).method(Method.POST).execute(); url = ReadProperties.getByName("common.ip") + req.getContextPath() + "/user/getUser"; con = Jsoup.connect(url) .cookies(login.cookies()) .ignoreContentType(true) .method(Method.GET); // 获取连接 rs = con.execute(); for (Entry<String, String> entry : rs.cookies().entrySet()) { Cookie cookie = new Cookie(entry.getKey(), entry.getValue()); cookie.setPath(req.getContextPath() + "/"); res.addCookie(cookie); } return JsonUtil.jsonToObject(rs.body(), Map.class); }
public boolean loginByEmailAndPwd() { loginCookies.clear(); Scanner sc = new Scanner(System.in); getCaptchaImgAndCookies(0); log.info("请输入账号:"); email = sc.nextLine(); log.info("请输入密码"); password = sc.nextLine(); log.info("查看验证码并输入"); captcha = sc.nextLine(); Connection con = JsoupUtil.getPostCon("https://www.zhihu.com/login/email"); Response rs = null; try { rs = con.data("_xsrf", xsrf) .data("email", email) .data("password", password) .data("remember_me", remeberMe) .data("captcha", captcha) .cookies(captchaCookies) .ignoreContentType(true) .execute(); } catch (IOException e) { e.printStackTrace(); log.info("通过账号密码登录发生异常"); return false; } JSONObject jsonObject = new JSONObject(rs.body()); String result = jsonObject.get("r").toString(); log.info(EzraPoundUtil.unicode2Character(jsonObject.get("msg").toString())); Response rs2 = null; try { rs2 = JsoupUtil.getGetCon("https://www.zhihu.com").cookies(rs.cookies()).execute(); } catch (IOException e) { e.printStackTrace(); } if (checkLogin(Jsoup.parse(rs2.body()))) { loginCookies.putAll(rs.cookies()); saveCookies(EzraPoundUtil.LOGIN_COOKIES_DIR, loginCookies); return true; } return false; }
private AccessToken getAccessToken() { try { Response response = Jsoup.connect(GET_URL).ignoreContentType(true).method(Method.GET).execute(); String json = response.body(); Gson gson = new Gson(); return gson.fromJson(json, new TypeToken<AccessToken>() {}.getType()); } catch (Exception e) { log.error("[GetAccessTokenTask] getAccessToken Error:", e); return null; } }
public static void main(String[] args) throws Exception { int index = 0; long timeout = 5000; String pattern = "MMdd_HH_mm_ss"; String dataDir = "./data/" + YhdPriceMonitor.class.getSimpleName().toLowerCase(); disableSSLCertCheck(); String dateChars = DateFormatUtils.format(new Date(), pattern); int lastErrorCount = 0; while (true) { index++; String url = "http://gps.yhd.com/restful/detail?mcsite=1&provinceId=1&pmId=41909728&callback=jQuery111304328004347221549_1447325832073&_=" + System.currentTimeMillis(); long start = System.currentTimeMillis(); Response resp = doRequest(url, 3); long cost = System.currentTimeMillis() - start; if (resp == null) { log.warn("error,index:" + index + ",status:null,cost:" + cost + ",url:" + url); } else { log.info( "done,index:" + index + ",status:" + resp.statusCode() + ",cost:" + cost + ",url:" + url); } boolean success = doValidateResponse(resp); if (success) { lastErrorCount = 0; log.info("validate=true.index:" + index + ",cookies:" + JSON.toJSONString(resp.cookies())); log.info("validate=true.index:" + index + ",headers:" + JSON.toJSONString(resp.headers())); } else { lastErrorCount++; if (resp != null) { log.warn( "validate=false.index:" + index + ",cookies:" + JSON.toJSONString(resp.cookies())); log.warn( "validate=false.index:" + index + ",cookies:" + JSON.toJSONString(resp.headers())); } } FileUtils.writeStringToFile( new File(dataDir, dateChars + File.separator + index + ".html"), resp == null ? "no response" : resp.body()); if (lastErrorCount >= 10) { break; } TimeUnit.MILLISECONDS.sleep(timeout); } }
public boolean loginBySavedCookies() { loginCookies.clear(); readCookies(EzraPoundUtil.LOGIN_COOKIES_DIR, loginCookies); Connection con = JsoupUtil.getGetCon("https://www.zhihu.com"); Response rs = null; try { rs = con.cookies(loginCookies).execute(); } catch (IOException e) { e.printStackTrace(); log.info("携带cookie登录测试失败"); return false; } return checkLogin(Jsoup.parse(rs.body())); }
private String getContent(Class response, String... strings) throws Exception { Map<String, String> data = new HashMap<String, String>(); for (int i = 0; i < strings.length; i = i + 2) data.put(strings[i], strings[i + 1]); Response res = null; System.out.println("Start connect: " + response.getName() + ", " + new Date()); while (res == null) { try { res = Jsoup.connect(url).data(data).timeout(0).method(Method.POST).execute(); } catch (IOException e) { System.out.println("exception: " + response.getName() + ", " + new Date()); } } System.out.println("Finish connect: " + response.getName() + ", " + new Date()); return changeEncoding(res.body()); }
private static boolean doValidateResponse(Response resp) { if (resp == null) { return false; } String body = resp.body(); List<String> containList = new ArrayList<String>(); containList.add("marketPrice"); containList.add("89"); containList.add("41909728"); for (String sVal : containList) { if (body.indexOf(sVal) < 0) { return false; } } return true; }
public boolean getXsrf(int times) { if (times > maxRecursiveTimes) return false; Connection con = JsoupUtil.getGetCon("http://www.zhihu.com"); Response rs = null; try { rs = con.execute(); } catch (IOException e) { e.printStackTrace(); log.info("获取_xsrf第" + times + "次失败"); return getXsrf(++times); } Document doc = Jsoup.parse(rs.body()); xsrf = doc.select(".view.view-signin [name=\"_xsrf\"]").attr("value"); log.info("已获得xsrf:" + xsrf); return true; }
/** Implementation method */ public List<CCCAMEntity> getLines() { List<CCCAMEntity> clines = new ArrayList<CCCAMEntity>(); try { Response res = Jsoup.connect(BASE_URL) .timeout(7500) .userAgent( "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0") .method(Method.GET) .execute(); Document doc = Jsoup.parse(res.body()); final String[] tokens = doc.getElementsByTag("h1").get(0).text().split(" "); final String host = tokens[1].trim(); final String port = tokens[2].trim(); final String user = tokens[3].trim(); final String pass = tokens[4].trim(); clines.add(new CCCAMEntity(host, port, user, pass, default_hops)); } catch (Exception e) { System.out.println("Error en " + BASE_URL); System.out.println("Error: " + e.getMessage()); } return clines; }