/** * Parse cn page and write in hbase * * @param symbol */ public static void parseCNSymbols(String symbol) { if (!Hbase.getData(symbol).equals("")) { // System.out.println(symbol + " Exists!"); return; } String url = "http://xueqiu.com/S/" + symbol + "/historical.csv"; Response rs = null; // System.out.println(url); try { Connection con = getConnection(url, "historyHttp"); con.header("Referer", " http://xueqiu.com/S/" + symbol); rs = con.execute(); // System.out.println(rs.body()); } catch (IOException e1) { if (handleError) { System.out.println(symbol + " http error"); errors.add(symbol); } else { WriteError(symbol); System.out.println(symbol + " http error"); } return; } try { BufferedReader reader = new BufferedReader(new StringReader(rs.body())); // 换成你的文件名 reader.readLine(); // 第一行信息,为标题信息,不用,如果需要,注释掉 String line = null; JSONArray HistoricalData = new JSONArray(); List<JSONArray> jsonLists = new ArrayList<JSONArray>(); while ((line = reader.readLine()) != null) { String item[] = line.split(","); // CSV格式文件为逗号分隔符文件,这里根据逗号切分 // System.out.println(item[0]); JSONArray DailyData = new JSONArray(); for (int i = 1; i < item.length; i++) { item[i] = item[i].replace("\"", ""); DailyData.put(item[i]); } if (Double.valueOf(DailyData.getString(2)) != 0) { jsonLists.add(DailyData); } } for (int i = (jsonLists.size() - 1); i >= 0; i--) { HistoricalData.put(jsonLists.get(i)); } Hbase.addData(symbol, type, HistoricalData.toString()); // System.out.println(symbol + " done"); // System.out.println(jsonLists); } catch (Exception e) { if (handleError) { System.out.println(symbol + " parsing error"); errors.add(symbol); } else { WriteError(symbol); System.out.println(symbol + " parsing error"); } } }
public static Map<String, String> login( HttpServletRequest req, HttpServletResponse res, String username, String password) throws Exception { String url = ReadProperties.getByName("login.ip") + "/login"; Map<String, String> datas = new HashMap<String, String>(); Map<String, String> cookies = new HashMap<String, String>(); Connection con = Jsoup.connect(url).timeout(120000); // 获取连接 con.header( "User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:29.0) Gecko/20100101 Firefox/29.0"); // 配置模拟浏览器 Response rs; rs = con.execute(); cookies = rs.cookies(); Document doc = Jsoup.parse(rs.body()); // 转换为Dom树 List<Element> et = doc.select("form"); // 获取form表单,可以通过查看页面源码代码得知 for (Element e : et.get(0).getAllElements()) { if (e.attr("name").equals("username")) { e.attr("value", username); // 设置用户名 } if (e.attr("name").equals("password")) { e.attr("value", password); // 设置用户密码 } if (e.attr("name").length() > 0) { // 排除空值表单属性 datas.put(e.attr("name"), e.attr("value")); } } // 设置cookie和post上面的map数据 Response login = null; login = con.data(datas).cookies(cookies).method(Method.POST).execute(); url = ReadProperties.getByName("common.ip") + req.getContextPath() + "/user/getUser"; con = Jsoup.connect(url) .cookies(login.cookies()) .ignoreContentType(true) .method(Method.GET); // 获取连接 rs = con.execute(); for (Entry<String, String> entry : rs.cookies().entrySet()) { Cookie cookie = new Cookie(entry.getKey(), entry.getValue()); cookie.setPath(req.getContextPath() + "/"); res.addCookie(cookie); } return JsonUtil.jsonToObject(rs.body(), Map.class); }