/** * Parse cn page and write in hbase * * @param symbol */ public static void parseCNSymbols(String symbol) { if (!Hbase.getData(symbol).equals("")) { // System.out.println(symbol + " Exists!"); return; } String url = "http://xueqiu.com/S/" + symbol + "/historical.csv"; Response rs = null; // System.out.println(url); try { Connection con = getConnection(url, "historyHttp"); con.header("Referer", " http://xueqiu.com/S/" + symbol); rs = con.execute(); // System.out.println(rs.body()); } catch (IOException e1) { if (handleError) { System.out.println(symbol + " http error"); errors.add(symbol); } else { WriteError(symbol); System.out.println(symbol + " http error"); } return; } try { BufferedReader reader = new BufferedReader(new StringReader(rs.body())); // 换成你的文件名 reader.readLine(); // 第一行信息,为标题信息,不用,如果需要,注释掉 String line = null; JSONArray HistoricalData = new JSONArray(); List<JSONArray> jsonLists = new ArrayList<JSONArray>(); while ((line = reader.readLine()) != null) { String item[] = line.split(","); // CSV格式文件为逗号分隔符文件,这里根据逗号切分 // System.out.println(item[0]); JSONArray DailyData = new JSONArray(); for (int i = 1; i < item.length; i++) { item[i] = item[i].replace("\"", ""); DailyData.put(item[i]); } if (Double.valueOf(DailyData.getString(2)) != 0) { jsonLists.add(DailyData); } } for (int i = (jsonLists.size() - 1); i >= 0; i--) { HistoricalData.put(jsonLists.get(i)); } Hbase.addData(symbol, type, HistoricalData.toString()); // System.out.println(symbol + " done"); // System.out.println(jsonLists); } catch (Exception e) { if (handleError) { System.out.println(symbol + " parsing error"); errors.add(symbol); } else { WriteError(symbol); System.out.println(symbol + " parsing error"); } } }
/** * Parse nasdq page and write in hbase * * @param symbol */ public static void parseUSSymbols(String symbol) { if (!Hbase.getData(symbol).equals("")) { // System.out.println(symbol + " Exists!"); return; } String result = HttpRequest.sendPost( "http://www.nasdaq.com/symbol/" + symbol.toLowerCase() + "/historical", length + "|false|" + symbol); if (result.equals("")) { WriteError(symbol); System.out.println(symbol + " result error"); return; } // System.out.println(result); Document doc = Jsoup.parse(result); JSONArray HistoricalData = new JSONArray(); try { Element body = doc.getElementsByTag("tbody").get(0); // System.out.println(body.toString()); Elements nodes = body.getElementsByTag("tr"); if (nodes.size() == 0) { WriteError(symbol); System.out.println(symbol + " size 0"); return; } // System.out.println(nodes.size()); for (Element node : nodes) { JSONArray DailyData = new JSONArray(); Elements units = node.getElementsByTag("td"); for (Element unit : units) { if (!unit.text().equals("")) { DailyData.put(unit.text()); } } if (DailyData.length() > 0) { HistoricalData.put(DailyData); } } Hbase.addData(symbol, type, HistoricalData.toString()); // System.out.println(symbol + " done"); } catch (Exception e) { if (handleError) { errors.add(symbol); } else { WriteError(symbol); System.out.println(symbol + " parsing error"); } // TODO: handle exception } }