private static void setPurProject(Model model, String bidHtml, String bidHtmlNoLabel) { try { model.setPurProject(getPurProjectC(bidHtmlNoLabel)); } catch (Exception e) { if (isCorrectTabel(bidHtml)) { try { model.setPurProject(getPurProjectT(bidHtmlNoLabel)); } catch (Exception ex) { model.addMessage("名称获取异常"); } } else { model.addMessage("名称获取异常"); } } try { model.setPurProject( Pattern.compile("([^)]*$|\\([^\\)]*$").matcher(model.getPurProject()).replaceAll("")); model.setPurProject( Pattern.compile("\\s*编号.*").matcher(model.getPurProject()).replaceAll("")); model.setPurProject( Pattern.compile("“").matcher(model.getPurProject()).replaceAll("“")); model.setPurProject( Pattern.compile("”").matcher(model.getPurProject()).replaceAll("”")); model.setPurProject(Pattern.compile(" ").matcher(model.getPurProject()).replaceAll(" ")); model.setPurProject( Pattern.compile("(—|-|-)+").matcher(model.getPurProject()).replaceAll("-")); } catch (NullPointerException e) { } if (model.getPurProject() != null && model.getPurProject().substring(model.getPurProject().length() - 1).equals(";")) { model.setPurProject(model.getPurProject().substring(0, model.getPurProject().length() - 1)); } }
private static void setPurAmount(Model model, String bidHtml, String bidHtmlNoLabel) { try { String au = getAmountUnit(bidHtmlNoLabel); String mu = getMoneyUnit(bidHtmlNoLabel); if (au == null) au = "元"; if (mu == null) { model.setBidAmountUnit("元"); } else if (mu.equals("$") && au.indexOf("人民币") == -1 && au.indexOf("欧") == -1 && au.indexOf("万") == -1) { model.setBidAmountUnit("美元"); } else if (mu.equals("$") && au.indexOf("人民币") == -1 && au.indexOf("欧") == -1 && au.indexOf("万") != -1) { model.setBidAmountUnit("万美元"); } else if (mu.equals("€") && au.indexOf("人民币") == -1 && au.indexOf("美") == -1 && au.indexOf("万") == -1) { model.setBidAmountUnit("欧元"); } else if (mu.equals("€") && au.indexOf("人民币") == -1 && au.indexOf("美") == -1 && au.indexOf("万") != -1) { model.setBidAmountUnit("万欧元"); } else if (mu.equals("¥") && au.indexOf("欧") == -1 && au.indexOf("美") == -1 && au.indexOf("万") == -1) { model.setBidAmountUnit("元"); } else if (mu.equals("¥") && au.indexOf("欧") == -1 && au.indexOf("美") == -1 && au.indexOf("万") != -1) { model.setBidAmountUnit("万元"); } else { model.addMessage("获取金额单位异常"); } try { model.setBidAmount( Pattern.compile(",").matcher(getPurAmount(bidHtmlNoLabel)).replaceAll("")); } catch (Exception e) { model.addMessage("获取金额异常"); } } catch (Exception e) { model.addMessage("获取金额异常"); } }
private static void setBidCompany(Model model, String bidHtml, String bidHtmlNoLabel) { if (model.getCanceled() == null) { try { model.setBidCompay(getBidCompanyC(bidHtmlNoLabel)); } catch (Exception e) { if (isCorrectTabel(bidHtml)) { try { model.setBidCompay(getBidCompanyT(bidHtmlNoLabel)); } catch (Exception ex) { model.addMessage("中标公司获取异常"); } } else { model.addMessage("中标公司获取异常"); } } if (model.getBidCompay() != null) { model.setBidCompay( DataPattern.BID_COMPANY_FILTER.matcher(model.getBidCompay()).replaceAll("")); if (DataPattern.BID_COMPANY_FILTER.matcher(model.getBidCompay()).find()) { model.setBidCompay(null); } } } }
public static void getDataByDateOld(Date date) throws IOException { DateFormat df = new SimpleDateFormat("yyyyMMdd"); DateFormat dfUrl = new SimpleDateFormat("yyyy-MM-dd"); String fileName = "data/res-" + df.format(date) + ".csv"; OutputStreamWriter osw = new OutputStreamWriter(new FileOutputStream(fileName), "GB2312"); osw.write(Model.TITLE); // List<Model> result = new ArrayList<Model>(); Integer maxPage = 10; Boolean flag = true; for (int page = 1; page <= maxPage; page++) { // System.out.println(URLPattern.getBidURL(page, dfUrl.format(date), dfUrl.format(date))); String resHtml = HtmlContent.getHtmlContect( URLPattern.getBidURL(page, dfUrl.format(date), dfUrl.format(date))); if (flag) { maxPage = (int) Math.ceil( Double.valueOf(getMatchers(resHtml, DataPattern.RES_AMOUNT).get(0)) / 20.0); if (maxPage == 0) { System.out.println("Warning: 所在日期没有结果 " + df.format(date)); osw.close(); return; } System.out.println("总计" + maxPage + "页"); flag = false; } System.out.println(page + "/" + maxPage); List<String> purInc = getMatchers(resHtml, DataPattern.PUR_INC); List<String> urls = getMatchers(resHtml, DataPattern.BID_URL_OLD); List<String> purTime = getMatchers(resHtml, DataPattern.PUR_TIME); List<String> purProxy = getMatchers(resHtml, DataPattern.PUR_PROXY); List<String> purTitle = getMatchers(resHtml, DataPattern.PUR_TITLE_OLD); for (int i = 0; i < urls.size(); ++i) { try { Model model = new Model(); model.setUrl(urls.get(i)); // model.setUrl("http://www.ccgp.gov.cn/cggg/zygg/zbgg/201507/t20150713_5553307.htm"); String bidHtml = HtmlContent.getHtmlContect( "http://search.ccgp.gov.cn/bidDetailShow.jsp?bidDoc=" + model.getUrl()); String bidHtmlNoLabel = removeHtmlLabel(bidHtml); model.setPurInc(purInc.get(i)); setTime(model, purTime.get(i)); setAddr(model, purTitle.get(i)); if (model.getPurProvince() == null) { String temp = getPurAddrSpecial(bidHtml); if (temp != null) { setAddr(model, temp); } } model.setPurProxy(purProxy.get(i)); if (hasTabel(bidHtml)) { model.addMessage("包含表格"); if (!isCorrectTabel(bidHtml)) { model.addMessage("非正常表格"); } } if (getMatchers(bidHtml, DataPattern.PUR_CANCEL).size() != 0) { model.addMessage("废标"); model.setCanceled("Y"); } for (String mode : PUR_MODE) { if (bidHtmlNoLabel.indexOf(mode) != -1) { model.setPurMode(mode); break; } } setPurProject(model, bidHtml, bidHtmlNoLabel); setPurAmount(model, bidHtml, bidHtmlNoLabel); setBidCompany(model, bidHtml, bidHtmlNoLabel); // result.add(model); osw.write(model.toString()); } catch (Exception e) { e.printStackTrace(); } } } System.out.println("Finish"); osw.close(); // for (Model model : result) { // if(true){ // System.out.println(model.toString()); // } // } }