Пример #1
0
 private static void setPurProject(Model model, String bidHtml, String bidHtmlNoLabel) {
   try {
     model.setPurProject(getPurProjectC(bidHtmlNoLabel));
   } catch (Exception e) {
     if (isCorrectTabel(bidHtml)) {
       try {
         model.setPurProject(getPurProjectT(bidHtmlNoLabel));
       } catch (Exception ex) {
         model.addMessage("名称获取异常");
       }
     } else {
       model.addMessage("名称获取异常");
     }
   }
   try {
     model.setPurProject(
         Pattern.compile("([^)]*$|\\([^\\)]*$").matcher(model.getPurProject()).replaceAll(""));
     model.setPurProject(
         Pattern.compile("\\s*编号.*").matcher(model.getPurProject()).replaceAll(""));
     model.setPurProject(
         Pattern.compile("“").matcher(model.getPurProject()).replaceAll("“"));
     model.setPurProject(
         Pattern.compile("”").matcher(model.getPurProject()).replaceAll("”"));
     model.setPurProject(Pattern.compile(" ").matcher(model.getPurProject()).replaceAll(" "));
     model.setPurProject(
         Pattern.compile("(—|-|-)+").matcher(model.getPurProject()).replaceAll("-"));
   } catch (NullPointerException e) {
   }
   if (model.getPurProject() != null
       && model.getPurProject().substring(model.getPurProject().length() - 1).equals(";")) {
     model.setPurProject(model.getPurProject().substring(0, model.getPurProject().length() - 1));
   }
 }
Пример #2
0
 private static void setPurAmount(Model model, String bidHtml, String bidHtmlNoLabel) {
   try {
     String au = getAmountUnit(bidHtmlNoLabel);
     String mu = getMoneyUnit(bidHtmlNoLabel);
     if (au == null) au = "元";
     if (mu == null) {
       model.setBidAmountUnit("元");
     } else if (mu.equals("$")
         && au.indexOf("人民币") == -1
         && au.indexOf("欧") == -1
         && au.indexOf("万") == -1) {
       model.setBidAmountUnit("美元");
     } else if (mu.equals("$")
         && au.indexOf("人民币") == -1
         && au.indexOf("欧") == -1
         && au.indexOf("万") != -1) {
       model.setBidAmountUnit("万美元");
     } else if (mu.equals("€")
         && au.indexOf("人民币") == -1
         && au.indexOf("美") == -1
         && au.indexOf("万") == -1) {
       model.setBidAmountUnit("欧元");
     } else if (mu.equals("€")
         && au.indexOf("人民币") == -1
         && au.indexOf("美") == -1
         && au.indexOf("万") != -1) {
       model.setBidAmountUnit("万欧元");
     } else if (mu.equals("¥")
         && au.indexOf("欧") == -1
         && au.indexOf("美") == -1
         && au.indexOf("万") == -1) {
       model.setBidAmountUnit("元");
     } else if (mu.equals("¥")
         && au.indexOf("欧") == -1
         && au.indexOf("美") == -1
         && au.indexOf("万") != -1) {
       model.setBidAmountUnit("万元");
     } else {
       model.addMessage("获取金额单位异常");
     }
     try {
       model.setBidAmount(
           Pattern.compile(",").matcher(getPurAmount(bidHtmlNoLabel)).replaceAll(""));
     } catch (Exception e) {
       model.addMessage("获取金额异常");
     }
   } catch (Exception e) {
     model.addMessage("获取金额异常");
   }
 }
Пример #3
0
 private static void setBidCompany(Model model, String bidHtml, String bidHtmlNoLabel) {
   if (model.getCanceled() == null) {
     try {
       model.setBidCompay(getBidCompanyC(bidHtmlNoLabel));
     } catch (Exception e) {
       if (isCorrectTabel(bidHtml)) {
         try {
           model.setBidCompay(getBidCompanyT(bidHtmlNoLabel));
         } catch (Exception ex) {
           model.addMessage("中标公司获取异常");
         }
       } else {
         model.addMessage("中标公司获取异常");
       }
     }
     if (model.getBidCompay() != null) {
       model.setBidCompay(
           DataPattern.BID_COMPANY_FILTER.matcher(model.getBidCompay()).replaceAll(""));
       if (DataPattern.BID_COMPANY_FILTER.matcher(model.getBidCompay()).find()) {
         model.setBidCompay(null);
       }
     }
   }
 }
Пример #4
0
  public static void getDataByDateOld(Date date) throws IOException {
    DateFormat df = new SimpleDateFormat("yyyyMMdd");
    DateFormat dfUrl = new SimpleDateFormat("yyyy-MM-dd");
    String fileName = "data/res-" + df.format(date) + ".csv";
    OutputStreamWriter osw = new OutputStreamWriter(new FileOutputStream(fileName), "GB2312");
    osw.write(Model.TITLE);
    // List<Model> result = new ArrayList<Model>();
    Integer maxPage = 10;
    Boolean flag = true;
    for (int page = 1; page <= maxPage; page++) {
      // System.out.println(URLPattern.getBidURL(page, dfUrl.format(date), dfUrl.format(date)));
      String resHtml =
          HtmlContent.getHtmlContect(
              URLPattern.getBidURL(page, dfUrl.format(date), dfUrl.format(date)));
      if (flag) {
        maxPage =
            (int)
                Math.ceil(
                    Double.valueOf(getMatchers(resHtml, DataPattern.RES_AMOUNT).get(0)) / 20.0);
        if (maxPage == 0) {
          System.out.println("Warning: 所在日期没有结果 " + df.format(date));
          osw.close();
          return;
        }
        System.out.println("总计" + maxPage + "页");
        flag = false;
      }
      System.out.println(page + "/" + maxPage);
      List<String> purInc = getMatchers(resHtml, DataPattern.PUR_INC);
      List<String> urls = getMatchers(resHtml, DataPattern.BID_URL_OLD);
      List<String> purTime = getMatchers(resHtml, DataPattern.PUR_TIME);
      List<String> purProxy = getMatchers(resHtml, DataPattern.PUR_PROXY);
      List<String> purTitle = getMatchers(resHtml, DataPattern.PUR_TITLE_OLD);
      for (int i = 0; i < urls.size(); ++i) {
        try {
          Model model = new Model();
          model.setUrl(urls.get(i));
          // model.setUrl("http://www.ccgp.gov.cn/cggg/zygg/zbgg/201507/t20150713_5553307.htm");
          String bidHtml =
              HtmlContent.getHtmlContect(
                  "http://search.ccgp.gov.cn/bidDetailShow.jsp?bidDoc=" + model.getUrl());
          String bidHtmlNoLabel = removeHtmlLabel(bidHtml);

          model.setPurInc(purInc.get(i));
          setTime(model, purTime.get(i));
          setAddr(model, purTitle.get(i));
          if (model.getPurProvince() == null) {
            String temp = getPurAddrSpecial(bidHtml);
            if (temp != null) {
              setAddr(model, temp);
            }
          }
          model.setPurProxy(purProxy.get(i));
          if (hasTabel(bidHtml)) {
            model.addMessage("包含表格");
            if (!isCorrectTabel(bidHtml)) {
              model.addMessage("非正常表格");
            }
          }
          if (getMatchers(bidHtml, DataPattern.PUR_CANCEL).size() != 0) {
            model.addMessage("废标");
            model.setCanceled("Y");
          }
          for (String mode : PUR_MODE) {
            if (bidHtmlNoLabel.indexOf(mode) != -1) {
              model.setPurMode(mode);
              break;
            }
          }
          setPurProject(model, bidHtml, bidHtmlNoLabel);
          setPurAmount(model, bidHtml, bidHtmlNoLabel);
          setBidCompany(model, bidHtml, bidHtmlNoLabel);
          // result.add(model);
          osw.write(model.toString());
        } catch (Exception e) {
          e.printStackTrace();
        }
      }
    }
    System.out.println("Finish");
    osw.close();
    //		for (Model model : result) {
    //			if(true){
    //				System.out.println(model.toString());
    //			}
    //		}
  }