コード例 #1
0
  @Override
  public void process() {
    List<UserData> alllist = new ArrayList<UserData>();
    List<UserData> list = new ArrayList<UserData>();
    String url = getRealUrl(siteinfo, gloaburl);
    String nexturl = url;
    HtmlInfo html = htmlInfo(CollectDataType.FANS.name());
    int count = 1;
    try {
      while (nexturl != null && !nexturl.equals("")) {
        list.clear();

        html.setOrignUrl(nexturl);

        try {
          http.getContent(html, user);
          //
          //	html.setContent(common.util.StringUtil.getContent("filedown/FANS/sina/50b7702c4c3dc15a1cf1c56155b08d46.htm"));

          nexturl =
              ((WeiboMonitorXpathExtractor) ((XpathExtractor) xpath))
                  .templateRelation(list, html, count, id + "", nexturl);

          if (list.size() == 0) {
            Systemconfig.sysLog.log(url + "元数据页面解析为空!!");
            break;
          }
          Systemconfig.sysLog.log(url + "元数据页面解析完成。");

          Systemconfig.dbService.getNorepeatData(list, "");

          alllist.addAll(list);

          url = nexturl;
          count++;
          if (nexturl != null) TimeUtil.rest(siteinfo.getDownInterval());

        } catch (Exception e) {
          e.printStackTrace();
          try {
            Systemconfig.dbService.saveLog(siteFlag, key, 3, url + "\r\n" + e.getMessage());
          } catch (IOException e1) {
            e1.printStackTrace();
          }
          break;
        }
      }
      Systemconfig.dbService.saveDatas(alllist);
    } catch (IOException e) {
      e.printStackTrace();
    } finally {
      alllist.clear();
      list.clear();
    }
  }
コード例 #2
0
  @Override
  public void parseUrl(List<WeixinData> list, Node dom, Component component, String... args) {
    if (args[0] == null || args[0] == "" || args[1] == null || args[1] == "") return;
    String cookie = args[1];
    //		String referer = args[1];

    List<String> results = StringUtil.regMatches(args[0], "<url>", "/url", true);
    for (int i = 0; i < results.size(); i++) {

      String tmpUrl = results.get(i);
      tmpUrl =
          "http://weixin.sogou.com"
              + tmpUrl.substring(tmpUrl.indexOf("CDATA[") + 6, tmpUrl.lastIndexOf("]]>"));

      String loc = null;
      try {
        HttpURLConnection conn = (HttpURLConnection) new URL(tmpUrl).openConnection();
        conn.addRequestProperty(
            "User-Agent", "Mozilla/5.0 (Windows NT 6.1; rv:38.0) Gecko/20100101 Firefox/38.0");
        conn.setRequestProperty("Cookie", cookie);
        //				conn.setRequestProperty("Referer", referer);
        HttpURLConnection.setFollowRedirects(false);
        conn.setFollowRedirects(false);
        conn.connect();
        loc = conn.getHeaderField("Location");
        if (loc != null) Systemconfig.sysLog.log(conn.getResponseMessage());

        Systemconfig.sysLog.log("real url: " + loc);
        int sleepTime = 30 + (int) (Math.random() * 20);
        Systemconfig.sysLog.log("sleep..." + sleepTime);
        TimeUtil.rest(sleepTime);

      } catch (MalformedURLException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
      } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
      }

      list.get(i).setUrl(loc == null ? "err." : loc);
    }
  }