コード例 #1
0
  public void process() {
    String url = getRealUrl(data);
    if (url == null) return;
    HtmlInfo html = htmlInfo("DATA");
    try {
      if (url != null && !url.equals("")) {
        html.setOrignUrl(url);
        html.setAgent(false);
        http.getContent(html);
        // html.setContent();
        if (html.getContent() == null) {
          return;
        }
        // 解析数据
        xpath.templateContentPage(data, html);

        Systemconfig.sysLog.log(data.getTitle() + "解析完成。。。");
        Systemconfig.dbService.saveData(data);
        synchronized (key) {
          key.savedCountIncrease();
        }
        Systemconfig.sysLog.log(data.getTitle() + "保存完成。。。");
      }
    } catch (Exception e) {
      Systemconfig.sysLog.log("采集出现异常" + url, e);
    } finally {
      if (count != null) count.countDown();
    }
  }
コード例 #2
0
  @Override
  public void process() {
    List<UserData> alllist = new ArrayList<UserData>();
    List<UserData> list = new ArrayList<UserData>();
    String url = getRealUrl(siteinfo, gloaburl);
    String nexturl = url;
    HtmlInfo html = htmlInfo(CollectDataType.FANS.name());
    int count = 1;
    try {
      while (nexturl != null && !nexturl.equals("")) {
        list.clear();

        html.setOrignUrl(nexturl);

        try {
          http.getContent(html, user);
          //
          //	html.setContent(common.util.StringUtil.getContent("filedown/FANS/sina/50b7702c4c3dc15a1cf1c56155b08d46.htm"));

          nexturl =
              ((WeiboMonitorXpathExtractor) ((XpathExtractor) xpath))
                  .templateRelation(list, html, count, id + "", nexturl);

          if (list.size() == 0) {
            Systemconfig.sysLog.log(url + "元数据页面解析为空!!");
            break;
          }
          Systemconfig.sysLog.log(url + "元数据页面解析完成。");

          Systemconfig.dbService.getNorepeatData(list, "");

          alllist.addAll(list);

          url = nexturl;
          count++;
          if (nexturl != null) TimeUtil.rest(siteinfo.getDownInterval());

        } catch (Exception e) {
          e.printStackTrace();
          try {
            Systemconfig.dbService.saveLog(siteFlag, key, 3, url + "\r\n" + e.getMessage());
          } catch (IOException e1) {
            e1.printStackTrace();
          }
          break;
        }
      }
      Systemconfig.dbService.saveDatas(alllist);
    } catch (IOException e) {
      e.printStackTrace();
    } finally {
      alllist.clear();
      list.clear();
    }
  }
コード例 #3
0
  public void parseNumber(WeixinData data, Node dom, Component component, String... args) {
    // http://mp.weixin.qq.com/s?__biz=MjM5ODE1NTMxMQ==&mid=201653867&idx=1&sn=6f3445a3640eb09ce7cfa5a49509f165&3rd=MzA3MDU4NTYzMw==&scene=6#rd

    String biz = "";
    String mid = "";
    String uin = "";
    String key = "";
    String fromFile = StringUtil.getContent("config/WeixinKey/WeixinKey.txt");
    try {
      biz = StringUtil.regMatcher(data.getUrl(), "__biz=", "&");
      mid = StringUtil.regMatcher(data.getUrl(), "mid=", "&");
      for (String string : fromFile.split("&")) {
        if (string.contains("uin")) uin = string.split("=")[1].trim();
        if (string.contains("key")) key = string.split("=")[1].trim();
      }
    } catch (Exception e) {
      e.printStackTrace();
    }

    String url =
        "http://mp.weixin.qq.com"
            + "/mp/getappmsgext?"
            + "__biz="
            + biz
            + "&mid="
            + mid
            + "&uin="
            + uin
            + "&key="
            + key
            // +
            // "&pass_ticket=b3hV91xTLYZxRGKemRNz%2FAi4VKElPnwHYUNtoV8w4dE%3D"

            + "";

    HtmlInfo html = new HtmlInfo();

    String charSet = "UTF-8";
    html.setType("DATA");
    html.setEncode(charSet);
    html.setOrignUrl(url);
    html.setCookie("Set-Cookie: wxuin=20156425; Path=/; Expires=Fri, 02-Jan-1970 00:00:00 GMT");
    html.setUa(
        "Mozilla/5.0 (iPhone; CPU iPhone OS 8_0 like Mac OS X) AppleWebKit/600.1.3 (KHTML, like Gecko) Version/8.0 Mobile/12A4345d Safari/600.1.4");
    SimpleHttpProcess shp = new SimpleHttpProcess();
    shp.getContent(html);
    String content = html.getContent();

    int retry = 0;
    while (!content.contains("read_num")) {
      if (retry++ > 3) break;
      Systemconfig.sysLog.log("请获取key后输入任意内容回车继续...输入c忽略(很可能无法继续采集,不推荐)");
      System.err.println("请获取key后输入任意内容回车继续...输入c忽略(很可能无法继续采集,不推荐)");
      Scanner input = new Scanner(System.in);
      String s = input.next();
      if (s.equals("c") || s.equals("C")) break;

      fromFile = StringUtil.getContent("config/WeixinKey/WeixinKey.txt");
      try {
        for (String string : fromFile.split("&")) {
          if (string.contains("uin")) uin = string.split("=")[1].trim();
          if (string.contains("key")) key = string.split("=")[1].trim();
        }
      } catch (Exception e) {
        e.printStackTrace();
      }
      url =
          "http://mp.weixin.qq.com"
              + "/mp/getappmsgext?"
              + "__biz="
              + biz
              + "&mid="
              + mid
              + "&uin="
              + uin
              + "&key="
              + key;
      html = new HtmlInfo();

      charSet = "UTF-8";
      html.setType("DATA");
      html.setEncode(charSet);
      html.setOrignUrl(url);
      html.setCookie("Set-Cookie: wxuin=20156425; Path=/; Expires=Fri, 02-Jan-1970 00:00:00 GMT");
      html.setUa(
          "Mozilla/5.0 (iPhone; CPU iPhone OS 8_0 like Mac OS X) AppleWebKit/600.1.3 (KHTML, like Gecko) Version/8.0 Mobile/12A4345d Safari/600.1.4");
      shp = new SimpleHttpProcess();
      shp.getContent(html);
      content = html.getContent();
    }

    String readNumStr = StringUtil.regMatcher(content, "\"read_num\":", ",");
    String praiseNumStr = StringUtil.regMatcher(content, "\"like_num\":", ",");

    try {
      if (readNumStr != null) data.setReadNum(Integer.parseInt(readNumStr));

      if (praiseNumStr != null) data.setPraiseNum(Integer.parseInt(praiseNumStr));

    } catch (Exception e) {
      e.printStackTrace();
    }
  }