コード例 #1
0
  /**
   * 摘要
   *
   * @param list
   * @param dom
   * @param component
   * @param strings
   */
  @Override
  public void parseBrief(List<WeixinData> list, Node dom, Component component, String... args) {
    if (args[0] == null || args[0] == "") return;
    List<String> results = StringUtil.regMatches(args[0], "content168>", "/content168", true);
    for (int i = 0; i < results.size(); i++) {

      String tmp = results.get(i);
      String result = StringUtil.regMatcher(tmp, "CDATA\\[", "\\]");
      list.get(i).setBrief(result);
    }
  }
コード例 #2
0
  @Override
  public void parseTitle(List<WeixinData> list, Node dom, Component component, String... args) {
    if (args[0] == null || args[0] == "") return;
    List<String> results = StringUtil.regMatches(args[0], "title>", "/title", true);
    for (int i = 0; i < results.size(); i++) {

      String tmp = results.get(i);
      String result = StringUtil.regMatcher(tmp, "CDATA\\[", "\\]");
      WeixinData vd = new WeixinData();
      vd.setTitle(result);
      list.add(vd);
    }
  }
コード例 #3
0
 @Override
 public void parseSource(WeixinData data, Node dom, Component component, String... strings) {
   if (component == null) return;
   NodeList nl = commonList(component.getXpath(), dom);
   if (nl == null) return;
   if (nl.item(0) != null) data.setSource(StringUtil.format(nl.item(0).getTextContent()));
 }
コード例 #4
0
 /**
  * 来源
  *
  * @param list
  * @param dom
  * @param component
  * @param strings
  */
 @Override
 public void parseSource(List<WeixinData> list, Node dom, Component component, String... strings) {
   if (component == null) return;
   NodeList nl = head(component.getXpath(), dom, list.size(), component.getName());
   if (nl == null) return;
   for (int i = 0; i < nl.getLength(); i++) {
     list.get(i).setSource(StringUtil.format(nl.item(i).getTextContent()));
   }
 }
コード例 #5
0
 @Override
 public void parseImgUrl(WeixinData data, Node dom, Component component, String... args) {
   if (component == null) return;
   NodeList nl = commonList(component.getXpath(), dom);
   if (nl == null) return;
   String imgs = "";
   for (int i = 0; i < nl.getLength(); i++) {
     imgs += StringUtil.format(nl.item(i).getTextContent()) + ";";
   }
   data.setImgUrl(imgs);
 }
コード例 #6
0
  @Override
  public void parseSource(NewsData data, Node dom, Component component, String... args) {
    String str = "";
    if (component == null) return;

    NodeList nl = commonList(component.getXpath(), dom);
    if (nl == null) return;

    if (nl.item(0) != null) str = StringUtil.format(nl.item(0).getTextContent());
    str = str.replace("来源:", "");
    str = str.equals("") ? "慧聪橡胶网" : str;
    str = str.length() > 20 ? "慧聪橡胶网" : str;
    data.setSource(str);
  }
コード例 #7
0
  @Override
  public void parseAuthor(NewsData data, Node dom, Component component, String... args) {
    String str = "";
    if (component == null) {
      return;
    }
    NodeList nl = commonList(component.getXpath(), dom);
    if (nl == null) {
      return;
    }
    if (nl.item(0) != null) str = StringUtil.format(nl.item(0).getTextContent());
    str = str.replace("●", "").trim();

    data.setSource(str);
  }
コード例 #8
0
  @Override
  public void parseUrl(List<WeixinData> list, Node dom, Component component, String... args) {
    if (args[0] == null || args[0] == "" || args[1] == null || args[1] == "") return;
    String cookie = args[1];
    //		String referer = args[1];

    List<String> results = StringUtil.regMatches(args[0], "<url>", "/url", true);
    for (int i = 0; i < results.size(); i++) {

      String tmpUrl = results.get(i);
      tmpUrl =
          "http://weixin.sogou.com"
              + tmpUrl.substring(tmpUrl.indexOf("CDATA[") + 6, tmpUrl.lastIndexOf("]]>"));

      String loc = null;
      try {
        HttpURLConnection conn = (HttpURLConnection) new URL(tmpUrl).openConnection();
        conn.addRequestProperty(
            "User-Agent", "Mozilla/5.0 (Windows NT 6.1; rv:38.0) Gecko/20100101 Firefox/38.0");
        conn.setRequestProperty("Cookie", cookie);
        //				conn.setRequestProperty("Referer", referer);
        HttpURLConnection.setFollowRedirects(false);
        conn.setFollowRedirects(false);
        conn.connect();
        loc = conn.getHeaderField("Location");
        if (loc != null) Systemconfig.sysLog.log(conn.getResponseMessage());

        Systemconfig.sysLog.log("real url: " + loc);
        int sleepTime = 30 + (int) (Math.random() * 20);
        Systemconfig.sysLog.log("sleep..." + sleepTime);
        TimeUtil.rest(sleepTime);

      } catch (MalformedURLException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
      } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
      }

      list.get(i).setUrl(loc == null ? "err." : loc);
    }
  }
コード例 #9
0
  @Override
  public void parseSource(NewsData data, Node dom, Component component, String... args) {
    String str = "";
    if (component == null) return;

    NodeList nl = commonList(component.getXpath(), dom);
    if (nl == null) return;

    if (nl.item(0) != null) {
      str = nl.item(0).getTextContent();
      str = StringUtil.format(str);
    }
    if (str.contains("来源:")) str = str.substring(str.indexOf("来源:") + 3);
    if (str.contains("发布日期")) str = str.substring(0, str.indexOf("发布日期")).replace(" ", "").trim();
    if (str == null) str = "中国橡胶工业协会-轮胎分会";
    else {
      str = str.contains("本站") ? "中国橡胶工业协会-轮胎分会" : str;
    }
    str = str.length() > 20 ? "中国橡胶工业协会-轮胎分会" : str;
    data.setSource(str);
  }
コード例 #10
0
  public void parseNumber(WeixinData data, Node dom, Component component, String... args) {
    // http://mp.weixin.qq.com/s?__biz=MjM5ODE1NTMxMQ==&mid=201653867&idx=1&sn=6f3445a3640eb09ce7cfa5a49509f165&3rd=MzA3MDU4NTYzMw==&scene=6#rd

    String biz = "";
    String mid = "";
    String uin = "";
    String key = "";
    String fromFile = StringUtil.getContent("config/WeixinKey/WeixinKey.txt");
    try {
      biz = StringUtil.regMatcher(data.getUrl(), "__biz=", "&");
      mid = StringUtil.regMatcher(data.getUrl(), "mid=", "&");
      for (String string : fromFile.split("&")) {
        if (string.contains("uin")) uin = string.split("=")[1].trim();
        if (string.contains("key")) key = string.split("=")[1].trim();
      }
    } catch (Exception e) {
      e.printStackTrace();
    }

    String url =
        "http://mp.weixin.qq.com"
            + "/mp/getappmsgext?"
            + "__biz="
            + biz
            + "&mid="
            + mid
            + "&uin="
            + uin
            + "&key="
            + key
            // +
            // "&pass_ticket=b3hV91xTLYZxRGKemRNz%2FAi4VKElPnwHYUNtoV8w4dE%3D"

            + "";

    HtmlInfo html = new HtmlInfo();

    String charSet = "UTF-8";
    html.setType("DATA");
    html.setEncode(charSet);
    html.setOrignUrl(url);
    html.setCookie("Set-Cookie: wxuin=20156425; Path=/; Expires=Fri, 02-Jan-1970 00:00:00 GMT");
    html.setUa(
        "Mozilla/5.0 (iPhone; CPU iPhone OS 8_0 like Mac OS X) AppleWebKit/600.1.3 (KHTML, like Gecko) Version/8.0 Mobile/12A4345d Safari/600.1.4");
    SimpleHttpProcess shp = new SimpleHttpProcess();
    shp.getContent(html);
    String content = html.getContent();

    int retry = 0;
    while (!content.contains("read_num")) {
      if (retry++ > 3) break;
      Systemconfig.sysLog.log("请获取key后输入任意内容回车继续...输入c忽略(很可能无法继续采集,不推荐)");
      System.err.println("请获取key后输入任意内容回车继续...输入c忽略(很可能无法继续采集,不推荐)");
      Scanner input = new Scanner(System.in);
      String s = input.next();
      if (s.equals("c") || s.equals("C")) break;

      fromFile = StringUtil.getContent("config/WeixinKey/WeixinKey.txt");
      try {
        for (String string : fromFile.split("&")) {
          if (string.contains("uin")) uin = string.split("=")[1].trim();
          if (string.contains("key")) key = string.split("=")[1].trim();
        }
      } catch (Exception e) {
        e.printStackTrace();
      }
      url =
          "http://mp.weixin.qq.com"
              + "/mp/getappmsgext?"
              + "__biz="
              + biz
              + "&mid="
              + mid
              + "&uin="
              + uin
              + "&key="
              + key;
      html = new HtmlInfo();

      charSet = "UTF-8";
      html.setType("DATA");
      html.setEncode(charSet);
      html.setOrignUrl(url);
      html.setCookie("Set-Cookie: wxuin=20156425; Path=/; Expires=Fri, 02-Jan-1970 00:00:00 GMT");
      html.setUa(
          "Mozilla/5.0 (iPhone; CPU iPhone OS 8_0 like Mac OS X) AppleWebKit/600.1.3 (KHTML, like Gecko) Version/8.0 Mobile/12A4345d Safari/600.1.4");
      shp = new SimpleHttpProcess();
      shp.getContent(html);
      content = html.getContent();
    }

    String readNumStr = StringUtil.regMatcher(content, "\"read_num\":", ",");
    String praiseNumStr = StringUtil.regMatcher(content, "\"like_num\":", ",");

    try {
      if (readNumStr != null) data.setReadNum(Integer.parseInt(readNumStr));

      if (praiseNumStr != null) data.setPraiseNum(Integer.parseInt(praiseNumStr));

    } catch (Exception e) {
      e.printStackTrace();
    }
  }