Ejemplo n.º 1
0
  // 深度控制的getLinks方法
  public static void getLinks(MyUrl Url, int ConDepth) throws IOException {
    Document doc;

    // String codeUrl = "http://"
    // + URLEncoder.encode(Url.getUrl().substring(7), "UTF-8");
    HttpClient httpclient = new DefaultHttpClient();
    System.out.print(Url);
    int Depth = Url.getDepth();
    String title = "k";
    String time = "k";
    String content = "k";

    if (Url.getDepth() > ConDepth) {

    } else {
      try {
        HttpGet httpget = new HttpGet(Url.getUrl());
        HttpResponse response = httpclient.execute(httpget);
        int resStatus = response.getStatusLine().getStatusCode();
        if (resStatus == HttpStatus.SC_OK) {
          doc = Jsoup.connect(Url.getUrl()).get();
          Elements links = doc.select("a[href]");

          Element Title = doc.select("title").first();
          Elements context = doc.select("p");
          Element Time = doc.select("div.time").first();
          // List<String> Url2 = new LinkedList<String>();
          content = context.text();
          System.out.println(Title + content);
          if (Title.text() != null && Time != null) {
            title = Title.text();
            time = Time.text();
          }
          System.out.println(Url.getUrl());
          System.out.println(title + time + content + Url.getUrl());
          // 组装Beans
          try {
            WebnewBean news = new WebnewBean(title, time, content, Url.getUrl());

            boolean result = DBOperation.MyInsert(news);
          } catch (Exception e) {
            System.err.println(e);
          }
          try {
            for (Element link : links) {
              String linkHref = link.attr("abs:href");

              MyUrl Assmble = new MyUrl();
              Assmble.setUrl(linkHref);
              Assmble.setDepth(Depth + 1);
              // Url2.add(linkHref);
              // System.out.println(linkHref);
              if (Assmble.getUrl().length() > 22
                  && Assmble.getUrl().substring(0, 22).equals("http://green.sohu.com/")) {
                MyLinkDB.addUnVisitedUrl(Assmble);
              }
            }
          } catch (Exception e) {
            System.err.println(e);
          }
        }
      } catch (Exception e) {
        System.err.println(e);
      }
    }
  }