// 深度控制的getLinks方法 public static void getLinks(MyUrl Url, int ConDepth) throws IOException { Document doc; // String codeUrl = "http://" // + URLEncoder.encode(Url.getUrl().substring(7), "UTF-8"); HttpClient httpclient = new DefaultHttpClient(); System.out.print(Url); int Depth = Url.getDepth(); String title = "k"; String time = "k"; String content = "k"; if (Url.getDepth() > ConDepth) { } else { try { HttpGet httpget = new HttpGet(Url.getUrl()); HttpResponse response = httpclient.execute(httpget); int resStatus = response.getStatusLine().getStatusCode(); if (resStatus == HttpStatus.SC_OK) { doc = Jsoup.connect(Url.getUrl()).get(); Elements links = doc.select("a[href]"); Element Title = doc.select("title").first(); Elements context = doc.select("p"); Element Time = doc.select("div.time").first(); // List<String> Url2 = new LinkedList<String>(); content = context.text(); System.out.println(Title + content); if (Title.text() != null && Time != null) { title = Title.text(); time = Time.text(); } System.out.println(Url.getUrl()); System.out.println(title + time + content + Url.getUrl()); // 组装Beans try { WebnewBean news = new WebnewBean(title, time, content, Url.getUrl()); boolean result = DBOperation.MyInsert(news); } catch (Exception e) { System.err.println(e); } try { for (Element link : links) { String linkHref = link.attr("abs:href"); MyUrl Assmble = new MyUrl(); Assmble.setUrl(linkHref); Assmble.setDepth(Depth + 1); // Url2.add(linkHref); // System.out.println(linkHref); if (Assmble.getUrl().length() > 22 && Assmble.getUrl().substring(0, 22).equals("http://green.sohu.com/")) { MyLinkDB.addUnVisitedUrl(Assmble); } } } catch (Exception e) { System.err.println(e); } } } catch (Exception e) { System.err.println(e); } } }