Beispiel #1
0
  // Chamando Robot
  public static void robot() throws Exception {
    Database db = new Database();
    db.connect();
    ResultSet rs = Page.findAll(db);

    Page p = null;
    while ((p = Page.next(rs)) != null) {
      String body = Robot.get(p.getUrl());

      // procurar por urls dentro do body
      // buscar por essas paginas

      // String expr = "href=\"([^\"]*)";
      String ereg = "href=\"https{0,1}:\\/\\/([^\"]*)\"";
      Pattern pt = Pattern.compile(ereg);
      Matcher m = pt.matcher(body);

      while (m.find()) {
        System.out.println(m.group());
        String[] _url = m.group().split("\"");
        Page.newUrl(_url[1]);
      }

      p.setBody(body);
      p.update();
    }

    db.close();
  }
Beispiel #2
0
  public static void test01() throws Exception {
    Page p = new Page();
    p.setTmsp("2011-03-04 10:30:00");
    p.setUrl("http://www.google.com");
    p.setBody("my body");
    p.insert();

    Page e = new Page("2011-03-04 10:31:00", "http://www.usp.br", "o corpo");
    e.insert();
    e.setBody("usp usp usp");
    e.update();

    Database db = new Database();
    db.connect();
    ResultSet rs = Page.findAll(db);

    /*
    while (rs.next()) {
    	System.out.println(rs.getString("tmsp")
    		+", "+ rs.getString("url")
    		+", "+ rs.getString("body"));
    }*/

    Page p0 = null;
    while ((p0 = Page.next(rs)) != null) {
      System.out.println(p0.getTmsp());
      System.out.println(p0.getUrl());
      System.out.println(p0.getBody());
    }

    db.close();

    p.remove();
    e.remove();
  }