// Chamando Robot public static void robot() throws Exception { Database db = new Database(); db.connect(); ResultSet rs = Page.findAll(db); Page p = null; while ((p = Page.next(rs)) != null) { String body = Robot.get(p.getUrl()); // procurar por urls dentro do body // buscar por essas paginas // String expr = "href=\"([^\"]*)"; String ereg = "href=\"https{0,1}:\\/\\/([^\"]*)\""; Pattern pt = Pattern.compile(ereg); Matcher m = pt.matcher(body); while (m.find()) { System.out.println(m.group()); String[] _url = m.group().split("\""); Page.newUrl(_url[1]); } p.setBody(body); p.update(); } db.close(); }
public static void test01() throws Exception { Page p = new Page(); p.setTmsp("2011-03-04 10:30:00"); p.setUrl("http://www.google.com"); p.setBody("my body"); p.insert(); Page e = new Page("2011-03-04 10:31:00", "http://www.usp.br", "o corpo"); e.insert(); e.setBody("usp usp usp"); e.update(); Database db = new Database(); db.connect(); ResultSet rs = Page.findAll(db); /* while (rs.next()) { System.out.println(rs.getString("tmsp") +", "+ rs.getString("url") +", "+ rs.getString("body")); }*/ Page p0 = null; while ((p0 = Page.next(rs)) != null) { System.out.println(p0.getTmsp()); System.out.println(p0.getUrl()); System.out.println(p0.getBody()); } db.close(); p.remove(); e.remove(); }
public void remove() throws Exception { Database db = new Database(); db.connect(); db.update("delete from page where url = '" + url + "'"); db.close(); }
public void update() throws Exception { Database db = new Database(); db.connect(); db.update( "update page set tmsp = '" + tmsp + "', body = '" + body + "' where url = '" + url + "'"); // URLEncoder.encode(url, "UTF-8")+"'"); // URLEncoder.encode(url, "ISO-8859-1")+"'"); db.close(); }
// sqllers public void insert() throws Exception { Database db = new Database(); db.connect(); db.update( "insert into page (tmsp, url, body) values ('" + tmsp + "', '" + url + "', '" + body + "')"); db.close(); }