public void setSpidered(String url, URLSpideredProxyOkEvent event) { String sql = "update spider_resource set saved = 1, httpStatus = " + daoHelp.format(event.getHttpStatus()) + ", " + " dTime = " + daoHelp.format(new Date().toLocaleString()) + " where url = " + daoHelp.format(url) + ""; daoHelp.execSql(sql); }
protected List<Resource> getResourceByList(String sql) { List<Resource> resList = new ArrayList<Resource>(); Resource res = null; ResultSet rs = null; Statement stmt = null; try { stmt = daoHelp.getConnection().createStatement(); rs = stmt.executeQuery(sql); while (rs.next()) { int rid = rs.getInt("rid"); res = this.createResourceFromRecord(rs); daoHelp.execSql("update spider_resource set crawl = 1 where rid = " + rid); resList.add(res); } daoHelp.safeClose(rs); daoHelp.safeClose(stmt); } catch (SQLException e) { log.error("SQLException -> ", e); daoHelp.safeClose(rs); daoHelp.safeClose(stmt); } finally { daoHelp.safeClose(rs); daoHelp.safeClose(stmt); } return resList; }
public void createSeed(String url, String site, int city, String type, String seedType) { String sql = "insert into spider_resource(url, type) values ('" + url + "', '" + seedType + "')"; daoHelp.execSql(sql); this.updResourceInfo(url, site, type, city); }
protected synchronized boolean isHavingUrl(String url) { boolean isHaving = false; // 判断该URL是否这次任务中被抓取 if (URLCache.findUrlInCache(url)) // 存在为true return true; // 判断该URL是否曾已被抓取 if (storage.getSinfoDAO().isCrawlUrl(url)) return true; Statement stmt = null; String sql = "select top 1 url from spider_resource where url = '" + url + "'"; ResultSet rs = null; try { stmt = daoHelp.getConnection().createStatement(); rs = stmt.executeQuery(sql); while (rs.next()) { isHaving = true; } daoHelp.safeClose(rs); daoHelp.safeClose(stmt); } catch (SQLException e) { log.error("SQLException -> ", e); daoHelp.safeClose(rs); daoHelp.safeClose(stmt); } finally { daoHelp.safeClose(rs); daoHelp.safeClose(stmt); } return isHaving; }
public List<CheckInfo> get() { List<CheckInfo> list = new ArrayList<CheckInfo>(); Statement stmt = null; String sql = "select * from spider_check where cType <> 5 order by sid "; ResultSet rs = null; try { stmt = daoHelp.getConnection().createStatement(); rs = stmt.executeQuery(sql); while (rs.next()) { CheckInfo checkInfo = new CheckInfo(); checkInfo.setSid(rs.getString(1)); checkInfo.setPid(rs.getString(2)); checkInfo.setField(rs.getString(3)); checkInfo.setUrl(rs.getString(4)); checkInfo.setCType(rs.getInt(5)); checkInfo.setSite(rs.getString(6)); checkInfo.setSType(rs.getString(7)); checkInfo.setContent(rs.getString(8)); list.add(checkInfo); } daoHelp.safeClose(rs); daoHelp.safeClose(stmt); } catch (SQLException e) { log.error("SQLException -> ", e); daoHelp.safeClose(rs); daoHelp.safeClose(stmt); } finally { daoHelp.safeClose(rs); daoHelp.safeClose(stmt); } return list; }
public boolean create(Resource resource) { if (this.isHavingUrl(resource.getUrl())) return false; StringBuffer sql = new StringBuffer(); sql.append("insert into spider_resource( "); sql.append("paId,"); sql.append("url,"); sql.append("type,"); sql.append("httpStatus,"); sql.append("dTime"); sql.append(" ) values ( "); sql.append(daoHelp.format(resource.getPaId()) + ","); sql.append(daoHelp.format(resource.getUrl()) + ","); sql.append(daoHelp.format(resource.getType()) + ","); sql.append("0,"); sql.append("''"); sql.append(" ) "); daoHelp.execSql(sql.toString()); return true; }
public void setSpiderError(URLSpideredProxyErrorEvent event) { String sql = "update spider_resource set saved = 0, httpStatus = " + event.getHttpStatus() + ", " + " dTime = '" + new Date().toLocaleString() + "' where url = '" + event.getUrl() + "'"; daoHelp.execSql(sql); }
public void create(CheckInfo info) { StringBuffer sql = new StringBuffer(); sql.append("insert into spider_check (sid, pid, field, url, cType, site, sType, content"); sql.append(" ) values ( "); sql.append(daoHelp.format(info.getSid()) + ","); sql.append(daoHelp.format(info.getPid()) + ","); sql.append(daoHelp.format(info.getField()) + ","); sql.append(daoHelp.format(info.getUrl()) + ","); sql.append(daoHelp.format(info.getCType()) + ","); sql.append(daoHelp.format(info.getSite()) + ","); sql.append(daoHelp.format(info.getSType()) + ","); sql.append(daoHelp.format(info.getContent()) + ")"); daoHelp.execSql(sql.toString()); }
public void createSeed(SeedInfo seedInfo) { String sql = "insert into spider_resource(url, type, kid) values ('" + seedInfo.getUrl() + "', '" + seedInfo.getSeedType() + "', " + seedInfo.getKid() + ")"; daoHelp.execSql(sql); this.updResourceInfo( seedInfo.getUrl(), seedInfo.getSite(), seedInfo.getType(), seedInfo.getCity()); }
public Date[] getCrawlDate() { Date[] date = new Date[2]; String sql = "select * from spider_info "; ResultSet rs = null; Statement stmt = null; try { stmt = daoHelp.getConnection().createStatement(); rs = stmt.executeQuery(sql); while (rs.next()) { date[0] = rs.getDate(1); date[1] = rs.getDate(2); } daoHelp.safeClose(rs); daoHelp.safeClose(stmt); } catch (SQLException e) { log.error("SQLException -> ", e); daoHelp.safeClose(rs); daoHelp.safeClose(stmt); } finally { daoHelp.safeClose(rs); daoHelp.safeClose(stmt); } return date; }
private boolean isAdd() { boolean isGet = false; String sql = "select top 1 rid rid from spider_resource where crawl = 1 and ( httpStatus = 0 or httpStatus is NULL)"; Statement stmt = null; ResultSet rs = null; try { stmt = daoHelp.getConnection().createStatement(); rs = stmt.executeQuery(sql); while (rs.next()) { isGet = true; } daoHelp.safeClose(rs); daoHelp.safeClose(stmt); } catch (SQLException e) { log.error("SQLException -> ", e); daoHelp.safeClose(rs); daoHelp.safeClose(stmt); } finally { daoHelp.safeClose(rs); daoHelp.safeClose(stmt); } return isGet; }
public void addRequsetCount(int rid, int reqCount) { String sql = "update spider_resource set reqCount = " + (++reqCount) + " where rid = " + rid; daoHelp.execSql(sql); }
public void setParsered(ResourceParsedOkEvent event) { String sql = "update spider_resource set parser = 1 where url = '" + event.getUrl() + "'"; daoHelp.execSql(sql); }
public void setSeedNotSpider() { String sql = "update spider_resource set saved = 0, crawl = 0, HttpStatus = 0, reqCount = 0 where (type = 'monitor' or type = 'seed') and httpStatus <= 301"; daoHelp.execSql(sql); }
public void setStartTime() { SimpleDateFormat dateformat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); String sDate = dateformat.format(new Date()); String sql = "update spider_info set stime = '" + sDate + "'"; daoHelp.execSql(sql); }
public void deleteAll() { String sql = "delete from spider_resource"; daoHelp.execSql(sql); }
public void setParsered(Resource res) { String sql = "update spider_resource set parser = 1 where rid = " + res.getRId(); daoHelp.execSql(sql); }
public List<Resource> setSpider() { String sql = "update spider_resource set crawl = 0 , saved = 0, httpStatus = NULL , dTime = 0 where saved = 0 and crawl = 1 and (HttpStatus is NULL or HttpStatus < 100 ) "; daoHelp.execSql(sql); return this.getSomeSeedResource(); }
private void updResourceInfo(String url, String site, String type, int city) { int rid = 0; Statement stmt = null; String sql = "select rid from spider_resource where url = '" + url + "'"; ResultSet rs = null; try { stmt = daoHelp.getConnection().createStatement(); rs = stmt.executeQuery(sql); while (rs.next()) { rid = rs.getInt("rid"); } daoHelp.safeClose(rs); daoHelp.safeClose(stmt); } catch (SQLException e) { log.error("SQLException -> ", e); daoHelp.safeClose(rs); daoHelp.safeClose(stmt); } finally { daoHelp.safeClose(rs); daoHelp.safeClose(stmt); } String uSql = "update spider_resource set paid = " + rid + " where rid = " + rid; daoHelp.execSql(uSql); String infoSql = "insert into spider_resource_info(rid, site, iType, city ) " + " values (" + "" + rid + "," + daoHelp.format(site) + ", " + daoHelp.format(type) + ", " + daoHelp.format(city) + ")"; daoHelp.execSql(infoSql); }