public File getHostBase(String host) { host = host.toLowerCase().trim(); File f = new File(this.baseDir, hostHash(host) + "/" + Misc.formatFileName(host, '-')); if (!f.exists()) { f.mkdirs(); } return f; }
public void run(int topNum, int minEntrane) throws Exception { lstHost = getHostInfos(); HostInjectTool tool = new HostInjectTool(); for (HostInfo info : lstHost) { final File dir = ScanConfig.instance.getSaveDir(info); System.out.println("checking " + dir); if (!dir.exists()) { System.out.println("skip empty " + dir); continue; } try { List<PageInfo> top = this.getTop(dir, info, topNum); for (PageInfo pi : top) { System.out.println(pi); } if (top.size() < minEntrane) { System.out.println("skip " + info + ", too few entrance " + top.size()); } else { // Consoler.readString("-----press any key to continue>"); for (PageInfo pi : top) { try { tool.injectAlwaysAllow(info, pi.getUrl()); } catch (Exception e) { e.printStackTrace(); } } setted.add(info); } HostInitorTool initor = new HostInitorTool(info.getHostName(), 1024 * 32); try { initor.markAllUrlToCrawled(); } finally { initor.close(); } Connectioner.instance().setHostStatus(info.getHostName(), HostFilter.Status_Incr_watch); if ("delOrg".equalsIgnoreCase(System.getProperty("isDelOrg"))) { System.out.println("deleteing ................... " + dir); Misc.del(dir); } } catch (Exception e) { e.printStackTrace(); } } tool.close(); }