Example #1
0
 public File getHostBase(String host) {
   host = host.toLowerCase().trim();
   File f = new File(this.baseDir, hostHash(host) + "/" + Misc.formatFileName(host, '-'));
   if (!f.exists()) {
     f.mkdirs();
   }
   return f;
 }
  public void run(int topNum, int minEntrane) throws Exception {
    lstHost = getHostInfos();
    HostInjectTool tool = new HostInjectTool();
    for (HostInfo info : lstHost) {
      final File dir = ScanConfig.instance.getSaveDir(info);
      System.out.println("checking " + dir);
      if (!dir.exists()) {
        System.out.println("skip empty " + dir);
        continue;
      }
      try {
        List<PageInfo> top = this.getTop(dir, info, topNum);
        for (PageInfo pi : top) {
          System.out.println(pi);
        }
        if (top.size() < minEntrane) {
          System.out.println("skip " + info + ", too few entrance " + top.size());
        } else {
          // Consoler.readString("-----press any key to continue>");
          for (PageInfo pi : top) {
            try {
              tool.injectAlwaysAllow(info, pi.getUrl());
            } catch (Exception e) {
              e.printStackTrace();
            }
          }

          setted.add(info);
        }

        HostInitorTool initor = new HostInitorTool(info.getHostName(), 1024 * 32);
        try {
          initor.markAllUrlToCrawled();
        } finally {
          initor.close();
        }
        Connectioner.instance().setHostStatus(info.getHostName(), HostFilter.Status_Incr_watch);
        if ("delOrg".equalsIgnoreCase(System.getProperty("isDelOrg"))) {
          System.out.println("deleteing ................... " + dir);
          Misc.del(dir);
        }
      } catch (Exception e) {
        e.printStackTrace();
      }
    }
    tool.close();
  }