コード例 #1
0
ファイル: HarvesterThread.java プロジェクト: k9m/jharvester
 private void waitFor() {
   try {
     Thread.sleep(crawlerCTRL.getInterval() * 1000);
   } catch (InterruptedException e) {
     e.printStackTrace();
   }
 }
コード例 #2
0
ファイル: HarvesterThread.java プロジェクト: k9m/jharvester
 private void waitForever() {
   try {
     synchronized (this) {
       crawlerCTRL.println_Crawler(this.getName() + " paused @ " + Timer.getTimeStamp());
       this.wait();
     }
   } catch (InterruptedException e) {
     e.printStackTrace();
   }
 }
コード例 #3
0
ファイル: HarvesterThread.java プロジェクト: k9m/jharvester
  public void run() {

    while (crawlerCTRL.peekInputJob() != null) {

      Job job = null;

      if (crawlerCTRL.peekInputJob().isDone()) {
        crawlerCTRL.nextJob();
        job = crawlerCTRL.peekInputJob();
      } else job = crawlerCTRL.peekInputJob();

      if (job == null) break;

      Task task = job.popInputStack();
      if (task == null) {
        this.waitFor();
        continue;
      }

      String urlStr = task.getUrl();
      String schedDir = task.getFolderPath();
      String filename = task.getFilename();

      BufferedInputStream bin = null;
      BufferedOutputStream bout = null;
      JLogger jLogger = new JLogger();

      try {
        crawlerCTRL.println_Crawler("++>" + filename + " Started @" + Timer.getTimeStamp());

        crawlerCTRL.clrTxt();
        new File(schedDir).mkdir();

        jLogger.openHTMLLog(schedDir + org.Setup._S_ + org.Setup.CFG_LOGNAME);

        URL url = new URL(urlStr);
        URLConnection urlConn = url.openConnection();
        urlConn.setConnectTimeout(CONNECTION_TIMEOUT * 1000);

        bin = new BufferedInputStream(urlConn.getInputStream(), 256);
        bout =
            new BufferedOutputStream(
                new FileOutputStream(schedDir + org.Setup._S_ + filename), 256);

        while (true) {
          int data = bin.read();
          if (data == -1) break;
          bout.write(data);
        }
        bout.flush();

        jLogger.burnHTMLLog("==>" + filename + " Done @ " + Timer.getTimeStamp());
        crawlerCTRL.println_Crawler("==>" + filename + " Done @" + Timer.getTimeStamp());

      } catch (Exception e) {
        try {
          crawlerCTRL.println_Crawler("% Error: " + e.getMessage() + " @ " + Timer.getTimeStamp());
          e.printStackTrace();

          jLogger.openHTMLLog_Error(schedDir + org.Setup._S_ + org.Setup.CFG_ERRORLOGNAME);
          jLogger.burnHTMLLog(filename + " % Error @ " + Timer.getTimeStamp());
          jLogger.burnHTMLLog_Error(filename + " % Error @ " + Timer.getTimeStamp());
          jLogger.burnHTMLLog_ErrorStack(e);
          jLogger.closeHTMLLog_Error();

        } catch (IOException e1) {
          System.err.println("Exception 1-2");
          e1.printStackTrace();
        }
      } finally {
        System.err.println("Finally 1");
        try {

          //////////////////////
          /////////////////////

          if (task.getExtension().equals(".html")) {
            task.setType(AtlasJob.J_HTML);
          }
          if (task.getExtension().equals(".pdf")) {
            task.setType(AtlasJob.J_PDF);
          }
          if (task.getExtension().equals(".xml")) {
            task.setType(AtlasJob.J_XML);
          }
          job.toOutputBus(task);
          crawlerCTRL.incProgress();

          if (bin != null) bin.close();
          if (bout != null) bout.close();
          jLogger.closeHTMLLog();

        } catch (Exception e) {
          System.err.println("Exception 2");
          e.printStackTrace();
        } finally {

          if (crawlerCTRL.isPause()) {
            this.waitForever();
            crawlerCTRL.wakeProcessThread();
          } else {
            crawlerCTRL.wakeProcessThread();
            this.waitFor();
          }
        }
      }
    } // ENDWHILE
    crawlerCTRL.killRecord(this);
  }