예제 #1
0
  public boolean checkFileName(CrawledLink link) {

    if (getFileNameRule() != null) {
      if (link.getLinkState() != LinkState.ONLINE) return false;

      return getFileNameRule().matches(link.getName());
    }
    return true;
  }
예제 #2
0
 public boolean checkHoster(CrawledLink link) throws NoDownloadLinkException {
   if (getHosterRule() != null) {
     if (link.getDownloadLink() == null) {
       throw new NoDownloadLinkException();
     }
     return getHosterRule().matches(link.getURL());
   }
   return true;
 }
예제 #3
0
 public boolean checkFileType(CrawledLink link) {
   if (getFiletypeFilter() != null) {
     if (link.getLinkState() != LinkState.ONLINE) return false;
     String ext = Files.getExtension(link.getName());
     if (ext == null) return true;
     return getFiletypeFilter().matches(ext);
   }
   return true;
 }
예제 #4
0
 public boolean checkSource(CrawledLink link) {
   CrawledLink p = link;
   if (getSourceRule() != null) {
     do {
       if (getSourceRule().matches(p.getURL())) {
         return true;
       }
     } while ((p = p.getSourceLink()) != null);
     return false;
   }
   return true;
 }
예제 #5
0
 protected List<DownloadLink> loadContainerFile(File file) {
   final LinkCrawler lc = LinkCrawler.newInstance();
   lc.crawl(file.toURI().toString());
   lc.waitForCrawling();
   final ArrayList<DownloadLink> ret = new ArrayList<DownloadLink>(lc.getCrawledLinks().size());
   for (final CrawledLink link : lc.getCrawledLinks()) {
     DownloadLink dl = link.getDownloadLink();
     if (dl == null) {
       final String url = link.getURL();
       if (url != null) {
         dl = new DownloadLink(null, null, null, url, true);
       }
     }
     if (dl != null) {
       ret.add(dl);
     }
   }
   return ret;
 }
예제 #6
0
 public void errLog(Throwable e, Browser br, CrawledLink link) {
   LogSource errlogger = LogController.getInstance().getLogger("PluginErrors");
   try {
     errlogger.severe("CrawlerPlugin out of date: " + this + " :" + getVersion());
     errlogger.severe("URL was: " + link.getURL());
     if (e != null) {
       errlogger.log(e);
     }
   } finally {
     errlogger.close();
   }
 }
예제 #7
0
 @Override
 public Boolean isSupported(final ArchiveFactory factory, final boolean allowDeepInspection) {
   if (splitType.matches(factory.getFilePath())) {
     if (factory instanceof DownloadLinkArchiveFactory) {
       for (final DownloadLink link : ((DownloadLinkArchiveFactory) factory).getDownloadLinks()) {
         final ExtensionsFilterInterface hint =
             CompiledFiletypeFilter.getExtensionsFilterInterface(link.getMimeHint());
         if (hint != null
             && !hint.isSameExtensionGroup(CompiledFiletypeFilter.ArchiveExtensions.NUM)) {
           return false;
         }
       }
     } else if (factory instanceof CrawledLinkFactory) {
       for (final CrawledLink link : ((CrawledLinkFactory) factory).getLinks()) {
         final DownloadLink dlLink = link.getDownloadLink();
         if (dlLink != null) {
           final ExtensionsFilterInterface hint =
               CompiledFiletypeFilter.getExtensionsFilterInterface(dlLink.getMimeHint());
           if (hint != null
               && !hint.isSameExtensionGroup(CompiledFiletypeFilter.ArchiveExtensions.NUM)) {
             return false;
           }
         }
       }
     }
     if (allowDeepInspection) {
       try {
         return SplitType.createArchive(factory, splitType, allowDeepInspection) != null;
       } catch (ArchiveException e) {
         getLogger().log(e);
       }
     } else {
       return true;
     }
   }
   return false;
 }
예제 #8
0
 public boolean matches(CrawledLink link) {
   return link != null && DownloadController.getInstance().hasDownloadLinkByID(link.getLinkID());
 }
예제 #9
0
  /**
   * Die Methode entschlüsselt einen einzelnen Link. Alle steps werden durchlaufen. Der letzte step
   * muss als parameter einen Vector <String> mit den decoded Links setzen
   *
   * @param cryptedLink Ein einzelner verschlüsselter Link
   * @return Ein Vector mit Klartext-links
   */
  public ArrayList<DownloadLink> decryptLink(CrawledLink link) {
    if (link.getCryptedLink() == null) {
      return null;
    }
    ArrayList<DownloadLink> tmpLinks = null;
    Throwable throwable = null;
    boolean linkstatusOffline = false;
    boolean pwfailed = false;
    boolean captchafailed = false;
    try {
      challenges = null;
      setCurrentLink(link);
      /*
       * we now lets log into plugin specific loggers with all verbose/debug on
       */
      br.setLogger(logger);
      br.setVerbose(true);
      br.setDebug(true);
      /* now we let the decrypter do its magic */
      tmpLinks = decryptIt(link);
      validateLastChallengeResponse();
    } catch (final Throwable e) {
      throwable = e;
      if (isAbort()) {
        throwable = null;
      } else if (processCaptchaException(e)) {
        /* User entered wrong captcha (too many times) */
        throwable = null;
        captchafailed = true;
      } else if (DecrypterException.PLUGIN_DEFECT.equals(e.getMessage())) {
        // leave alone.
      } else if (DecrypterException.PASSWORD.equals(e.getMessage())) {
        /* User entered password captcha (too many times) */
        throwable = null;
        pwfailed = true;
      } else if (DecrypterException.ACCOUNT.equals(e.getMessage())) {
        throwable = null;
      } else if (e instanceof DecrypterException || e.getCause() instanceof DecrypterException) {
        throwable = null;
      } else if (e instanceof PluginException) {
        // offline file linkstatus exception, this should not be treated as crawler error..
        if (((PluginException) e).getLinkStatus() == 32) {
          throwable = null;
          linkstatusOffline = true;
          if (tmpLinks == null
              && LinkCrawler.getConfig().isAddDefectiveCrawlerTasksAsOfflineInLinkgrabber()) {
            tmpLinks = new ArrayList<DownloadLink>();
            tmpLinks.add(createOfflinelink(link.getURL()));
          }
        }
      }
      if (throwable == null && logger instanceof LogSource) {
        if (logger instanceof LogSource) {
          /* make sure we use the right logger */
          ((LogSource) logger).clear();
          ((LogSource) logger).log(e);
        } else {
          LogSource.exception(logger, e);
        }
      }
    } finally {
      clean();
      challenges = null;
    }
    if ((tmpLinks == null || throwable != null)
        && !isAbort()
        && !pwfailed
        && !captchafailed
        && !linkstatusOffline) {
      /*
       * null as return value? something must have happened, do not clear log
       */
      errLog(throwable, br, link);
      logger.severe("CrawlerPlugin out of date: " + this + " :" + getVersion());
      logger.severe("URL was: " + link.getURL());
      /*
       * we can effectively create generic offline link here. For custom message/comments this must be done within the plugin.
       * -raztoki
       */
      if (tmpLinks == null
          && LinkCrawler.getConfig().isAddDefectiveCrawlerTasksAsOfflineInLinkgrabber()) {
        tmpLinks = new ArrayList<DownloadLink>();
        tmpLinks.add(createOfflinelink(link.getURL()));
      }

      /* lets forward the log */
      if (logger instanceof LogSource) {
        /* make sure we use the right logger */
        ((LogSource) logger).flush();
      }
    }
    if (logger instanceof LogSource) {
      /* make sure we use the right logger */
      ((LogSource) logger).clear();
    }
    return tmpLinks;
  }
예제 #10
0
 public ArrayList<DownloadLink> decryptIt(CrawledLink link) throws Exception {
   return decryptIt(link.getCryptedLink(), dummyProgressController);
 }
 public Long getUniqueID() {
   return link.getUniqueID().getID();
 }