public boolean checkFileName(CrawledLink link) { if (getFileNameRule() != null) { if (link.getLinkState() != LinkState.ONLINE) return false; return getFileNameRule().matches(link.getName()); } return true; }
public boolean checkHoster(CrawledLink link) throws NoDownloadLinkException { if (getHosterRule() != null) { if (link.getDownloadLink() == null) { throw new NoDownloadLinkException(); } return getHosterRule().matches(link.getURL()); } return true; }
public boolean checkFileType(CrawledLink link) { if (getFiletypeFilter() != null) { if (link.getLinkState() != LinkState.ONLINE) return false; String ext = Files.getExtension(link.getName()); if (ext == null) return true; return getFiletypeFilter().matches(ext); } return true; }
public boolean checkSource(CrawledLink link) { CrawledLink p = link; if (getSourceRule() != null) { do { if (getSourceRule().matches(p.getURL())) { return true; } } while ((p = p.getSourceLink()) != null); return false; } return true; }
protected List<DownloadLink> loadContainerFile(File file) { final LinkCrawler lc = LinkCrawler.newInstance(); lc.crawl(file.toURI().toString()); lc.waitForCrawling(); final ArrayList<DownloadLink> ret = new ArrayList<DownloadLink>(lc.getCrawledLinks().size()); for (final CrawledLink link : lc.getCrawledLinks()) { DownloadLink dl = link.getDownloadLink(); if (dl == null) { final String url = link.getURL(); if (url != null) { dl = new DownloadLink(null, null, null, url, true); } } if (dl != null) { ret.add(dl); } } return ret; }
public void errLog(Throwable e, Browser br, CrawledLink link) { LogSource errlogger = LogController.getInstance().getLogger("PluginErrors"); try { errlogger.severe("CrawlerPlugin out of date: " + this + " :" + getVersion()); errlogger.severe("URL was: " + link.getURL()); if (e != null) { errlogger.log(e); } } finally { errlogger.close(); } }
@Override public Boolean isSupported(final ArchiveFactory factory, final boolean allowDeepInspection) { if (splitType.matches(factory.getFilePath())) { if (factory instanceof DownloadLinkArchiveFactory) { for (final DownloadLink link : ((DownloadLinkArchiveFactory) factory).getDownloadLinks()) { final ExtensionsFilterInterface hint = CompiledFiletypeFilter.getExtensionsFilterInterface(link.getMimeHint()); if (hint != null && !hint.isSameExtensionGroup(CompiledFiletypeFilter.ArchiveExtensions.NUM)) { return false; } } } else if (factory instanceof CrawledLinkFactory) { for (final CrawledLink link : ((CrawledLinkFactory) factory).getLinks()) { final DownloadLink dlLink = link.getDownloadLink(); if (dlLink != null) { final ExtensionsFilterInterface hint = CompiledFiletypeFilter.getExtensionsFilterInterface(dlLink.getMimeHint()); if (hint != null && !hint.isSameExtensionGroup(CompiledFiletypeFilter.ArchiveExtensions.NUM)) { return false; } } } } if (allowDeepInspection) { try { return SplitType.createArchive(factory, splitType, allowDeepInspection) != null; } catch (ArchiveException e) { getLogger().log(e); } } else { return true; } } return false; }
public boolean matches(CrawledLink link) { return link != null && DownloadController.getInstance().hasDownloadLinkByID(link.getLinkID()); }
/** * Die Methode entschlüsselt einen einzelnen Link. Alle steps werden durchlaufen. Der letzte step * muss als parameter einen Vector <String> mit den decoded Links setzen * * @param cryptedLink Ein einzelner verschlüsselter Link * @return Ein Vector mit Klartext-links */ public ArrayList<DownloadLink> decryptLink(CrawledLink link) { if (link.getCryptedLink() == null) { return null; } ArrayList<DownloadLink> tmpLinks = null; Throwable throwable = null; boolean linkstatusOffline = false; boolean pwfailed = false; boolean captchafailed = false; try { challenges = null; setCurrentLink(link); /* * we now lets log into plugin specific loggers with all verbose/debug on */ br.setLogger(logger); br.setVerbose(true); br.setDebug(true); /* now we let the decrypter do its magic */ tmpLinks = decryptIt(link); validateLastChallengeResponse(); } catch (final Throwable e) { throwable = e; if (isAbort()) { throwable = null; } else if (processCaptchaException(e)) { /* User entered wrong captcha (too many times) */ throwable = null; captchafailed = true; } else if (DecrypterException.PLUGIN_DEFECT.equals(e.getMessage())) { // leave alone. } else if (DecrypterException.PASSWORD.equals(e.getMessage())) { /* User entered password captcha (too many times) */ throwable = null; pwfailed = true; } else if (DecrypterException.ACCOUNT.equals(e.getMessage())) { throwable = null; } else if (e instanceof DecrypterException || e.getCause() instanceof DecrypterException) { throwable = null; } else if (e instanceof PluginException) { // offline file linkstatus exception, this should not be treated as crawler error.. if (((PluginException) e).getLinkStatus() == 32) { throwable = null; linkstatusOffline = true; if (tmpLinks == null && LinkCrawler.getConfig().isAddDefectiveCrawlerTasksAsOfflineInLinkgrabber()) { tmpLinks = new ArrayList<DownloadLink>(); tmpLinks.add(createOfflinelink(link.getURL())); } } } if (throwable == null && logger instanceof LogSource) { if (logger instanceof LogSource) { /* make sure we use the right logger */ ((LogSource) logger).clear(); ((LogSource) logger).log(e); } else { LogSource.exception(logger, e); } } } finally { clean(); challenges = null; } if ((tmpLinks == null || throwable != null) && !isAbort() && !pwfailed && !captchafailed && !linkstatusOffline) { /* * null as return value? something must have happened, do not clear log */ errLog(throwable, br, link); logger.severe("CrawlerPlugin out of date: " + this + " :" + getVersion()); logger.severe("URL was: " + link.getURL()); /* * we can effectively create generic offline link here. For custom message/comments this must be done within the plugin. * -raztoki */ if (tmpLinks == null && LinkCrawler.getConfig().isAddDefectiveCrawlerTasksAsOfflineInLinkgrabber()) { tmpLinks = new ArrayList<DownloadLink>(); tmpLinks.add(createOfflinelink(link.getURL())); } /* lets forward the log */ if (logger instanceof LogSource) { /* make sure we use the right logger */ ((LogSource) logger).flush(); } } if (logger instanceof LogSource) { /* make sure we use the right logger */ ((LogSource) logger).clear(); } return tmpLinks; }
public ArrayList<DownloadLink> decryptIt(CrawledLink link) throws Exception { return decryptIt(link.getCryptedLink(), dummyProgressController); }
public Long getUniqueID() { return link.getUniqueID().getID(); }