Java UURI Exemples

Langage de programmation: Java

Espace de nommage/Pack: org.archive.net

Class/Type: UURI

Exemples au hotexamples.com: 5

Java UURI - 5 exemples trouvés. Ce sont les exemples réels les mieux notés de org.archive.net.UURI extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Méthodes fréquemment utilisées

Afficher Cacher

toString(4)

getPath(1)

hasQuery(1)

Méthodes fréquemment utilisées

toString (4)

getPath (1)

hasQuery (1)

Associées

UtilAngle

SparseObjectFacet3D

ObjectRole

CashflowPeriod

SecurityContextRepository

ValueLabelItem

javax.servlet.jsp.JspFactory

System

Related in langs

cryptPassword (PHP)

Query_PDO_Driver (PHP)

ProblemDetail (C#)

AboutBox (C#)

send_msg (C++)

destroy (C++)

New (Go)

Protocol (Go)

getUrlByVid (Python)

write_settings (Python)

Exemple #1

0

Afficher le fichier

Fichier : SelfTestBase.java Projet : ukwa/bl-heritrix-modules

protected Set<String> filesInArcs() throws IOException { List<ArchiveRecordHeader> headers = headersInArcs(); HashSet<String> result = new HashSet<String>(); for (ArchiveRecordHeader arh : headers) { // ignore 'filedesc:' record if (arh.getUrl().startsWith("filedesc:")) { continue; } UURI uuri = UURIFactory.getInstance(arh.getUrl()); String path = uuri.getPath(); if (path.startsWith("/")) { path = path.substring(1); } if (arh.getUrl().startsWith("http:")) { result.add(path); } } LOGGER.finest(result.toString()); return result; }

Exemple #2

0

Afficher le fichier

Fichier : HtmlFormCredential.java Projet : abhay123lp/web-mining-2007

public boolean isPrerequisite(final CrawlURI curi) { boolean result = false; String curiStr = curi.getUURI().toString(); String loginUri = getPrerequisite(curi); if (loginUri != null) { try { UURI uuri = UURIFactory.getInstance(curi.getUURI(), loginUri); if (uuri != null && curiStr != null && uuri.toString().equals(curiStr)) { result = true; if (!curi.isPrerequisite()) { curi.setPrerequisite(true); logger.fine(curi + " is prereq."); } } } catch (URIException e) { logger.severe("Failed to uuri: " + curi + ", " + e.getMessage()); } } return result; }

Exemple #3

0

Afficher le fichier

Fichier : ParseHTTP.java Projet : yangdayuan/spider-2

protected void addHeaderLink(CrawlURI curi, Header loc) { if (loc == null) { // If null, return without adding anything. return; } // TODO: consider possibility of multiple headers try { /** * 302重定向使用自定义的方法存储link * * @modify: wuliufu * @since : 2012-05-11 */ curi.createAndAddLocationLink( curi.getVia(), loc.getValue(), loc.getName() + ":", Link.REFER_HOP); if (curi.getObject(URLInfo.ATTACH) != null) { UURI outUURI = UURIFactory.getInstance(curi.getUURI(), loc.getValue()); logger.debug( "ParseHTTP: curi = " + curi.getUURI().toString() + "&& " + loc.getName() + "=" + outUURI.toString()); curi.putObject(outUURI.toString(), curi.getObject(URLInfo.ATTACH)); } numberOfLinksExtracted++; } catch (URIException e) { // There may not be a controller (e.g. If we're being run // by the extractor tool). if (getController() != null) { getController().logUriError(e, curi.getUURI(), loc.getValue()); } else { logger.info(curi + ", " + loc.getValue() + ": " + e.getMessage()); } } }

Exemple #4

0

Afficher le fichier

Fichier : WagCostAssignmentPolicy.java Projet : dljjj/heritrix3

/** * Add constant penalties for certain features of URI (and its 'via') that make it more * delayable/skippable. * * @param curi CrawlURI to be assigned a cost * @see org.archive.crawler.frontier.CostAssignmentPolicy#costOf(org.archive.modules.CrawlURI) */ public int costOf(CrawlURI curi) { int cost = 1; UURI uuri = curi.getUURI(); if (uuri.hasQuery()) { // has query string cost++; int qIndex = uuri.toString().indexOf('?'); if (curi.flattenVia().startsWith(uuri.toString().substring(0, qIndex))) { // non-query-string portion of URI is same as previous cost++; } // TODO: other potential query-related cost penalties: // - more than X query-string attributes // - calendarish terms // - query-string over certain size } // TODO: other potential path-based penalties // - new path is simply extension of via path // - many path segments // TODO: other potential hops-based penalties // - more than X hops // - each speculative hop return cost; }

Exemple #5

0

Afficher le fichier

Fichier : RegularExpressionCriteria.java Projet : focusdone/seacrawler

/* (non-Javadoc) * @see org.archive.crawler.settings.refinements.Criteria#isWithinRefinementBounds(org.archive.crawler.datamodel.UURI, int) */ public boolean isWithinRefinementBounds(UURI uri) { return (uri == null || uri == null) ? false : TextUtils.matches(regexp, uri.toString()); }