// Private Methods for Dereferenceability Process private boolean isDereferenceable(CachedHTTPResource httpResource) { if (httpResource.getDereferencabilityStatusCode() == null) { List<Integer> statusCode = this.getStatusCodes(httpResource.getStatusLines()); if (httpResource.getUri().contains("#") && statusCode.contains(200)) httpResource.setDereferencabilityStatusCode(StatusCode.HASH); else if (statusCode.contains(200)) { httpResource.setDereferencabilityStatusCode(StatusCode.SC200); if (statusCode.contains(303)) httpResource.setDereferencabilityStatusCode(StatusCode.SC303); else { if (statusCode.contains(301)) httpResource.setDereferencabilityStatusCode(StatusCode.SC301); else if (statusCode.contains(302)) httpResource.setDereferencabilityStatusCode(StatusCode.SC302); else if (statusCode.contains(307)) httpResource.setDereferencabilityStatusCode(StatusCode.SC307); } } if (has4xxCode(statusCode)) { httpResource.setDereferencabilityStatusCode(StatusCode.SC4XX); } if (has5xxCode(statusCode)) { httpResource.setDereferencabilityStatusCode(StatusCode.SC5XX); } } StatusCode scode = httpResource.getDereferencabilityStatusCode(); return this.mapDerefStatusCode(scode); }
/** * Tries to dereference all the URIs contained in the parameter, by retrieving them from the * cache. URIs not found in the cache are added to the queue containing the URIs to be fetched by * the async HTTP retrieval process * * @param uriSet Set of URIs to be dereferenced * @return list with the results of the dereferenceability operations, for those URIs that were * found in the cache */ private List<DerefResult> deReferenceUris(List<String> uriSet) { // Start the dereferencing process, which will be run in parallel httpRetriever.addListOfResourceToQueue(uriSet); httpRetriever.start(); List<DerefResult> lstDerefUris = new ArrayList<DerefResult>(); List<String> lstToDerefUris = new ArrayList<String>(uriSet); // Dereference each and every one of the URIs contained in the specified set while (lstToDerefUris.size() > 0) { // Remove the URI at the head of the queue of URIs to be dereferenced String headUri = lstToDerefUris.remove(0); // First, search for the URI in the cache CachedHTTPResource httpResource = (CachedHTTPResource) dcmgr.getFromCache(DiachronCacheManager.HTTP_RESOURCE_CACHE, headUri); if (httpResource == null || httpResource.getStatusLines() == null) { // URIs not found in the cache, is still to be fetched via HTTP, add it to the end of the // list lstToDerefUris.add(headUri); } else { // URI found in the cache (which means that was fetched at some point), check if // successfully dereferenced DerefResult curUrlResult = new DerefResult(headUri, false, false); lstDerefUris.add(curUrlResult); if (this.isDereferenceable(httpResource)) { curUrlResult.isDeref = true; if (this.is200AnRDF(httpResource)) { curUrlResult.isRdfXml = true; } else this.createProblemQuad(httpResource.getUri(), DQM.NotMeaningful); } else if (httpResource.getDereferencabilityStatusCode() == StatusCode.SC200) { curUrlResult.isDeref = true; // Check if the resource contains RDF on XML if (this.is200AnRDF(httpResource)) { curUrlResult.isRdfXml = true; } } logger.trace( "Resource fetched: {}. Deref. status: {}. Is RDF: {}", headUri, httpResource.getDereferencabilityStatusCode(), curUrlResult.isRdfXml); } } return lstDerefUris; }
private boolean isDereferenceable(CachedHTTPResource httpResource) { if (httpResource.getDereferencabilityStatusCode() == null) { List<Integer> statusCode = this.getStatusCodes(httpResource.getStatusLines()); if (httpResource.getUri().contains("#") && statusCode.contains(200)) httpResource.setDereferencabilityStatusCode(StatusCode.HASH); else if (statusCode.contains(200)) { httpResource.setDereferencabilityStatusCode(StatusCode.SC200); if (statusCode.contains(303)) httpResource.setDereferencabilityStatusCode(StatusCode.SC303); else { if (statusCode.contains(301)) { httpResource.setDereferencabilityStatusCode(StatusCode.SC301); this.createProblemQuad(httpResource.getUri(), DQM.SC301MovedPermanently); } else if (statusCode.contains(302)) { httpResource.setDereferencabilityStatusCode(StatusCode.SC302); this.createProblemQuad(httpResource.getUri(), DQM.SC302Found); } else if (statusCode.contains(307)) { httpResource.setDereferencabilityStatusCode(StatusCode.SC307); this.createProblemQuad(httpResource.getUri(), DQM.SC307TemporaryRedirectory); } else { if (hasBad3xxCode(statusCode)) this.createProblemQuad(httpResource.getUri(), DQM.SC3XXRedirection); } } } if (has4xxCode(statusCode)) { httpResource.setDereferencabilityStatusCode(StatusCode.SC4XX); this.createProblemQuad(httpResource.getUri(), DQM.SC4XXClientError); } if (has5xxCode(statusCode)) { httpResource.setDereferencabilityStatusCode(StatusCode.SC5XX); this.createProblemQuad(httpResource.getUri(), DQM.SC5XXServerError); } } StatusCode scode = httpResource.getDereferencabilityStatusCode(); return this.mapDerefStatusCode(scode); }