@Test public void test() throws GalimatiasParseException { final URLCanonicalizer canon = new DecodeUnreservedCanonicalizer(); for (final String[] pair : new String[][] { new String[] { "http://%41%5A%61%7A%30%39%2D%2E%5F%[email protected]/", "http://[email protected]/" }, new String[] { "http://:%41%5A%61%7A%30%39%2D%2E%5F%[email protected]/", "http://:[email protected]/" }, new String[] { "http://example.com/%41%5A%61%7A%30%39%2D%2E%5F%7E", "http://example.com/AZaz09-._~" }, new String[] { "http://example.com/?%41%5A%61%7A%30%39%2D%2E%5F%7E", "http://example.com/?AZaz09-._~" }, new String[] { "http://example.com/#%41%5A%61%7A%30%39%2D%2E%5F%7E", "http://example.com/#AZaz09-._~" } }) { assertThat(canon.canonicalize(URL.parse(pair[0])).toString()) .isEqualTo(URL.parse(pair[1]).toString()); } }
@Theory public void idempotence( final @TestURL.TestURLs(dataset = TestURL.DATASETS.WHATWG) TestURL testURL) throws GalimatiasParseException { assumeNotNull(testURL.parsedURL); final URLCanonicalizer canon = new DecodeUnreservedCanonicalizer(); final URL roundOne = canon.canonicalize(testURL.parsedURL); final URL roundTwo = canon.canonicalize(roundOne); assertThat(roundOne).isEqualTo(roundTwo); final URL reparse = URL.parse(roundTwo.toString()); assertThat(reparse).isEqualTo(roundTwo); }
/** @see org.xml.sax.EntityResolver#resolveEntity(java.lang.String, java.lang.String) */ @Override public InputSource resolveEntity(String publicId, String systemId) throws SAXException, IOException { if (requestsLeft > -1) { if (requestsLeft == 0) { throw new IOException("Number of permitted HTTP requests exceeded."); } else { requestsLeft--; } } HttpGet m = null; try { URL url; try { url = URL.parse(systemId); } catch (GalimatiasParseException e) { IOException ioe = (IOException) new IOException(e.getMessage()).initCause(e); SAXParseException spe = new SAXParseException(e.getMessage(), publicId, systemId, -1, -1, ioe); if (errorHandler != null) { errorHandler.fatalError(spe); } throw spe; } String scheme = url.scheme(); if (!("http".equals(scheme) || "https".equals(scheme))) { String msg = "Unsupported URI scheme: \u201C" + scheme + "\u201D."; SAXParseException spe = new SAXParseException(msg, publicId, systemId, -1, -1, new IOException(msg)); if (errorHandler != null) { errorHandler.fatalError(spe); } throw spe; } systemId = url.toString(); try { m = new HttpGet(systemId); } catch (IllegalArgumentException e) { SAXParseException spe = new SAXParseException( e.getMessage(), publicId, systemId, -1, -1, (IOException) new IOException(e.getMessage()).initCause(e)); if (errorHandler != null) { errorHandler.fatalError(spe); } throw spe; } m.setHeader("User-Agent", userAgent); m.setHeader("Accept", buildAccept()); m.setHeader("Accept-Encoding", "gzip"); log4j.info(systemId); HttpResponse response = client.execute(m); int statusCode = response.getStatusLine().getStatusCode(); if (statusCode != 200) { String msg = "HTTP resource not retrievable. The HTTP status from the remote server was: " + statusCode + "."; SAXParseException spe = new SAXParseException( msg, publicId, m.getURI().toString(), -1, -1, new IOException(msg)); if (errorHandler != null) { errorHandler.fatalError(spe); } throw spe; } HttpEntity entity = response.getEntity(); long len = entity.getContentLength(); if (sizeLimit > -1 && len > sizeLimit) { SAXParseException spe = new SAXParseException( "Resource size exceeds limit.", publicId, m.getURI().toString(), -1, -1, new StreamBoundException("Resource size exceeds limit.")); if (errorHandler != null) { errorHandler.fatalError(spe); } throw spe; } TypedInputSource is; org.apache.http.Header ct = response.getFirstHeader("Content-Type"); String contentType = null; final String baseUri = m.getURI().toString(); if (ct != null) { contentType = ct.getValue(); } is = contentTypeParser.buildTypedInputSource(baseUri, publicId, contentType); Header cl = response.getFirstHeader("Content-Language"); if (cl != null) { is.setLanguage(cl.getValue().trim()); } Header xuac = response.getFirstHeader("X-UA-Compatible"); if (xuac != null) { String val = xuac.getValue().trim(); if (!"ie=edge".equalsIgnoreCase(val)) { SAXParseException spe = new SAXParseException( "X-UA-Compatible HTTP header must have the value \u201CIE=edge\u201D," + " was \u201C" + val + "\u201D.", publicId, systemId, -1, -1); errorHandler.error(spe); } } Header csp = response.getFirstHeader("Content-Security-Policy"); if (csp != null) { try { ContentSecurityPolicy.THE_INSTANCE.checkValid(csp.getValue().trim()); } catch (DatatypeException e) { SAXParseException spe = new SAXParseException( "Content-Security-Policy HTTP header: " + e.getMessage(), publicId, systemId, -1, -1); Html5DatatypeException ex5 = (Html5DatatypeException) e; if (ex5.isWarning()) { errorHandler.warning(spe); } else { errorHandler.error(spe); } } } final HttpGet meth = m; InputStream stream = entity.getContent(); if (sizeLimit > -1) { stream = new BoundedInputStream(stream, sizeLimit, baseUri); } Header ce = response.getFirstHeader("Content-Encoding"); if (ce != null) { String val = ce.getValue().trim(); if ("gzip".equalsIgnoreCase(val) || "x-gzip".equalsIgnoreCase(val)) { stream = new GZIPInputStream(stream); if (sizeLimit > -1) { stream = new BoundedInputStream(stream, sizeLimit, baseUri); } } } is.setByteStream( new ObservableInputStream( stream, new StreamObserver() { private final Logger log4j = Logger.getLogger("nu.validator.xml.PrudentEntityResolver.StreamObserver"); private boolean released = false; @Override public void closeCalled() { log4j.debug("closeCalled"); if (!released) { log4j.debug("closeCalled, not yet released"); released = true; try { meth.releaseConnection(); } catch (Exception e) { log4j.debug("closeCalled, releaseConnection", e); } } } @Override public void exceptionOccurred(Exception ex) throws IOException { if (!released) { released = true; try { meth.abort(); } catch (Exception e) { log4j.debug("exceptionOccurred, abort", e); } finally { try { meth.releaseConnection(); } catch (Exception e) { log4j.debug("exceptionOccurred, releaseConnection", e); } } } if (ex instanceof SystemIdIOException) { throw (SystemIdIOException) ex; } else if (ex instanceof IOException) { IOException ioe = (IOException) ex; throw new SystemIdIOException(baseUri, ioe.getMessage(), ioe); } else if (ex instanceof RuntimeException) { throw (RuntimeException) ex; } else { throw new RuntimeException("API contract violation. Wrong exception type.", ex); } } @Override public void finalizerCalled() { if (!released) { released = true; try { meth.abort(); } catch (Exception e) { log4j.debug("finalizerCalled, abort", e); } finally { try { meth.releaseConnection(); } catch (Exception e) { log4j.debug("finalizerCalled, releaseConnection", e); } } } } })); return is; } catch (IOException | RuntimeException | SAXException e) { if (m != null) { try { m.abort(); } catch (Exception ex) { log4j.debug("abort", ex); } finally { try { m.releaseConnection(); } catch (Exception ex) { log4j.debug("releaseConnection", ex); } } } throw e; } }