/** * Create a new DocumentParser * * @exception Exception An error */ public DocumentParser(ApplContext ac, String urlString) throws Exception { this.htmlURL = HTTPURL.getURL(urlString); this.ac = ac; urlString = htmlURL.toString(); String urlLower = urlString.toLowerCase(); String media = ac.getMedium(); String urlProtocol = htmlURL.getProtocol(); if (!"http".equals(urlProtocol) && !"https".equals(urlProtocol)) { if (urlLower.endsWith(".css")) { StyleSheetParser parser = new StyleSheetParser(); parser.parseURL(ac, htmlURL, null, null, media, StyleSheetOrigin.AUTHOR); style = parser.getStyleSheet(); } else if (urlLower.endsWith(".html") || urlLower.endsWith(".shtml") || urlLower.endsWith("/")) { TagSoupStyleSheetHandler handler = new TagSoupStyleSheetHandler(htmlURL, ac); handler.parse(htmlURL); style = handler.getStyleSheet(); if (style != null) { style.setType("text/html"); } } else if (urlLower.endsWith(".xhtml") || urlLower.endsWith(".xml")) { // Seems like we need to use tagsout in this case as well XMLStyleSheetHandler handler = new XMLStyleSheetHandler(htmlURL, ac); handler.parse(htmlURL); style = handler.getStyleSheet(); if (style != null) { style.setType("text/xml"); } } else { throw new Exception("Unknown file"); } } else { URLConnection connection = null; try { boolean isXML = false; String cType; // @@ hum, maybe? (plh, yes probably :-) ) String credential = ac.getCredential(); connection = HTTPURL.getConnection(htmlURL, ac); htmlURL = connection.getURL(); String httpCL = connection.getHeaderField("Content-Location"); if (httpCL != null) { htmlURL = HTTPURL.getURL(htmlURL, httpCL); } cType = connection.getContentType(); if (cType == null) { cType = "unknown/unknown"; } MimeType contentType = null; try { contentType = new MimeType(cType); } catch (MimeTypeFormatException ex) { } if (Util.onDebug) { System.err.println("[DEBUG] content type is [" + contentType + ']'); } if (contentType.match(MimeType.TEXT_HTML) == MimeType.MATCH_SPECIFIC_SUBTYPE) { TagSoupStyleSheetHandler handler; handler = new TagSoupStyleSheetHandler(htmlURL, ac); handler.parse(urlString, connection); style = handler.getStyleSheet(); if (style != null) { style.setType("text/html"); } } else if (contentType.match(MimeType.TEXT_CSS) == MimeType.MATCH_SPECIFIC_SUBTYPE) { StyleSheetParser parser = new StyleSheetParser(); parser.parseURL(ac, htmlURL, null, null, media, StyleSheetOrigin.AUTHOR); style = parser.getStyleSheet(); } else if ((contentType.match(MimeType.TEXT_XML) == MimeType.MATCH_SPECIFIC_SUBTYPE) || (contentType.match(MimeType.APPLICATION_XHTML_XML) == MimeType.MATCH_SPECIFIC_SUBTYPE) || (contentType.match(wap) == MimeType.MATCH_SPECIFIC_SUBTYPE)) { // TagSoup ? XMLStyleSheetHandler handler = new XMLStyleSheetHandler(htmlURL, ac); handler.parse(urlString, connection); style = handler.getStyleSheet(); if (style != null) { style.setType("text/xml"); } } else { throw new IOException("Unknown mime type : " + contentType); } } finally { try { connection.getInputStream().close(); } catch (Exception e) { } } } }