/** * <b>SAX:</b> Resolve the given entity into an input source. If the name can't be mapped to a * preferred form of the entity, the URI is used. To resolve the entity, first a local catalog * mapping names to URIs is consulted. If no mapping is found there, a catalog mapping names to * java resources is consulted. Finally, if neither mapping found a copy of the entity, the * specified URI is used. * * <p> * * <p>When a URI is used, <a href="#createInputSource">createInputSource</a> is used to correctly * deduce the character encoding used by this entity. No MIME type checking is done. * * @param name Used to find alternate copies of the entity, when this value is non-null; this is * the XML "public ID". * @param uri Used when no alternate copy of the entity is found; this is the XML "system ID", * normally a URI. */ public InputSource resolveEntity(String name, String uri) throws IOException { InputSource retval; String mappedURI = name2uri(name); InputStream stream; // prefer explicit URI mappings, then bundled resources... if (mappedURI == null && (stream = mapResource(name)) != null) { uri = "java:resource:" + (String) id2resource.get(name); retval = new InputSource(XmlReader.createReader(stream)); // ...and treat all URIs the same (as URLs for now). } else { URL url; URLConnection conn; if (mappedURI != null) uri = mappedURI; else if (uri == null) return null; url = new URL(uri); conn = url.openConnection(); uri = conn.getURL().toString(); // System.out.println ("++ URI: " + url); if (ignoringMIME) retval = new InputSource(XmlReader.createReader(conn.getInputStream())); else { String contentType = conn.getContentType(); retval = createInputSource(contentType, conn.getInputStream(), false, url.getProtocol()); } } retval.setSystemId(uri); retval.setPublicId(name); return retval; }
/** * Creates an input source from a given URI. * * @param uri the URI (system ID) for the entity * @param checkType if true, the MIME content type for the entity is checked for document type and * character set encoding. */ public static InputSource createInputSource(URL uri, boolean checkType) throws IOException { URLConnection conn = uri.openConnection(); InputSource retval; if (checkType) { String contentType = conn.getContentType(); retval = createInputSource(contentType, conn.getInputStream(), false, uri.getProtocol()); } else { retval = new InputSource(XmlReader.createReader(conn.getInputStream())); } retval.setSystemId(conn.getURL().toString()); return retval; }
/** Creates an input source from a given file, autodetecting the character encoding. */ public static InputSource createInputSource(File file) throws IOException { InputSource retval; String path; retval = new InputSource(XmlReader.createReader(new FileInputStream(file))); // On JDK 1.2 and later, simplify this: // "path = file.toURL ().toString ()". path = file.getAbsolutePath(); if (File.separatorChar != '/') path = path.replace(File.separatorChar, '/'); if (!path.startsWith("/")) path = "/" + path; if (!path.endsWith("/") && file.isDirectory()) path = path + "/"; retval.setSystemId("file:" + path); return retval; }
/** * Returns an input source, using the MIME type information and URL scheme to statically determine * the correct character encoding if possible and otherwise autodetecting it. MIME carefully * specifies the character encoding defaults, and how attributes of the content type can change * it. XML further specifies two mandatory encodings (UTF-8 and UTF-16), and includes an XML * declaration which can be used to internally label most documents encoded using US-ASCII * supersets (such as Shift_JIS, EUC-JP, ISO-2022-*, ISO-8859-*, and more). * * <p> * * <p>This method can be used to access XML documents which do not have URIs (such as servlet * input streams, or most JavaMail message entities) and to support access methods such as HTTP * POST or PUT. (URLs normally return content using the GET method.) * * <p> * * <p><em> The caller should set the system ID in order for relative URIs found in this document * to be interpreted correctly.</em> In some cases, a custom resolver will need to be used; for * example, documents may be grouped in a single MIME "multipart/related" bundle, and relative * URLs would refer to other documents in that bundle. * * @param contentType The MIME content type for the source for which an InputSource is desired, * such as <em>text/xml;charset=utf-8</em>. * @param stream The input byte stream for the input source. * @param checkType If true, this verifies that the content type is known to support XML * documents, such as <em>application/xml</em>. * @param scheme Unless this is "file", unspecified MIME types default to US-ASCII. Files are * always autodetected since most file systems discard character encoding information. */ public static InputSource createInputSource( String contentType, InputStream stream, boolean checkType, String scheme) throws IOException { InputSource retval; String charset = null; if (contentType != null) { int index; contentType = contentType.toLowerCase(); index = contentType.indexOf(';'); if (index != -1) { String attributes; attributes = contentType.substring(index + 1); contentType = contentType.substring(0, index); // use "charset=..." if it's available index = attributes.indexOf("charset"); if (index != -1) { attributes = attributes.substring(index + 7); // strip out subsequent attributes if ((index = attributes.indexOf(';')) != -1) attributes = attributes.substring(0, index); // find start of value if ((index = attributes.indexOf('=')) != -1) { attributes = attributes.substring(index + 1); // strip out rfc822 comments if ((index = attributes.indexOf('(')) != -1) attributes = attributes.substring(0, index); // double quotes are optional if ((index = attributes.indexOf('"')) != -1) { attributes = attributes.substring(index + 1); attributes = attributes.substring(0, attributes.indexOf('"')); } charset = attributes.trim(); // XXX "\;", "\)" etc were mishandled above } } } // // Check MIME type. // if (checkType) { boolean isOK = false; for (int i = 0; i < types.length; i++) if (types[i].equals(contentType)) { isOK = true; break; } if (!isOK) throw new IOException("Not XML: " + contentType); } // // "text/*" MIME types have hard-wired character set // defaults, as specified in the RFCs. For XML, we // ignore the system "file.encoding" property since // autodetection is more correct. // if (charset == null) { contentType = contentType.trim(); if (contentType.startsWith("text/")) { if (!"file".equalsIgnoreCase(scheme)) charset = "US-ASCII"; } // "application/*" has no default } } retval = new InputSource(XmlReader.createReader(stream, charset)); retval.setByteStream(stream); retval.setEncoding(charset); return retval; }