예제 #1
   * <b>SAX:</b> Resolve the given entity into an input source. If the name can't be mapped to a
   * preferred form of the entity, the URI is used. To resolve the entity, first a local catalog
   * mapping names to URIs is consulted. If no mapping is found there, a catalog mapping names to
   * java resources is consulted. Finally, if neither mapping found a copy of the entity, the
   * specified URI is used.
   * <p>
   * <p>When a URI is used, <a href="#createInputSource">createInputSource</a> is used to correctly
   * deduce the character encoding used by this entity. No MIME type checking is done.
   * @param name Used to find alternate copies of the entity, when this value is non-null; this is
   *     the XML "public ID".
   * @param uri Used when no alternate copy of the entity is found; this is the XML "system ID",
   *     normally a URI.
  public InputSource resolveEntity(String name, String uri) throws IOException {
    InputSource retval;
    String mappedURI = name2uri(name);
    InputStream stream;

    // prefer explicit URI mappings, then bundled resources...
    if (mappedURI == null && (stream = mapResource(name)) != null) {
      uri = "java:resource:" + (String) id2resource.get(name);
      retval = new InputSource(XmlReader.createReader(stream));

      // ...and treat all URIs the same (as URLs for now).
    } else {
      URL url;
      URLConnection conn;

      if (mappedURI != null) uri = mappedURI;
      else if (uri == null) return null;

      url = new URL(uri);
      conn = url.openConnection();
      uri = conn.getURL().toString();
      // System.out.println ("++ URI: " + url);
      if (ignoringMIME) retval = new InputSource(XmlReader.createReader(conn.getInputStream()));
      else {
        String contentType = conn.getContentType();
        retval = createInputSource(contentType, conn.getInputStream(), false, url.getProtocol());
    return retval;
예제 #2
   * Creates an input source from a given URI.
   * @param uri the URI (system ID) for the entity
   * @param checkType if true, the MIME content type for the entity is checked for document type and
   *     character set encoding.
  public static InputSource createInputSource(URL uri, boolean checkType) throws IOException {

    URLConnection conn = uri.openConnection();
    InputSource retval;

    if (checkType) {
      String contentType = conn.getContentType();
      retval = createInputSource(contentType, conn.getInputStream(), false, uri.getProtocol());
    } else {
      retval = new InputSource(XmlReader.createReader(conn.getInputStream()));
    return retval;
예제 #3
  /** Creates an input source from a given file, autodetecting the character encoding. */
  public static InputSource createInputSource(File file) throws IOException {
    InputSource retval;
    String path;

    retval = new InputSource(XmlReader.createReader(new FileInputStream(file)));

    // On JDK 1.2 and later, simplify this:
    //    "path = file.toURL ().toString ()".
    path = file.getAbsolutePath();
    if (File.separatorChar != '/') path = path.replace(File.separatorChar, '/');
    if (!path.startsWith("/")) path = "/" + path;
    if (!path.endsWith("/") && file.isDirectory()) path = path + "/";

    retval.setSystemId("file:" + path);
    return retval;
예제 #4
   * Returns an input source, using the MIME type information and URL scheme to statically determine
   * the correct character encoding if possible and otherwise autodetecting it. MIME carefully
   * specifies the character encoding defaults, and how attributes of the content type can change
   * it. XML further specifies two mandatory encodings (UTF-8 and UTF-16), and includes an XML
   * declaration which can be used to internally label most documents encoded using US-ASCII
   * supersets (such as Shift_JIS, EUC-JP, ISO-2022-*, ISO-8859-*, and more).
   * <p>
   * <p>This method can be used to access XML documents which do not have URIs (such as servlet
   * input streams, or most JavaMail message entities) and to support access methods such as HTTP
   * POST or PUT. (URLs normally return content using the GET method.)
   * <p>
   * <p><em> The caller should set the system ID in order for relative URIs found in this document
   * to be interpreted correctly.</em> In some cases, a custom resolver will need to be used; for
   * example, documents may be grouped in a single MIME "multipart/related" bundle, and relative
   * URLs would refer to other documents in that bundle.
   * @param contentType The MIME content type for the source for which an InputSource is desired,
   *     such as <em>text/xml;charset=utf-8</em>.
   * @param stream The input byte stream for the input source.
   * @param checkType If true, this verifies that the content type is known to support XML
   *     documents, such as <em>application/xml</em>.
   * @param scheme Unless this is "file", unspecified MIME types default to US-ASCII. Files are
   *     always autodetected since most file systems discard character encoding information.
  public static InputSource createInputSource(
      String contentType, InputStream stream, boolean checkType, String scheme) throws IOException {
    InputSource retval;
    String charset = null;

    if (contentType != null) {
      int index;

      contentType = contentType.toLowerCase();
      index = contentType.indexOf(';');
      if (index != -1) {
        String attributes;

        attributes = contentType.substring(index + 1);
        contentType = contentType.substring(0, index);

        // use "charset=..." if it's available
        index = attributes.indexOf("charset");
        if (index != -1) {
          attributes = attributes.substring(index + 7);
          // strip out subsequent attributes
          if ((index = attributes.indexOf(';')) != -1) attributes = attributes.substring(0, index);
          // find start of value
          if ((index = attributes.indexOf('=')) != -1) {
            attributes = attributes.substring(index + 1);
            // strip out rfc822 comments
            if ((index = attributes.indexOf('(')) != -1)
              attributes = attributes.substring(0, index);
            // double quotes are optional
            if ((index = attributes.indexOf('"')) != -1) {
              attributes = attributes.substring(index + 1);
              attributes = attributes.substring(0, attributes.indexOf('"'));
            charset = attributes.trim();
            // XXX "\;", "\)" etc were mishandled above

      // Check MIME type.
      if (checkType) {
        boolean isOK = false;
        for (int i = 0; i < types.length; i++)
          if (types[i].equals(contentType)) {
            isOK = true;
        if (!isOK) throw new IOException("Not XML: " + contentType);

      // "text/*" MIME types have hard-wired character set
      // defaults, as specified in the RFCs.  For XML, we
      // ignore the system "file.encoding" property since
      // autodetection is more correct.
      if (charset == null) {
        contentType = contentType.trim();
        if (contentType.startsWith("text/")) {
          if (!"file".equalsIgnoreCase(scheme)) charset = "US-ASCII";
        // "application/*" has no default

    retval = new InputSource(XmlReader.createReader(stream, charset));
    return retval;