Пример #1
1
  /**
   * Uses a Vector of TransformerHandlers to pipe XML input document through a series of 1 or more
   * transformations. Called by {@link #pipeDocument}.
   *
   * @param vTHandler Vector of Transformation Handlers (1 per stylesheet).
   * @param source absolute URI to XML input
   * @param target absolute path to transformation output.
   */
  public void usePipe(Vector vTHandler, String source, String target)
      throws TransformerException, TransformerConfigurationException, FileNotFoundException,
          IOException, SAXException, SAXNotRecognizedException {
    XMLReader reader = XMLReaderFactory.createXMLReader();
    TransformerHandler tHFirst = (TransformerHandler) vTHandler.firstElement();
    reader.setContentHandler(tHFirst);
    reader.setProperty("http://xml.org/sax/properties/lexical-handler", tHFirst);
    for (int i = 1; i < vTHandler.size(); i++) {
      TransformerHandler tHFrom = (TransformerHandler) vTHandler.elementAt(i - 1);
      TransformerHandler tHTo = (TransformerHandler) vTHandler.elementAt(i);
      tHFrom.setResult(new SAXResult(tHTo));
    }
    TransformerHandler tHLast = (TransformerHandler) vTHandler.lastElement();
    Transformer trans = tHLast.getTransformer();
    Properties outputProps = trans.getOutputProperties();
    Serializer serializer = SerializerFactory.getSerializer(outputProps);

    FileOutputStream out = new FileOutputStream(target);
    try {
      serializer.setOutputStream(out);
      tHLast.setResult(new SAXResult(serializer.asContentHandler()));
      reader.parse(source);
    } finally {
      // Always clean up the FileOutputStream,
      // even if an exception was thrown in the try block
      if (out != null) out.close();
    }
  }
Пример #2
0
  @Override
  public boolean execute(Property inputProperty, Node outputNode, Context context)
      throws Exception {
    Binary binaryValue = inputProperty.getBinary();
    CheckArg.isNotNull(binaryValue, "binary");

    if (!outputNode.isNew()) {
      outputNode = outputNode.addNode(XmlLexicon.DOCUMENT);
    }

    XmlSequencerHandler sequencingHandler = new XmlSequencerHandler(outputNode, scoping);
    // Create the reader ...
    XMLReader reader = XMLReaderFactory.createXMLReader();
    reader.setContentHandler(sequencingHandler);
    reader.setErrorHandler(sequencingHandler);
    // Ensure handler acting as entity resolver 2
    reader.setProperty(DECL_HANDLER_FEATURE, sequencingHandler);
    // Ensure handler acting as lexical handler
    reader.setProperty(LEXICAL_HANDLER_FEATURE, sequencingHandler);
    // Ensure handler acting as entity resolver 2
    setFeature(reader, ENTITY_RESOLVER_2_FEATURE, true);
    // Prevent loading of external DTDs
    setFeature(reader, LOAD_EXTERNAL_DTDS_FEATURE, false);
    // Prevent the resolving of DTD entities into fully-qualified URIS
    setFeature(reader, RESOLVE_DTD_URIS_FEATURE, false);
    // Parse XML document
    try (InputStream stream = binaryValue.getStream()) {
      reader.parse(new InputSource(stream));
    }
    return true;
  }
  private void updateWiretap() {
    if (contentHandler != null) {
      if (wiretapContentHander != null) {
        wrappedReader.setContentHandler(
            new CombineContentHandler(wiretapContentHander, contentHandler));
      } else {
        wrappedReader.setContentHandler(contentHandler);
      }
    } else {
      wrappedReader.setContentHandler(wiretapContentHander);
    }

    try {
      if (lexicalHandler != null) {
        if (wiretapLexicalHandler != null) {
          wrappedReader.setProperty(
              "http://xml.org/sax/properties/lexical-handler",
              new CombineLexicalHandler(wiretapLexicalHandler, lexicalHandler));
        } else {
          wrappedReader.setProperty(
              "http://xml.org/sax/properties/lexical-handler", lexicalHandler);
        }
      } else {
        wrappedReader.setProperty(
            "http://xml.org/sax/properties/lexical-handler", wiretapLexicalHandler);
      }
    } catch (SAXNotRecognizedException e) {
    } catch (SAXNotSupportedException e) {
    }
  }
Пример #4
0
  /**
   * Reads malformed XML from the InputStream original and returns a new InputStream which can be
   * used to read a well-formed version of the input
   *
   * @param original original input
   * @return an {@link InputStream} which can be used to read a well-formed version of the input XML
   * @throws ParseException if an exception occurs while parsing the input
   */
  public static InputStream xmlizeInputStream(InputStream original) throws ParseException {
    try {
      ByteArrayOutputStream out = new ByteArrayOutputStream();

      HTMLSchema schema = new HTMLSchema();
      XMLReader reader = new Parser();

      // TODO walk through the javadoc and tune more settings
      // see tagsoup javadoc for details
      reader.setProperty(Parser.schemaProperty, schema);
      reader.setFeature(Parser.bogonsEmptyFeature, false);
      reader.setFeature(Parser.ignorableWhitespaceFeature, true);
      reader.setFeature(Parser.ignoreBogonsFeature, false);

      Writer writeger = new OutputStreamWriter(out);
      XMLWriter x = new XMLWriter(writeger);

      reader.setContentHandler(x);

      InputSource s = new InputSource(original);

      reader.parse(s);
      return new ByteArrayInputStream(out.toByteArray());
    } catch (SAXException e) {
      throw new ParseException(R("PBadXML"), e);
    } catch (IOException e) {
      throw new ParseException(R("PBadXML"), e);
    }
  }
Пример #5
0
  public Struct validate(InputSource xml) throws PageException {
    CFMLEngine engine = CFMLEngineFactory.getInstance();
    warnings = engine.getCreationUtil().createArray();
    errors = engine.getCreationUtil().createArray();
    fatals = engine.getCreationUtil().createArray();

    try {
      XMLReader parser = new XMLUtilImpl().createXMLReader("org.apache.xerces.parsers.SAXParser");
      parser.setContentHandler(this);
      parser.setErrorHandler(this);
      parser.setEntityResolver(this);
      parser.setFeature("http://xml.org/sax/features/validation", true);
      parser.setFeature("http://apache.org/xml/features/validation/schema", true);
      parser.setFeature("http://apache.org/xml/features/validation/schema-full-checking", true);
      // if(!validateNamespace)
      if (!Util.isEmpty(strSchema))
        parser.setProperty(
            "http://apache.org/xml/properties/schema/external-noNamespaceSchemaLocation",
            strSchema);
      parser.parse(xml);
    } catch (SAXException e) {
    } catch (IOException e) {
      throw engine.getExceptionUtil().createXMLException(e.getMessage());
    }

    // result
    Struct result = engine.getCreationUtil().createStruct();
    result.setEL("warnings", warnings);
    result.setEL("errors", errors);
    result.setEL("fatalerrors", fatals);
    result.setEL("status", engine.getCastUtil().toBoolean(!hasErrors));
    release();
    return result;
  }
Пример #6
0
  private XMLReader getXMLReader(ContentHandler contentHandler, ErrorHandler errorHandler)
      throws ParserConfigurationException, SAXException {

    // setup sax factory ; be sure just one instance!
    SAXParserFactory saxFactory = SAXParserFactory.newInstance();

    // Enable validation stuff
    saxFactory.setValidating(true);
    saxFactory.setNamespaceAware(true);

    // Create xml reader
    SAXParser saxParser = saxFactory.newSAXParser();
    XMLReader xmlReader = saxParser.getXMLReader();

    // Setup xmlreader
    xmlReader.setProperty(
        XMLReaderObjectFactory.APACHE_PROPERTIES_INTERNAL_GRAMMARPOOL, grammarPool);

    xmlReader.setFeature(Namespaces.SAX_VALIDATION, true);
    xmlReader.setFeature(Namespaces.SAX_VALIDATION_DYNAMIC, false);
    xmlReader.setFeature(XMLReaderObjectFactory.APACHE_FEATURES_VALIDATION_SCHEMA, true);
    xmlReader.setFeature(XMLReaderObjectFactory.APACHE_PROPERTIES_LOAD_EXT_DTD, true);
    xmlReader.setFeature(Namespaces.SAX_NAMESPACES_PREFIXES, true);

    xmlReader.setContentHandler(contentHandler);
    xmlReader.setErrorHandler(errorHandler);

    return xmlReader;
  }
Пример #7
0
 private void processCommentLines(File file) throws SAXException, IOException {
   SAXParser parser = newSaxParser(false);
   XMLReader xmlReader = parser.getXMLReader();
   commentHandler = new CommentHandler();
   xmlReader.setProperty("http://xml.org/sax/properties/lexical-handler", commentHandler);
   parser.parse(FileUtils.openInputStream(file), commentHandler);
 }
Пример #8
0
  /** Default constructor of MapLinksReader class. */
  public MapLinksReader() {
    super();
    map = new HashMap<String, Map<String, String>>();
    ancestorList = new ArrayList<String>(INT_16);
    matchList = new ArrayList<String>(INT_16);
    indexEntries = new StringBuffer(INT_1024);
    firstMatchElement = null;
    lastMatchElement = new HashSet<String>();
    level = 0;
    match = false;
    validHref = true;
    needResolveEntity = false;
    topicPath = null;
    inputFile = null;

    try {
      reader = StringUtils.getXMLReader();
      reader.setContentHandler(this);
      reader.setProperty(LEXICAL_HANDLER_PROPERTY, this);
      // Added by william on 2009-11-8 for ampbug:2893664 start
      reader.setFeature("http://apache.org/xml/features/scanner/notify-char-refs", true);
      reader.setFeature("http://apache.org/xml/features/scanner/notify-builtin-refs", true);
      // Added by william on 2009-11-8 for ampbug:2893664 end
      reader.setFeature("http://xml.org/sax/features/namespaces", false);
    } catch (final Exception e) {
      logger.logException(e);
    }
  }
Пример #9
0
  /**
   * This will create a SAX XMLReader capable of parsing a DTD and configure it so that the DTD
   * parsing events are routed to the handlers registered onto this SAXOutputter.
   *
   * @return <code>XMLReader</code> a SAX2 parser.
   * @throws JDOMException if no parser can be created.
   */
  private XMLReader createDTDParser() throws JDOMException {
    XMLReader parser = null;

    // Get a parser instance
    try {
      parser = createParser();
    } catch (Exception ex1) {
      throw new JDOMException("Error in SAX parser allocation", ex1);
    }

    // Register handlers
    if (this.getDTDHandler() != null) {
      parser.setDTDHandler(this.getDTDHandler());
    }
    if (this.getEntityResolver() != null) {
      parser.setEntityResolver(this.getEntityResolver());
    }
    if (this.getLexicalHandler() != null) {
      try {
        parser.setProperty(SAX_PROPERTY_LEXICAL_HANDLER, this.getLexicalHandler());
      } catch (SAXException ex1) {
        try {
          parser.setProperty(SAX_PROPERTY_LEXICAL_HANDLER_ALT, this.getLexicalHandler());
        } catch (SAXException ex2) {
          // Forget it!
        }
      }
    }
    if (this.getDeclHandler() != null) {
      try {
        parser.setProperty(SAX_PROPERTY_DECLARATION_HANDLER, this.getDeclHandler());
      } catch (SAXException ex1) {
        try {
          parser.setProperty(SAX_PROPERTY_DECLARATION_HANDLER_ALT, this.getDeclHandler());
        } catch (SAXException ex2) {
          // Forget it!
        }
      }
    }

    // Absorb errors as much as possible, per Laurent
    parser.setErrorHandler(new DefaultHandler());

    return parser;
  }
 /**
  * @param name
  * @param value
  * @throws SAXNotRecognizedException
  * @throws SAXNotSupportedException
  * @see org.xml.sax.XMLReader#setProperty(java.lang.String, java.lang.Object)
  */
 public void setProperty(String name, Object value)
     throws SAXNotRecognizedException, SAXNotSupportedException {
   if ("http://xml.org/sax/properties/lexical-handler".equals(name)) {
     lexicalHandler = (LexicalHandler) value;
     updateWiretap();
   } else {
     wrappedReader.setProperty(name, value);
   }
 }
Пример #11
0
  public SimpleDocTypeParser() throws SAXException {

    xmlReader = XMLReaderFactory.createXMLReader();

    xmlReader.setContentHandler(this);

    // LexicalHandler
    xmlReader.setProperty("http://xml.org/sax/properties/lexical-handler", this);

    // DeclHandler
    xmlReader.setProperty("http://xml.org/sax/properties/declaration-handler", this);

    // DTD
    xmlReader.setFeature("http://xml.org/sax/features/resolve-dtd-uris", false);
    // *skip* resolving entities like DTDs
    xmlReader.setEntityResolver(new NoEntityResolver());

    //		xmlReader.setProperty(
    //				"http://xml.org/sax/properties/declaration-handler", dh);

  }
Пример #12
0
  public void parse(InputStream xml, OutputStream finf, String workingDirectory) throws Exception {
    SAXParser saxParser = getParser();
    SAXDocumentSerializer documentSerializer = getSerializer(finf);

    XMLReader reader = saxParser.getXMLReader();
    reader.setProperty("http://xml.org/sax/properties/lexical-handler", documentSerializer);
    reader.setContentHandler(documentSerializer);

    if (workingDirectory != null) {
      reader.setEntityResolver(createRelativePathResolver(workingDirectory));
    }
    reader.parse(new InputSource(xml));
  }
 // Register handler directly with the incremental parser
 public void setLexicalHandler(org.xml.sax.ext.LexicalHandler handler) {
   // Not supported by all SAX2 parsers but should work in Xerces:
   try {
     // Typecast required in Xerces2; SAXParser doesn't inheret XMLReader
     // %OPT% Cast at asignment?
     ((XMLReader) fIncrementalParser)
         .setProperty("http://xml.org/sax/properties/lexical-handler", handler);
   } catch (org.xml.sax.SAXNotRecognizedException e) {
     // Nothing we can do about it
   } catch (org.xml.sax.SAXNotSupportedException e) {
     // Nothing we can do about it
   }
 }
Пример #14
0
  /**
   * Parse the input source into a set of modifications.
   *
   * @param is
   * @return an array of type Modification
   * @throws ParserConfigurationException
   * @throws IOException
   * @throws SAXException
   */
  public Modification[] parse(InputSource is)
      throws ParserConfigurationException, IOException, SAXException {
    final XMLReader reader = broker.getBrokerPool().getParserPool().borrowXMLReader();
    try {
      reader.setProperty(Namespaces.SAX_LEXICAL_HANDLER, this);
      reader.setFeature(Namespaces.SAX_NAMESPACES, true);
      reader.setFeature(Namespaces.SAX_NAMESPACES_PREFIXES, false);
      reader.setContentHandler(this);

      reader.parse(is);
      final Modification mods[] = new Modification[modifications.size()];
      return modifications.toArray(mods);
    } finally {
      broker.getBrokerPool().getParserPool().returnXMLReader(reader);
    }
  }
Пример #15
0
  /** Parse the file and write its transformed content to the Writer. */
  public void writeContent() throws SAXException {
    SaxHandler handler = new SaxHandler();

    XMLReader reader = XMLReaderFactory.createXMLReader();

    reader.setContentHandler(handler);
    reader.setProperty("http://xml.org/sax/properties/lexical-handler", handler);

    try {
      InputStream is = new BufferedInputStream(new FileInputStream(xdoc));

      reader.parse(new InputSource(is));
    } catch (IOException ex) {
      throw new RuntimeException(ex);
    }
  }
  @Override
  protected void doGet(final HttpServletRequest request, final HttpServletResponse response)
      throws ServletException, IOException {
    LOG.trace("{@method} request = {}", request);

    final String requestUri = request.getRequestURI();
    if (!requestUri.matches("^/[^/]+/.+")) {
      response.sendError(500);
      return;
    }

    try {
      final StringBuilder pageUrlBuilder = new StringBuilder();
      pageUrlBuilder.append("http:/").append(requestUri);
      if (request.getQueryString() != null) {
        pageUrlBuilder.append("?").append(request.getQueryString());
      }
      final URL pageUrl = new URL(pageUrlBuilder.toString());
      LOG.debug("GET: {}", pageUrl);

      final XMLReader reader = new Parser();
      reader.setProperty(Parser.schemaProperty, new HTMLSchema());
      final ByteArrayOutputStream bytesOut = new ByteArrayOutputStream();
      Writer writer = null;
      try {
        writer = new OutputStreamWriter(bytesOut);
        reader.setContentHandler(new XMLWriter(writer));
        final InputSource source = new InputSource();
        source.setByteStream(pageUrl.openStream());
        reader.parse(source);
      } finally {
        Closeables.closeQuietly(writer);
      }

      InputStream bytesIn = null;
      try {
        bytesIn = new ByteArrayInputStream(bytesOut.toByteArray());
        ByteStreams.copy(bytesIn, response.getOutputStream());
      } finally {
        Closeables.closeQuietly(bytesIn);
        Closeables.closeQuietly(response.getOutputStream());
      }
    } catch (final Exception e) {
      throw new IllegalStateException(request.getRequestURI(), e);
    }
  }
Пример #17
0
  public Xv4htContentHandler(XMLReader xmlReader, String catalog) {
    super();
    this.catalog = catalog;
    lexicalHandler = new Xv4htLexicalHandler();
    try {
      xmlReader.setProperty("http://xml.org/sax/properties/lexical-handler", lexicalHandler);
    } catch (SAXNotRecognizedException e) {
      System.err.println("err 1");
      return;
    } catch (SAXNotSupportedException e) {
      System.err.println("err 2");
      return;
    }

    errHandler = new Xv4htErrorHandler();
    xmlReader.setErrorHandler(errHandler);

    xmlReader.setEntityResolver(new Xv4htEntityResolver());
  }
 public void parse(java.io.Reader src) throws IOException {
   try {
     org.xml.sax.XMLReader reader = org.openide.xml.XMLUtil.createXMLReader(false, false);
     reader.setContentHandler(this);
     reader.setEntityResolver(this);
     org.xml.sax.InputSource is = new org.xml.sax.InputSource(src);
     try {
       reader.setProperty("http://xml.org/sax/properties/lexical-handler", this); // NOI18N
     } catch (SAXException sex) {
       XMLSettingsSupport.err.warning(
           "Warning: XML parser does not support lexical-handler feature."); // NOI18N
     }
     reader.parse(is);
   } catch (SAXException ex) {
     IOException ioe = new IOException();
     ioe.initCause(ex);
     throw ioe;
   }
 }
Пример #19
0
  protected void installLexicalHandler() {
    XMLReader parent = getParent();

    if (parent == null) {
      throw new NullPointerException("No parent for filter");
    }

    // try to register for lexical events
    for (String lexicalHandlerName : LEXICAL_HANDLER_NAMES) {
      try {
        parent.setProperty(lexicalHandlerName, this);

        break;
      } catch (SAXNotRecognizedException ex) {
        // ignore
      } catch (SAXNotSupportedException ex) {
        // ignore
      }
    }
  }
Пример #20
0
 /**
  * Creates a new SAX parser for use within this instance.
  *
  * @return The newly created parser.
  * @throws ParserConfigurationException If a parser of the given configuration cannot be created.
  * @throws SAXException If something in general goes wrong when creating the parser.
  * @throws SAXNotRecognizedException If the <code>XMLReader</code> does not recognize the lexical
  *     handler configuration option.
  * @throws SAXNotSupportedException If the <code>XMLReader</code> does not support the lexical
  *     handler configuration option.
  */
 private final SAXParser createParser(SAXParserFactory parserFactory)
     throws ParserConfigurationException, SAXException, SAXNotRecognizedException,
         SAXNotSupportedException {
   // Initialize the parser.
   final SAXParser parser = parserFactory.newSAXParser();
   final XMLReader reader = parser.getXMLReader();
   reader.setProperty("http://xml.org/sax/properties/lexical-handler", this); // $NON-NLS-1$
   // disable DTD validation (bug 63625)
   try {
     //	be sure validation is "off" or the feature to ignore DTD's will not apply
     reader.setFeature("http://xml.org/sax/features/validation", false); // $NON-NLS-1$
     reader.setFeature(
         "http://apache.org/xml/features/nonvalidating/load-external-dtd", false); // $NON-NLS-1$
   } catch (SAXNotRecognizedException e) {
     // not a big deal if the parser does not recognize the features
   } catch (SAXNotSupportedException e) {
     // not a big deal if the parser does not support the features
   }
   return parser;
 }
  @Override
  public String parseFulltext(String html) {
    if (!judge(html)) return null;
    Parser theParser = null;

    HTMLSchema theSchema = null;
    XMLReader r;
    if (theParser == null) theParser = new Parser();
    r = theParser;
    theSchema = new HTMLSchema();
    try {
      r.setProperty(Parser.schemaProperty, theSchema);
      r.setFeature(Parser.namespacesFeature, false);
    } catch (SAXNotRecognizedException e) {
      logger.fatal(e.getMessage());
      return null;
    } catch (SAXNotSupportedException e) {
      logger.fatal(e.getMessage());
      return null;
    }

    RejuvenationResearchParserHandler h = new RejuvenationResearchParserHandler();
    r.setContentHandler(h);
    InputStream fin = new ByteArrayInputStream(html.getBytes());
    InputSource s = new InputSource(fin);
    s.setEncoding("utf8");
    try {
      r.parse(s);
      fin.close();
      return h.getContent();
    } catch (IOException e) {
      logger.fatal(e.getMessage());
    } catch (SAXException e) {
      logger.fatal(e.getMessage());
    }

    return null;
  }
Пример #22
0
  private static void inputSourceToSAX(
      InputSource inputSource,
      XMLReceiver xmlReceiver,
      XMLUtils.ParserConfiguration parserConfiguration,
      boolean handleLexical) {

    // Insert XInclude processor if needed
    final TransformerURIResolver resolver;
    if (parserConfiguration.handleXInclude) {
      parserConfiguration =
          new XMLUtils.ParserConfiguration(
              parserConfiguration.validating,
              false,
              parserConfiguration.externalEntities,
              parserConfiguration.uriReferences);
      resolver = new TransformerURIResolver(XMLUtils.ParserConfiguration.PLAIN);
      xmlReceiver =
          new XIncludeReceiver(null, xmlReceiver, parserConfiguration.uriReferences, resolver);
    } else {
      resolver = null;
    }

    try {
      final XMLReader xmlReader = newSAXParser(parserConfiguration).getXMLReader();
      xmlReader.setContentHandler(xmlReceiver);
      if (handleLexical) xmlReader.setProperty(XMLConstants.SAX_LEXICAL_HANDLER, xmlReceiver);

      xmlReader.setEntityResolver(ENTITY_RESOLVER);
      xmlReader.setErrorHandler(ERROR_HANDLER);
      xmlReader.parse(inputSource);
    } catch (SAXParseException e) {
      throw new ValidationException(e.getMessage(), new LocationData(e));
    } catch (Exception e) {
      throw new OXFException(e);
    } finally {
      if (resolver != null) resolver.destroy();
    }
  }
Пример #23
0
  /**
   * Retrieves a cached XMLReader for this thread, or creates a new XMLReader, if the existing
   * reader is in use. When the caller no longer needs the reader, it must release it with a call to
   * {@link #releaseXMLReader}.
   */
  public synchronized XMLReader getXMLReader() throws SAXException {
    XMLReader reader;

    if (m_readers == null) {
      // When the m_readers.get() method is called for the first time
      // on a thread, a new XMLReader will automatically be created.
      m_readers = new ThreadLocal();
    }

    if (m_inUse == null) {
      m_inUse = new HashMap();
    }

    // If the cached reader for this thread is in use, construct a new
    // one; otherwise, return the cached reader unless it isn't an
    // instance of the class set in the 'org.xml.sax.driver' property
    reader = (XMLReader) m_readers.get();
    boolean threadHasReader = (reader != null);
    String factory = SecuritySupport.getSystemProperty(property);
    if (threadHasReader
        && m_inUse.get(reader) != Boolean.TRUE
        && (factory == null || reader.getClass().getName().equals(factory))) {
      m_inUse.put(reader, Boolean.TRUE);
    } else {
      try {
        try {
          // According to JAXP 1.2 specification, if a SAXSource
          // is created using a SAX InputSource the Transformer or
          // TransformerFactory creates a reader via the
          // XMLReaderFactory if setXMLReader is not used
          reader = XMLReaderFactory.createXMLReader();
          try {
            reader.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, _secureProcessing);
          } catch (SAXNotRecognizedException e) {
            System.err.println("Warning:  " + reader.getClass().getName() + ": " + e.getMessage());
          }
        } catch (Exception e) {
          try {
            // If unable to create an instance, let's try to use
            // the XMLReader from JAXP
            if (m_parserFactory == null) {
              m_parserFactory = FactoryImpl.getSAXFactory(m_useServicesMechanism);
              m_parserFactory.setNamespaceAware(true);
            }

            reader = m_parserFactory.newSAXParser().getXMLReader();
          } catch (ParserConfigurationException pce) {
            throw pce; // pass along pce
          }
        }
        try {
          reader.setFeature(NAMESPACES_FEATURE, true);
          reader.setFeature(NAMESPACE_PREFIXES_FEATURE, false);
        } catch (SAXException se) {
          // Try to carry on if we've got a parser that
          // doesn't know about namespace prefixes.
        }
      } catch (ParserConfigurationException ex) {
        throw new SAXException(ex);
      } catch (FactoryConfigurationError ex1) {
        throw new SAXException(ex1.toString());
      } catch (NoSuchMethodError ex2) {
      } catch (AbstractMethodError ame) {
      }

      // Cache the XMLReader if this is the first time we've created
      // a reader for this thread.
      if (!threadHasReader) {
        m_readers.set(reader);
        m_inUse.put(reader, Boolean.TRUE);
      }
    }

    try {
      // reader is cached, but this property might have been reset
      reader.setProperty(XMLConstants.ACCESS_EXTERNAL_DTD, _accessExternalDTD);
    } catch (SAXException se) {
      System.err.println("Warning:  " + reader.getClass().getName() + ": " + se.getMessage());
    }

    try {
      if (_xmlSecurityManager != null) {
        for (XMLSecurityManager.Limit limit : XMLSecurityManager.Limit.values()) {
          reader.setProperty(limit.apiProperty(), _xmlSecurityManager.getLimitValueAsString(limit));
        }
        if (_xmlSecurityManager.printEntityCountInfo()) {
          reader.setProperty(XalanConstants.JDK_ENTITY_COUNT_INFO, XalanConstants.JDK_YES);
        }
      }
    } catch (SAXException se) {
      System.err.println("Warning:  " + reader.getClass().getName() + ": " + se.getMessage());
    }

    return reader;
  }
Пример #24
0
  /**
   * Converts an HTML document to XML.
   *
   * @param io io reference
   * @param opts html options
   * @return parser
   * @throws IOException I/O exception
   */
  private static IO toXML(final IO io, final HtmlOptions opts) throws IOException {
    // reader could not be initialized; fall back to XML
    if (READER == null) return io;

    try {
      // tries to extract the encoding from the input
      final TextInput ti = new TextInput(io);
      String enc = ti.encoding();
      final byte[] content = ti.content();

      // looks for a charset definition
      final byte[] encoding = token("charset=");
      int cs = indexOf(content, encoding);
      if (cs > 0) {
        // extracts the encoding string
        cs += encoding.length;
        int ce = cs;
        final int cl = content.length;
        while (++ce < cl && content[ce] > 0x28) ;
        enc = string(substring(content, cs, ce));
      }

      // define input
      final InputSource is = new InputSource(new ArrayInput(content));
      is.setEncoding(supported(enc) ? normEncoding(enc) : UTF8);
      // define output
      final StringWriter sw = new StringWriter();
      final XMLReader reader = (XMLReader) Reflect.get(READER);
      final Object writer = Reflect.get(WRITER, sw);

      // set TagSoup options
      if (opts.get(HtmlOptions.HTML)) {
        reader.setFeature("http://xml.org/sax/features/namespaces", false);
        opt("method", "html");
        opt("omit-xml-declaration", "yes");
      }
      if (opts.get(HtmlOptions.NONS))
        reader.setFeature("http://xml.org/sax/features/namespaces", false);
      if (opts.get(HtmlOptions.OMITXML)) opt("omit-xml-declaration", "yes");
      if (opts.get(HtmlOptions.NOBOGONS)) reader.setFeature(FEATURES + "ignore-bogons", true);
      if (opts.get(HtmlOptions.NODEFAULTS))
        reader.setFeature(FEATURES + "default-attributes", false);
      if (opts.get(HtmlOptions.NOCOLONS)) reader.setFeature(FEATURES + "translate-colons", true);
      if (opts.get(HtmlOptions.NORESTART)) reader.setFeature(FEATURES + "restart-elements", false);
      if (opts.get(HtmlOptions.IGNORABLE))
        reader.setFeature(FEATURES + "ignorable-whitespace", true);
      if (opts.get(HtmlOptions.EMPTYBOGONS)) reader.setFeature(FEATURES + "bogons-empty", true);
      if (opts.get(HtmlOptions.ANY)) reader.setFeature(FEATURES + "bogons-empty", false);
      if (opts.get(HtmlOptions.NOROOTBOGONS)) reader.setFeature(FEATURES + "root-bogons", false);
      if (opts.get(HtmlOptions.NOCDATA)) reader.setFeature(FEATURES + "cdata-elements", false);
      if (opts.get(HtmlOptions.LEXICAL))
        reader.setProperty("http://xml.org/sax/properties/lexical-handler", writer);
      if (opts.contains(HtmlOptions.METHOD)) opt("method", opts.get(HtmlOptions.METHOD));
      if (opts.contains(HtmlOptions.DOCTYPESYS))
        opt("doctype-system", opts.get(HtmlOptions.DOCTYPESYS));
      if (opts.contains(HtmlOptions.DOCTYPEPUB))
        opt("doctype-public", opts.get(HtmlOptions.DOCTYPEPUB));
      if (opts.contains(HtmlOptions.ENCODING)) is.setEncoding(opts.get(HtmlOptions.ENCODING));
      // end TagSoup options

      reader.setContentHandler((ContentHandler) writer);
      reader.parse(is);
      return new IOContent(token(sw.toString()), io.name());

    } catch (final SAXException ex) {
      Util.errln(ex);
      return io;
    }
  }
Пример #25
0
  /**
   * Validate XML data from reader using specified grammar.
   *
   * @param grammarUrl User supplied path to grammar.
   * @param stream XML input.
   * @return Validation report containing all validation info.
   */
  public ValidationReport validateParse(InputStream stream, String grammarUrl) {

    logger.debug("Start validation.");

    ValidationReport report = new ValidationReport();
    ValidationContentHandler contenthandler = new ValidationContentHandler();

    try {

      XMLReader xmlReader = getXMLReader(contenthandler, report);

      if (grammarUrl == null) {

        // Scenario 1 : no params - use system catalog
        logger.debug("Validation using system catalog.");
        xmlReader.setProperty(
            XMLReaderObjectFactory.APACHE_PROPERTIES_ENTITYRESOLVER, systemCatalogResolver);

      } else if (grammarUrl.endsWith(".xml")) {

        // Scenario 2 : path to catalog (xml)
        logger.debug("Validation using user specified catalog '" + grammarUrl + "'.");
        eXistXMLCatalogResolver resolver = new eXistXMLCatalogResolver();
        resolver.setCatalogList(new String[] {grammarUrl});
        xmlReader.setProperty(XMLReaderObjectFactory.APACHE_PROPERTIES_ENTITYRESOLVER, resolver);

      } else if (grammarUrl.endsWith("/")) {

        // Scenario 3 : path to collection ("/"): search.
        logger.debug("Validation using searched grammar, start from '" + grammarUrl + "'.");
        SearchResourceResolver resolver = new SearchResourceResolver(grammarUrl, brokerPool);
        xmlReader.setProperty(XMLReaderObjectFactory.APACHE_PROPERTIES_ENTITYRESOLVER, resolver);

      } else {

        // Scenario 4 : path to grammar (xsd, dtd) specified.
        logger.debug("Validation using specified grammar '" + grammarUrl + "'.");
        AnyUriResolver resolver = new AnyUriResolver(grammarUrl);
        xmlReader.setProperty(XMLReaderObjectFactory.APACHE_PROPERTIES_ENTITYRESOLVER, resolver);
      }

      logger.debug("Validation started.");
      report.start();
      InputSource source = new InputSource(stream);
      xmlReader.parse(source);
      logger.debug("Validation stopped.");

      report.stop();

      report.setNamespaceUri(contenthandler.getNamespaceUri());

      if (!report.isValid()) {
        logger.debug("Document is not valid.");
      }

    } catch (ExistIOException ex) {
      logger.error(ex.getCause());
      report.setThrowable(ex.getCause());

    } catch (Exception ex) {
      logger.error(ex);
      report.setThrowable(ex);

    } finally {
      report.stop();

      logger.debug("Validation performed in " + report.getValidationDuration() + " msec.");
    }

    return report;
  }
Пример #26
0
 public static void setLexicalHandler(XMLReader xmlReader, LexicalHandler handler)
     throws SAXNotRecognizedException, SAXNotSupportedException {
   logger.debug("setLexicalHandler(xmlReader={}, handler={}) - start", xmlReader, handler);
   xmlReader.setProperty(LEXICAL_HANDLER_PROPERTY_NAME, handler);
 }
Пример #27
0
  public static void main(String args[]) {
    // Printing arguments just to show them
    boolean debug = false;
    if (debug) {
      for (int i = 0; i < args.length; i++) {
        System.err.printf("arg[%d]: %s\n", i, args[i]);
      }
    }

    // Now the real program
    InputStream in = System.in;
    PrintStream out = System.out;
    PrintStream err = System.err;

    String usage =
        "Usage: "
            + OLEWrapper.class.getName()
            + " [file]\n"
            + "\tfile = input file to read, default is stdin";

    if (args.length > 1) {
      out.println("Error: more than 1 argument");
      out.println(usage);
      System.exit(1);
    } else if (args.length == 1) {
      try {
        in = new BufferedInputStream(new FileInputStream(args[0]));
      } catch (FileNotFoundException e) {
        err.println("file " + args[0] + "was not found.");
        System.exit(2);
      }
    }
    // Then do the work...

    SAXParserFactory factory = SAXParserFactory.newInstance();
    factory.setNamespaceAware(true);
    try {
      SAXParser saxParser = factory.newSAXParser();
      DefaultHandler handler = new OLEImportHandler(out, err);
      XMLReader reader = saxParser.getXMLReader();
      if (debug) {
        out.println("factory.isNamespaceAware()=" + factory.isNamespaceAware());
        out.println("saxParser.isNamespaceAware()=" + saxParser.isNamespaceAware());
        out.println(SaxNSURI + "=" + reader.getFeature(SaxNSURI));
        System.exit(0);
      }
      reader.setContentHandler(handler);
      // Register lexical handler for comment processing
      reader.setProperty(SaxLexHandlerURI, handler);
      reader.parse(new InputSource(in));
    } catch (IOException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    } catch (ParserConfigurationException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    } catch (SAXException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    }

    System.exit(0);
  }
Пример #28
0
 @Override
 public void setProperty(String name, Object value)
     throws SAXNotRecognizedException, SAXNotSupportedException {
   xmlReader.setProperty(name, value);
 }
Пример #29
0
  public void validate(Source source, Result result) throws SAXException, IOException {
    if (result instanceof SAXResult || result == null) {
      final SAXSource saxSource = (SAXSource) source;
      final SAXResult saxResult = (SAXResult) result;

      if (result != null) {
        setContentHandler(saxResult.getHandler());
      }

      try {
        XMLReader reader = saxSource.getXMLReader();
        if (reader == null) {
          // create one now
          SAXParserFactory spf = SAXParserFactory.newInstance();
          spf.setNamespaceAware(true);
          try {
            reader = spf.newSAXParser().getXMLReader();
            // If this is a Xerces SAX parser, set the security manager if there is one
            if (reader instanceof com.sun.org.apache.xerces.internal.parsers.SAXParser) {
              SecurityManager securityManager =
                  (SecurityManager) fComponentManager.getProperty(SECURITY_MANAGER);
              if (securityManager != null) {
                try {
                  reader.setProperty(SECURITY_MANAGER, securityManager);
                }
                // Ignore the exception if the security manager cannot be set.
                catch (SAXException exc) {
                }
              }
            }
          } catch (Exception e) {
            // this is impossible, but better safe than sorry
            throw new FactoryConfigurationError(e);
          }
        }

        // If XML names and Namespace URIs are already internalized we
        // can avoid running them through the SymbolTable.
        try {
          fStringsInternalized = reader.getFeature(STRING_INTERNING);
        } catch (SAXException exc) {
          // The feature isn't recognized or getting it is not supported.
          // In either case, assume that strings are not internalized.
          fStringsInternalized = false;
        }

        ErrorHandler errorHandler = fComponentManager.getErrorHandler();
        reader.setErrorHandler(
            errorHandler != null ? errorHandler : DraconianErrorHandler.getInstance());
        reader.setEntityResolver(fResolutionForwarder);
        fResolutionForwarder.setEntityResolver(fComponentManager.getResourceResolver());
        reader.setContentHandler(this);
        reader.setDTDHandler(this);

        InputSource is = saxSource.getInputSource();
        reader.parse(is);
      } finally {
        // release the reference to user's handler ASAP
        setContentHandler(null);
      }
      return;
    }
    throw new IllegalArgumentException(
        JAXPValidationMessageFormatter.formatMessage(
            Locale.getDefault(),
            "SourceResultMismatch",
            new Object[] {source.getClass().getName(), result.getClass().getName()}));
  }
Пример #30
0
  /**
   * Get an instance of a DTM, loaded with the content from the specified source. If the unique flag
   * is true, a new instance will always be returned. Otherwise it is up to the DTMManager to return
   * a new instance or an instance that it already created and may be being used by someone else. (I
   * think more parameters will need to be added for error handling, and entity resolution).
   *
   * @param source the specification of the source object.
   * @param unique true if the returned DTM must be unique, probably because it is going to be
   *     mutated.
   * @param whiteSpaceFilter Enables filtering of whitespace nodes, and may be null.
   * @param incremental true if the DTM should be built incrementally, if possible.
   * @param doIndexing true if the caller considers it worth it to use indexing schemes.
   * @param hasUserReader true if <code>source</code> is a <code>SAXSource</code> object that has an
   *     <code>XMLReader</code>, that was specified by the user.
   * @param size Specifies initial size of tables that represent the DTM
   * @param buildIdIndex true if the id index table should be built.
   * @param newNameTable true if we want to use a separate ExpandedNameTable for this DTM.
   * @return a non-null DTM reference.
   */
  public DTM getDTM(
      Source source,
      boolean unique,
      DTMWSFilter whiteSpaceFilter,
      boolean incremental,
      boolean doIndexing,
      boolean hasUserReader,
      int size,
      boolean buildIdIndex,
      boolean newNameTable) {
    if (DEBUG && null != source) {
      System.out.println(
          "Starting " + (unique ? "UNIQUE" : "shared") + " source: " + source.getSystemId());
    }

    int dtmPos = getFirstFreeDTMID();
    int documentID = dtmPos << IDENT_DTM_NODE_BITS;

    if ((null != source) && source instanceof StAXSource) {
      final StAXSource staxSource = (StAXSource) source;
      StAXEvent2SAX staxevent2sax = null;
      StAXStream2SAX staxStream2SAX = null;
      if (staxSource.getXMLEventReader() != null) {
        final XMLEventReader xmlEventReader = staxSource.getXMLEventReader();
        staxevent2sax = new StAXEvent2SAX(xmlEventReader);
      } else if (staxSource.getXMLStreamReader() != null) {
        final XMLStreamReader xmlStreamReader = staxSource.getXMLStreamReader();
        staxStream2SAX = new StAXStream2SAX(xmlStreamReader);
      }

      SAXImpl dtm;

      if (size <= 0) {
        dtm =
            new SAXImpl(
                this,
                source,
                documentID,
                whiteSpaceFilter,
                null,
                doIndexing,
                DTMDefaultBase.DEFAULT_BLOCKSIZE,
                buildIdIndex,
                newNameTable);
      } else {
        dtm =
            new SAXImpl(
                this,
                source,
                documentID,
                whiteSpaceFilter,
                null,
                doIndexing,
                size,
                buildIdIndex,
                newNameTable);
      }

      dtm.setDocumentURI(source.getSystemId());

      addDTM(dtm, dtmPos, 0);

      try {
        if (staxevent2sax != null) {
          staxevent2sax.setContentHandler(dtm);
          staxevent2sax.parse();
        } else if (staxStream2SAX != null) {
          staxStream2SAX.setContentHandler(dtm);
          staxStream2SAX.parse();
        }

      } catch (RuntimeException re) {
        throw re;
      } catch (Exception e) {
        throw new com.sun.org.apache.xml.internal.utils.WrappedRuntimeException(e);
      }

      return dtm;
    } else if ((null != source) && source instanceof DOMSource) {
      final DOMSource domsrc = (DOMSource) source;
      final org.w3c.dom.Node node = domsrc.getNode();
      final DOM2SAX dom2sax = new DOM2SAX(node);

      SAXImpl dtm;

      if (size <= 0) {
        dtm =
            new SAXImpl(
                this,
                source,
                documentID,
                whiteSpaceFilter,
                null,
                doIndexing,
                DTMDefaultBase.DEFAULT_BLOCKSIZE,
                buildIdIndex,
                newNameTable);
      } else {
        dtm =
            new SAXImpl(
                this,
                source,
                documentID,
                whiteSpaceFilter,
                null,
                doIndexing,
                size,
                buildIdIndex,
                newNameTable);
      }

      dtm.setDocumentURI(source.getSystemId());

      addDTM(dtm, dtmPos, 0);

      dom2sax.setContentHandler(dtm);

      try {
        dom2sax.parse();
      } catch (RuntimeException re) {
        throw re;
      } catch (Exception e) {
        throw new com.sun.org.apache.xml.internal.utils.WrappedRuntimeException(e);
      }

      return dtm;
    } else {
      boolean isSAXSource = (null != source) ? (source instanceof SAXSource) : true;
      boolean isStreamSource = (null != source) ? (source instanceof StreamSource) : false;

      if (isSAXSource || isStreamSource) {
        XMLReader reader;
        InputSource xmlSource;

        if (null == source) {
          xmlSource = null;
          reader = null;
          hasUserReader = false; // Make sure the user didn't lie
        } else {
          reader = getXMLReader(source);
          xmlSource = SAXSource.sourceToInputSource(source);

          String urlOfSource = xmlSource.getSystemId();

          if (null != urlOfSource) {
            try {
              urlOfSource = SystemIDResolver.getAbsoluteURI(urlOfSource);
            } catch (Exception e) {
              // %REVIEW% Is there a better way to send a warning?
              System.err.println("Can not absolutize URL: " + urlOfSource);
            }

            xmlSource.setSystemId(urlOfSource);
          }
        }

        // Create the basic SAX2DTM.
        SAXImpl dtm;
        if (size <= 0) {
          dtm =
              new SAXImpl(
                  this,
                  source,
                  documentID,
                  whiteSpaceFilter,
                  null,
                  doIndexing,
                  DTMDefaultBase.DEFAULT_BLOCKSIZE,
                  buildIdIndex,
                  newNameTable);
        } else {
          dtm =
              new SAXImpl(
                  this,
                  source,
                  documentID,
                  whiteSpaceFilter,
                  null,
                  doIndexing,
                  size,
                  buildIdIndex,
                  newNameTable);
        }

        // Go ahead and add the DTM to the lookup table.  This needs to be
        // done before any parsing occurs. Note offset 0, since we've just
        // created a new DTM.
        addDTM(dtm, dtmPos, 0);

        if (null == reader) {
          // Then the user will construct it themselves.
          return dtm;
        }

        reader.setContentHandler(dtm.getBuilder());

        if (!hasUserReader || null == reader.getDTDHandler()) {
          reader.setDTDHandler(dtm);
        }

        if (!hasUserReader || null == reader.getErrorHandler()) {
          reader.setErrorHandler(dtm);
        }

        try {
          reader.setProperty("http://xml.org/sax/properties/lexical-handler", dtm);
        } catch (SAXNotRecognizedException e) {
        } catch (SAXNotSupportedException e) {
        }

        try {
          reader.parse(xmlSource);
        } catch (RuntimeException re) {
          throw re;
        } catch (Exception e) {
          throw new com.sun.org.apache.xml.internal.utils.WrappedRuntimeException(e);
        } finally {
          if (!hasUserReader) {
            releaseXMLReader(reader);
          }
        }

        if (DUMPTREE) {
          System.out.println("Dumping SAX2DOM");
          dtm.dumpDTM(System.err);
        }

        return dtm;
      } else {
        // It should have been handled by a derived class or the caller
        // made a mistake.
        throw new DTMException(
            XMLMessages.createXMLMessage(
                XMLErrorResources.ER_NOT_SUPPORTED, new Object[] {source}));
      }
    }
  }