/** * Uses a Vector of TransformerHandlers to pipe XML input document through a series of 1 or more * transformations. Called by {@link #pipeDocument}. * * @param vTHandler Vector of Transformation Handlers (1 per stylesheet). * @param source absolute URI to XML input * @param target absolute path to transformation output. */ public void usePipe(Vector vTHandler, String source, String target) throws TransformerException, TransformerConfigurationException, FileNotFoundException, IOException, SAXException, SAXNotRecognizedException { XMLReader reader = XMLReaderFactory.createXMLReader(); TransformerHandler tHFirst = (TransformerHandler) vTHandler.firstElement(); reader.setContentHandler(tHFirst); reader.setProperty("http://xml.org/sax/properties/lexical-handler", tHFirst); for (int i = 1; i < vTHandler.size(); i++) { TransformerHandler tHFrom = (TransformerHandler) vTHandler.elementAt(i - 1); TransformerHandler tHTo = (TransformerHandler) vTHandler.elementAt(i); tHFrom.setResult(new SAXResult(tHTo)); } TransformerHandler tHLast = (TransformerHandler) vTHandler.lastElement(); Transformer trans = tHLast.getTransformer(); Properties outputProps = trans.getOutputProperties(); Serializer serializer = SerializerFactory.getSerializer(outputProps); FileOutputStream out = new FileOutputStream(target); try { serializer.setOutputStream(out); tHLast.setResult(new SAXResult(serializer.asContentHandler())); reader.parse(source); } finally { // Always clean up the FileOutputStream, // even if an exception was thrown in the try block if (out != null) out.close(); } }
@Override public boolean execute(Property inputProperty, Node outputNode, Context context) throws Exception { Binary binaryValue = inputProperty.getBinary(); CheckArg.isNotNull(binaryValue, "binary"); if (!outputNode.isNew()) { outputNode = outputNode.addNode(XmlLexicon.DOCUMENT); } XmlSequencerHandler sequencingHandler = new XmlSequencerHandler(outputNode, scoping); // Create the reader ... XMLReader reader = XMLReaderFactory.createXMLReader(); reader.setContentHandler(sequencingHandler); reader.setErrorHandler(sequencingHandler); // Ensure handler acting as entity resolver 2 reader.setProperty(DECL_HANDLER_FEATURE, sequencingHandler); // Ensure handler acting as lexical handler reader.setProperty(LEXICAL_HANDLER_FEATURE, sequencingHandler); // Ensure handler acting as entity resolver 2 setFeature(reader, ENTITY_RESOLVER_2_FEATURE, true); // Prevent loading of external DTDs setFeature(reader, LOAD_EXTERNAL_DTDS_FEATURE, false); // Prevent the resolving of DTD entities into fully-qualified URIS setFeature(reader, RESOLVE_DTD_URIS_FEATURE, false); // Parse XML document try (InputStream stream = binaryValue.getStream()) { reader.parse(new InputSource(stream)); } return true; }
private void updateWiretap() { if (contentHandler != null) { if (wiretapContentHander != null) { wrappedReader.setContentHandler( new CombineContentHandler(wiretapContentHander, contentHandler)); } else { wrappedReader.setContentHandler(contentHandler); } } else { wrappedReader.setContentHandler(wiretapContentHander); } try { if (lexicalHandler != null) { if (wiretapLexicalHandler != null) { wrappedReader.setProperty( "http://xml.org/sax/properties/lexical-handler", new CombineLexicalHandler(wiretapLexicalHandler, lexicalHandler)); } else { wrappedReader.setProperty( "http://xml.org/sax/properties/lexical-handler", lexicalHandler); } } else { wrappedReader.setProperty( "http://xml.org/sax/properties/lexical-handler", wiretapLexicalHandler); } } catch (SAXNotRecognizedException e) { } catch (SAXNotSupportedException e) { } }
/** * Reads malformed XML from the InputStream original and returns a new InputStream which can be * used to read a well-formed version of the input * * @param original original input * @return an {@link InputStream} which can be used to read a well-formed version of the input XML * @throws ParseException if an exception occurs while parsing the input */ public static InputStream xmlizeInputStream(InputStream original) throws ParseException { try { ByteArrayOutputStream out = new ByteArrayOutputStream(); HTMLSchema schema = new HTMLSchema(); XMLReader reader = new Parser(); // TODO walk through the javadoc and tune more settings // see tagsoup javadoc for details reader.setProperty(Parser.schemaProperty, schema); reader.setFeature(Parser.bogonsEmptyFeature, false); reader.setFeature(Parser.ignorableWhitespaceFeature, true); reader.setFeature(Parser.ignoreBogonsFeature, false); Writer writeger = new OutputStreamWriter(out); XMLWriter x = new XMLWriter(writeger); reader.setContentHandler(x); InputSource s = new InputSource(original); reader.parse(s); return new ByteArrayInputStream(out.toByteArray()); } catch (SAXException e) { throw new ParseException(R("PBadXML"), e); } catch (IOException e) { throw new ParseException(R("PBadXML"), e); } }
public Struct validate(InputSource xml) throws PageException { CFMLEngine engine = CFMLEngineFactory.getInstance(); warnings = engine.getCreationUtil().createArray(); errors = engine.getCreationUtil().createArray(); fatals = engine.getCreationUtil().createArray(); try { XMLReader parser = new XMLUtilImpl().createXMLReader("org.apache.xerces.parsers.SAXParser"); parser.setContentHandler(this); parser.setErrorHandler(this); parser.setEntityResolver(this); parser.setFeature("http://xml.org/sax/features/validation", true); parser.setFeature("http://apache.org/xml/features/validation/schema", true); parser.setFeature("http://apache.org/xml/features/validation/schema-full-checking", true); // if(!validateNamespace) if (!Util.isEmpty(strSchema)) parser.setProperty( "http://apache.org/xml/properties/schema/external-noNamespaceSchemaLocation", strSchema); parser.parse(xml); } catch (SAXException e) { } catch (IOException e) { throw engine.getExceptionUtil().createXMLException(e.getMessage()); } // result Struct result = engine.getCreationUtil().createStruct(); result.setEL("warnings", warnings); result.setEL("errors", errors); result.setEL("fatalerrors", fatals); result.setEL("status", engine.getCastUtil().toBoolean(!hasErrors)); release(); return result; }
private XMLReader getXMLReader(ContentHandler contentHandler, ErrorHandler errorHandler) throws ParserConfigurationException, SAXException { // setup sax factory ; be sure just one instance! SAXParserFactory saxFactory = SAXParserFactory.newInstance(); // Enable validation stuff saxFactory.setValidating(true); saxFactory.setNamespaceAware(true); // Create xml reader SAXParser saxParser = saxFactory.newSAXParser(); XMLReader xmlReader = saxParser.getXMLReader(); // Setup xmlreader xmlReader.setProperty( XMLReaderObjectFactory.APACHE_PROPERTIES_INTERNAL_GRAMMARPOOL, grammarPool); xmlReader.setFeature(Namespaces.SAX_VALIDATION, true); xmlReader.setFeature(Namespaces.SAX_VALIDATION_DYNAMIC, false); xmlReader.setFeature(XMLReaderObjectFactory.APACHE_FEATURES_VALIDATION_SCHEMA, true); xmlReader.setFeature(XMLReaderObjectFactory.APACHE_PROPERTIES_LOAD_EXT_DTD, true); xmlReader.setFeature(Namespaces.SAX_NAMESPACES_PREFIXES, true); xmlReader.setContentHandler(contentHandler); xmlReader.setErrorHandler(errorHandler); return xmlReader; }
private void processCommentLines(File file) throws SAXException, IOException { SAXParser parser = newSaxParser(false); XMLReader xmlReader = parser.getXMLReader(); commentHandler = new CommentHandler(); xmlReader.setProperty("http://xml.org/sax/properties/lexical-handler", commentHandler); parser.parse(FileUtils.openInputStream(file), commentHandler); }
/** Default constructor of MapLinksReader class. */ public MapLinksReader() { super(); map = new HashMap<String, Map<String, String>>(); ancestorList = new ArrayList<String>(INT_16); matchList = new ArrayList<String>(INT_16); indexEntries = new StringBuffer(INT_1024); firstMatchElement = null; lastMatchElement = new HashSet<String>(); level = 0; match = false; validHref = true; needResolveEntity = false; topicPath = null; inputFile = null; try { reader = StringUtils.getXMLReader(); reader.setContentHandler(this); reader.setProperty(LEXICAL_HANDLER_PROPERTY, this); // Added by william on 2009-11-8 for ampbug:2893664 start reader.setFeature("http://apache.org/xml/features/scanner/notify-char-refs", true); reader.setFeature("http://apache.org/xml/features/scanner/notify-builtin-refs", true); // Added by william on 2009-11-8 for ampbug:2893664 end reader.setFeature("http://xml.org/sax/features/namespaces", false); } catch (final Exception e) { logger.logException(e); } }
/** * This will create a SAX XMLReader capable of parsing a DTD and configure it so that the DTD * parsing events are routed to the handlers registered onto this SAXOutputter. * * @return <code>XMLReader</code> a SAX2 parser. * @throws JDOMException if no parser can be created. */ private XMLReader createDTDParser() throws JDOMException { XMLReader parser = null; // Get a parser instance try { parser = createParser(); } catch (Exception ex1) { throw new JDOMException("Error in SAX parser allocation", ex1); } // Register handlers if (this.getDTDHandler() != null) { parser.setDTDHandler(this.getDTDHandler()); } if (this.getEntityResolver() != null) { parser.setEntityResolver(this.getEntityResolver()); } if (this.getLexicalHandler() != null) { try { parser.setProperty(SAX_PROPERTY_LEXICAL_HANDLER, this.getLexicalHandler()); } catch (SAXException ex1) { try { parser.setProperty(SAX_PROPERTY_LEXICAL_HANDLER_ALT, this.getLexicalHandler()); } catch (SAXException ex2) { // Forget it! } } } if (this.getDeclHandler() != null) { try { parser.setProperty(SAX_PROPERTY_DECLARATION_HANDLER, this.getDeclHandler()); } catch (SAXException ex1) { try { parser.setProperty(SAX_PROPERTY_DECLARATION_HANDLER_ALT, this.getDeclHandler()); } catch (SAXException ex2) { // Forget it! } } } // Absorb errors as much as possible, per Laurent parser.setErrorHandler(new DefaultHandler()); return parser; }
/** * @param name * @param value * @throws SAXNotRecognizedException * @throws SAXNotSupportedException * @see org.xml.sax.XMLReader#setProperty(java.lang.String, java.lang.Object) */ public void setProperty(String name, Object value) throws SAXNotRecognizedException, SAXNotSupportedException { if ("http://xml.org/sax/properties/lexical-handler".equals(name)) { lexicalHandler = (LexicalHandler) value; updateWiretap(); } else { wrappedReader.setProperty(name, value); } }
public SimpleDocTypeParser() throws SAXException { xmlReader = XMLReaderFactory.createXMLReader(); xmlReader.setContentHandler(this); // LexicalHandler xmlReader.setProperty("http://xml.org/sax/properties/lexical-handler", this); // DeclHandler xmlReader.setProperty("http://xml.org/sax/properties/declaration-handler", this); // DTD xmlReader.setFeature("http://xml.org/sax/features/resolve-dtd-uris", false); // *skip* resolving entities like DTDs xmlReader.setEntityResolver(new NoEntityResolver()); // xmlReader.setProperty( // "http://xml.org/sax/properties/declaration-handler", dh); }
public void parse(InputStream xml, OutputStream finf, String workingDirectory) throws Exception { SAXParser saxParser = getParser(); SAXDocumentSerializer documentSerializer = getSerializer(finf); XMLReader reader = saxParser.getXMLReader(); reader.setProperty("http://xml.org/sax/properties/lexical-handler", documentSerializer); reader.setContentHandler(documentSerializer); if (workingDirectory != null) { reader.setEntityResolver(createRelativePathResolver(workingDirectory)); } reader.parse(new InputSource(xml)); }
// Register handler directly with the incremental parser public void setLexicalHandler(org.xml.sax.ext.LexicalHandler handler) { // Not supported by all SAX2 parsers but should work in Xerces: try { // Typecast required in Xerces2; SAXParser doesn't inheret XMLReader // %OPT% Cast at asignment? ((XMLReader) fIncrementalParser) .setProperty("http://xml.org/sax/properties/lexical-handler", handler); } catch (org.xml.sax.SAXNotRecognizedException e) { // Nothing we can do about it } catch (org.xml.sax.SAXNotSupportedException e) { // Nothing we can do about it } }
/** * Parse the input source into a set of modifications. * * @param is * @return an array of type Modification * @throws ParserConfigurationException * @throws IOException * @throws SAXException */ public Modification[] parse(InputSource is) throws ParserConfigurationException, IOException, SAXException { final XMLReader reader = broker.getBrokerPool().getParserPool().borrowXMLReader(); try { reader.setProperty(Namespaces.SAX_LEXICAL_HANDLER, this); reader.setFeature(Namespaces.SAX_NAMESPACES, true); reader.setFeature(Namespaces.SAX_NAMESPACES_PREFIXES, false); reader.setContentHandler(this); reader.parse(is); final Modification mods[] = new Modification[modifications.size()]; return modifications.toArray(mods); } finally { broker.getBrokerPool().getParserPool().returnXMLReader(reader); } }
/** Parse the file and write its transformed content to the Writer. */ public void writeContent() throws SAXException { SaxHandler handler = new SaxHandler(); XMLReader reader = XMLReaderFactory.createXMLReader(); reader.setContentHandler(handler); reader.setProperty("http://xml.org/sax/properties/lexical-handler", handler); try { InputStream is = new BufferedInputStream(new FileInputStream(xdoc)); reader.parse(new InputSource(is)); } catch (IOException ex) { throw new RuntimeException(ex); } }
@Override protected void doGet(final HttpServletRequest request, final HttpServletResponse response) throws ServletException, IOException { LOG.trace("{@method} request = {}", request); final String requestUri = request.getRequestURI(); if (!requestUri.matches("^/[^/]+/.+")) { response.sendError(500); return; } try { final StringBuilder pageUrlBuilder = new StringBuilder(); pageUrlBuilder.append("http:/").append(requestUri); if (request.getQueryString() != null) { pageUrlBuilder.append("?").append(request.getQueryString()); } final URL pageUrl = new URL(pageUrlBuilder.toString()); LOG.debug("GET: {}", pageUrl); final XMLReader reader = new Parser(); reader.setProperty(Parser.schemaProperty, new HTMLSchema()); final ByteArrayOutputStream bytesOut = new ByteArrayOutputStream(); Writer writer = null; try { writer = new OutputStreamWriter(bytesOut); reader.setContentHandler(new XMLWriter(writer)); final InputSource source = new InputSource(); source.setByteStream(pageUrl.openStream()); reader.parse(source); } finally { Closeables.closeQuietly(writer); } InputStream bytesIn = null; try { bytesIn = new ByteArrayInputStream(bytesOut.toByteArray()); ByteStreams.copy(bytesIn, response.getOutputStream()); } finally { Closeables.closeQuietly(bytesIn); Closeables.closeQuietly(response.getOutputStream()); } } catch (final Exception e) { throw new IllegalStateException(request.getRequestURI(), e); } }
public Xv4htContentHandler(XMLReader xmlReader, String catalog) { super(); this.catalog = catalog; lexicalHandler = new Xv4htLexicalHandler(); try { xmlReader.setProperty("http://xml.org/sax/properties/lexical-handler", lexicalHandler); } catch (SAXNotRecognizedException e) { System.err.println("err 1"); return; } catch (SAXNotSupportedException e) { System.err.println("err 2"); return; } errHandler = new Xv4htErrorHandler(); xmlReader.setErrorHandler(errHandler); xmlReader.setEntityResolver(new Xv4htEntityResolver()); }
public void parse(java.io.Reader src) throws IOException { try { org.xml.sax.XMLReader reader = org.openide.xml.XMLUtil.createXMLReader(false, false); reader.setContentHandler(this); reader.setEntityResolver(this); org.xml.sax.InputSource is = new org.xml.sax.InputSource(src); try { reader.setProperty("http://xml.org/sax/properties/lexical-handler", this); // NOI18N } catch (SAXException sex) { XMLSettingsSupport.err.warning( "Warning: XML parser does not support lexical-handler feature."); // NOI18N } reader.parse(is); } catch (SAXException ex) { IOException ioe = new IOException(); ioe.initCause(ex); throw ioe; } }
protected void installLexicalHandler() { XMLReader parent = getParent(); if (parent == null) { throw new NullPointerException("No parent for filter"); } // try to register for lexical events for (String lexicalHandlerName : LEXICAL_HANDLER_NAMES) { try { parent.setProperty(lexicalHandlerName, this); break; } catch (SAXNotRecognizedException ex) { // ignore } catch (SAXNotSupportedException ex) { // ignore } } }
/** * Creates a new SAX parser for use within this instance. * * @return The newly created parser. * @throws ParserConfigurationException If a parser of the given configuration cannot be created. * @throws SAXException If something in general goes wrong when creating the parser. * @throws SAXNotRecognizedException If the <code>XMLReader</code> does not recognize the lexical * handler configuration option. * @throws SAXNotSupportedException If the <code>XMLReader</code> does not support the lexical * handler configuration option. */ private final SAXParser createParser(SAXParserFactory parserFactory) throws ParserConfigurationException, SAXException, SAXNotRecognizedException, SAXNotSupportedException { // Initialize the parser. final SAXParser parser = parserFactory.newSAXParser(); final XMLReader reader = parser.getXMLReader(); reader.setProperty("http://xml.org/sax/properties/lexical-handler", this); // $NON-NLS-1$ // disable DTD validation (bug 63625) try { // be sure validation is "off" or the feature to ignore DTD's will not apply reader.setFeature("http://xml.org/sax/features/validation", false); // $NON-NLS-1$ reader.setFeature( "http://apache.org/xml/features/nonvalidating/load-external-dtd", false); // $NON-NLS-1$ } catch (SAXNotRecognizedException e) { // not a big deal if the parser does not recognize the features } catch (SAXNotSupportedException e) { // not a big deal if the parser does not support the features } return parser; }
@Override public String parseFulltext(String html) { if (!judge(html)) return null; Parser theParser = null; HTMLSchema theSchema = null; XMLReader r; if (theParser == null) theParser = new Parser(); r = theParser; theSchema = new HTMLSchema(); try { r.setProperty(Parser.schemaProperty, theSchema); r.setFeature(Parser.namespacesFeature, false); } catch (SAXNotRecognizedException e) { logger.fatal(e.getMessage()); return null; } catch (SAXNotSupportedException e) { logger.fatal(e.getMessage()); return null; } RejuvenationResearchParserHandler h = new RejuvenationResearchParserHandler(); r.setContentHandler(h); InputStream fin = new ByteArrayInputStream(html.getBytes()); InputSource s = new InputSource(fin); s.setEncoding("utf8"); try { r.parse(s); fin.close(); return h.getContent(); } catch (IOException e) { logger.fatal(e.getMessage()); } catch (SAXException e) { logger.fatal(e.getMessage()); } return null; }
private static void inputSourceToSAX( InputSource inputSource, XMLReceiver xmlReceiver, XMLUtils.ParserConfiguration parserConfiguration, boolean handleLexical) { // Insert XInclude processor if needed final TransformerURIResolver resolver; if (parserConfiguration.handleXInclude) { parserConfiguration = new XMLUtils.ParserConfiguration( parserConfiguration.validating, false, parserConfiguration.externalEntities, parserConfiguration.uriReferences); resolver = new TransformerURIResolver(XMLUtils.ParserConfiguration.PLAIN); xmlReceiver = new XIncludeReceiver(null, xmlReceiver, parserConfiguration.uriReferences, resolver); } else { resolver = null; } try { final XMLReader xmlReader = newSAXParser(parserConfiguration).getXMLReader(); xmlReader.setContentHandler(xmlReceiver); if (handleLexical) xmlReader.setProperty(XMLConstants.SAX_LEXICAL_HANDLER, xmlReceiver); xmlReader.setEntityResolver(ENTITY_RESOLVER); xmlReader.setErrorHandler(ERROR_HANDLER); xmlReader.parse(inputSource); } catch (SAXParseException e) { throw new ValidationException(e.getMessage(), new LocationData(e)); } catch (Exception e) { throw new OXFException(e); } finally { if (resolver != null) resolver.destroy(); } }
/** * Retrieves a cached XMLReader for this thread, or creates a new XMLReader, if the existing * reader is in use. When the caller no longer needs the reader, it must release it with a call to * {@link #releaseXMLReader}. */ public synchronized XMLReader getXMLReader() throws SAXException { XMLReader reader; if (m_readers == null) { // When the m_readers.get() method is called for the first time // on a thread, a new XMLReader will automatically be created. m_readers = new ThreadLocal(); } if (m_inUse == null) { m_inUse = new HashMap(); } // If the cached reader for this thread is in use, construct a new // one; otherwise, return the cached reader unless it isn't an // instance of the class set in the 'org.xml.sax.driver' property reader = (XMLReader) m_readers.get(); boolean threadHasReader = (reader != null); String factory = SecuritySupport.getSystemProperty(property); if (threadHasReader && m_inUse.get(reader) != Boolean.TRUE && (factory == null || reader.getClass().getName().equals(factory))) { m_inUse.put(reader, Boolean.TRUE); } else { try { try { // According to JAXP 1.2 specification, if a SAXSource // is created using a SAX InputSource the Transformer or // TransformerFactory creates a reader via the // XMLReaderFactory if setXMLReader is not used reader = XMLReaderFactory.createXMLReader(); try { reader.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, _secureProcessing); } catch (SAXNotRecognizedException e) { System.err.println("Warning: " + reader.getClass().getName() + ": " + e.getMessage()); } } catch (Exception e) { try { // If unable to create an instance, let's try to use // the XMLReader from JAXP if (m_parserFactory == null) { m_parserFactory = FactoryImpl.getSAXFactory(m_useServicesMechanism); m_parserFactory.setNamespaceAware(true); } reader = m_parserFactory.newSAXParser().getXMLReader(); } catch (ParserConfigurationException pce) { throw pce; // pass along pce } } try { reader.setFeature(NAMESPACES_FEATURE, true); reader.setFeature(NAMESPACE_PREFIXES_FEATURE, false); } catch (SAXException se) { // Try to carry on if we've got a parser that // doesn't know about namespace prefixes. } } catch (ParserConfigurationException ex) { throw new SAXException(ex); } catch (FactoryConfigurationError ex1) { throw new SAXException(ex1.toString()); } catch (NoSuchMethodError ex2) { } catch (AbstractMethodError ame) { } // Cache the XMLReader if this is the first time we've created // a reader for this thread. if (!threadHasReader) { m_readers.set(reader); m_inUse.put(reader, Boolean.TRUE); } } try { // reader is cached, but this property might have been reset reader.setProperty(XMLConstants.ACCESS_EXTERNAL_DTD, _accessExternalDTD); } catch (SAXException se) { System.err.println("Warning: " + reader.getClass().getName() + ": " + se.getMessage()); } try { if (_xmlSecurityManager != null) { for (XMLSecurityManager.Limit limit : XMLSecurityManager.Limit.values()) { reader.setProperty(limit.apiProperty(), _xmlSecurityManager.getLimitValueAsString(limit)); } if (_xmlSecurityManager.printEntityCountInfo()) { reader.setProperty(XalanConstants.JDK_ENTITY_COUNT_INFO, XalanConstants.JDK_YES); } } } catch (SAXException se) { System.err.println("Warning: " + reader.getClass().getName() + ": " + se.getMessage()); } return reader; }
/** * Converts an HTML document to XML. * * @param io io reference * @param opts html options * @return parser * @throws IOException I/O exception */ private static IO toXML(final IO io, final HtmlOptions opts) throws IOException { // reader could not be initialized; fall back to XML if (READER == null) return io; try { // tries to extract the encoding from the input final TextInput ti = new TextInput(io); String enc = ti.encoding(); final byte[] content = ti.content(); // looks for a charset definition final byte[] encoding = token("charset="); int cs = indexOf(content, encoding); if (cs > 0) { // extracts the encoding string cs += encoding.length; int ce = cs; final int cl = content.length; while (++ce < cl && content[ce] > 0x28) ; enc = string(substring(content, cs, ce)); } // define input final InputSource is = new InputSource(new ArrayInput(content)); is.setEncoding(supported(enc) ? normEncoding(enc) : UTF8); // define output final StringWriter sw = new StringWriter(); final XMLReader reader = (XMLReader) Reflect.get(READER); final Object writer = Reflect.get(WRITER, sw); // set TagSoup options if (opts.get(HtmlOptions.HTML)) { reader.setFeature("http://xml.org/sax/features/namespaces", false); opt("method", "html"); opt("omit-xml-declaration", "yes"); } if (opts.get(HtmlOptions.NONS)) reader.setFeature("http://xml.org/sax/features/namespaces", false); if (opts.get(HtmlOptions.OMITXML)) opt("omit-xml-declaration", "yes"); if (opts.get(HtmlOptions.NOBOGONS)) reader.setFeature(FEATURES + "ignore-bogons", true); if (opts.get(HtmlOptions.NODEFAULTS)) reader.setFeature(FEATURES + "default-attributes", false); if (opts.get(HtmlOptions.NOCOLONS)) reader.setFeature(FEATURES + "translate-colons", true); if (opts.get(HtmlOptions.NORESTART)) reader.setFeature(FEATURES + "restart-elements", false); if (opts.get(HtmlOptions.IGNORABLE)) reader.setFeature(FEATURES + "ignorable-whitespace", true); if (opts.get(HtmlOptions.EMPTYBOGONS)) reader.setFeature(FEATURES + "bogons-empty", true); if (opts.get(HtmlOptions.ANY)) reader.setFeature(FEATURES + "bogons-empty", false); if (opts.get(HtmlOptions.NOROOTBOGONS)) reader.setFeature(FEATURES + "root-bogons", false); if (opts.get(HtmlOptions.NOCDATA)) reader.setFeature(FEATURES + "cdata-elements", false); if (opts.get(HtmlOptions.LEXICAL)) reader.setProperty("http://xml.org/sax/properties/lexical-handler", writer); if (opts.contains(HtmlOptions.METHOD)) opt("method", opts.get(HtmlOptions.METHOD)); if (opts.contains(HtmlOptions.DOCTYPESYS)) opt("doctype-system", opts.get(HtmlOptions.DOCTYPESYS)); if (opts.contains(HtmlOptions.DOCTYPEPUB)) opt("doctype-public", opts.get(HtmlOptions.DOCTYPEPUB)); if (opts.contains(HtmlOptions.ENCODING)) is.setEncoding(opts.get(HtmlOptions.ENCODING)); // end TagSoup options reader.setContentHandler((ContentHandler) writer); reader.parse(is); return new IOContent(token(sw.toString()), io.name()); } catch (final SAXException ex) { Util.errln(ex); return io; } }
/** * Validate XML data from reader using specified grammar. * * @param grammarUrl User supplied path to grammar. * @param stream XML input. * @return Validation report containing all validation info. */ public ValidationReport validateParse(InputStream stream, String grammarUrl) { logger.debug("Start validation."); ValidationReport report = new ValidationReport(); ValidationContentHandler contenthandler = new ValidationContentHandler(); try { XMLReader xmlReader = getXMLReader(contenthandler, report); if (grammarUrl == null) { // Scenario 1 : no params - use system catalog logger.debug("Validation using system catalog."); xmlReader.setProperty( XMLReaderObjectFactory.APACHE_PROPERTIES_ENTITYRESOLVER, systemCatalogResolver); } else if (grammarUrl.endsWith(".xml")) { // Scenario 2 : path to catalog (xml) logger.debug("Validation using user specified catalog '" + grammarUrl + "'."); eXistXMLCatalogResolver resolver = new eXistXMLCatalogResolver(); resolver.setCatalogList(new String[] {grammarUrl}); xmlReader.setProperty(XMLReaderObjectFactory.APACHE_PROPERTIES_ENTITYRESOLVER, resolver); } else if (grammarUrl.endsWith("/")) { // Scenario 3 : path to collection ("/"): search. logger.debug("Validation using searched grammar, start from '" + grammarUrl + "'."); SearchResourceResolver resolver = new SearchResourceResolver(grammarUrl, brokerPool); xmlReader.setProperty(XMLReaderObjectFactory.APACHE_PROPERTIES_ENTITYRESOLVER, resolver); } else { // Scenario 4 : path to grammar (xsd, dtd) specified. logger.debug("Validation using specified grammar '" + grammarUrl + "'."); AnyUriResolver resolver = new AnyUriResolver(grammarUrl); xmlReader.setProperty(XMLReaderObjectFactory.APACHE_PROPERTIES_ENTITYRESOLVER, resolver); } logger.debug("Validation started."); report.start(); InputSource source = new InputSource(stream); xmlReader.parse(source); logger.debug("Validation stopped."); report.stop(); report.setNamespaceUri(contenthandler.getNamespaceUri()); if (!report.isValid()) { logger.debug("Document is not valid."); } } catch (ExistIOException ex) { logger.error(ex.getCause()); report.setThrowable(ex.getCause()); } catch (Exception ex) { logger.error(ex); report.setThrowable(ex); } finally { report.stop(); logger.debug("Validation performed in " + report.getValidationDuration() + " msec."); } return report; }
public static void setLexicalHandler(XMLReader xmlReader, LexicalHandler handler) throws SAXNotRecognizedException, SAXNotSupportedException { logger.debug("setLexicalHandler(xmlReader={}, handler={}) - start", xmlReader, handler); xmlReader.setProperty(LEXICAL_HANDLER_PROPERTY_NAME, handler); }
public static void main(String args[]) { // Printing arguments just to show them boolean debug = false; if (debug) { for (int i = 0; i < args.length; i++) { System.err.printf("arg[%d]: %s\n", i, args[i]); } } // Now the real program InputStream in = System.in; PrintStream out = System.out; PrintStream err = System.err; String usage = "Usage: " + OLEWrapper.class.getName() + " [file]\n" + "\tfile = input file to read, default is stdin"; if (args.length > 1) { out.println("Error: more than 1 argument"); out.println(usage); System.exit(1); } else if (args.length == 1) { try { in = new BufferedInputStream(new FileInputStream(args[0])); } catch (FileNotFoundException e) { err.println("file " + args[0] + "was not found."); System.exit(2); } } // Then do the work... SAXParserFactory factory = SAXParserFactory.newInstance(); factory.setNamespaceAware(true); try { SAXParser saxParser = factory.newSAXParser(); DefaultHandler handler = new OLEImportHandler(out, err); XMLReader reader = saxParser.getXMLReader(); if (debug) { out.println("factory.isNamespaceAware()=" + factory.isNamespaceAware()); out.println("saxParser.isNamespaceAware()=" + saxParser.isNamespaceAware()); out.println(SaxNSURI + "=" + reader.getFeature(SaxNSURI)); System.exit(0); } reader.setContentHandler(handler); // Register lexical handler for comment processing reader.setProperty(SaxLexHandlerURI, handler); reader.parse(new InputSource(in)); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (ParserConfigurationException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (SAXException e) { // TODO Auto-generated catch block e.printStackTrace(); } System.exit(0); }
@Override public void setProperty(String name, Object value) throws SAXNotRecognizedException, SAXNotSupportedException { xmlReader.setProperty(name, value); }
public void validate(Source source, Result result) throws SAXException, IOException { if (result instanceof SAXResult || result == null) { final SAXSource saxSource = (SAXSource) source; final SAXResult saxResult = (SAXResult) result; if (result != null) { setContentHandler(saxResult.getHandler()); } try { XMLReader reader = saxSource.getXMLReader(); if (reader == null) { // create one now SAXParserFactory spf = SAXParserFactory.newInstance(); spf.setNamespaceAware(true); try { reader = spf.newSAXParser().getXMLReader(); // If this is a Xerces SAX parser, set the security manager if there is one if (reader instanceof com.sun.org.apache.xerces.internal.parsers.SAXParser) { SecurityManager securityManager = (SecurityManager) fComponentManager.getProperty(SECURITY_MANAGER); if (securityManager != null) { try { reader.setProperty(SECURITY_MANAGER, securityManager); } // Ignore the exception if the security manager cannot be set. catch (SAXException exc) { } } } } catch (Exception e) { // this is impossible, but better safe than sorry throw new FactoryConfigurationError(e); } } // If XML names and Namespace URIs are already internalized we // can avoid running them through the SymbolTable. try { fStringsInternalized = reader.getFeature(STRING_INTERNING); } catch (SAXException exc) { // The feature isn't recognized or getting it is not supported. // In either case, assume that strings are not internalized. fStringsInternalized = false; } ErrorHandler errorHandler = fComponentManager.getErrorHandler(); reader.setErrorHandler( errorHandler != null ? errorHandler : DraconianErrorHandler.getInstance()); reader.setEntityResolver(fResolutionForwarder); fResolutionForwarder.setEntityResolver(fComponentManager.getResourceResolver()); reader.setContentHandler(this); reader.setDTDHandler(this); InputSource is = saxSource.getInputSource(); reader.parse(is); } finally { // release the reference to user's handler ASAP setContentHandler(null); } return; } throw new IllegalArgumentException( JAXPValidationMessageFormatter.formatMessage( Locale.getDefault(), "SourceResultMismatch", new Object[] {source.getClass().getName(), result.getClass().getName()})); }
/** * Get an instance of a DTM, loaded with the content from the specified source. If the unique flag * is true, a new instance will always be returned. Otherwise it is up to the DTMManager to return * a new instance or an instance that it already created and may be being used by someone else. (I * think more parameters will need to be added for error handling, and entity resolution). * * @param source the specification of the source object. * @param unique true if the returned DTM must be unique, probably because it is going to be * mutated. * @param whiteSpaceFilter Enables filtering of whitespace nodes, and may be null. * @param incremental true if the DTM should be built incrementally, if possible. * @param doIndexing true if the caller considers it worth it to use indexing schemes. * @param hasUserReader true if <code>source</code> is a <code>SAXSource</code> object that has an * <code>XMLReader</code>, that was specified by the user. * @param size Specifies initial size of tables that represent the DTM * @param buildIdIndex true if the id index table should be built. * @param newNameTable true if we want to use a separate ExpandedNameTable for this DTM. * @return a non-null DTM reference. */ public DTM getDTM( Source source, boolean unique, DTMWSFilter whiteSpaceFilter, boolean incremental, boolean doIndexing, boolean hasUserReader, int size, boolean buildIdIndex, boolean newNameTable) { if (DEBUG && null != source) { System.out.println( "Starting " + (unique ? "UNIQUE" : "shared") + " source: " + source.getSystemId()); } int dtmPos = getFirstFreeDTMID(); int documentID = dtmPos << IDENT_DTM_NODE_BITS; if ((null != source) && source instanceof StAXSource) { final StAXSource staxSource = (StAXSource) source; StAXEvent2SAX staxevent2sax = null; StAXStream2SAX staxStream2SAX = null; if (staxSource.getXMLEventReader() != null) { final XMLEventReader xmlEventReader = staxSource.getXMLEventReader(); staxevent2sax = new StAXEvent2SAX(xmlEventReader); } else if (staxSource.getXMLStreamReader() != null) { final XMLStreamReader xmlStreamReader = staxSource.getXMLStreamReader(); staxStream2SAX = new StAXStream2SAX(xmlStreamReader); } SAXImpl dtm; if (size <= 0) { dtm = new SAXImpl( this, source, documentID, whiteSpaceFilter, null, doIndexing, DTMDefaultBase.DEFAULT_BLOCKSIZE, buildIdIndex, newNameTable); } else { dtm = new SAXImpl( this, source, documentID, whiteSpaceFilter, null, doIndexing, size, buildIdIndex, newNameTable); } dtm.setDocumentURI(source.getSystemId()); addDTM(dtm, dtmPos, 0); try { if (staxevent2sax != null) { staxevent2sax.setContentHandler(dtm); staxevent2sax.parse(); } else if (staxStream2SAX != null) { staxStream2SAX.setContentHandler(dtm); staxStream2SAX.parse(); } } catch (RuntimeException re) { throw re; } catch (Exception e) { throw new com.sun.org.apache.xml.internal.utils.WrappedRuntimeException(e); } return dtm; } else if ((null != source) && source instanceof DOMSource) { final DOMSource domsrc = (DOMSource) source; final org.w3c.dom.Node node = domsrc.getNode(); final DOM2SAX dom2sax = new DOM2SAX(node); SAXImpl dtm; if (size <= 0) { dtm = new SAXImpl( this, source, documentID, whiteSpaceFilter, null, doIndexing, DTMDefaultBase.DEFAULT_BLOCKSIZE, buildIdIndex, newNameTable); } else { dtm = new SAXImpl( this, source, documentID, whiteSpaceFilter, null, doIndexing, size, buildIdIndex, newNameTable); } dtm.setDocumentURI(source.getSystemId()); addDTM(dtm, dtmPos, 0); dom2sax.setContentHandler(dtm); try { dom2sax.parse(); } catch (RuntimeException re) { throw re; } catch (Exception e) { throw new com.sun.org.apache.xml.internal.utils.WrappedRuntimeException(e); } return dtm; } else { boolean isSAXSource = (null != source) ? (source instanceof SAXSource) : true; boolean isStreamSource = (null != source) ? (source instanceof StreamSource) : false; if (isSAXSource || isStreamSource) { XMLReader reader; InputSource xmlSource; if (null == source) { xmlSource = null; reader = null; hasUserReader = false; // Make sure the user didn't lie } else { reader = getXMLReader(source); xmlSource = SAXSource.sourceToInputSource(source); String urlOfSource = xmlSource.getSystemId(); if (null != urlOfSource) { try { urlOfSource = SystemIDResolver.getAbsoluteURI(urlOfSource); } catch (Exception e) { // %REVIEW% Is there a better way to send a warning? System.err.println("Can not absolutize URL: " + urlOfSource); } xmlSource.setSystemId(urlOfSource); } } // Create the basic SAX2DTM. SAXImpl dtm; if (size <= 0) { dtm = new SAXImpl( this, source, documentID, whiteSpaceFilter, null, doIndexing, DTMDefaultBase.DEFAULT_BLOCKSIZE, buildIdIndex, newNameTable); } else { dtm = new SAXImpl( this, source, documentID, whiteSpaceFilter, null, doIndexing, size, buildIdIndex, newNameTable); } // Go ahead and add the DTM to the lookup table. This needs to be // done before any parsing occurs. Note offset 0, since we've just // created a new DTM. addDTM(dtm, dtmPos, 0); if (null == reader) { // Then the user will construct it themselves. return dtm; } reader.setContentHandler(dtm.getBuilder()); if (!hasUserReader || null == reader.getDTDHandler()) { reader.setDTDHandler(dtm); } if (!hasUserReader || null == reader.getErrorHandler()) { reader.setErrorHandler(dtm); } try { reader.setProperty("http://xml.org/sax/properties/lexical-handler", dtm); } catch (SAXNotRecognizedException e) { } catch (SAXNotSupportedException e) { } try { reader.parse(xmlSource); } catch (RuntimeException re) { throw re; } catch (Exception e) { throw new com.sun.org.apache.xml.internal.utils.WrappedRuntimeException(e); } finally { if (!hasUserReader) { releaseXMLReader(reader); } } if (DUMPTREE) { System.out.println("Dumping SAX2DOM"); dtm.dumpDTM(System.err); } return dtm; } else { // It should have been handled by a derived class or the caller // made a mistake. throw new DTMException( XMLMessages.createXMLMessage( XMLErrorResources.ER_NOT_SUPPORTED, new Object[] {source})); } } }