/** * Delegates the call to the matching component parser. * * <p>Potential {@link RuntimeException}s, {@link IOException}s and {@link SAXException}s * unrelated to the given input stream and content handler are automatically wrapped into {@link * TikaException}s to better honor the {@link Parser} contract. */ public void parse( InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { Parser parser = getParser(metadata, context); TemporaryResources tmp = new TemporaryResources(); try { TikaInputStream taggedStream = TikaInputStream.get(stream, tmp); TaggedContentHandler taggedHandler = handler != null ? new TaggedContentHandler(handler) : null; if (parser instanceof ParserDecorator) { metadata.add( "X-Parsed-By", ((ParserDecorator) parser).getWrappedParser().getClass().getName()); } else { metadata.add("X-Parsed-By", parser.getClass().getName()); } try { parser.parse(taggedStream, taggedHandler, metadata, context); } catch (RuntimeException e) { throw new TikaException("Unexpected RuntimeException from " + parser, e); } catch (IOException e) { taggedStream.throwIfCauseOf(e); throw new TikaException("TIKA-198: Illegal IOException from " + parser, e); } catch (SAXException e) { if (taggedHandler != null) taggedHandler.throwIfCauseOf(e); throw new TikaException("TIKA-237: Illegal SAXException from " + parser, e); } } finally { tmp.dispose(); } }
/** * Parse the input stream with a SAX parser. Wraps the content handler with an {@link * org.apache.tika.sax.OfflineContentHandler} to avoid that any namespace lookups are made. In * addition, by overriding {@link #getContentHandler(ContentHandler, Metadata, ParseContext)}, it * is possible to add additional wrappers. * * @param stream that should be parsed * @param handler that will receive the SAX events * @param metadata of current document stream * @param context of current parse * @throws IOException if the stream cannot be read * @throws SAXException if the SAX parsing fails. * @throws TikaException if the XML parsing fails. */ public void parse( InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { final TaggedContentHandler tagged = new TaggedContentHandler(handler); if (metadata.get(HttpHeaders.CONTENT_TYPE) == null) { metadata.set(HttpHeaders.CONTENT_TYPE, "application/xml"); } try { context .getSAXParser() .parse( new CloseShieldInputStream(stream), new OfflineContentHandler(getContentHandler(tagged, metadata, context))); } catch (final SAXException e) { tagged.throwIfCauseOf(e); throw new TikaException("XML parse error", e); } }
@Override public void parse( InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { setContentType(metadata); final XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata); xhtml.startDocument(); TaggedContentHandler tagged = new TaggedContentHandler(xhtml); try { context .getSAXParser() .parse( new CloseShieldInputStream(stream), new OfflineContentHandler( new EmbeddedContentHandler(getContentHandler(tagged, metadata, context)))); } catch (SAXException e) { tagged.throwIfCauseOf(e); throw new TikaException("XML parse error", e); } finally { xhtml.endDocument(); } }