public static void main(String[] args) throws Exception { String sourceUrlString = "data/test.html"; if (args.length == 0) System.err.println("Using default argument of \"" + sourceUrlString + '"'); else sourceUrlString = args[0]; if (sourceUrlString.indexOf(':') == -1) sourceUrlString = "file:" + sourceUrlString; StreamedSource streamedSource = new StreamedSource(new URL(sourceUrlString)); // streamedSource.setBuffer(new char[65000]); // uncomment this to use a fixed buffer size Writer writer = null; try { writer = new OutputStreamWriter( new FileOutputStream("StreamedSourceCopyOutput.html"), streamedSource.getEncoding()); System.out.println("Processing segments:"); int lastSegmentEnd = 0; for (Segment segment : streamedSource) { System.out.println(segment.getDebugInfo()); if (segment.getEnd() <= lastSegmentEnd) continue; // if this tag is inside the previous tag (e.g. a server tag) then ignore it as // it was already output along with the previous tag. lastSegmentEnd = segment.getEnd(); if (segment instanceof Tag) { Tag tag = (Tag) segment; // HANDLE TAG // Uncomment the following line to ensure each tag is valid XML: // writer.write(tag.tidy()); continue; } else if (segment instanceof CharacterReference) { CharacterReference characterReference = (CharacterReference) segment; // HANDLE CHARACTER REFERENCE // Uncomment the following line to decode all character references instead of copying them // verbatim: // characterReference.appendCharTo(writer); continue; } else { // HANDLE PLAIN TEXT } // unless specific handling has prevented getting to here, simply output the segment as is: writer.write(segment.toString()); } writer.close(); System.err.println( "\nA copy of the source document has been output to StreamedSourceCopyOuput.html"); } catch (Exception ex) { if (writer != null) try { writer.close(); } catch (IOException ex2) { } throw ex; } }
@Override public void close() throws IOException { parser.close(); reader.close(); writer.close(); stack.clear(); }
protected HtmlFilterReaderBase(Reader reader) throws IOException, ParserConfigurationException { this.reader = reader; document = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument(); stack = new Stack<Level>(); parser = new StreamedSource(reader); iterator = parser.iterator(); writer = new StringWriter(); buffer = writer.getBuffer(); offset = 0; }
private void processCurrentSegment() { Segment segment = parser.getCurrentSegment(); // If this tag is inside the previous tag (e.g. a server tag) then // ignore it as it was already output along with the previous tag. if (segment.getEnd() <= lastSegEnd) { return; } lastSegEnd = segment.getEnd(); if (segment instanceof Tag) { if (segment instanceof StartTag) { processStartTag((StartTag) segment); } else if (segment instanceof EndTag) { processEndTag((EndTag) segment); } else { writer.write(segment.toString()); } } else { processText(segment); } }