public static void main(String[] args) throws Exception { String sourceUrlString = "data/test.html"; if (args.length == 0) System.err.println("Using default argument of \"" + sourceUrlString + '"'); else sourceUrlString = args[0]; if (sourceUrlString.indexOf(':') == -1) sourceUrlString = "file:" + sourceUrlString; StreamedSource streamedSource = new StreamedSource(new URL(sourceUrlString)); // streamedSource.setBuffer(new char[65000]); // uncomment this to use a fixed buffer size Writer writer = null; try { writer = new OutputStreamWriter( new FileOutputStream("StreamedSourceCopyOutput.html"), streamedSource.getEncoding()); System.out.println("Processing segments:"); int lastSegmentEnd = 0; for (Segment segment : streamedSource) { System.out.println(segment.getDebugInfo()); if (segment.getEnd() <= lastSegmentEnd) continue; // if this tag is inside the previous tag (e.g. a server tag) then ignore it as // it was already output along with the previous tag. lastSegmentEnd = segment.getEnd(); if (segment instanceof Tag) { Tag tag = (Tag) segment; // HANDLE TAG // Uncomment the following line to ensure each tag is valid XML: // writer.write(tag.tidy()); continue; } else if (segment instanceof CharacterReference) { CharacterReference characterReference = (CharacterReference) segment; // HANDLE CHARACTER REFERENCE // Uncomment the following line to decode all character references instead of copying them // verbatim: // characterReference.appendCharTo(writer); continue; } else { // HANDLE PLAIN TEXT } // unless specific handling has prevented getting to here, simply output the segment as is: writer.write(segment.toString()); } writer.close(); System.err.println( "\nA copy of the source document has been output to StreamedSourceCopyOuput.html"); } catch (Exception ex) { if (writer != null) try { writer.close(); } catch (IOException ex2) { } throw ex; } }
/** * Replaces the specified {@linkplain Segment segment} in this output document with the specified * text. * * <p>Specifying a <code>null</code> argument to the <code>text</code> parameter is exactly * equivalent to specifying an empty string, and results in the segment being completely removed * from the output document. * * @param segment the segment to replace. * @param text the replacement text, or <code>null</code> to remove the segment. */ public void replace(final Segment segment, final CharSequence text) { replace(segment.getBegin(), segment.getEnd(), text); }