public static void main(String[] args) throws Exception {
   String sourceUrlString = "data/test.html";
   if (args.length == 0)
     System.err.println("Using default argument of \"" + sourceUrlString + '"');
   else sourceUrlString = args[0];
   if (sourceUrlString.indexOf(':') == -1) sourceUrlString = "file:" + sourceUrlString;
   StreamedSource streamedSource = new StreamedSource(new URL(sourceUrlString));
   // streamedSource.setBuffer(new char[65000]); // uncomment this to use a fixed buffer size
   Writer writer = null;
   try {
     writer =
         new OutputStreamWriter(
             new FileOutputStream("StreamedSourceCopyOutput.html"), streamedSource.getEncoding());
     System.out.println("Processing segments:");
     int lastSegmentEnd = 0;
     for (Segment segment : streamedSource) {
       System.out.println(segment.getDebugInfo());
       if (segment.getEnd() <= lastSegmentEnd)
         continue; // if this tag is inside the previous tag (e.g. a server tag) then ignore it as
                   // it was already output along with the previous tag.
       lastSegmentEnd = segment.getEnd();
       if (segment instanceof Tag) {
         Tag tag = (Tag) segment;
         // HANDLE TAG
         // Uncomment the following line to ensure each tag is valid XML:
         // writer.write(tag.tidy()); continue;
       } else if (segment instanceof CharacterReference) {
         CharacterReference characterReference = (CharacterReference) segment;
         // HANDLE CHARACTER REFERENCE
         // Uncomment the following line to decode all character references instead of copying them
         // verbatim:
         // characterReference.appendCharTo(writer); continue;
       } else {
         // HANDLE PLAIN TEXT
       }
       // unless specific handling has prevented getting to here, simply output the segment as is:
       writer.write(segment.toString());
     }
     writer.close();
     System.err.println(
         "\nA copy of the source document has been output to StreamedSourceCopyOuput.html");
   } catch (Exception ex) {
     if (writer != null)
       try {
         writer.close();
       } catch (IOException ex2) {
       }
     throw ex;
   }
 }
示例#2
0
 @Override
 public void close() throws IOException {
   parser.close();
   reader.close();
   writer.close();
   stack.clear();
 }
示例#3
0
 protected HtmlFilterReaderBase(Reader reader) throws IOException, ParserConfigurationException {
   this.reader = reader;
   document = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
   stack = new Stack<Level>();
   parser = new StreamedSource(reader);
   iterator = parser.iterator();
   writer = new StringWriter();
   buffer = writer.getBuffer();
   offset = 0;
 }
示例#4
0
 private void processCurrentSegment() {
   Segment segment = parser.getCurrentSegment();
   // If this tag is inside the previous tag (e.g. a server tag) then
   // ignore it as it was already output along with the previous tag.
   if (segment.getEnd() <= lastSegEnd) {
     return;
   }
   lastSegEnd = segment.getEnd();
   if (segment instanceof Tag) {
     if (segment instanceof StartTag) {
       processStartTag((StartTag) segment);
     } else if (segment instanceof EndTag) {
       processEndTag((EndTag) segment);
     } else {
       writer.write(segment.toString());
     }
   } else {
     processText(segment);
   }
 }