void read(Tokeniser t, CharacterReader r) {
   if (r.matchesLetter()) {
     t.createDoctypePending();
     t.transition(DoctypeName);
     return;
   }
   char c = r.consume();
   switch (c) {
     case '\t':
     case '\n':
     case '\r':
     case '\f':
     case ' ':
       break; // ignore whitespace
     case nullChar:
       t.error(this);
       t.doctypePending.name.append(replacementChar);
       t.transition(DoctypeName);
       break;
     case eof:
       t.eofError(this);
       t.createDoctypePending();
       t.doctypePending.forceQuirks = true;
       t.emitDoctypePending();
       t.transition(Data);
       break;
     default:
       t.createDoctypePending();
       t.doctypePending.name.append(c);
       t.transition(DoctypeName);
   }
 }
 void read(Tokeniser t, CharacterReader r) {
   char c = r.consume();
   switch (c) {
     case '-':
       t.transition(CommentStartDash);
       break;
     case nullChar:
       t.error(this);
       t.commentPending.data.append(replacementChar);
       t.transition(Comment);
       break;
     case '>':
       t.error(this);
       t.emitCommentPending();
       t.transition(Data);
       break;
     case eof:
       t.eofError(this);
       t.emitCommentPending();
       t.transition(Data);
       break;
     default:
       t.commentPending.data.append(c);
       t.transition(Comment);
   }
 }
 void read(Tokeniser t, CharacterReader r) {
   char c = r.consume();
   switch (c) {
     case '\t':
     case '\n':
     case '\r':
     case '\f':
     case ' ':
       break;
     case '"':
       // set system id to empty string
       t.transition(DoctypeSystemIdentifier_doubleQuoted);
       break;
     case '\'':
       // set public id to empty string
       t.transition(DoctypeSystemIdentifier_singleQuoted);
       break;
     case '>':
       t.error(this);
       t.doctypePending.forceQuirks = true;
       t.emitDoctypePending();
       t.transition(Data);
       break;
     case eof:
       t.eofError(this);
       t.doctypePending.forceQuirks = true;
       t.emitDoctypePending();
       t.transition(Data);
       break;
     default:
       t.error(this);
       t.doctypePending.forceQuirks = true;
       t.transition(BogusDoctype);
   }
 }
    void read(Tokeniser t, CharacterReader r) {
      if (r.isEmpty()) {
        t.eofError(this);
        t.transition(Data);
        return;
      }

      char c = r.consume();
      switch (c) {
        case '-':
          t.emit(c);
          t.transition(ScriptDataEscapedDashDash);
          break;
        case '<':
          t.transition(ScriptDataEscapedLessthanSign);
          break;
        case nullChar:
          t.error(this);
          t.emit(replacementChar);
          t.transition(ScriptDataEscaped);
          break;
        default:
          t.emit(c);
          t.transition(ScriptDataEscaped);
      }
    }
    void read(Tokeniser t, CharacterReader r) {
      String value = r.consumeToAny('\'', '&', nullChar);
      if (value.length() > 0) t.tagPending.appendAttributeValue(value);

      char c = r.consume();
      switch (c) {
        case '\'':
          t.transition(AfterAttributeValue_quoted);
          break;
        case '&':
          Character ref = t.consumeCharacterReference('\'', true);
          if (ref != null) t.tagPending.appendAttributeValue(ref);
          else t.tagPending.appendAttributeValue('&');
          break;
        case nullChar:
          t.error(this);
          t.tagPending.appendAttributeValue(replacementChar);
          break;
        case eof:
          t.eofError(this);
          t.transition(Data);
          break;
          // no default, handled in consume to any above
      }
    }
    void read(Tokeniser t, CharacterReader r) {
      if (r.matchesLetter()) {
        String name = r.consumeLetterSequence();
        t.tagPending.appendTagName(name.toLowerCase());
        t.dataBuffer.append(name);
        return;
      }

      if (t.isAppropriateEndTagToken() && !r.isEmpty()) {
        char c = r.consume();
        switch (c) {
          case '\t':
          case '\n':
          case '\r':
          case '\f':
          case ' ':
            t.transition(BeforeAttributeName);
            break;
          case '/':
            t.transition(SelfClosingStartTag);
            break;
          case '>':
            t.emitTagPending();
            t.transition(Data);
            break;
          default:
            t.dataBuffer.append(c);
            anythingElse(t, r);
            break;
        }
      } else {
        anythingElse(t, r);
      }
    }
    void read(Tokeniser t, CharacterReader r) {
      if (r.matchesLetter()) {
        String name = r.consumeLetterSequence();
        t.dataBuffer.append(name.toLowerCase());
        t.emit(name);
        return;
      }

      char c = r.consume();
      switch (c) {
        case '\t':
        case '\n':
        case '\r':
        case '\f':
        case ' ':
        case '/':
        case '>':
          if (t.dataBuffer.toString().equals("script")) t.transition(ScriptDataEscaped);
          else t.transition(ScriptDataDoubleEscaped);
          t.emit(c);
          break;
        default:
          r.unconsume();
          t.transition(ScriptDataDoubleEscaped);
      }
    }
 void read(Tokeniser t, CharacterReader r) {
   if (r.matches('-')) {
     t.emit('-');
     t.advanceTransition(ScriptDataEscapedDashDash);
   } else {
     t.transition(ScriptData);
   }
 }
 void read(Tokeniser t, CharacterReader r) {
   if (r.matchesLetter()) {
     t.createTagPending(false);
     t.transition(ScriptDataEndTagName);
   } else {
     t.emit("</");
     t.transition(ScriptData);
   }
 }
 void read(Tokeniser t, CharacterReader r) {
   if (r.matches('/')) {
     t.createTempBuffer();
     t.advanceTransition(RawtextEndTagOpen);
   } else {
     t.emit('<');
     t.transition(Rawtext);
   }
 }
 void read(Tokeniser t, CharacterReader r) {
   if (r.matches('/')) {
     t.emit('/');
     t.createTempBuffer();
     t.advanceTransition(ScriptDataDoubleEscapeEnd);
   } else {
     t.transition(ScriptDataDoubleEscaped);
   }
 }
 void read(Tokeniser t, CharacterReader r) {
   // todo: handle bogus comment starting from eof. when does that trigger?
   // rewind to capture character that lead us here
   r.unconsume();
   Token.Comment comment = new Token.Comment();
   comment.data.append(r.consumeTo('>'));
   // todo: replace nullChar with replaceChar
   t.emit(comment);
   t.advanceTransition(Data);
 }
 void read(Tokeniser t, CharacterReader r) {
   char c = r.current();
   switch (c) {
     case '-':
       t.emit(c);
       t.advanceTransition(ScriptDataDoubleEscapedDash);
       break;
     case '<':
       t.emit(c);
       t.advanceTransition(ScriptDataDoubleEscapedLessthanSign);
       break;
     case nullChar:
       t.error(this);
       r.advance();
       t.emit(replacementChar);
       break;
     case eof:
       t.eofError(this);
       t.transition(Data);
       break;
     default:
       String data = r.consumeToAny('-', '<', nullChar);
       t.emit(data);
   }
 }
 void read(Tokeniser t, CharacterReader r) {
   if (r.matchesLetter()) {
     t.createTagPending(false);
     t.tagPending.appendTagName(Character.toLowerCase(r.current()));
     t.dataBuffer.append(r.current());
     t.advanceTransition(ScriptDataEscapedEndTagName);
   } else {
     t.emit("</");
     t.transition(ScriptDataEscaped);
   }
 }
    void read(Tokeniser t, CharacterReader r) {
      if (r.isEmpty()) {
        t.eofError(this);
        t.transition(Data);
        return;
      }

      switch (r.current()) {
        case '-':
          t.emit('-');
          t.advanceTransition(ScriptDataEscapedDash);
          break;
        case '<':
          t.advanceTransition(ScriptDataEscapedLessthanSign);
          break;
        case nullChar:
          t.error(this);
          r.advance();
          t.emit(replacementChar);
          break;
        default:
          String data = r.consumeToAny('-', '<', nullChar);
          t.emit(data);
      }
    }
 void read(Tokeniser t, CharacterReader r) {
   char c = r.consume();
   switch (c) {
     case '\t':
     case '\n':
     case '\r':
     case '\f':
     case ' ':
       t.transition(BeforeAttributeName);
       break;
     case '/':
       t.transition(SelfClosingStartTag);
       break;
     case '>':
       t.emitTagPending();
       t.transition(Data);
       break;
     case eof:
       t.eofError(this);
       t.transition(Data);
       break;
     default:
       t.error(this);
       r.unconsume();
       t.transition(BeforeAttributeName);
   }
 }
 void read(Tokeniser t, CharacterReader r) {
   char c = r.consume();
   switch (c) {
     case '\'':
       t.transition(AfterDoctypeSystemIdentifier);
       break;
     case nullChar:
       t.error(this);
       t.doctypePending.systemIdentifier.append(replacementChar);
       break;
     case '>':
       t.error(this);
       t.doctypePending.forceQuirks = true;
       t.emitDoctypePending();
       t.transition(Data);
       break;
     case eof:
       t.eofError(this);
       t.doctypePending.forceQuirks = true;
       t.emitDoctypePending();
       t.transition(Data);
       break;
     default:
       t.doctypePending.systemIdentifier.append(c);
   }
 }
 void read(Tokeniser t, CharacterReader r) {
   switch (r.current()) {
     case nullChar:
       t.error(this);
       r.advance();
       t.emit(replacementChar);
       break;
     case eof:
       t.emit(new Token.EOF());
       break;
     default:
       String data = r.consumeTo(nullChar);
       t.emit(data);
       break;
   }
 }
 void read(Tokeniser t, CharacterReader r) {
   char c = r.consume();
   switch (c) {
     case '>':
       t.emitDoctypePending();
       t.transition(Data);
       break;
     case eof:
       t.emitDoctypePending();
       t.transition(Data);
       break;
     default:
       // ignore char
       break;
   }
 }
Example #20
0
 @Test
 public void testTokeniser() {
   try {
     Tokeniser.tokenise("_sand _box   ;   ");
     fail();
   } catch (TokeniserException ignored) {
   }
 }
 void read(Tokeniser t, CharacterReader r) {
   char c = r.consume();
   switch (c) {
     case '\t':
     case '\n':
     case '\r':
     case '\f':
     case ' ':
       break;
     case '>':
       t.emitDoctypePending();
       t.transition(Data);
       break;
     case eof:
       t.eofError(this);
       t.doctypePending.forceQuirks = true;
       t.emitDoctypePending();
       t.transition(Data);
       break;
     default:
       t.error(this);
       t.transition(BogusDoctype);
       // NOT force quirks
   }
 }
 void read(Tokeniser t, CharacterReader r) {
   if (r.matchesLetter()) {
     String name = r.consumeLetterSequence();
     t.doctypePending.name.append(name.toLowerCase());
     return;
   }
   char c = r.consume();
   switch (c) {
     case '>':
       t.emitDoctypePending();
       t.transition(Data);
       break;
     case '\t':
     case '\n':
     case '\r':
     case '\f':
     case ' ':
       t.transition(AfterDoctypeName);
       break;
     case nullChar:
       t.error(this);
       t.doctypePending.name.append(replacementChar);
       break;
     case eof:
       t.eofError(this);
       t.doctypePending.forceQuirks = true;
       t.emitDoctypePending();
       t.transition(Data);
       break;
     default:
       t.doctypePending.name.append(c);
   }
 }
 void read(Tokeniser t, CharacterReader r) {
   switch (r.current()) {
     case '<':
       t.advanceTransition(ScriptDataLessthanSign);
       break;
     case nullChar:
       t.error(this);
       r.advance();
       t.emit(replacementChar);
       break;
     case eof:
       t.emit(new Token.EOF());
       break;
     default:
       String data = r.consumeToAny('<', nullChar);
       t.emit(data);
       break;
   }
 }
Example #24
0
 void read(Tokeniser t, CharacterReader r) {
   char c = r.current();
   switch (c) {
     case '-':
       t.advanceTransition(CommentEndDash);
       break;
     case nullChar:
       t.error(this);
       t.commentPending.data.append(replacementChar);
       break;
     case eof:
       t.eofError(this);
       t.emitCommentPending();
       t.transition(Data);
       break;
     default:
       t.commentPending.data.append(r.consumeToAny('-', nullChar));
   }
 }
Example #25
0
  protected void runParser() {
    while (true) {
      final Token token = tokeniser.read();
      process(token);

      if (token.type == Token.TokenType.EOF) {
        break;
      }
    }
  }
 void read(Tokeniser t, CharacterReader r) {
   if (r.isEmpty()) {
     t.eofError(this);
     t.emit("</");
     t.transition(Data);
   } else if (r.matchesLetter()) {
     t.createTagPending(false);
     t.transition(TagName);
   } else if (r.matches('>')) {
     t.error(this);
     t.advanceTransition(Data);
   } else {
     t.error(this);
     t.advanceTransition(BogusComment);
   }
 }
 // from tagname <xxx
 void read(Tokeniser t, CharacterReader r) {
   char c = r.consume();
   switch (c) {
     case '\t':
     case '\n':
     case '\r':
     case '\f':
     case ' ':
       break; // ignore whitespace
     case '/':
       t.transition(SelfClosingStartTag);
       break;
     case '>':
       t.emitTagPending();
       t.transition(Data);
       break;
     case nullChar:
       t.error(this);
       t.tagPending.newAttribute();
       r.unconsume();
       t.transition(AttributeName);
       break;
     case eof:
       t.eofError(this);
       t.transition(Data);
       break;
     case '"':
     case '\'':
     case '<':
     case '=':
       t.error(this);
       t.tagPending.newAttribute();
       t.tagPending.appendAttributeName(c);
       t.transition(AttributeName);
       break;
     default: // A-Z, anything else
       t.tagPending.newAttribute();
       r.unconsume();
       t.transition(AttributeName);
   }
 }
 // from < in rcdata
 void read(Tokeniser t, CharacterReader r) {
   if (r.matches('/')) {
     t.createTempBuffer();
     t.advanceTransition(RCDATAEndTagOpen);
   } else if (r.matchesLetter() && !r.containsIgnoreCase("</" + t.appropriateEndTagName())) {
     // diverge from spec: got a start tag, but there's no appropriate end tag (</title>), so
     // rather than
     // consuming to EOF; break out here
     t.tagPending = new Token.EndTag(t.appropriateEndTagName());
     t.emitTagPending();
     r.unconsume(); // undo "<"
     t.transition(Data);
   } else {
     t.emit("<");
     t.transition(Rcdata);
   }
 }
Example #29
0
  protected void runParser() {
    while (true) {
      Token token = tokeniser.read();
      // if(Thread.currentThread().getName().equals("1"))
      // System.out.println("type: "+token.tokenType()+" token: "+token);
      process(token);

      if (token.type == Token.TokenType.EOF) break;
    }
    updateDoc();
    // System.out.println(tokeniser.getReader().getProfileData());
  }
    // from < or </ in data, will have start or end tag pending
    void read(Tokeniser t, CharacterReader r) {
      // previous TagOpen state did NOT consume, will have a letter char in current
      String tagName =
          r.consumeToAny('\t', '\n', '\r', '\f', ' ', '/', '>', nullChar).toLowerCase();
      t.tagPending.appendTagName(tagName);

      switch (r.consume()) {
        case '\t':
        case '\n':
        case '\r':
        case '\f':
        case ' ':
          t.transition(BeforeAttributeName);
          break;
        case '/':
          t.transition(SelfClosingStartTag);
          break;
        case '>':
          t.emitTagPending();
          t.transition(Data);
          break;
        case nullChar: // replacement
          t.tagPending.appendTagName(replacementStr);
          break;
        case eof: // should emit pending tag?
          t.eofError(this);
          t.transition(Data);
          // no default, as covered with above consumeToAny
      }
    }