void read(Tokeniser t, CharacterReader r) {
   if (r.matchesLetter()) {
     t.createDoctypePending();
     t.transition(DoctypeName);
     return;
   }
   char c = r.consume();
   switch (c) {
     case '\t':
     case '\n':
     case '\r':
     case '\f':
     case ' ':
       break; // ignore whitespace
     case nullChar:
       t.error(this);
       t.doctypePending.name.append(replacementChar);
       t.transition(DoctypeName);
       break;
     case eof:
       t.eofError(this);
       t.createDoctypePending();
       t.doctypePending.forceQuirks = true;
       t.emitDoctypePending();
       t.transition(Data);
       break;
     default:
       t.createDoctypePending();
       t.doctypePending.name.append(c);
       t.transition(DoctypeName);
   }
 }
 void read(Tokeniser t, CharacterReader r) {
   char c = r.consume();
   switch (c) {
     case '\t':
     case '\n':
     case '\r':
     case '\f':
     case ' ':
       break;
     case '"':
       // set system id to empty string
       t.transition(DoctypeSystemIdentifier_doubleQuoted);
       break;
     case '\'':
       // set public id to empty string
       t.transition(DoctypeSystemIdentifier_singleQuoted);
       break;
     case '>':
       t.error(this);
       t.doctypePending.forceQuirks = true;
       t.emitDoctypePending();
       t.transition(Data);
       break;
     case eof:
       t.eofError(this);
       t.doctypePending.forceQuirks = true;
       t.emitDoctypePending();
       t.transition(Data);
       break;
     default:
       t.error(this);
       t.doctypePending.forceQuirks = true;
       t.transition(BogusDoctype);
   }
 }
    void read(Tokeniser t, CharacterReader r) {
      String value = r.consumeToAny('\'', '&', nullChar);
      if (value.length() > 0) t.tagPending.appendAttributeValue(value);

      char c = r.consume();
      switch (c) {
        case '\'':
          t.transition(AfterAttributeValue_quoted);
          break;
        case '&':
          Character ref = t.consumeCharacterReference('\'', true);
          if (ref != null) t.tagPending.appendAttributeValue(ref);
          else t.tagPending.appendAttributeValue('&');
          break;
        case nullChar:
          t.error(this);
          t.tagPending.appendAttributeValue(replacementChar);
          break;
        case eof:
          t.eofError(this);
          t.transition(Data);
          break;
          // no default, handled in consume to any above
      }
    }
 void read(Tokeniser t, CharacterReader r) {
   char c = r.consume();
   switch (c) {
     case '\t':
     case '\n':
     case '\r':
     case '\f':
     case ' ':
       t.transition(BeforeAttributeName);
       break;
     case '/':
       t.transition(SelfClosingStartTag);
       break;
     case '>':
       t.emitTagPending();
       t.transition(Data);
       break;
     case eof:
       t.eofError(this);
       t.transition(Data);
       break;
     default:
       t.error(this);
       r.unconsume();
       t.transition(BeforeAttributeName);
   }
 }
    void read(Tokeniser t, CharacterReader r) {
      if (r.matchesLetter()) {
        String name = r.consumeLetterSequence();
        t.tagPending.appendTagName(name.toLowerCase());
        t.dataBuffer.append(name);
        return;
      }

      if (t.isAppropriateEndTagToken() && !r.isEmpty()) {
        char c = r.consume();
        switch (c) {
          case '\t':
          case '\n':
          case '\r':
          case '\f':
          case ' ':
            t.transition(BeforeAttributeName);
            break;
          case '/':
            t.transition(SelfClosingStartTag);
            break;
          case '>':
            t.emitTagPending();
            t.transition(Data);
            break;
          default:
            t.dataBuffer.append(c);
            anythingElse(t, r);
            break;
        }
      } else {
        anythingElse(t, r);
      }
    }
    void read(Tokeniser t, CharacterReader r) {
      if (r.isEmpty()) {
        t.eofError(this);
        t.transition(Data);
        return;
      }

      char c = r.consume();
      switch (c) {
        case '-':
          t.emit(c);
          break;
        case '<':
          t.transition(ScriptDataEscapedLessthanSign);
          break;
        case '>':
          t.emit(c);
          t.transition(ScriptData);
          break;
        case nullChar:
          t.error(this);
          t.emit(replacementChar);
          t.transition(ScriptDataEscaped);
          break;
        default:
          t.emit(c);
          t.transition(ScriptDataEscaped);
      }
    }
    // from < or </ in data, will have start or end tag pending
    void read(Tokeniser t, CharacterReader r) {
      // previous TagOpen state did NOT consume, will have a letter char in current
      String tagName =
          r.consumeToAny('\t', '\n', '\r', '\f', ' ', '/', '>', nullChar).toLowerCase();
      t.tagPending.appendTagName(tagName);

      switch (r.consume()) {
        case '\t':
        case '\n':
        case '\r':
        case '\f':
        case ' ':
          t.transition(BeforeAttributeName);
          break;
        case '/':
          t.transition(SelfClosingStartTag);
          break;
        case '>':
          t.emitTagPending();
          t.transition(Data);
          break;
        case nullChar: // replacement
          t.tagPending.appendTagName(replacementStr);
          break;
        case eof: // should emit pending tag?
          t.eofError(this);
          t.transition(Data);
          // no default, as covered with above consumeToAny
      }
    }
 void read(Tokeniser t, CharacterReader r) {
   char c = r.current();
   switch (c) {
     case '-':
       t.emit(c);
       t.advanceTransition(ScriptDataDoubleEscapedDash);
       break;
     case '<':
       t.emit(c);
       t.advanceTransition(ScriptDataDoubleEscapedLessthanSign);
       break;
     case nullChar:
       t.error(this);
       r.advance();
       t.emit(replacementChar);
       break;
     case eof:
       t.eofError(this);
       t.transition(Data);
       break;
     default:
       String data = r.consumeToAny('-', '<', nullChar);
       t.emit(data);
   }
 }
    void read(Tokeniser t, CharacterReader r) {
      if (r.matchesLetter()) {
        String name = r.consumeLetterSequence();
        t.dataBuffer.append(name.toLowerCase());
        t.emit(name);
        return;
      }

      char c = r.consume();
      switch (c) {
        case '\t':
        case '\n':
        case '\r':
        case '\f':
        case ' ':
        case '/':
        case '>':
          if (t.dataBuffer.toString().equals("script")) t.transition(ScriptDataEscaped);
          else t.transition(ScriptDataDoubleEscaped);
          t.emit(c);
          break;
        default:
          r.unconsume();
          t.transition(ScriptDataDoubleEscaped);
      }
    }
 void read(Tokeniser t, CharacterReader r) {
   if (r.matchesLetter()) {
     String name = r.consumeLetterSequence();
     t.doctypePending.name.append(name.toLowerCase());
     return;
   }
   char c = r.consume();
   switch (c) {
     case '>':
       t.emitDoctypePending();
       t.transition(Data);
       break;
     case '\t':
     case '\n':
     case '\r':
     case '\f':
     case ' ':
       t.transition(AfterDoctypeName);
       break;
     case nullChar:
       t.error(this);
       t.doctypePending.name.append(replacementChar);
       break;
     case eof:
       t.eofError(this);
       t.doctypePending.forceQuirks = true;
       t.emitDoctypePending();
       t.transition(Data);
       break;
     default:
       t.doctypePending.name.append(c);
   }
 }
    void read(Tokeniser t, CharacterReader r) {
      if (r.isEmpty()) {
        t.eofError(this);
        t.transition(Data);
        return;
      }

      switch (r.current()) {
        case '-':
          t.emit('-');
          t.advanceTransition(ScriptDataEscapedDash);
          break;
        case '<':
          t.advanceTransition(ScriptDataEscapedLessthanSign);
          break;
        case nullChar:
          t.error(this);
          r.advance();
          t.emit(replacementChar);
          break;
        default:
          String data = r.consumeToAny('-', '<', nullChar);
          t.emit(data);
      }
    }
 void read(Tokeniser t, CharacterReader r) {
   // todo: handle bogus comment starting from eof. when does that trigger?
   // rewind to capture character that lead us here
   r.unconsume();
   Token.Comment comment = new Token.Comment();
   comment.data.append(r.consumeTo('>'));
   // todo: replace nullChar with replaceChar
   t.emit(comment);
   t.advanceTransition(Data);
 }
 void read(Tokeniser t, CharacterReader r) {
   if (r.matchesLetter()) {
     t.createTagPending(false);
     t.tagPending.appendTagName(Character.toLowerCase(r.current()));
     t.dataBuffer.append(r.current());
     t.advanceTransition(ScriptDataEscapedEndTagName);
   } else {
     t.emit("</");
     t.transition(ScriptDataEscaped);
   }
 }
 void read(Tokeniser t, CharacterReader r) {
   if (r.matchesLetter()) {
     t.createTempBuffer();
     t.dataBuffer.append(Character.toLowerCase(r.current()));
     t.emit("<" + r.current());
     t.advanceTransition(ScriptDataDoubleEscapeStart);
   } else if (r.matches('/')) {
     t.createTempBuffer();
     t.advanceTransition(ScriptDataEscapedEndTagOpen);
   } else {
     t.emit('<');
     t.transition(ScriptDataEscaped);
   }
 }
 void read(Tokeniser t, CharacterReader r) {
   switch (r.current()) {
     case nullChar:
       t.error(this);
       r.advance();
       t.emit(replacementChar);
       break;
     case eof:
       t.emit(new Token.EOF());
       break;
     default:
       String data = r.consumeTo(nullChar);
       t.emit(data);
       break;
   }
 }
 void read(Tokeniser t, CharacterReader r) {
   switch (r.consume()) {
     case '/':
       t.createTempBuffer();
       t.transition(ScriptDataEndTagOpen);
       break;
     case '!':
       t.emit("<!");
       t.transition(ScriptDataEscapeStart);
       break;
     default:
       t.emit("<");
       r.unconsume();
       t.transition(ScriptData);
   }
 }
 void read(Tokeniser t, CharacterReader r) {
   char c = r.consume();
   switch (c) {
     case '\t':
     case '\n':
     case '\r':
     case '\f':
     case ' ':
       break;
     case '>':
       t.emitDoctypePending();
       t.transition(Data);
       break;
     case eof:
       t.eofError(this);
       t.doctypePending.forceQuirks = true;
       t.emitDoctypePending();
       t.transition(Data);
       break;
     default:
       t.error(this);
       t.transition(BogusDoctype);
       // NOT force quirks
   }
 }
 void read(Tokeniser t, CharacterReader r) {
   char c = r.consume();
   switch (c) {
     case '\'':
       t.transition(AfterDoctypeSystemIdentifier);
       break;
     case nullChar:
       t.error(this);
       t.doctypePending.systemIdentifier.append(replacementChar);
       break;
     case '>':
       t.error(this);
       t.doctypePending.forceQuirks = true;
       t.emitDoctypePending();
       t.transition(Data);
       break;
     case eof:
       t.eofError(this);
       t.doctypePending.forceQuirks = true;
       t.emitDoctypePending();
       t.transition(Data);
       break;
     default:
       t.doctypePending.systemIdentifier.append(c);
   }
 }
 void read(Tokeniser t, CharacterReader r) {
   if (r.isEmpty()) {
     t.eofError(this);
     t.emit("</");
     t.transition(Data);
   } else if (r.matchesLetter()) {
     t.createTagPending(false);
     t.transition(TagName);
   } else if (r.matches('>')) {
     t.error(this);
     t.advanceTransition(Data);
   } else {
     t.error(this);
     t.advanceTransition(BogusComment);
   }
 }
 void read(Tokeniser t, CharacterReader r) {
   char c = r.consume();
   switch (c) {
     case '-':
       t.commentPending.data.append("--!");
       t.transition(CommentEndDash);
       break;
     case '>':
       t.emitCommentPending();
       t.transition(Data);
       break;
     case nullChar:
       t.error(this);
       t.commentPending.data.append("--!").append(replacementChar);
       t.transition(Comment);
       break;
     case eof:
       t.eofError(this);
       t.emitCommentPending();
       t.transition(Data);
       break;
     default:
       t.commentPending.data.append("--!").append(c);
       t.transition(Comment);
   }
 }
 void read(Tokeniser t, CharacterReader r) {
   char c = r.consume();
   switch (c) {
     case '-':
       t.emit(c);
       t.transition(ScriptDataDoubleEscapedDashDash);
       break;
     case '<':
       t.emit(c);
       t.transition(ScriptDataDoubleEscapedLessthanSign);
       break;
     case nullChar:
       t.error(this);
       t.emit(replacementChar);
       t.transition(ScriptDataDoubleEscaped);
       break;
     case eof:
       t.eofError(this);
       t.transition(Data);
       break;
     default:
       t.emit(c);
       t.transition(ScriptDataDoubleEscaped);
   }
 }
 void read(Tokeniser t, CharacterReader r) {
   char c = r.consume();
   switch (c) {
     case '\t':
     case '\n':
     case '\r':
     case '\f':
     case ' ':
       // ignore
       break;
     case '"':
       t.transition(AttributeValue_doubleQuoted);
       break;
     case '&':
       r.unconsume();
       t.transition(AttributeValue_unquoted);
       break;
     case '\'':
       t.transition(AttributeValue_singleQuoted);
       break;
     case nullChar:
       t.error(this);
       t.tagPending.appendAttributeValue(replacementChar);
       t.transition(AttributeValue_unquoted);
       break;
     case eof:
       t.eofError(this);
       t.transition(Data);
       break;
     case '>':
       t.error(this);
       t.emitTagPending();
       t.transition(Data);
       break;
     case '<':
     case '=':
     case '`':
       t.error(this);
       t.tagPending.appendAttributeValue(c);
       t.transition(AttributeValue_unquoted);
       break;
     default:
       r.unconsume();
       t.transition(AttributeValue_unquoted);
   }
 }
 // from < in rcdata
 void read(Tokeniser t, CharacterReader r) {
   if (r.matches('/')) {
     t.createTempBuffer();
     t.advanceTransition(RCDATAEndTagOpen);
   } else if (r.matchesLetter() && !r.containsIgnoreCase("</" + t.appropriateEndTagName())) {
     // diverge from spec: got a start tag, but there's no appropriate end tag (</title>), so
     // rather than
     // consuming to EOF; break out here
     t.tagPending = new Token.EndTag(t.appropriateEndTagName());
     t.emitTagPending();
     r.unconsume(); // undo "<"
     t.transition(Data);
   } else {
     t.emit("<");
     t.transition(Rcdata);
   }
 }
 void read(Tokeniser t, CharacterReader r) {
   if (r.matches('-')) {
     t.emit('-');
     t.advanceTransition(ScriptDataEscapedDashDash);
   } else {
     t.transition(ScriptData);
   }
 }
 void read(Tokeniser t, CharacterReader r) {
   if (r.matchesLetter()) {
     t.createTagPending(false);
     t.transition(ScriptDataEndTagName);
   } else {
     t.emit("</");
     t.transition(ScriptData);
   }
 }
 void read(Tokeniser t, CharacterReader r) {
   if (r.matchConsume("--")) {
     t.createCommentPending();
     t.transition(CommentStart);
   } else if (r.matchConsumeIgnoreCase("DOCTYPE")) {
     t.transition(Doctype);
   } else if (r.matchConsume("[CDATA[")) {
     // todo: should actually check current namepspace, and only non-html allows cdata. until
     // namespace
     // is implemented properly, keep handling as cdata
     // } else if (!t.currentNodeInHtmlNS() && r.matchConsume("[CDATA[")) {
     t.transition(CdataSection);
   } else {
     t.error(this);
     t.advanceTransition(
         BogusComment); // advance so this character gets in bogus comment data's rewind
   }
 }
 void read(Tokeniser t, CharacterReader r) {
   if (r.matches('/')) {
     t.emit('/');
     t.createTempBuffer();
     t.advanceTransition(ScriptDataDoubleEscapeEnd);
   } else {
     t.transition(ScriptDataDoubleEscaped);
   }
 }
 void read(Tokeniser t, CharacterReader r) {
   if (r.matches('/')) {
     t.createTempBuffer();
     t.advanceTransition(RawtextEndTagOpen);
   } else {
     t.emit('<');
     t.transition(Rawtext);
   }
 }
 void read(Tokeniser t, CharacterReader r) {
   switch (r.current()) {
     case '<':
       t.advanceTransition(ScriptDataLessthanSign);
       break;
     case nullChar:
       t.error(this);
       r.advance();
       t.emit(replacementChar);
       break;
     case eof:
       t.emit(new Token.EOF());
       break;
     default:
       String data = r.consumeToAny('<', nullChar);
       t.emit(data);
       break;
   }
 }
 void read(Tokeniser t, CharacterReader r) {
   char c = r.consume();
   switch (c) {
     case '\t':
     case '\n':
     case '\r':
     case '\f':
     case ' ':
       // ignore
       break;
     case '/':
       t.transition(SelfClosingStartTag);
       break;
     case '=':
       t.transition(BeforeAttributeValue);
       break;
     case '>':
       t.emitTagPending();
       t.transition(Data);
       break;
     case nullChar:
       t.error(this);
       t.tagPending.appendAttributeName(replacementChar);
       t.transition(AttributeName);
       break;
     case eof:
       t.eofError(this);
       t.transition(Data);
       break;
     case '"':
     case '\'':
     case '<':
       t.error(this);
       t.tagPending.newAttribute();
       t.tagPending.appendAttributeName(c);
       t.transition(AttributeName);
       break;
     default: // A-Z, anything else
       t.tagPending.newAttribute();
       r.unconsume();
       t.transition(AttributeName);
   }
 }