void read(Tokeniser t, CharacterReader r) { if (r.matchesLetter()) { String name = r.consumeLetterSequence(); t.dataBuffer.append(name.toLowerCase()); t.emit(name); return; } char c = r.consume(); switch (c) { case '\t': case '\n': case '\r': case '\f': case ' ': case '/': case '>': if (t.dataBuffer.toString().equals("script")) t.transition(ScriptDataEscaped); else t.transition(ScriptDataDoubleEscaped); t.emit(c); break; default: r.unconsume(); t.transition(ScriptDataDoubleEscaped); } }
void read(Tokeniser t, CharacterReader r) { char c = r.consume(); switch (c) { case '\t': case '\n': case '\r': case '\f': case ' ': t.transition(BeforeAttributeName); break; case '/': t.transition(SelfClosingStartTag); break; case '>': t.emitTagPending(); t.transition(Data); break; case eof: t.eofError(this); t.transition(Data); break; default: t.error(this); r.unconsume(); t.transition(BeforeAttributeName); } }
void read(Tokeniser t, CharacterReader r) { char c = r.consume(); switch (c) { case '\t': case '\n': case '\r': case '\f': case ' ': // ignore break; case '"': t.transition(AttributeValue_doubleQuoted); break; case '&': r.unconsume(); t.transition(AttributeValue_unquoted); break; case '\'': t.transition(AttributeValue_singleQuoted); break; case nullChar: t.error(this); t.tagPending.appendAttributeValue(replacementChar); t.transition(AttributeValue_unquoted); break; case eof: t.eofError(this); t.transition(Data); break; case '>': t.error(this); t.emitTagPending(); t.transition(Data); break; case '<': case '=': case '`': t.error(this); t.tagPending.appendAttributeValue(c); t.transition(AttributeValue_unquoted); break; default: r.unconsume(); t.transition(AttributeValue_unquoted); } }
void read(Tokeniser t, CharacterReader r) { // todo: handle bogus comment starting from eof. when does that trigger? // rewind to capture character that lead us here r.unconsume(); Token.Comment comment = new Token.Comment(); comment.data.append(r.consumeTo('>')); // todo: replace nullChar with replaceChar t.emit(comment); t.advanceTransition(Data); }
// from tagname <xxx void read(Tokeniser t, CharacterReader r) { char c = r.consume(); switch (c) { case '\t': case '\n': case '\r': case '\f': case ' ': break; // ignore whitespace case '/': t.transition(SelfClosingStartTag); break; case '>': t.emitTagPending(); t.transition(Data); break; case nullChar: t.error(this); t.tagPending.newAttribute(); r.unconsume(); t.transition(AttributeName); break; case eof: t.eofError(this); t.transition(Data); break; case '"': case '\'': case '<': case '=': t.error(this); t.tagPending.newAttribute(); t.tagPending.appendAttributeName(c); t.transition(AttributeName); break; default: // A-Z, anything else t.tagPending.newAttribute(); r.unconsume(); t.transition(AttributeName); } }
void read(Tokeniser t, CharacterReader r) { switch (r.consume()) { case '/': t.createTempBuffer(); t.transition(ScriptDataEndTagOpen); break; case '!': t.emit("<!"); t.transition(ScriptDataEscapeStart); break; default: t.emit("<"); r.unconsume(); t.transition(ScriptData); } }
// from < in rcdata void read(Tokeniser t, CharacterReader r) { if (r.matches('/')) { t.createTempBuffer(); t.advanceTransition(RCDATAEndTagOpen); } else if (r.matchesLetter() && !r.containsIgnoreCase("</" + t.appropriateEndTagName())) { // diverge from spec: got a start tag, but there's no appropriate end tag (</title>), so // rather than // consuming to EOF; break out here t.tagPending = new Token.EndTag(t.appropriateEndTagName()); t.emitTagPending(); r.unconsume(); // undo "<" t.transition(Data); } else { t.emit("<"); t.transition(Rcdata); } }