/** * Scans a PEReference. [69] * * @return entity * @throws IOException I/O exception */ private byte[] peRef() throws IOException { // scans predefined entities final byte[] name = name(true); consume(';'); final byte[] en = pents.get(name); if (en != null) return en; return name; }
/** * Scans a reference. [67] * * @param f dissolve entities * @return entity * @throws IOException I/O exception */ private byte[] ref(final boolean f) throws IOException { // scans numeric entities if (consume('#')) { // [66] final TokenBuilder ent = new TokenBuilder(); int b = 10; int ch = nextChar(); ent.add(ch); if (ch == 'x') { b = 16; ent.add(ch = nextChar()); } int n = 0; do { final boolean m = ch >= '0' && ch <= '9'; final boolean h = b == 16 && (ch >= 'a' && ch <= 'f' || ch >= 'A' && ch <= 'F'); if (!m && !h) { completeRef(ent); return QUESTION; } n *= b; n += ch & 15; if (!m) n += 9; ent.add(ch = nextChar()); } while (ch != ';'); if (!valid(n)) return QUESTION; ent.reset(); ent.add(n); return ent.finish(); } // scans predefined entities [68] final byte[] name = name(false); if (!consume(';')) return QUESTION; if (!f) return concat(AMPER, name, SEMI); byte[] en = ents.get(name); if (en == null) { // unknown entity: try HTML entities (lazy initialization) if (HTMLENTS.size() == 0) { for (int s = 0; s < HTMLENTITIES.length; s += 2) { HTMLENTS.add(token(HTMLENTITIES[s]), token(HTMLENTITIES[s + 1])); } } en = HTMLENTS.get(name); } return en == null ? QUESTION : en; }
/** * Reads next character or throws an exception if all bytes have been read. * * @return next character * @throws IOException I/O exception */ private int consume() throws IOException { while (true) { final int ch = input.read(); if (ch == -1) return 0; if (ch == '%' && pe) { // [69] final byte[] key = name(true); final byte[] val = pents.get(key); if (val == null) error(UNKNOWNPE, key); check(';'); input.add(val, true); } else { return ch; } } }
/** * Initializes the scanner. * * @param f input file * @param pr database properties * @param frag allow parsing of document fragment * @throws IOException I/O exception */ XMLScanner(final IO f, final Prop pr, final boolean frag) throws IOException { input = new XMLInput(f); fragment = frag; try { for (int e = 0; e < ENTITIES.length; e += 2) { ents.add(token(ENTITIES[e]), token(ENTITIES[e + 1])); } dtd = pr.is(Prop.DTD); chop = pr.is(Prop.CHOP); String enc = null; // process document declaration... if (consume(DOCDECL)) { if (s()) { if (!version()) error(DECLSTART); boolean s = s(); enc = encoding(); if (enc != null) { if (!s) error(WSERROR); s = s(); } if (sddecl() != null && !s) error(WSERROR); s(); final int ch = nextChar(); if (ch != '?' || nextChar() != '>') error(DECLWRONG); } else { prev(5); } } encoding = enc == null ? UTF8 : enc; if (!fragment) { final int n = consume(); if (!s(n)) { if (n != '<') error(BEFOREROOT); prev(1); } } } catch (final IOException ex) { input.close(); throw ex; } }
/** * Scans a markup declaration. [29] * * @return true if a declaration was found * @throws IOException I/O exception */ private boolean markupDecl() throws IOException { if (consume(ENT)) { // [70] checkS(); if (consume('%')) { // [72] PEDecl checkS(); final byte[] key = name(true); checkS(); byte[] val = entityValue(true); // [74] if (val == null) { val = externalID(true, false); if (val == null) error(INVEND); } s(); pents.add(key, val); } else { // [71] GEDecl final byte[] key = name(true); checkS(); byte[] val = entityValue(false); // [73] EntityDef if (val == null) { val = externalID(true, false); if (val == null) error(INVEND); if (s()) { check(ND); checkS(); name(true); } } s(); ents.add(key, val); } check('>'); pe = true; } else if (consume(ELEM)) { // [45] checkS(); name(true); checkS(); pe = true; if (!consume(EMP) && !consume(ANY)) { // [46] if (consume('(')) { s(); if (consume(PC)) { // [51] s(); boolean alt = false; while (consume('|')) { s(); name(true); s(); alt = true; } check(')'); if (!consume('*') && alt) error(INVEND); } else { cp(); s(); // check(')'); // to be fixed... while (!consume(')')) consume(); // input.prev(1); occ(); } } else { error(INVEND); } } s(); check('>'); } else if (consume(ATTL)) { // [52] pe = true; checkS(); name(true); s(); while (name(false) != null) { // [53] checkS(); if (!consume(CD) && !consume(IDRS) && !consume(IDR) && !consume(ID) && !consume(ENTS) && !consume(ENT1) && !consume(NMTS) && !consume(NMT)) { // [56] if (consume(NOT)) { // [57,58] checkS(); check('('); s(); name(true); s(); while (consume('|')) { s(); name(true); s(); } check(')'); } else { // [59] check('('); s(); nmtoken(); s(); while (consume('|')) { s(); nmtoken(); s(); } check(')'); } } // [54] pe = true; checkS(); if (!consume(REQ) && !consume(IMP)) { // [60] if (consume(FIX)) checkS(); quote = qu(); attValue(consume()); } s(); } check('>'); } else if (consume(NOTA)) { // [82] checkS(); name(true); s(); externalID(false, false); s(); check('>'); } else if (consume(COMS)) { comment(); } else if (consume(XML)) { pi(); } else { return false; } s(); pe = false; return true; }