// Implements Reusable. public void reset() { // Resets all members (alphabetically ordered). _attributes.reset(); _attrPrefixSep = 0; _attrQName = null; _attrValue = null; _attrQName = null; _charactersPending = false; _encoding = null; _entities.reset(); _eventType = START_DOCUMENT; _index = 0; _isEmpty = false; _location.reset(); _namespaces.reset(); _prolog = null; _readCount = 0; _reader = null; _depth = 0; _readIndex = 0; _seqsIndex = 0; _start = 0; _startOffset = 0; _state = STATE_CHARACTERS; _utf8StreamReader.reset(); // Recycles if factory produced. if (_factory != null) _factory.recycle(this); }
/** Reloads data buffer. */ private void reloadBuffer() throws XMLStreamException { if (_reader == null) throw new XMLStreamException("Input not specified"); _location._column += _readIndex; _location._charactersRead += _readIndex; _readIndex = 0; try { _readCount = _reader.read(_readBuffer, 0, _readBuffer.length); if ((_readCount <= 0) && ((_depth != 0) || (_state != STATE_CHARACTERS))) throw new XMLUnexpectedEndOfDocumentException("Unexpected end of document", _location); } catch (IOException e) { throw new XMLStreamException(e); } while ((_index + _readCount) >= _data.length) { // Potential overflow. increaseDataBuffer(); } }
/** * Handles end of line as per XML Spec. 2.11 * * @param c the potential end of line character. * @return the replacement character for end of line. */ private char handleEndOfLine(char c) throws XMLStreamException { if (c == 0xD) { // Replaces #xD with #xA // Unless next char is #xA, then skip, // #xD#xA will be replaced by #xA if (_readIndex >= _readCount) reloadBuffer(); if ((_readIndex < _readCount) && (_readBuffer[_readIndex] == 0xA)) { _readIndex++; // Skips 0xD _location._totalCharsRead++; } c = (char) 0xA; } if (c == 0xA) { _location._line++; _location._column = -_readIndex; // column = 0 } else if (c == 0x0) { throw new XMLStreamException("Illegal XML character U+0000", _location); } return c; }
// Implements XMLStreamReader Interface. public int next() throws XMLStreamException { // Clears previous state. if (_eventType == START_ELEMENT) { if (_isEmpty) { // Previous empty tag, generates END_TAG automatically. _isEmpty = false; return _eventType = END_ELEMENT; } } else if (_eventType == END_ELEMENT) { _namespaces.pop(); CharArray startElem = _elemStack[_depth--]; _start = _index = startElem.offset(); // TODO: does this loop do anything at all? while (_seqs[--_seqsIndex] != startElem) { // Recycles CharArray instances. } } // Reader loop. while (true) { // Main character reading block. if ((_readIndex >= _readCount) && isEndOfStream()) return _eventType; // END_DOCUMENT or CHARACTERS. char c = _readBuffer[_readIndex++]; _location._totalCharsRead++; if (c <= '&') c = (c == '&') ? replaceEntity() : (c < ' ') ? handleEndOfLine(c) : c; _data[_index++] = c; // Main processing. // switch (_state) { case STATE_CHARACTERS: while (true) { // Read characters data all at once. if (c == '<') { int length = _index - _start - 1; if (length > 0) { if (_charactersPending) { _text.setArray(_data, _text.offset(), _text.length() + length); // Coalescing. } else { _text = newSeq(_start, length); _charactersPending = true; } _start = _index - 1; // Keeps '<' as part of markup. } _state = STATE_MARKUP; break; } // Local character reading block. if ((_readIndex >= _readCount) && isEndOfStream()) return _eventType; c = _readBuffer[_readIndex++]; _location._totalCharsRead++; if (c <= '&') c = (c == '&') ? replaceEntity() : (c < ' ') ? handleEndOfLine(c) : c; _data[_index++] = c; } break; case STATE_CDATA: while (true) { // Reads CDATA all at once. if ((c == '>') && (_index - _start >= 3) && (_data[_index - 2] == ']') && (_data[_index - 3] == ']')) { _index -= 3; int length = _index - _start; if (length > 0) { // Not empty. if (_charactersPending) { _text.setArray(_data, _text.offset(), _text.length() + length); // Coalescing. } else { _text = newSeq(_start, length); _charactersPending = true; } } _start = _index; _state = STATE_CHARACTERS; break; } // Local character reading block. if (_readIndex >= _readCount) reloadBuffer(); c = _readBuffer[_readIndex++]; _location._totalCharsRead++; if (c < ' ') c = handleEndOfLine(c); _data[_index++] = c; } break; case STATE_DTD: if (c == '>') { _text = newSeq(_start, _index - _start); _index = _start; // Do not keep DTD. _state = STATE_CHARACTERS; return _eventType = DTD; } else if (c == '[') { _state = STATE_DTD_INTERNAL; } break; case STATE_DTD_INTERNAL: if (c == ']') { _state = STATE_DTD; } break; case STATE_MARKUP: // Starts with '<' if (_index - _start == 2) { if (c == '/') { _start = _index = _index - 2; _state = STATE_CLOSE_TAGxREAD_ELEM_NAME; _prefixSep = -1; if (_charactersPending) { // Flush characters event. _charactersPending = false; return _eventType = CHARACTERS; } } else if (c == '?') { _start = _index = _index - 2; _state = STATE_PI; if (_charactersPending) { // Flush characters event. _charactersPending = false; return _eventType = CHARACTERS; } } else if (c != '!') { // Element tag (first letter). _location._lastStartTagPos = _location._totalCharsRead - 2; _data[_start] = c; _index = _start + 1; _state = STATE_OPEN_TAGxREAD_ELEM_NAME; _prefixSep = -1; if (_charactersPending) { // Flush character event. _charactersPending = false; return _eventType = CHARACTERS; } } } else if ((_index - _start == 4) && (_data[_start + 1] == '!') && (_data[_start + 2] == '-') && (_data[_start + 3] == '-')) { _start = _index = _index - 4; // Removes <!-- _state = STATE_COMMENT; if (_charactersPending) { // Flush character event. _charactersPending = false; return _eventType = CHARACTERS; } } else if ((_index - _start == 9) && (_data[_start + 1] == '!') && (_data[_start + 2] == '[') && (_data[_start + 3] == 'C') && (_data[_start + 4] == 'D') && (_data[_start + 5] == 'A') && (_data[_start + 6] == 'T') && (_data[_start + 7] == 'A') && (_data[_start + 8] == '[')) { _start = _index = _index - 9; // Do not keep <![CDATA[ _state = STATE_CDATA; } else if ((_index - _start == 9) && (_data[_start + 1] == '!') && (_data[_start + 2] == 'D') && (_data[_start + 3] == 'O') && (_data[_start + 4] == 'C') && (_data[_start + 5] == 'T') && (_data[_start + 6] == 'Y') && (_data[_start + 7] == 'P') && (_data[_start + 8] == 'E')) { // Keeps <!DOCTYPE as part of DTD. _state = STATE_DTD; } else { // Ignores, e.g. <!ELEMENT <!ENTITY... } break; case STATE_COMMENT: while (true) { // Read comment all at once. if ((c == '>') && (_index - _start >= 3) && (_data[_index - 2] == '-') && (_data[_index - 3] == '-')) { _index -= 3; // Removes --> _text = newSeq(_start, _index - _start); _state = STATE_CHARACTERS; _index = _start; // Do not keep comments. return _eventType = COMMENT; } // Local character reading block. if (_readIndex >= _readCount) reloadBuffer(); c = _readBuffer[_readIndex++]; _location._totalCharsRead++; if (c < ' ') c = handleEndOfLine(c); _data[_index++] = c; } case STATE_PI: if ((c == '>') && (_index - _start >= 2) && (_data[_index - 2] == '?')) { _index -= 2; // Removes ?> _text = newSeq(_start, _index - _start); _state = STATE_CHARACTERS; _index = _start; // Do not keep processing instructions. return _eventType = PROCESSING_INSTRUCTION; } break; // OPEN_TAG: case STATE_OPEN_TAGxREAD_ELEM_NAME: _attributes.reset(); _namespaces.push(); while (true) { // Read element name all at once. if (c < '@') { // Else avoid multiple checks. if (c == '>') { _qName = newSeq(_start, --_index - _start); _start = _index; _state = STATE_CHARACTERS; processStartTag(); _isEmpty = false; return _eventType = START_ELEMENT; } else if (c == '/') { _qName = newSeq(_start, --_index - _start); _start = _index; _state = STATE_OPEN_TAGxEMPTY_TAG; break; } else if (c == ':') { _prefixSep = _index - 1; } else if (c <= ' ') { _qName = newSeq(_start, --_index - _start); _state = STATE_OPEN_TAGxELEM_NAME_READ; break; } } if (_readIndex >= _readCount) reloadBuffer(); c = _data[_index++] = _readBuffer[_readIndex++]; _location._totalCharsRead++; } break; case STATE_OPEN_TAGxELEM_NAME_READ: if (c == '>') { _start = --_index; _state = STATE_CHARACTERS; processStartTag(); _isEmpty = false; return _eventType = START_ELEMENT; } else if (c == '/') { _state = STATE_OPEN_TAGxEMPTY_TAG; } else if (c > ' ') { _start = _index - 1; // Includes current character. _attrPrefixSep = -1; _state = STATE_OPEN_TAGxREAD_ATTR_NAME; } break; case STATE_OPEN_TAGxREAD_ATTR_NAME: while (true) { // Read attribute name all at once. if (c < '@') { // Else avoid multiple checks. if (c <= ' ') { _attrQName = newSeq(_start, --_index - _start); _state = STATE_OPEN_TAGxATTR_NAME_READ; break; } else if (c == '=') { _attrQName = newSeq(_start, --_index - _start); _state = STATE_OPEN_TAGxEQUAL_READ; break; } else if (c == ':') { _attrPrefixSep = _index - 1; } } if (_readIndex >= _readCount) reloadBuffer(); _data[_index++] = c = _readBuffer[_readIndex++]; _location._totalCharsRead++; } break; case STATE_OPEN_TAGxATTR_NAME_READ: if (c == '=') { --_index; _state = STATE_OPEN_TAGxEQUAL_READ; } else if (c > ' ') { throw new XMLStreamException("'=' expected", _location); } break; case STATE_OPEN_TAGxEQUAL_READ: if (c == '\'') { _start = --_index; _state = STATE_OPEN_TAGxREAD_ATTR_VALUE_SIMPLE_QUOTE; } else if (c == '\"') { _start = --_index; _state = STATE_OPEN_TAGxREAD_ATTR_VALUE_DOUBLE_QUOTE; } else if (c > ' ') { throw new XMLStreamException("Quotes expected", _location); } break; case STATE_OPEN_TAGxREAD_ATTR_VALUE_SIMPLE_QUOTE: while (true) { // Read attribute value all at once. if (c == '\'') { _attrValue = newSeq(_start, --_index - _start); processAttribute(); _state = STATE_OPEN_TAGxELEM_NAME_READ; break; } // Local character reading block. if (_readIndex >= _readCount) reloadBuffer(); c = _readBuffer[_readIndex++]; _location._totalCharsRead++; if (c == '&') c = replaceEntity(); _data[_index++] = c; } break; case STATE_OPEN_TAGxREAD_ATTR_VALUE_DOUBLE_QUOTE: while (true) { // Read attribute value all at once. if (c == '\"') { _attrValue = newSeq(_start, --_index - _start); processAttribute(); _state = STATE_OPEN_TAGxELEM_NAME_READ; break; } // Local character reading block. if (_readIndex >= _readCount) reloadBuffer(); c = _readBuffer[_readIndex++]; _location._totalCharsRead++; if (c == '&') c = replaceEntity(); _data[_index++] = c; } break; case STATE_OPEN_TAGxEMPTY_TAG: if (c == '>') { _start = --_index; _state = STATE_CHARACTERS; processStartTag(); _isEmpty = true; return _eventType = START_ELEMENT; } else { throw new XMLStreamException("'>' expected", _location); } // CLOSE_TAG: case STATE_CLOSE_TAGxREAD_ELEM_NAME: while (true) { // Element name can be read all at once. if (c < '@') { // Else avoid multiple checks. if (c == '>') { _qName = newSeq(_start, --_index - _start); _start = _index; _state = STATE_CHARACTERS; processEndTag(); return _eventType = END_ELEMENT; } else if (c == ':') { _prefixSep = _index - 1; } else if (c <= ' ') { _qName = newSeq(_start, --_index - _start); _state = STATE_CLOSE_TAGxELEM_NAME_READ; break; } } if (_readIndex >= _readCount) reloadBuffer(); c = _data[_index++] = _readBuffer[_readIndex++]; _location._totalCharsRead++; } break; case STATE_CLOSE_TAGxELEM_NAME_READ: if (c == '>') { _start = --_index; _state = STATE_CHARACTERS; processEndTag(); return _eventType = END_ELEMENT; } else if (c > ' ') { throw new XMLStreamException("'>' expected", _location); } break; default: throw new XMLStreamException("State unknown: " + _state, _location); } } }
/** * Returns the textual representation of this reader current state. * * @return the textual representation of the current state. */ public String toString() { return "XMLStreamReader - State: " + NAMES_OF_EVENTS[_eventType] + ", Location: " + _location.toString(); }