예제 #1
0
  // Implements Reusable.
  public void reset() {
    // Resets all members (alphabetically ordered).
    _attributes.reset();
    _attrPrefixSep = 0;
    _attrQName = null;
    _attrValue = null;
    _attrQName = null;
    _charactersPending = false;
    _encoding = null;
    _entities.reset();
    _eventType = START_DOCUMENT;
    _index = 0;
    _isEmpty = false;
    _location.reset();
    _namespaces.reset();
    _prolog = null;
    _readCount = 0;
    _reader = null;
    _depth = 0;
    _readIndex = 0;
    _seqsIndex = 0;
    _start = 0;
    _startOffset = 0;
    _state = STATE_CHARACTERS;
    _utf8StreamReader.reset();

    // Recycles if factory produced.
    if (_factory != null) _factory.recycle(this);
  }
예제 #2
0
 /** Reloads data buffer. */
 private void reloadBuffer() throws XMLStreamException {
   if (_reader == null) throw new XMLStreamException("Input not specified");
   _location._column += _readIndex;
   _location._charactersRead += _readIndex;
   _readIndex = 0;
   try {
     _readCount = _reader.read(_readBuffer, 0, _readBuffer.length);
     if ((_readCount <= 0) && ((_depth != 0) || (_state != STATE_CHARACTERS)))
       throw new XMLUnexpectedEndOfDocumentException("Unexpected end of document", _location);
   } catch (IOException e) {
     throw new XMLStreamException(e);
   }
   while ((_index + _readCount) >= _data.length) { // Potential overflow.
     increaseDataBuffer();
   }
 }
예제 #3
0
 /**
  * Handles end of line as per XML Spec. 2.11
  *
  * @param c the potential end of line character.
  * @return the replacement character for end of line.
  */
 private char handleEndOfLine(char c) throws XMLStreamException {
   if (c == 0xD) { // Replaces #xD with #xA
     // Unless next char is #xA, then skip,
     // #xD#xA will be replaced by #xA
     if (_readIndex >= _readCount) reloadBuffer();
     if ((_readIndex < _readCount) && (_readBuffer[_readIndex] == 0xA)) {
       _readIndex++; // Skips 0xD
       _location._totalCharsRead++;
     }
     c = (char) 0xA;
   }
   if (c == 0xA) {
     _location._line++;
     _location._column = -_readIndex; // column = 0
   } else if (c == 0x0) {
     throw new XMLStreamException("Illegal XML character U+0000", _location);
   }
   return c;
 }
예제 #4
0
  // Implements XMLStreamReader Interface.
  public int next() throws XMLStreamException {

    // Clears previous state.
    if (_eventType == START_ELEMENT) {
      if (_isEmpty) { // Previous empty tag, generates END_TAG automatically.
        _isEmpty = false;
        return _eventType = END_ELEMENT;
      }
    } else if (_eventType == END_ELEMENT) {
      _namespaces.pop();
      CharArray startElem = _elemStack[_depth--];
      _start = _index = startElem.offset();

      // TODO: does this loop do anything at all?
      while (_seqs[--_seqsIndex] != startElem) { // Recycles CharArray instances.
      }
    }
    // Reader loop.
    while (true) {

      // Main character reading block.
      if ((_readIndex >= _readCount) && isEndOfStream())
        return _eventType; // END_DOCUMENT or CHARACTERS.
      char c = _readBuffer[_readIndex++];
      _location._totalCharsRead++;
      if (c <= '&') c = (c == '&') ? replaceEntity() : (c < ' ') ? handleEndOfLine(c) : c;
      _data[_index++] = c;

      // Main processing.
      //
      switch (_state) {
        case STATE_CHARACTERS:
          while (true) { // Read characters data all at once.

            if (c == '<') {
              int length = _index - _start - 1;
              if (length > 0) {
                if (_charactersPending) {
                  _text.setArray(_data, _text.offset(), _text.length() + length); // Coalescing.
                } else {
                  _text = newSeq(_start, length);
                  _charactersPending = true;
                }
                _start = _index - 1; // Keeps '<' as part of markup.
              }
              _state = STATE_MARKUP;
              break;
            }

            // Local character reading block.
            if ((_readIndex >= _readCount) && isEndOfStream()) return _eventType;
            c = _readBuffer[_readIndex++];
            _location._totalCharsRead++;
            if (c <= '&') c = (c == '&') ? replaceEntity() : (c < ' ') ? handleEndOfLine(c) : c;
            _data[_index++] = c;
          }
          break;

        case STATE_CDATA:
          while (true) { // Reads CDATA all at once.

            if ((c == '>')
                && (_index - _start >= 3)
                && (_data[_index - 2] == ']')
                && (_data[_index - 3] == ']')) {
              _index -= 3;
              int length = _index - _start;
              if (length > 0) { // Not empty.
                if (_charactersPending) {
                  _text.setArray(_data, _text.offset(), _text.length() + length); // Coalescing.
                } else {
                  _text = newSeq(_start, length);
                  _charactersPending = true;
                }
              }
              _start = _index;
              _state = STATE_CHARACTERS;
              break;
            }

            // Local character reading block.
            if (_readIndex >= _readCount) reloadBuffer();
            c = _readBuffer[_readIndex++];
            _location._totalCharsRead++;
            if (c < ' ') c = handleEndOfLine(c);
            _data[_index++] = c;
          }
          break;

        case STATE_DTD:
          if (c == '>') {
            _text = newSeq(_start, _index - _start);
            _index = _start; // Do not keep DTD.
            _state = STATE_CHARACTERS;
            return _eventType = DTD;
          } else if (c == '[') {
            _state = STATE_DTD_INTERNAL;
          }
          break;

        case STATE_DTD_INTERNAL:
          if (c == ']') {
            _state = STATE_DTD;
          }
          break;

        case STATE_MARKUP: // Starts with '<'
          if (_index - _start == 2) {
            if (c == '/') {
              _start = _index = _index - 2;
              _state = STATE_CLOSE_TAGxREAD_ELEM_NAME;
              _prefixSep = -1;
              if (_charactersPending) { // Flush characters event.
                _charactersPending = false;
                return _eventType = CHARACTERS;
              }
            } else if (c == '?') {
              _start = _index = _index - 2;
              _state = STATE_PI;
              if (_charactersPending) { // Flush characters event.
                _charactersPending = false;
                return _eventType = CHARACTERS;
              }
            } else if (c != '!') { // Element tag (first letter).
              _location._lastStartTagPos = _location._totalCharsRead - 2;
              _data[_start] = c;
              _index = _start + 1;
              _state = STATE_OPEN_TAGxREAD_ELEM_NAME;
              _prefixSep = -1;
              if (_charactersPending) { // Flush character event.
                _charactersPending = false;
                return _eventType = CHARACTERS;
              }
            }
          } else if ((_index - _start == 4)
              && (_data[_start + 1] == '!')
              && (_data[_start + 2] == '-')
              && (_data[_start + 3] == '-')) {
            _start = _index = _index - 4; // Removes <!--
            _state = STATE_COMMENT;
            if (_charactersPending) { // Flush character event.
              _charactersPending = false;
              return _eventType = CHARACTERS;
            }

          } else if ((_index - _start == 9)
              && (_data[_start + 1] == '!')
              && (_data[_start + 2] == '[')
              && (_data[_start + 3] == 'C')
              && (_data[_start + 4] == 'D')
              && (_data[_start + 5] == 'A')
              && (_data[_start + 6] == 'T')
              && (_data[_start + 7] == 'A')
              && (_data[_start + 8] == '[')) {
            _start = _index = _index - 9; // Do not keep <![CDATA[
            _state = STATE_CDATA;

          } else if ((_index - _start == 9)
              && (_data[_start + 1] == '!')
              && (_data[_start + 2] == 'D')
              && (_data[_start + 3] == 'O')
              && (_data[_start + 4] == 'C')
              && (_data[_start + 5] == 'T')
              && (_data[_start + 6] == 'Y')
              && (_data[_start + 7] == 'P')
              && (_data[_start + 8] == 'E')) {
            // Keeps <!DOCTYPE as part of DTD.
            _state = STATE_DTD;
          } else {
            // Ignores, e.g. <!ELEMENT <!ENTITY...
          }
          break;

        case STATE_COMMENT:
          while (true) { // Read comment all at once.

            if ((c == '>')
                && (_index - _start >= 3)
                && (_data[_index - 2] == '-')
                && (_data[_index - 3] == '-')) {
              _index -= 3; // Removes -->
              _text = newSeq(_start, _index - _start);
              _state = STATE_CHARACTERS;
              _index = _start; // Do not keep comments.
              return _eventType = COMMENT;
            }

            // Local character reading block.
            if (_readIndex >= _readCount) reloadBuffer();
            c = _readBuffer[_readIndex++];
            _location._totalCharsRead++;
            if (c < ' ') c = handleEndOfLine(c);
            _data[_index++] = c;
          }

        case STATE_PI:
          if ((c == '>') && (_index - _start >= 2) && (_data[_index - 2] == '?')) {
            _index -= 2; // Removes ?>
            _text = newSeq(_start, _index - _start);
            _state = STATE_CHARACTERS;
            _index = _start; // Do not keep processing instructions.
            return _eventType = PROCESSING_INSTRUCTION;
          }
          break;

          // OPEN_TAG:
        case STATE_OPEN_TAGxREAD_ELEM_NAME:
          _attributes.reset();
          _namespaces.push();
          while (true) { // Read element name all at once.

            if (c < '@') { // Else avoid multiple checks.
              if (c == '>') {
                _qName = newSeq(_start, --_index - _start);
                _start = _index;
                _state = STATE_CHARACTERS;
                processStartTag();
                _isEmpty = false;
                return _eventType = START_ELEMENT;
              } else if (c == '/') {
                _qName = newSeq(_start, --_index - _start);
                _start = _index;
                _state = STATE_OPEN_TAGxEMPTY_TAG;
                break;
              } else if (c == ':') {
                _prefixSep = _index - 1;
              } else if (c <= ' ') {
                _qName = newSeq(_start, --_index - _start);
                _state = STATE_OPEN_TAGxELEM_NAME_READ;
                break;
              }
            }

            if (_readIndex >= _readCount) reloadBuffer();
            c = _data[_index++] = _readBuffer[_readIndex++];
            _location._totalCharsRead++;
          }
          break;

        case STATE_OPEN_TAGxELEM_NAME_READ:
          if (c == '>') {
            _start = --_index;
            _state = STATE_CHARACTERS;
            processStartTag();
            _isEmpty = false;
            return _eventType = START_ELEMENT;
          } else if (c == '/') {
            _state = STATE_OPEN_TAGxEMPTY_TAG;
          } else if (c > ' ') {
            _start = _index - 1; // Includes current character.
            _attrPrefixSep = -1;
            _state = STATE_OPEN_TAGxREAD_ATTR_NAME;
          }
          break;

        case STATE_OPEN_TAGxREAD_ATTR_NAME:
          while (true) { // Read attribute name all at once.

            if (c < '@') { // Else avoid multiple checks.
              if (c <= ' ') {
                _attrQName = newSeq(_start, --_index - _start);
                _state = STATE_OPEN_TAGxATTR_NAME_READ;
                break;
              } else if (c == '=') {
                _attrQName = newSeq(_start, --_index - _start);
                _state = STATE_OPEN_TAGxEQUAL_READ;
                break;
              } else if (c == ':') {
                _attrPrefixSep = _index - 1;
              }
            }

            if (_readIndex >= _readCount) reloadBuffer();
            _data[_index++] = c = _readBuffer[_readIndex++];
            _location._totalCharsRead++;
          }
          break;

        case STATE_OPEN_TAGxATTR_NAME_READ:
          if (c == '=') {
            --_index;
            _state = STATE_OPEN_TAGxEQUAL_READ;
          } else if (c > ' ') {
            throw new XMLStreamException("'=' expected", _location);
          }
          break;

        case STATE_OPEN_TAGxEQUAL_READ:
          if (c == '\'') {
            _start = --_index;
            _state = STATE_OPEN_TAGxREAD_ATTR_VALUE_SIMPLE_QUOTE;
          } else if (c == '\"') {
            _start = --_index;
            _state = STATE_OPEN_TAGxREAD_ATTR_VALUE_DOUBLE_QUOTE;
          } else if (c > ' ') {
            throw new XMLStreamException("Quotes expected", _location);
          }
          break;

        case STATE_OPEN_TAGxREAD_ATTR_VALUE_SIMPLE_QUOTE:
          while (true) { // Read attribute value all at once.

            if (c == '\'') {
              _attrValue = newSeq(_start, --_index - _start);
              processAttribute();
              _state = STATE_OPEN_TAGxELEM_NAME_READ;
              break;
            }

            // Local character reading block.
            if (_readIndex >= _readCount) reloadBuffer();
            c = _readBuffer[_readIndex++];
            _location._totalCharsRead++;
            if (c == '&') c = replaceEntity();
            _data[_index++] = c;
          }
          break;

        case STATE_OPEN_TAGxREAD_ATTR_VALUE_DOUBLE_QUOTE:
          while (true) { // Read attribute value all at once.

            if (c == '\"') {
              _attrValue = newSeq(_start, --_index - _start);
              processAttribute();
              _state = STATE_OPEN_TAGxELEM_NAME_READ;
              break;
            }

            // Local character reading block.
            if (_readIndex >= _readCount) reloadBuffer();
            c = _readBuffer[_readIndex++];
            _location._totalCharsRead++;
            if (c == '&') c = replaceEntity();
            _data[_index++] = c;
          }
          break;

        case STATE_OPEN_TAGxEMPTY_TAG:
          if (c == '>') {
            _start = --_index;
            _state = STATE_CHARACTERS;
            processStartTag();
            _isEmpty = true;
            return _eventType = START_ELEMENT;
          } else {
            throw new XMLStreamException("'>' expected", _location);
          }

          // CLOSE_TAG:
        case STATE_CLOSE_TAGxREAD_ELEM_NAME:
          while (true) { // Element name can be read all at once.

            if (c < '@') { // Else avoid multiple checks.
              if (c == '>') {
                _qName = newSeq(_start, --_index - _start);
                _start = _index;
                _state = STATE_CHARACTERS;
                processEndTag();
                return _eventType = END_ELEMENT;
              } else if (c == ':') {
                _prefixSep = _index - 1;
              } else if (c <= ' ') {
                _qName = newSeq(_start, --_index - _start);
                _state = STATE_CLOSE_TAGxELEM_NAME_READ;
                break;
              }
            }

            if (_readIndex >= _readCount) reloadBuffer();
            c = _data[_index++] = _readBuffer[_readIndex++];
            _location._totalCharsRead++;
          }
          break;

        case STATE_CLOSE_TAGxELEM_NAME_READ:
          if (c == '>') {
            _start = --_index;
            _state = STATE_CHARACTERS;
            processEndTag();
            return _eventType = END_ELEMENT;
          } else if (c > ' ') {
            throw new XMLStreamException("'>' expected", _location);
          }
          break;

        default:
          throw new XMLStreamException("State unknown: " + _state, _location);
      }
    }
  }
예제 #5
0
 /**
  * Returns the textual representation of this reader current state.
  *
  * @return the textual representation of the current state.
  */
 public String toString() {
   return "XMLStreamReader - State: "
       + NAMES_OF_EVENTS[_eventType]
       + ", Location: "
       + _location.toString();
 }