Esempio n. 1
0
  public Page parse(SitemeshBuffer buffer) throws IOException {
    CharArrayReader reader =
        new CharArrayReader(buffer.getCharArray(), 0, buffer.getBufferLength());
    CharArray _buffer = new CharArray(4096);
    CharArray _body = new CharArray(4096);
    CharArray _head = new CharArray(512);
    CharArray _title = new CharArray(128);
    Map _htmlProperties = null;
    Map _metaProperties = new HashMap(6);
    Map _sitemeshProperties = new HashMap(6);
    Map _bodyProperties = null;

    CharArray _currentTaggedContent = new CharArray(1024);
    String _contentTagId = null;
    boolean tagged = false;

    boolean _frameSet = false;

    int _state = STATE_TEXT;
    int _tokenType = TOKEN_NONE;
    int _pushBack = 0;
    int _comment = 0;
    int _quote = 0;
    boolean hide = false;

    int state = TAG_STATE_NONE;
    int laststate = TAG_STATE_NONE;
    boolean doneTitle = false;

    // This tag object gets reused each iteration.
    Tag tagObject = new Tag();

    while (_tokenType != TOKEN_EOF) {
      if (tagged) {
        if (_tokenType == TOKEN_TAG || _tokenType == TOKEN_EMPTYTAG) {
          if (_buffer == null || _buffer.length() == 0) {
            _tokenType = TOKEN_NONE;
            continue;
          }

          if (parseTag(tagObject, _buffer) == null) continue;

          if (_buffer.compareLowerSubstr(
              "/content")) // Note that the '/' survives the | 32 operation
          {
            tagged = false;
            if (_contentTagId != null) {
              state = TAG_STATE_NONE;
              _sitemeshProperties.put(_contentTagId, _currentTaggedContent.toString());
              _currentTaggedContent.setLength(0);
              _contentTagId = null;
            }
          } else {
            _currentTaggedContent.append('<').append(_buffer).append('>');
          }
        } else {
          if (_buffer.length() > 0) _currentTaggedContent.append(_buffer);
        }
      } else {
        if (_tokenType == TOKEN_TAG || _tokenType == TOKEN_EMPTYTAG) {
          if (_buffer == null || _buffer.length() == 0) {
            _tokenType = TOKEN_NONE;
            continue;
          }

          if (parseTag(tagObject, _buffer) == null) {
            _tokenType = TOKEN_TEXT;
            continue;
          }

          int tagHash = _buffer.substrHashCode();

          if (state == TAG_STATE_XML || state == TAG_STATE_XMP) {
            writeTag(state, laststate, hide, _head, _buffer, _body);
            if ((state == TAG_STATE_XML && tagHash == SLASH_XML_HASH)
                || (state == TAG_STATE_XMP && tagHash == SLASH_XMP_HASH)) {
              state = laststate;
            }
          } else {
            boolean doDefault = false;
            switch (tagHash) {
              case HTML_HASH:
                if (!_buffer.compareLowerSubstr("html")) { // skip any accidental hash collisions
                  doDefault = true;
                  break;
                }
                state = TAG_STATE_HTML;
                _htmlProperties = parseProperties(tagObject, _buffer).properties;
                break;
              case HEAD_HASH:
                if (!_buffer.compareLowerSubstr("head")) { // skip any accidental hash collisions
                  doDefault = true;
                  break;
                }
                state = TAG_STATE_HEAD;
                break;
              case XML_HASH:
                if (!_buffer.compareLowerSubstr("xml")) { // skip any accidental hash collisions
                  doDefault = true;
                  break;
                }
                laststate = state;
                writeTag(state, laststate, hide, _head, _buffer, _body);
                state = TAG_STATE_XML;
                break;
              case XMP_HASH:
                if (!_buffer.compareLowerSubstr("xmp")) { // skip any accidental hash collisions
                  doDefault = true;
                  break;
                }
                laststate = state;
                writeTag(state, laststate, hide, _head, _buffer, _body);
                state = TAG_STATE_XMP;
                break;
              case TITLE_HASH:
                if (!_buffer.compareLowerSubstr("title")) { // skip any accidental hash collisions
                  doDefault = true;
                  break;
                }
                if (doneTitle) {
                  hide = true;
                } else {
                  laststate = state;
                  state = TAG_STATE_TITLE;
                }
                break;
              case SLASH_TITLE_HASH:
                if (!_buffer.compareLowerSubstr("/title")) { // skip any accidental hash collisions
                  doDefault = true;
                  break;
                }
                if (doneTitle) {
                  hide = false;
                } else {
                  doneTitle = true;
                  state = laststate;
                }
                break;
              case PARAMETER_HASH:
                if (!_buffer.compareLowerSubstr(
                    "parameter")) { // skip any accidental hash collisions
                  doDefault = true;
                  break;
                }
                parseProperties(tagObject, _buffer);
                String name = (String) tagObject.properties.get("name");
                String value = (String) tagObject.properties.get("value");

                if (name != null && value != null) {
                  _sitemeshProperties.put(name, value);
                }
                break;
              case META_HASH:
                if (!_buffer.compareLowerSubstr("meta")) { // skip any accidental hash collisions
                  doDefault = true;
                  break;
                }
                CharArray metaDestination = state == TAG_STATE_HEAD ? _head : _body;
                metaDestination.append('<');
                metaDestination.append(_buffer);
                metaDestination.append('>');
                parseProperties(tagObject, _buffer);
                name = (String) tagObject.properties.get("name");
                value = (String) tagObject.properties.get("content");

                if (name == null) {
                  String httpEquiv = (String) tagObject.properties.get("http-equiv");

                  if (httpEquiv != null) {
                    name = "http-equiv." + httpEquiv;
                  }
                }

                if (name != null && value != null) {
                  _metaProperties.put(name, value);
                }
                break;
              case SLASH_HEAD_HASH:
                if (!_buffer.compareLowerSubstr("/head")) { // skip any accidental hash collisions
                  doDefault = true;
                  break;
                }
                state = TAG_STATE_HTML;
                break;
              case FRAME_HASH:
                if (!_buffer.compareLowerSubstr("frame")) { // skip any accidental hash collisions
                  doDefault = true;
                  break;
                }
                _frameSet = true;
                break;
              case FRAMESET_HASH:
                if (!_buffer.compareLowerSubstr(
                    "frameset")) { // skip any accidental hash collisions
                  doDefault = true;
                  break;
                }
                _frameSet = true;
                break;
              case BODY_HASH:
                if (!_buffer.compareLowerSubstr("body")) { // skip any accidental hash collisions
                  doDefault = true;
                  break;
                }
                if (_tokenType == TOKEN_EMPTYTAG) {
                  state = TAG_STATE_BODY;
                }
                _bodyProperties = parseProperties(tagObject, _buffer).properties;
                break;
              case CONTENT_HASH:
                if (!_buffer.compareLowerSubstr("content")) { // skip any accidental hash collisions
                  doDefault = true;
                  break;
                }
                state = TAG_STATE_NONE;
                Map props = parseProperties(tagObject, _buffer).properties;
                if (props != null) {
                  tagged = true;
                  _contentTagId = (String) props.get("tag");
                }
                break;
              case SLASH_XMP_HASH:
                if (!_buffer.compareLowerSubstr("/xmp")) { // skip any accidental hash collisions
                  doDefault = true;
                  break;
                }
                hide = false;
                break;
              case SLASH_BODY_HASH:
                if (!_buffer.compareLowerSubstr("/body")) { // skip any accidental hash collisions
                  doDefault = true;
                  break;
                }
                state = TAG_STATE_NONE;
                hide = true;
                break;
              case SLASH_HTML_HASH:
                if (!_buffer.compareLowerSubstr("/html")) { // skip any accidental hash collisions
                  doDefault = true;
                  break;
                }
                state = TAG_STATE_NONE;
                hide = true;
                break;
              default:
                doDefault = true;
            }
            if (doDefault) writeTag(state, laststate, hide, _head, _buffer, _body);
          }
        } else if (!hide) {
          if (_tokenType == TOKEN_TEXT) {
            if (state == TAG_STATE_TITLE) {
              _title.append(_buffer);
            } else if (shouldWriteToHead(state, laststate)) {
              _head.append(_buffer);
            } else {
              _body.append(_buffer);
            }
          } else if (_tokenType == TOKEN_COMMENT) {
            final CharArray commentDestination =
                shouldWriteToHead(state, laststate) ? _head : _body;
            commentDestination.append("<!--");
            commentDestination.append(_buffer);
            commentDestination.append("-->");
          } else if (_tokenType == TOKEN_CDATA) {
            final CharArray commentDestination = state == TAG_STATE_HEAD ? _head : _body;
            commentDestination.append("<![CDATA[");
            commentDestination.append(_buffer);
            commentDestination.append("]]>");
          } else if (_tokenType == TOKEN_SCRIPT) {
            final CharArray commentDestination = state == TAG_STATE_HEAD ? _head : _body;
            commentDestination.append('<');
            commentDestination.append(_buffer);
          }
        }
      }
      _buffer.setLength(0);

      start:
      while (true) {
        int c;

        if (_pushBack != 0) {
          c = _pushBack;
          _pushBack = 0;
        } else {
          try {
            c = reader.read();
          } catch (IOException e) {
            _tokenType = TOKEN_EOF;
            break start;
          }
        }

        if (c < 0) {
          int tmpstate = _state;
          _state = STATE_EOF;

          if (_buffer.length() > 0 && tmpstate == STATE_TEXT) {
            _tokenType = TOKEN_TEXT;
            break start;
          } else {
            _tokenType = TOKEN_EOF;
            break start;
          }
        }

        switch (_state) {
          case STATE_TAG:
            {
              int buflen = _buffer.length();

              if (c == '>') {
                if (_buffer.length() > 1 && _buffer.charAt(_buffer.length() - 1) == '/') {
                  _tokenType = TOKEN_EMPTYTAG;
                } else {
                  _tokenType = TOKEN_TAG;
                }
                _state = STATE_TEXT;
                break start;
              } else if (c == '/') {
                _buffer.append('/');
              } else if (c == '<' && buflen == 0) {
                _buffer.append("<<");
                _state = STATE_TEXT;
              } else if (c == '-'
                  && buflen == 2
                  && _buffer.charAt(1) == '-'
                  && _buffer.charAt(0) == '!') {
                _buffer.setLength(0);
                _state = STATE_COMMENT;
              } else if (c == '['
                  && buflen == 7
                  && _buffer.charAt(0) == '!'
                  && _buffer.charAt(1) == '['
                  && _buffer.compareLower("cdata", 2)) {
                _buffer.setLength(0);
                _state = STATE_CDATA;
              } else if ((c == 'e' || c == 'E')
                  && buflen == 7
                  && _buffer.charAt(0) == '!'
                  && _buffer.compareLower("doctyp", 1)) {
                _buffer.append((char) c);
                _state = STATE_DOCTYPE;
              } else if ((c == 'T' || c == 't')
                  && buflen == 5
                  && _buffer.compareLower("scrip", 0)) {
                _buffer.append((char) c);
                _state = STATE_SCRIPT;
              } else if (c == '"' || c == '\'') {
                _quote = c;
                _buffer.append((char) c);
                _state = STATE_TAG_QUOTE;
              } else {
                _buffer.append((char) c);
              }
            }
            break;

          case STATE_TEXT:
            {
              if (c == '<') {
                _state = STATE_TAG;
                if (_buffer.length() > 0) {
                  _tokenType = TOKEN_TEXT;
                  break start;
                }
              } else {
                _buffer.append((char) c);
              }
            }
            break;

          case STATE_TAG_QUOTE:
            {
              if (c == '>') {
                _pushBack = c;
                _state = STATE_TAG;
              } else {
                _buffer.append((char) c);
                if (c == _quote) {
                  _state = STATE_TAG;
                }
              }
            }
            break;

          case STATE_COMMENT:
            {
              if (c == '>' && _comment >= 2) {
                _buffer.setLength(_buffer.length() - 2);
                _comment = 0;
                _state = STATE_TEXT;
                _tokenType = TOKEN_COMMENT;
                break start;
              } else if (c == '-') {
                _comment++;
              } else {
                _comment = 0;
              }

              _buffer.append((char) c);
            }
            break;

          case STATE_CDATA:
            {
              if (c == '>' && _comment >= 2) {
                _buffer.setLength(_buffer.length() - 2);
                _comment = 0;
                _state = STATE_TEXT;
                _tokenType = TOKEN_CDATA;
                break start;
              } else if (c == ']') {
                _comment++;
              } else {
                _comment = 0;
              }

              _buffer.append((char) c);
            }
            break;

          case STATE_SCRIPT:
            {
              _buffer.append((char) c);
              if (c == '<') {
                _comment = 0;
              } else if ((c == '/' && _comment == 0)
                  || ((c == 's' || c == 'S') && _comment == 1)
                  || ((c == 'c' || c == 'C') && _comment == 2)
                  || ((c == 'r' || c == 'R') && _comment == 3)
                  || ((c == 'i' || c == 'I') && _comment == 4)
                  || ((c == 'p' || c == 'P') && _comment == 5)
                  || ((c == 't' || c == 'T') && _comment == 6)) {
                _comment++;
              } else if (c == '>' && _comment >= 7) {
                _comment = 0;
                _state = STATE_TEXT;
                _tokenType = TOKEN_SCRIPT;
                break start;
              }
            }
            break;

          case STATE_DOCTYPE:
            {
              _buffer.append((char) c);
              if (c == '>') {
                _state = STATE_TEXT;
                _tokenType = TOKEN_DOCTYPE;
                break start;
              } else {
                _comment = 0;
              }
            }
            break;
        }
      }
    }

    // Help the GC
    _currentTaggedContent = null;
    _buffer = null;

    return new FastPage(
        buffer,
        _sitemeshProperties,
        _htmlProperties,
        _metaProperties,
        _bodyProperties,
        _title.toString().trim(),
        _head.toString().trim(),
        _body.toString().trim(),
        _frameSet);
  }