Пример #1
0
  /**
   * Populates a {@link Tag} object using data from the supplied {@link CharArray}.
   *
   * <p>The supplied tag parameter is reset and reused - this avoids excess object creation which
   * hwlps performance.
   *
   * @return the same tag instance that was passed in, except it will be populated with a new
   *     <tt>name</tt> value (and the corresponding <tt>nameEndIdx</tt> value). However if the tag
   *     contained nathing but whitespace, this method will return <tt>null</tt>.
   */
  private Tag parseTag(Tag tag, CharArray buf) {
    int len = buf.length();
    int idx = 0;
    int begin;

    // Skip over any leading whitespace in the tag
    while (idx < len && Character.isWhitespace(buf.charAt(idx))) idx++;

    if (idx == len) return null;

    // Find out where the non-whitespace characters end. This will give us the tag name.
    begin = idx;
    while (idx < len && !Character.isWhitespace(buf.charAt(idx))) idx++;

    // Mark the tag name as a substring within the buffer. This allows us to perform
    // a substring comparison against it at a later date
    buf.setSubstr(begin, buf.charAt(idx - 1) == '/' ? idx - 1 : idx);

    // Remember where the name finishes so we can pull out the properties later if need be
    tag.nameEndIdx = idx;

    return tag;
  }
Пример #2
0
  /**
   * This is called when we need to extract the properties for the tag from the tag's HTML. We only
   * call this when necessary since it has quite a lot of overhead.
   *
   * @param tag the tag that is currently being processed. This should be the tag that was returned
   *     as a result of a call to {@link #parseTag(FastPageParser.Tag, CharArray)} (ie, it has the
   *     <tt>name</tt> and <tt>nameEndIdx</tt> fields set correctly for the tag in question. The
   *     <tt>properties</tt> field can be in an undefined state - it will get replaced regardless).
   * @param buffer a <tt>CharArray</tt> containing the entire tag that is being parsed.
   * @return the same tag instance that was passed in, only it will now be populated with any
   *     properties that were specified in the tag's HTML.
   */
  private static Tag parseProperties(Tag tag, CharArray buffer) {
    int len = buffer.length();
    int idx = tag.nameEndIdx;

    // Start with an empty hashmap. A new HashMap is lazy-created if we happen to find any
    // properties
    tag.properties = Collections.EMPTY_MAP;
    int begin;
    while (idx < len) {
      // Skip forward to the next non-whitespace character
      while (idx < len && Character.isWhitespace(buffer.charAt(idx))) idx++;

      if (idx == len) continue;

      begin = idx;
      if (buffer.charAt(idx) == '"') {
        idx++;
        while (idx < len && buffer.charAt(idx) != '"') idx++;
        if (idx == len) continue;
        idx++;
      } else if (buffer.charAt(idx) == '\'') {
        idx++;
        while (idx < len && buffer.charAt(idx) != '\'') idx++;
        if (idx == len) continue;
        idx++;
      } else {
        while (idx < len
            && !Character.isWhitespace(buffer.charAt(idx))
            && buffer.charAt(idx) != '=') idx++;
      }

      // Mark the substring. This is the attribute name
      buffer.setSubstr(begin, idx);

      if (idx < len && Character.isWhitespace(buffer.charAt(idx))) {
        while (idx < len && Character.isWhitespace(buffer.charAt(idx))) idx++;
      }

      if (idx == len || buffer.charAt(idx) != '=') continue;

      idx++;

      if (idx == len) continue;

      while (idx < len && (buffer.charAt(idx) == '\n' || buffer.charAt(idx) == '\r')) idx++;

      if (buffer.charAt(idx) == ' ') {
        while (idx < len && Character.isWhitespace(buffer.charAt(idx))) idx++;
        if (idx == len || (buffer.charAt(idx) != '"' && buffer.charAt(idx) != '"')) continue;
      }

      begin = idx;
      int end;
      if (buffer.charAt(idx) == '"') {
        idx++;
        begin = idx;
        while (idx < len && buffer.charAt(idx) != '"') idx++;
        if (idx == len) continue;
        end = idx;
        idx++;
      } else if (buffer.charAt(idx) == '\'') {
        idx++;
        begin = idx;
        while (idx < len && buffer.charAt(idx) != '\'') idx++;
        if (idx == len) continue;
        end = idx;
        idx++;
      } else {
        while (idx < len && !Character.isWhitespace(buffer.charAt(idx))) idx++;
        end = idx;
      }
      // Extract the name and value as String objects and add them to the property map
      String name = buffer.getLowerSubstr();
      String value = buffer.substring(begin, end);

      tag.addProperty(name, value);
    }
    return tag;
  }