/** * Populates a {@link Tag} object using data from the supplied {@link CharArray}. * * <p>The supplied tag parameter is reset and reused - this avoids excess object creation which * hwlps performance. * * @return the same tag instance that was passed in, except it will be populated with a new * <tt>name</tt> value (and the corresponding <tt>nameEndIdx</tt> value). However if the tag * contained nathing but whitespace, this method will return <tt>null</tt>. */ private Tag parseTag(Tag tag, CharArray buf) { int len = buf.length(); int idx = 0; int begin; // Skip over any leading whitespace in the tag while (idx < len && Character.isWhitespace(buf.charAt(idx))) idx++; if (idx == len) return null; // Find out where the non-whitespace characters end. This will give us the tag name. begin = idx; while (idx < len && !Character.isWhitespace(buf.charAt(idx))) idx++; // Mark the tag name as a substring within the buffer. This allows us to perform // a substring comparison against it at a later date buf.setSubstr(begin, buf.charAt(idx - 1) == '/' ? idx - 1 : idx); // Remember where the name finishes so we can pull out the properties later if need be tag.nameEndIdx = idx; return tag; }
/** * This is called when we need to extract the properties for the tag from the tag's HTML. We only * call this when necessary since it has quite a lot of overhead. * * @param tag the tag that is currently being processed. This should be the tag that was returned * as a result of a call to {@link #parseTag(FastPageParser.Tag, CharArray)} (ie, it has the * <tt>name</tt> and <tt>nameEndIdx</tt> fields set correctly for the tag in question. The * <tt>properties</tt> field can be in an undefined state - it will get replaced regardless). * @param buffer a <tt>CharArray</tt> containing the entire tag that is being parsed. * @return the same tag instance that was passed in, only it will now be populated with any * properties that were specified in the tag's HTML. */ private static Tag parseProperties(Tag tag, CharArray buffer) { int len = buffer.length(); int idx = tag.nameEndIdx; // Start with an empty hashmap. A new HashMap is lazy-created if we happen to find any // properties tag.properties = Collections.EMPTY_MAP; int begin; while (idx < len) { // Skip forward to the next non-whitespace character while (idx < len && Character.isWhitespace(buffer.charAt(idx))) idx++; if (idx == len) continue; begin = idx; if (buffer.charAt(idx) == '"') { idx++; while (idx < len && buffer.charAt(idx) != '"') idx++; if (idx == len) continue; idx++; } else if (buffer.charAt(idx) == '\'') { idx++; while (idx < len && buffer.charAt(idx) != '\'') idx++; if (idx == len) continue; idx++; } else { while (idx < len && !Character.isWhitespace(buffer.charAt(idx)) && buffer.charAt(idx) != '=') idx++; } // Mark the substring. This is the attribute name buffer.setSubstr(begin, idx); if (idx < len && Character.isWhitespace(buffer.charAt(idx))) { while (idx < len && Character.isWhitespace(buffer.charAt(idx))) idx++; } if (idx == len || buffer.charAt(idx) != '=') continue; idx++; if (idx == len) continue; while (idx < len && (buffer.charAt(idx) == '\n' || buffer.charAt(idx) == '\r')) idx++; if (buffer.charAt(idx) == ' ') { while (idx < len && Character.isWhitespace(buffer.charAt(idx))) idx++; if (idx == len || (buffer.charAt(idx) != '"' && buffer.charAt(idx) != '"')) continue; } begin = idx; int end; if (buffer.charAt(idx) == '"') { idx++; begin = idx; while (idx < len && buffer.charAt(idx) != '"') idx++; if (idx == len) continue; end = idx; idx++; } else if (buffer.charAt(idx) == '\'') { idx++; begin = idx; while (idx < len && buffer.charAt(idx) != '\'') idx++; if (idx == len) continue; end = idx; idx++; } else { while (idx < len && !Character.isWhitespace(buffer.charAt(idx))) idx++; end = idx; } // Extract the name and value as String objects and add them to the property map String name = buffer.getLowerSubstr(); String value = buffer.substring(begin, end); tag.addProperty(name, value); } return tag; }