private static String getMetaValue(Source source, String key) { for (int pos = 0; pos < source.length(); ) { StartTag startTag = source.getNextStartTag(pos, "name", key, false); if (startTag == null) return null; if (startTag.getName() == HTMLElementName.META) return startTag.getAttributeValue("content"); // Attribute values are automatically decoded pos = startTag.getEnd(); } return null; }
final List<Element> getChildElements(int depth) { if (depth != -1) this.depth = depth; if (childElements == null) { if (!Config.IncludeServerTagsInElementHierarchy && end == startTag.end) { childElements = Collections.emptyList(); } else { final int childDepth = (depth == -1 ? -1 : depth + 1); childElements = new ArrayList<Element>(); int pos = Config.IncludeServerTagsInElementHierarchy ? begin + 1 : startTag.end; final int maxChildBegin = (Config.IncludeServerTagsInElementHierarchy || endTag == null) ? end : endTag.begin; while (true) { final StartTag childStartTag = source.getNextStartTag(pos); if (childStartTag == null || childStartTag.begin >= maxChildBegin) break; if (Config.IncludeServerTagsInElementHierarchy) { if (childStartTag.begin < startTag.end && !childStartTag.getTagType().isServerTag() && !startTag.getTagType().isServerTag()) { // A start tag is found within another start tag, but neither is a server tag. // This only legitimately happens in very rare cases like entity definitions in // doctype. // We don't want to include the child elements in the hierarchy. pos = childStartTag.end; continue; } } else if (childStartTag.getTagType().isServerTag()) { pos = childStartTag.end; continue; } final Element childElement = childStartTag.getElement(); if (childElement.end > end) { if (source.logger.isInfoEnabled()) source.logger.info( "Child " + childElement.getDebugInfo() + " extends beyond end of parent " + getDebugInfo()); if (!INCLUDE_INCORRECTLY_NESTED_CHILDREN_IN_HIERARCHY) { pos = childElement.end; continue; } } childElement.getChildElements(childDepth); if (childElement.parentElement == Element.NOT_CACHED) { // make sure element was not added as a child of a descendent // element (can happen with overlapping elements) childElement.parentElement = this; childElements.add(childElement); } pos = childElement.end; } } } return childElements; }
public String getDebugInfo() { if (this == NOT_CACHED) return "NOT_CACHED"; final StringBuilder sb = new StringBuilder(); sb.append("Element "); startTag.appendDebugTag(sb); if (!isEmpty()) sb.append('-'); if (endTag != null) sb.append(endTag); sb.append(' '); startTag.appendDebugTagType(sb); sb.append(super.getDebugInfo()); return sb.toString(); }
/** * Returns the parent of this element in the document element hierarchy. * * <p>The {@link Source#fullSequentialParse()} method must be called (either explicitly or * implicitly) immediately after construction of the <code>Source</code> object if this method is * to be used. An <code>IllegalStateException</code> is thrown if a full sequential parse has not * been performed or if it was performed after this element was found. * * <p>This method returns <code>null</code> for a <a href="Source.html#TopLevelElement">top-level * element</a>, as well as any element formed from a {@linkplain TagType#isServerTag() server * tag}, regardless of whether it is nested inside a normal element. * * <p>See the {@link Source#getChildElements()} method for more details. * * @return the parent of this element in the document element hierarchy, or <code>null</code> if * this element is a <a href="Source.html#TopLevelElement">top-level element</a>. * @throws IllegalStateException if a {@linkplain Source#fullSequentialParse() full sequential * parse} has not been performed or if it was performed after this element was found. * @see #getChildElements() */ public Element getParentElement() { if (parentElement == Element.NOT_CACHED) { if (!source.wasFullSequentialParseCalled()) throw new IllegalStateException( "This operation is only possible after a full sequential parse has been performed"); if (startTag.isOrphaned()) throw new IllegalStateException( "This operation is only possible if a full sequential parse was performed immediately after construction of the Source object"); source.getChildElements(); if (parentElement == Element.NOT_CACHED) parentElement = null; } return parentElement; }
/** * Indicates whether this element is an <a target="_blank" * href="http://www.w3.org/TR/REC-xml#dt-eetag">empty-element tag</a>. * * <p>This is equivalent to {@link #getStartTag()}<code>.</code>{@link * StartTag#isEmptyElementTag() isEmptyElementTag()}. * * @return <code>true</code> if this element is an <a target="_blank" * href="http://www.w3.org/TR/REC-xml#dt-eetag">empty-element tag</a>, otherwise <code>false * </code>. */ public boolean isEmptyElementTag() { return startTag.isEmptyElementTag(); }
/** * Returns the {@linkplain StartTag#getName() name} of the {@linkplain #getStartTag() start tag} * of this element, always in lower case. * * <p>This is equivalent to {@link #getStartTag()}<code>.</code>{@link StartTag#getName() * getName()}. * * <p>See the {@link Tag#getName()} method for more information. * * @return the name of the {@linkplain #getStartTag() start tag} of this element, always in lower * case. */ public String getName() { return startTag.getName(); }