/** * Insert the specified node into the DOM after this node (i.e. as a following sibling). * * @param node to add after this node * @return this node, for chaining * @see #before(Node) */ public Node after(Node node) { Validate.notNull(node); Validate.notNull(parentNode); parentNode.addChildren(siblingIndex() + 1, node); return this; }
/** * Add allowed URL protocols for an element's URL attribute. This restricts the possible values of * the attribute to URLs with the defined protocol. * * <p>E.g.: <code>addProtocols("a", "href", "ftp", "http", "https")</code> * * @param tag Tag the URL protocol is for * @param key Attribute key * @param protocols List of valid protocols * @return this, for chaining */ public Whitelist addProtocols(String tag, String key, String... protocols) { Validate.notEmpty(tag); Validate.notEmpty(key); Validate.notNull(protocols); TagName tagName = TagName.valueOf(tag); AttributeKey attrKey = AttributeKey.valueOf(key); Map<AttributeKey, Set<Protocol>> attrMap; Set<Protocol> protSet; if (this.protocols.containsKey(tagName)) { attrMap = this.protocols.get(tagName); } else { attrMap = new HashMap<AttributeKey, Set<Protocol>>(); this.protocols.put(tagName, attrMap); } if (attrMap.containsKey(attrKey)) { protSet = attrMap.get(attrKey); } else { protSet = new HashSet<Protocol>(); attrMap.put(attrKey, protSet); } for (String protocol : protocols) { Validate.notEmpty(protocol); Protocol prot = Protocol.valueOf(protocol); protSet.add(prot); } return this; }
private void addSiblingHtml(int index, String html) { Validate.notNull(html); Validate.notNull(parentNode); Element context = parent() instanceof Element ? (Element) parent() : null; List<Node> nodes = Parser.parseFragment(html, context, baseUri()); parentNode.addChildren(index, nodes.toArray(new Node[nodes.size()])); }
/** * Create a new Node. * * @param baseUri base URI * @param attributes attributes (not null, but may be empty) */ protected Node(String baseUri, Attributes attributes) { Validate.notNull(baseUri); Validate.notNull(attributes); childNodes = new ArrayList<Node>(4); this.baseUri = baseUri.trim(); this.attributes = attributes; }
/** * Add a list of allowed elements to a whitelist. (If a tag is not allowed, it will be removed * from the HTML.) * * @param tags tag names to allow * @return this (for chaining) */ public Whitelist addTags(String... tags) { Validate.notNull(tags); for (String tagName : tags) { Validate.notEmpty(tagName); tagNames.add(TagName.valueOf(tagName)); } return this; }
private static <E extends Element> Integer indexInList(Element search, List<E> elements) { Validate.notNull(search); Validate.notNull(elements); for (int i = 0; i < elements.size(); i++) { E element = elements.get(i); if (element.equals(search)) return i; } return null; }
/** * Inserts the given child nodes into this element at the specified index. Current nodes will be * shifted to the right. The inserted nodes will be moved from their current parent. To prevent * moving, copy the nodes first. * * @param index 0-based index to insert children at. Specify {@code 0} to insert at the start, * {@code -1} at the end * @param children child nodes to insert * @return this element, for chaining. */ public Element insertChildren(int index, Collection<? extends Node> children) { Validate.notNull(children, "Children collection to be inserted must not be null."); int currentSize = childNodeSize(); if (index < 0) index += currentSize + 1; // roll around Validate.isTrue(index >= 0 && index <= currentSize, "Insert position out of bounds."); ArrayList<Node> nodes = new ArrayList<Node>(children); Node[] nodeArray = nodes.toArray(new Node[nodes.size()]); addChildren(index, nodeArray); return this; }
protected void replaceChild(Node out, Node in) { Validate.isTrue(out.parentNode == this); Validate.notNull(in); if (in.parentNode != null) in.parentNode.removeChild(in); Integer index = out.siblingIndex(); childNodes.set(index, in); in.parentNode = this; in.setSiblingIndex(index); out.parentNode = null; }
protected void initialiseParse(String input, String baseUri, ParseErrorList errors) { Validate.notNull(input, "String input must not be null"); Validate.notNull(baseUri, "BaseURI must not be null"); doc = new Document(baseUri); reader = new CharacterReader(input, this); this.errors = errors; tokeniser = new Tokeniser(reader, errors); stack = new DescendableLinkedList<Element>(); this.baseUri = baseUri; this.input = input; this.resetFlag = false; }
protected void removeChild(Node out) { Validate.isTrue(out.parentNode == this); int index = out.siblingIndex(); childNodes.remove(index); reindexChildren(); out.parentNode = null; }
/** * Add inner HTML into this element. The supplied HTML will be parsed, and each node prepended to * the start of the element's children. * * @param html HTML to add inside this element, before the existing HTML * @return this element * @see #html(String) */ public Element prepend(String html) { Validate.notNull(html); List<Node> nodes = Parser.parseFragment(html, this, baseUri()); addChildren(0, nodes.toArray(new Node[nodes.size()])); return this; }
/** * Find an element by ID, including or under this element. * * <p>Note that this finds the first matching ID, starting with this element. If you search down * from a different starting point, it is possible to find a different element by ID. For unique * element by ID within a Document, use {@link Document#getElementById(String)} * * @param id The ID to search for. * @return The first matching element by ID, starting with this element, or null if none found. */ public Element getElementById(String id) { Validate.notEmpty(id); Elements elements = Collector.collect(new Evaluator.Id(id), this); if (elements.size() > 0) return elements.get(0); else return null; }
/** * Get an absolute URL from a URL attribute that may be relative (i.e. an <code><a href></code> * or <code><img src></code>). * * <p>E.g.: <code>String absUrl = linkEl.absUrl("href");</code> * * <p>If the attribute value is already absolute (i.e. it starts with a protocol, like <code> * http://</code> or <code>https://</code> etc), and it successfully parses as a URL, the * attribute is returned directly. Otherwise, it is treated as a URL relative to the element's * {@link #baseUri}, and made absolute using that. * * <p>As an alternate, you can use the {@link #attr} method with the <code>abs:</code> prefix, * e.g.: <code>String absUrl = linkEl.attr("abs:href");</code> * * @param attributeKey The attribute key * @return An absolute URL if one could be made, or an empty string (not null) if the attribute * was missing or could not be made successfully into a URL. * @see #attr * @see java.net.URL#URL(java.net.URL, String) */ public String absUrl(String attributeKey) { Validate.notEmpty(attributeKey); String relUrl = attr(attributeKey); if (!hasAttr(attributeKey)) { return ""; // nothing to make absolute with } else { URL base; try { try { base = new URL(baseUri); } catch (MalformedURLException e) { // the base is unsuitable, but the attribute may be abs on its own, so try that URL abs = new URL(relUrl); return abs.toExternalForm(); } // workaround: java resolves '//path/file + ?foo' to '//path/?foo', not '//path/file?foo' as // desired if (relUrl.startsWith("?")) relUrl = base.getPath() + relUrl; URL abs = new URL(base, relUrl); return abs.toExternalForm(); } catch (MalformedURLException e) { return ""; } } }
public void put(Attribute attribute) { Validate.notNull(attribute); if (this.attributes == null) { this.attributes = new LinkedHashMap(2); } this.attributes.put(attribute.getKey(), attribute); }
/** * Get this node's previous sibling. * * @return the previous sibling, or null if this is the first sibling */ public Node previousSibling() { List<Node> siblings = parentNode.childNodes; Integer index = siblingIndex(); Validate.notNull(index); if (index > 0) return siblings.get(index - 1); else return null; }
/** * Gets the next sibling element of this element. E.g., if a {@code div} contains two {@code p}s, * the {@code nextElementSibling} of the first {@code p} is the second {@code p}. * * <p>This is similar to {@link #nextSibling()}, but specifically finds only Elements * * @return the next element, or null if there is no next element * @see #previousElementSibling() */ public Element nextElementSibling() { if (parentNode == null) return null; List<Element> siblings = parent().children(); Integer index = indexInList(this, siblings); Validate.notNull(index); if (siblings.size() > index + 1) return siblings.get(index + 1); else return null; }
/** * Get an attribute's value by its key. * * <p>To get an absolute URL from an attribute that may be a relative URL, prefix the key with * <code><b>abs</b></code>, which is a shortcut to the {@link #absUrl} method. E.g.: * * <blockquote> * * <code>String url = a.attr("abs:href");</code> * * </blockquote> * * @param attributeKey The attribute key. * @return The attribute, or empty string if not present (to avoid nulls). * @see #attributes() * @see #hasAttr(String) * @see #absUrl(String) */ public String attr(String attributeKey) { Validate.notNull(attributeKey); if (attributes.hasKey(attributeKey)) return attributes.get(attributeKey); else if (attributeKey.toLowerCase().startsWith("abs:")) return absUrl(attributeKey.substring("abs:".length())); else return ""; }
public String get(String key) { Validate.notEmpty(key); if (this.attributes == null) { return UnsupportedUrlFragment.DISPLAY_NAME; } Attribute attr = (Attribute) this.attributes.get(key.toLowerCase()); return attr != null ? attr.getValue() : UnsupportedUrlFragment.DISPLAY_NAME; }
/** * Gets the previous element sibling of this element. * * @return the previous element, or null if there is no previous element * @see #nextElementSibling() */ public Element previousElementSibling() { if (parentNode == null) return null; List<Element> siblings = parent().children(); Integer index = indexInList(this, siblings); Validate.notNull(index); if (index > 0) return siblings.get(index - 1); else return null; }
/** * Set the text of this element. Any existing contents (text or elements) will be cleared * * @param text unencoded text * @return this element */ public Element text(String text) { Validate.notNull(text); empty(); TextNode textNode = new TextNode(text, baseUri); appendChild(textNode); return this; }
/** * Remove a class name from this element's {@code class} attribute. * * @param className class name to remove * @return this element */ public Element removeClass(String className) { Validate.notNull(className); Set<String> classes = classNames(); classes.remove(className); classNames(classes); return this; }
/** * Test if this element has an attribute. * * @param attributeKey The attribute key to check. * @return true if the attribute exists, false if not. */ public boolean hasAttr(String attributeKey) { Validate.notNull(attributeKey); if (attributeKey.toLowerCase().startsWith("abs:")) { String key = attributeKey.substring("abs:".length()); if (attributes.hasKey(key) && !absUrl(key).equals("")) return true; } return attributes.hasKey(attributeKey); }
/** * Get this node's next sibling. * * @return next sibling, or null if this is the last sibling */ public Node nextSibling() { if (parentNode == null) return null; // root List<Node> siblings = parentNode.childNodes; Integer index = siblingIndex(); Validate.notNull(index); if (siblings.size() > index + 1) return siblings.get(index + 1); else return null; }
protected void addChildren(int index, Node... children) { Validate.noNullElements(children); for (int i = children.length - 1; i >= 0; i--) { Node in = children[i]; reparentChild(in); childNodes.add(index, in); } reindexChildren(); }
/** * Toggle a class name on this element's {@code class} attribute: if present, remove it; otherwise * add it. * * @param className class name to toggle * @return this element */ public Element toggleClass(String className) { Validate.notNull(className); Set<String> classes = classNames(); if (classes.contains(className)) classes.remove(className); else classes.add(className); classNames(classes); return this; }
/** * Creates a new, clean document, from the original dirty document, containing only elements * allowed by the whitelist. The original document is not modified. Only elements from the dirt * document's <code>body</code> are used. * * @param dirtyDocument Untrusted base document to clean. * @return cleaned document. */ public Document clean(Document dirtyDocument) { Validate.notNull(dirtyDocument); Document clean = Document.createShell(dirtyDocument.baseUri()); if (dirtyDocument.body() != null) // frameset documents won't have a body. the clean doc will have empty body. copySafeNodes(dirtyDocument.body(), clean.body()); return clean; }
/** * Determines if the input document <b>body</b>is valid, against the whitelist. It is considered * valid if all the tags and attributes in the input HTML are allowed by the whitelist, and that * there is no content in the <code>head</code>. * * <p>This method can be used as a validator for user input. An invalid document will still be * cleaned successfully using the {@link #clean(Document)} document. If using as a validator, it * is recommended to still clean the document to ensure enforced attributes are set correctly, and * that the output is tidied. * * @param dirtyDocument document to test * @return true if no tags or attributes need to be removed; false if they do */ public boolean isValid(Document dirtyDocument) { Validate.notNull(dirtyDocument); Document clean = Document.createShell(dirtyDocument.baseUri()); int numDiscarded = copySafeNodes(dirtyDocument.body(), clean.body()); return numDiscarded == 0 && dirtyDocument.head().childNodes().size() == 0; // because we only look at the body, but we start from a shell, make sure there's // nothing in the head }
/** * Add an enforced attribute to a tag. An enforced attribute will always be added to the element. * If the element already has the attribute set, it will be overridden. * * <p>E.g.: <code>addEnforcedAttribute("a", "rel", "nofollow")</code> will make all <code>a</code> * tags output as <code><a href="..." rel="nofollow"></code> * * @param tag The tag the enforced attribute is for. The tag will be added to the allowed tag list * if necessary. * @param key The attribute key * @param value The enforced attribute value * @return this (for chaining) */ public Whitelist addEnforcedAttribute(String tag, String key, String value) { Validate.notEmpty(tag); Validate.notEmpty(key); Validate.notEmpty(value); TagName tagName = TagName.valueOf(tag); if (!tagNames.contains(tagName)) tagNames.add(tagName); AttributeKey attrKey = AttributeKey.valueOf(key); AttributeValue attrVal = AttributeValue.valueOf(value); if (enforcedAttributes.containsKey(tagName)) { enforcedAttributes.get(tagName).put(attrKey, attrVal); } else { Map<AttributeKey, AttributeValue> attrMap = new HashMap<AttributeKey, AttributeValue>(); attrMap.put(attrKey, attrVal); enforcedAttributes.put(tagName, attrMap); } return this; }
/** * Removes this node from the DOM, and moves its children up into the node's parent. This has the * effect of dropping the node but keeping its children. * * <p>For example, with the input html:<br> * {@code <div>One <span>Two <b>Three</b></span></div>}<br> * Calling {@code element.unwrap()} on the {@code span} element will result in the html:<br> * {@code <div>One Two <b>Three</b></div>}<br> * and the {@code "Two "} {@link TextNode} being returned. * * @return the first child of this node, after the node has been unwrapped. Null if the node had * no children. * @see #remove() * @see #wrap(String) */ public Node unwrap() { Validate.notNull(parentNode); int index = siblingIndex; Node firstChild = childNodes.size() > 0 ? childNodes.get(0) : null; parentNode.addChildren(index, this.childNodesAsArray()); this.remove(); return firstChild; }
public static Chat createChat(Skype client, String identity) throws SkypeException { Validate.notNull(client, "Client must not be null"); Validate.isTrue( client instanceof SkypeImpl, String.format("Now is not the time to use that, %s", client.getUsername())); Validate.notEmpty(identity, "Identity must not be null/empty"); if (identity.startsWith("19:")) { if (identity.endsWith("@thread.skype")) { return new ChatGroup((SkypeImpl) client, identity); } else { client.getLogger().info(String.format("Skipping P2P chat with identity %s", identity)); return null; } } else if (identity.startsWith("8:")) { return new ChatIndividual((SkypeImpl) client, identity); } else { throw new IllegalArgumentException( String.format("Unknown group type with identity %s", identity)); } }