Ejemplo n.º 1
0
  /**
   * Insert the specified node into the DOM after this node (i.e. as a following sibling).
   *
   * @param node to add after this node
   * @return this node, for chaining
   * @see #before(Node)
   */
  public Node after(Node node) {
    Validate.notNull(node);
    Validate.notNull(parentNode);

    parentNode.addChildren(siblingIndex() + 1, node);
    return this;
  }
Ejemplo n.º 2
0
  /**
   * Add allowed URL protocols for an element's URL attribute. This restricts the possible values of
   * the attribute to URLs with the defined protocol.
   *
   * <p>E.g.: <code>addProtocols("a", "href", "ftp", "http", "https")</code>
   *
   * @param tag Tag the URL protocol is for
   * @param key Attribute key
   * @param protocols List of valid protocols
   * @return this, for chaining
   */
  public Whitelist addProtocols(String tag, String key, String... protocols) {
    Validate.notEmpty(tag);
    Validate.notEmpty(key);
    Validate.notNull(protocols);

    TagName tagName = TagName.valueOf(tag);
    AttributeKey attrKey = AttributeKey.valueOf(key);
    Map<AttributeKey, Set<Protocol>> attrMap;
    Set<Protocol> protSet;

    if (this.protocols.containsKey(tagName)) {
      attrMap = this.protocols.get(tagName);
    } else {
      attrMap = new HashMap<AttributeKey, Set<Protocol>>();
      this.protocols.put(tagName, attrMap);
    }
    if (attrMap.containsKey(attrKey)) {
      protSet = attrMap.get(attrKey);
    } else {
      protSet = new HashSet<Protocol>();
      attrMap.put(attrKey, protSet);
    }
    for (String protocol : protocols) {
      Validate.notEmpty(protocol);
      Protocol prot = Protocol.valueOf(protocol);
      protSet.add(prot);
    }
    return this;
  }
Ejemplo n.º 3
0
  private void addSiblingHtml(int index, String html) {
    Validate.notNull(html);
    Validate.notNull(parentNode);

    Element context = parent() instanceof Element ? (Element) parent() : null;
    List<Node> nodes = Parser.parseFragment(html, context, baseUri());
    parentNode.addChildren(index, nodes.toArray(new Node[nodes.size()]));
  }
Ejemplo n.º 4
0
  /**
   * Create a new Node.
   *
   * @param baseUri base URI
   * @param attributes attributes (not null, but may be empty)
   */
  protected Node(String baseUri, Attributes attributes) {
    Validate.notNull(baseUri);
    Validate.notNull(attributes);

    childNodes = new ArrayList<Node>(4);
    this.baseUri = baseUri.trim();
    this.attributes = attributes;
  }
Ejemplo n.º 5
0
  /**
   * Add a list of allowed elements to a whitelist. (If a tag is not allowed, it will be removed
   * from the HTML.)
   *
   * @param tags tag names to allow
   * @return this (for chaining)
   */
  public Whitelist addTags(String... tags) {
    Validate.notNull(tags);

    for (String tagName : tags) {
      Validate.notEmpty(tagName);
      tagNames.add(TagName.valueOf(tagName));
    }
    return this;
  }
Ejemplo n.º 6
0
  private static <E extends Element> Integer indexInList(Element search, List<E> elements) {
    Validate.notNull(search);
    Validate.notNull(elements);

    for (int i = 0; i < elements.size(); i++) {
      E element = elements.get(i);
      if (element.equals(search)) return i;
    }
    return null;
  }
Ejemplo n.º 7
0
  /**
   * Inserts the given child nodes into this element at the specified index. Current nodes will be
   * shifted to the right. The inserted nodes will be moved from their current parent. To prevent
   * moving, copy the nodes first.
   *
   * @param index 0-based index to insert children at. Specify {@code 0} to insert at the start,
   *     {@code -1} at the end
   * @param children child nodes to insert
   * @return this element, for chaining.
   */
  public Element insertChildren(int index, Collection<? extends Node> children) {
    Validate.notNull(children, "Children collection to be inserted must not be null.");
    int currentSize = childNodeSize();
    if (index < 0) index += currentSize + 1; // roll around
    Validate.isTrue(index >= 0 && index <= currentSize, "Insert position out of bounds.");

    ArrayList<Node> nodes = new ArrayList<Node>(children);
    Node[] nodeArray = nodes.toArray(new Node[nodes.size()]);
    addChildren(index, nodeArray);
    return this;
  }
Ejemplo n.º 8
0
  protected void replaceChild(Node out, Node in) {
    Validate.isTrue(out.parentNode == this);
    Validate.notNull(in);
    if (in.parentNode != null) in.parentNode.removeChild(in);

    Integer index = out.siblingIndex();
    childNodes.set(index, in);
    in.parentNode = this;
    in.setSiblingIndex(index);
    out.parentNode = null;
  }
Ejemplo n.º 9
0
  protected void initialiseParse(String input, String baseUri, ParseErrorList errors) {
    Validate.notNull(input, "String input must not be null");
    Validate.notNull(baseUri, "BaseURI must not be null");

    doc = new Document(baseUri);
    reader = new CharacterReader(input, this);
    this.errors = errors;
    tokeniser = new Tokeniser(reader, errors);
    stack = new DescendableLinkedList<Element>();
    this.baseUri = baseUri;
    this.input = input;
    this.resetFlag = false;
  }
Ejemplo n.º 10
0
 protected void removeChild(Node out) {
   Validate.isTrue(out.parentNode == this);
   int index = out.siblingIndex();
   childNodes.remove(index);
   reindexChildren();
   out.parentNode = null;
 }
Ejemplo n.º 11
0
  /**
   * Add inner HTML into this element. The supplied HTML will be parsed, and each node prepended to
   * the start of the element's children.
   *
   * @param html HTML to add inside this element, before the existing HTML
   * @return this element
   * @see #html(String)
   */
  public Element prepend(String html) {
    Validate.notNull(html);

    List<Node> nodes = Parser.parseFragment(html, this, baseUri());
    addChildren(0, nodes.toArray(new Node[nodes.size()]));
    return this;
  }
Ejemplo n.º 12
0
  /**
   * Find an element by ID, including or under this element.
   *
   * <p>Note that this finds the first matching ID, starting with this element. If you search down
   * from a different starting point, it is possible to find a different element by ID. For unique
   * element by ID within a Document, use {@link Document#getElementById(String)}
   *
   * @param id The ID to search for.
   * @return The first matching element by ID, starting with this element, or null if none found.
   */
  public Element getElementById(String id) {
    Validate.notEmpty(id);

    Elements elements = Collector.collect(new Evaluator.Id(id), this);
    if (elements.size() > 0) return elements.get(0);
    else return null;
  }
Ejemplo n.º 13
0
  /**
   * Get an absolute URL from a URL attribute that may be relative (i.e. an <code>&lt;a href></code>
   * or <code>&lt;img src></code>).
   *
   * <p>E.g.: <code>String absUrl = linkEl.absUrl("href");</code>
   *
   * <p>If the attribute value is already absolute (i.e. it starts with a protocol, like <code>
   * http://</code> or <code>https://</code> etc), and it successfully parses as a URL, the
   * attribute is returned directly. Otherwise, it is treated as a URL relative to the element's
   * {@link #baseUri}, and made absolute using that.
   *
   * <p>As an alternate, you can use the {@link #attr} method with the <code>abs:</code> prefix,
   * e.g.: <code>String absUrl = linkEl.attr("abs:href");</code>
   *
   * @param attributeKey The attribute key
   * @return An absolute URL if one could be made, or an empty string (not null) if the attribute
   *     was missing or could not be made successfully into a URL.
   * @see #attr
   * @see java.net.URL#URL(java.net.URL, String)
   */
  public String absUrl(String attributeKey) {
    Validate.notEmpty(attributeKey);

    String relUrl = attr(attributeKey);
    if (!hasAttr(attributeKey)) {
      return ""; // nothing to make absolute with
    } else {
      URL base;
      try {
        try {
          base = new URL(baseUri);
        } catch (MalformedURLException e) {
          // the base is unsuitable, but the attribute may be abs on its own, so try that
          URL abs = new URL(relUrl);
          return abs.toExternalForm();
        }
        // workaround: java resolves '//path/file + ?foo' to '//path/?foo', not '//path/file?foo' as
        // desired
        if (relUrl.startsWith("?")) relUrl = base.getPath() + relUrl;
        URL abs = new URL(base, relUrl);
        return abs.toExternalForm();
      } catch (MalformedURLException e) {
        return "";
      }
    }
  }
Ejemplo n.º 14
0
 public void put(Attribute attribute) {
   Validate.notNull(attribute);
   if (this.attributes == null) {
     this.attributes = new LinkedHashMap(2);
   }
   this.attributes.put(attribute.getKey(), attribute);
 }
Ejemplo n.º 15
0
 /**
  * Get this node's previous sibling.
  *
  * @return the previous sibling, or null if this is the first sibling
  */
 public Node previousSibling() {
   List<Node> siblings = parentNode.childNodes;
   Integer index = siblingIndex();
   Validate.notNull(index);
   if (index > 0) return siblings.get(index - 1);
   else return null;
 }
Ejemplo n.º 16
0
 /**
  * Gets the next sibling element of this element. E.g., if a {@code div} contains two {@code p}s,
  * the {@code nextElementSibling} of the first {@code p} is the second {@code p}.
  *
  * <p>This is similar to {@link #nextSibling()}, but specifically finds only Elements
  *
  * @return the next element, or null if there is no next element
  * @see #previousElementSibling()
  */
 public Element nextElementSibling() {
   if (parentNode == null) return null;
   List<Element> siblings = parent().children();
   Integer index = indexInList(this, siblings);
   Validate.notNull(index);
   if (siblings.size() > index + 1) return siblings.get(index + 1);
   else return null;
 }
Ejemplo n.º 17
0
  /**
   * Get an attribute's value by its key.
   *
   * <p>To get an absolute URL from an attribute that may be a relative URL, prefix the key with
   * <code><b>abs</b></code>, which is a shortcut to the {@link #absUrl} method. E.g.:
   *
   * <blockquote>
   *
   * <code>String url = a.attr("abs:href");</code>
   *
   * </blockquote>
   *
   * @param attributeKey The attribute key.
   * @return The attribute, or empty string if not present (to avoid nulls).
   * @see #attributes()
   * @see #hasAttr(String)
   * @see #absUrl(String)
   */
  public String attr(String attributeKey) {
    Validate.notNull(attributeKey);

    if (attributes.hasKey(attributeKey)) return attributes.get(attributeKey);
    else if (attributeKey.toLowerCase().startsWith("abs:"))
      return absUrl(attributeKey.substring("abs:".length()));
    else return "";
  }
Ejemplo n.º 18
0
 public String get(String key) {
   Validate.notEmpty(key);
   if (this.attributes == null) {
     return UnsupportedUrlFragment.DISPLAY_NAME;
   }
   Attribute attr = (Attribute) this.attributes.get(key.toLowerCase());
   return attr != null ? attr.getValue() : UnsupportedUrlFragment.DISPLAY_NAME;
 }
Ejemplo n.º 19
0
 /**
  * Gets the previous element sibling of this element.
  *
  * @return the previous element, or null if there is no previous element
  * @see #nextElementSibling()
  */
 public Element previousElementSibling() {
   if (parentNode == null) return null;
   List<Element> siblings = parent().children();
   Integer index = indexInList(this, siblings);
   Validate.notNull(index);
   if (index > 0) return siblings.get(index - 1);
   else return null;
 }
Ejemplo n.º 20
0
  /**
   * Set the text of this element. Any existing contents (text or elements) will be cleared
   *
   * @param text unencoded text
   * @return this element
   */
  public Element text(String text) {
    Validate.notNull(text);

    empty();
    TextNode textNode = new TextNode(text, baseUri);
    appendChild(textNode);

    return this;
  }
Ejemplo n.º 21
0
  /**
   * Remove a class name from this element's {@code class} attribute.
   *
   * @param className class name to remove
   * @return this element
   */
  public Element removeClass(String className) {
    Validate.notNull(className);

    Set<String> classes = classNames();
    classes.remove(className);
    classNames(classes);

    return this;
  }
Ejemplo n.º 22
0
  /**
   * Test if this element has an attribute.
   *
   * @param attributeKey The attribute key to check.
   * @return true if the attribute exists, false if not.
   */
  public boolean hasAttr(String attributeKey) {
    Validate.notNull(attributeKey);

    if (attributeKey.toLowerCase().startsWith("abs:")) {
      String key = attributeKey.substring("abs:".length());
      if (attributes.hasKey(key) && !absUrl(key).equals("")) return true;
    }
    return attributes.hasKey(attributeKey);
  }
Ejemplo n.º 23
0
  /**
   * Get this node's next sibling.
   *
   * @return next sibling, or null if this is the last sibling
   */
  public Node nextSibling() {
    if (parentNode == null) return null; // root

    List<Node> siblings = parentNode.childNodes;
    Integer index = siblingIndex();
    Validate.notNull(index);
    if (siblings.size() > index + 1) return siblings.get(index + 1);
    else return null;
  }
Ejemplo n.º 24
0
 protected void addChildren(int index, Node... children) {
   Validate.noNullElements(children);
   for (int i = children.length - 1; i >= 0; i--) {
     Node in = children[i];
     reparentChild(in);
     childNodes.add(index, in);
   }
   reindexChildren();
 }
Ejemplo n.º 25
0
  /**
   * Toggle a class name on this element's {@code class} attribute: if present, remove it; otherwise
   * add it.
   *
   * @param className class name to toggle
   * @return this element
   */
  public Element toggleClass(String className) {
    Validate.notNull(className);

    Set<String> classes = classNames();
    if (classes.contains(className)) classes.remove(className);
    else classes.add(className);
    classNames(classes);

    return this;
  }
Ejemplo n.º 26
0
  /**
   * Creates a new, clean document, from the original dirty document, containing only elements
   * allowed by the whitelist. The original document is not modified. Only elements from the dirt
   * document's <code>body</code> are used.
   *
   * @param dirtyDocument Untrusted base document to clean.
   * @return cleaned document.
   */
  public Document clean(Document dirtyDocument) {
    Validate.notNull(dirtyDocument);

    Document clean = Document.createShell(dirtyDocument.baseUri());
    if (dirtyDocument.body()
        != null) // frameset documents won't have a body. the clean doc will have empty body.
    copySafeNodes(dirtyDocument.body(), clean.body());

    return clean;
  }
Ejemplo n.º 27
0
  /**
   * Determines if the input document <b>body</b>is valid, against the whitelist. It is considered
   * valid if all the tags and attributes in the input HTML are allowed by the whitelist, and that
   * there is no content in the <code>head</code>.
   *
   * <p>This method can be used as a validator for user input. An invalid document will still be
   * cleaned successfully using the {@link #clean(Document)} document. If using as a validator, it
   * is recommended to still clean the document to ensure enforced attributes are set correctly, and
   * that the output is tidied.
   *
   * @param dirtyDocument document to test
   * @return true if no tags or attributes need to be removed; false if they do
   */
  public boolean isValid(Document dirtyDocument) {
    Validate.notNull(dirtyDocument);

    Document clean = Document.createShell(dirtyDocument.baseUri());
    int numDiscarded = copySafeNodes(dirtyDocument.body(), clean.body());
    return numDiscarded == 0
        && dirtyDocument.head().childNodes().size()
            == 0; // because we only look at the body, but we start from a shell, make sure there's
                  // nothing in the head
  }
Ejemplo n.º 28
0
  /**
   * Add an enforced attribute to a tag. An enforced attribute will always be added to the element.
   * If the element already has the attribute set, it will be overridden.
   *
   * <p>E.g.: <code>addEnforcedAttribute("a", "rel", "nofollow")</code> will make all <code>a</code>
   * tags output as <code>&lt;a href="..." rel="nofollow"></code>
   *
   * @param tag The tag the enforced attribute is for. The tag will be added to the allowed tag list
   *     if necessary.
   * @param key The attribute key
   * @param value The enforced attribute value
   * @return this (for chaining)
   */
  public Whitelist addEnforcedAttribute(String tag, String key, String value) {
    Validate.notEmpty(tag);
    Validate.notEmpty(key);
    Validate.notEmpty(value);

    TagName tagName = TagName.valueOf(tag);
    if (!tagNames.contains(tagName)) tagNames.add(tagName);
    AttributeKey attrKey = AttributeKey.valueOf(key);
    AttributeValue attrVal = AttributeValue.valueOf(value);

    if (enforcedAttributes.containsKey(tagName)) {
      enforcedAttributes.get(tagName).put(attrKey, attrVal);
    } else {
      Map<AttributeKey, AttributeValue> attrMap = new HashMap<AttributeKey, AttributeValue>();
      attrMap.put(attrKey, attrVal);
      enforcedAttributes.put(tagName, attrMap);
    }
    return this;
  }
Ejemplo n.º 29
0
  /**
   * Removes this node from the DOM, and moves its children up into the node's parent. This has the
   * effect of dropping the node but keeping its children.
   *
   * <p>For example, with the input html:<br>
   * {@code <div>One <span>Two <b>Three</b></span></div>}<br>
   * Calling {@code element.unwrap()} on the {@code span} element will result in the html:<br>
   * {@code <div>One Two <b>Three</b></div>}<br>
   * and the {@code "Two "} {@link TextNode} being returned.
   *
   * @return the first child of this node, after the node has been unwrapped. Null if the node had
   *     no children.
   * @see #remove()
   * @see #wrap(String)
   */
  public Node unwrap() {
    Validate.notNull(parentNode);

    int index = siblingIndex;
    Node firstChild = childNodes.size() > 0 ? childNodes.get(0) : null;
    parentNode.addChildren(index, this.childNodesAsArray());
    this.remove();

    return firstChild;
  }
Ejemplo n.º 30
0
 public static Chat createChat(Skype client, String identity) throws SkypeException {
   Validate.notNull(client, "Client must not be null");
   Validate.isTrue(
       client instanceof SkypeImpl,
       String.format("Now is not the time to use that, %s", client.getUsername()));
   Validate.notEmpty(identity, "Identity must not be null/empty");
   if (identity.startsWith("19:")) {
     if (identity.endsWith("@thread.skype")) {
       return new ChatGroup((SkypeImpl) client, identity);
     } else {
       client.getLogger().info(String.format("Skipping P2P chat with identity %s", identity));
       return null;
     }
   } else if (identity.startsWith("8:")) {
     return new ChatIndividual((SkypeImpl) client, identity);
   } else {
     throw new IllegalArgumentException(
         String.format("Unknown group type with identity %s", identity));
   }
 }