Exemple #1
0
  private boolean equalsFormat(HTMLNode node1, HTMLNode node2) {
    Name name1 = node1.getName();
    Name name2 = node2.getName();
    if (name1 != name2) return false;
    Attributes attributes1 = node1.getAttributes();
    Attributes attributes2 = node2.getAttributes();
    if (attributes1.size() != attributes2.size()) return false;
    for (int i = 0; i < attributes1.size(); i++) {
      Attribute attribute1 = attributes1.get(i);
      Attribute attribute2 = attributes2.get(i);
      if (attribute1 == null && attribute2 != null) return false;
      if (attribute1 != null && attribute2 == null) return false;
      if (attribute1 != null
          && attribute2 != null
          && !attribute1.getName().equalsIgnoreCase(attribute2.getName())) return false;
    }

    List<HTMLNode> children1 = node1.getChildren();
    List<HTMLNode> children2 = node2.getChildren();
    if (children1 == null && children2 == null) return true;
    if (children1 == null && children2 != null) return false;
    if (children1 != null && children2 == null) return false;
    if (children1.size() != children2.size()) return false;
    for (int i = 0; i < children1.size(); i++) {
      if (!equalsFormat(children1.get(i), children2.get(i))) return false;
    }
    return true;
  }
 protected void removeIFrameSource(HTMLNode node) {
   if (node.isNode(Name.IFRAME)) {
     Attributes attributes = node.getAttributes();
     attributes.remove("src");
   }
   List<HTMLNode> children = node.getChildren();
   if (children == null || children.size() < 1) return;
   for (int i = 0; i < children.size(); i++) {
     removeIFrameSource(children.get(i));
   }
 }
Exemple #3
0
  private boolean isLinkContainer(HTMLNode node) {
    List<HTMLNode> children = node.getChildren();
    if (children == null) return false;
    if (isListNode(children)) {
      NodeIterator nodeIterator = node.iterator();
      int counter = 0;
      while (nodeIterator.hasNext()) {
        HTMLNode iterNode = nodeIterator.next();
        if (iterNode.isNode(Name.A)) counter++;
      }
      return counter >= children.size() - 3;
    }

    for (int i = 0; i < children.size(); i++) {
      if (isLinkContainer(children.get(i))) return true;
    }

    return false;
  }
Exemple #4
0
  private boolean isLinkDiv(HTMLNode node) {
    if (node.getChildren() == null) return false;

    List<HTMLNode> ignores = new ArrayList<HTMLNode>();
    NodeIterator iterator = node.iterator();
    while (iterator.hasNext()) {
      HTMLNode n = iterator.next();
      if (n.isNode(Name.A) && !linkNodeChecker.isValid(new CheckModel(n), 0)) ignores.add(n);
    }

    int counter = 0;
    iterator = node.iterator(ignores);
    while (iterator.hasNext()) {
      HTMLNode n = iterator.next();
      if (n.isNode(Name.CONTENT)) {
        counter += countWord(n);
      }
    }
    return counter < 5 && ignores.size() > 1;
  }