コード例 #1
0
ファイル: VipsParser.java プロジェクト: jgera/vips_java
  /**
   * Tries to divide DOM elements and finds visual blocks.
   *
   * @param vipsBlock Visual structure
   */
  private void divideVipsBlockTree(VipsBlock vipsBlock) {
    _currentVipsBlock = vipsBlock;
    ElementBox elementBox = (ElementBox) vipsBlock.getBox();
    // System.err.println(elementBox.getNode().getNodeName());
    // System.out.println(elementBox.getText());

    if (elementBox.getElement().getAttribute("id").equals("logosLine")) {
      System.out.println();
    }

    // With VIPS rules it tries to determine if element is dividable
    if (applyVipsRules(elementBox) && vipsBlock.isDividable() && !vipsBlock.isVisualBlock()) {
      // if element is dividable, let's divide it
      _currentVipsBlock.setAlreadyDivided(true);
      for (VipsBlock vipsBlockChild : vipsBlock.getChildren()) {
        if (!(vipsBlockChild.getBox() instanceof TextBox)) divideVipsBlockTree(vipsBlockChild);
      }
    } else {
      if (vipsBlock.isDividable()) {
        // System.err.println("Element " + elementBox.getNode().getNodeName() + " is visual block");
        vipsBlock.setIsVisualBlock(true);
        vipsBlock.setDoC(11);
      }

      if (!verifyValidity(elementBox)) {
        _currentVipsBlock.setIsVisualBlock(false);
      }
      /*
      if (vipsBlock.isVisualBlock())
      	//System.err.println("Element " + elementBox.getNode().getNodeName() + " is visual block");
      else
      	System.err.println("Element " + elementBox.getNode().getNodeName() + " is not visual block");*/
    }
  }
コード例 #2
0
ファイル: VipsParser.java プロジェクト: jgera/vips_java
  /**
   * VIPS Rule Nine
   *
   * <p>If the child of the node with maximum size are small than a threshold (relative size), do
   * not divide this node. <br>
   * Set the DoC based on the html tag and size of this node.
   *
   * @param node Input node
   * @return True, if rule is applied, otherwise false.
   */
  private boolean ruleNine(ElementBox node) {
    // System.err.println("Applying rule Nine on " + node.getNode().getNodeName() + " node");
    if (node.getSubBoxList().isEmpty()) return false;

    int maxSize = 0;

    for (Box childNode : node.getSubBoxList()) {
      int childSize = childNode.getWidth() * childNode.getHeight();

      if (maxSize < childSize) {
        maxSize = childSize;
      }
    }

    if (maxSize > _sizeTresholdWidth * _sizeTresholdHeight) return true;

    // TODO set DOC
    _currentVipsBlock.setIsVisualBlock(true);
    _currentVipsBlock.setIsDividable(false);

    if (node.getNode().getNodeName().equals("Xdiv")) _currentVipsBlock.setDoC(7);
    if (node.getNode().getNodeName().equals("a")) _currentVipsBlock.setDoC(11);
    else _currentVipsBlock.setDoC(8);

    return true;
  }
コード例 #3
0
ファイル: VipsParser.java プロジェクト: jgera/vips_java
  /**
   * VIPS Rule Twelve
   *
   * <p>Do not divide this node <br>
   * Set the DoC value based on the html tag and size of this node.
   *
   * @param node Input node
   * @return True, if rule is applied, otherwise false.
   */
  private boolean ruleTwelve(ElementBox node) {
    // System.err.println("Applying rule Twelve on " + node.getNode().getNodeName() + " node");

    _currentVipsBlock.setIsDividable(false);
    _currentVipsBlock.setIsVisualBlock(true);

    if (node.getNode().getNodeName().equals("Xdiv")) _currentVipsBlock.setDoC(7);
    else if (node.getNode().getNodeName().equals("li")) _currentVipsBlock.setDoC(8);
    else if (node.getNode().getNodeName().equals("span")) _currentVipsBlock.setDoC(8);
    else if (node.getNode().getNodeName().equals("sup")) _currentVipsBlock.setDoC(8);
    else if (node.getNode().getNodeName().equals("img")) _currentVipsBlock.setDoC(8);
    else _currentVipsBlock.setDoC(333);
    // TODO DoC Part
    return true;
  }
コード例 #4
0
ファイル: VipsParser.java プロジェクト: jgera/vips_java
  /*
   * Checks if node has valid children nodes
   */
  private boolean hasValidChildrenNodes(ElementBox node) {
    if (node.getNode().getNodeName().equals("img")
        || node.getNode().getNodeName().equals("input")) {
      if (node.getContentWidth() > 0 && node.getContentHeight() > 0) {
        _currentVipsBlock.setIsVisualBlock(true);
        _currentVipsBlock.setDoC(8);
        return true;
      } else return false;
    }

    if (node.getSubBoxList().isEmpty()) return false;

    _cnt = 0;

    for (Box child : node.getSubBoxList()) {
      checkValidChildrenNodes(child);
    }

    return (_cnt > 0) ? true : false;
  }
コード例 #5
0
ファイル: VipsParser.java プロジェクト: jgera/vips_java
  /**
   * VIPS Rule Seven
   *
   * <p>If the background color of this node is different from one of its children’s, divide this
   * node and at the same time, the child node with different background color will not be divided
   * in this round. Set the DoC value (6-8) for the child node based on the &lt;html&gt; tag of the
   * child node and the size of the child node.
   *
   * @param node Input node
   * @return True, if rule is applied, otherwise false.
   */
  private boolean ruleSeven(ElementBox node) {
    // System.err.println("Applying rule Seven on " + node.getNode().getNodeName() + " node");
    if (node.getSubBoxList().isEmpty()) return false;

    if (isTextNode(node)) return false;

    // String nodeBgColor = node.getStylePropertyValue("background-color");
    String nodeBgColor = _currentVipsBlock.getBgColor();

    for (VipsBlock vipsStructureChild : _currentVipsBlock.getChildren()) {
      if (!(vipsStructureChild.getBgColor().equals(nodeBgColor))) {
        vipsStructureChild.setIsDividable(false);
        vipsStructureChild.setIsVisualBlock(true);
        // TODO DoC values
        vipsStructureChild.setDoC(7);
        return true;
      }
    }

    return false;
  }
コード例 #6
0
ファイル: VipsParser.java プロジェクト: jgera/vips_java
  /**
   * VIPS Rule Eight
   *
   * <p>If the node has at least one text node child or at least one virtual text node child, and
   * the node's relative size is smaller than a threshold, then the node cannot be divided. Set the
   * DoC value (from 5-8) based on the html tag of the node.
   *
   * @param node Input node
   * @return True, if rule is applied, otherwise false.
   */
  private boolean ruleEight(ElementBox node) {
    // System.err.println("Applying rule Eight on " + node.getNode().getNodeName() + " node");
    if (node.getSubBoxList().isEmpty()) return false;

    List<Box> children = new ArrayList<Box>();

    findTextChildrenNodes(node, children);

    int cnt = children.size();

    if (cnt == 0) return false;

    if (node.getWidth() == 0 || node.getHeight() == 0) {
      children.clear();

      getAllChildren(node, children);

      for (Box child : children) {
        if (child.getWidth() != 0 && child.getHeight() != 0) return true;
      }
    }

    if (node.getWidth() * node.getHeight() > _sizeTresholdHeight * _sizeTresholdWidth) return false;

    if (node.getNode().getNodeName().equals("ul")) {
      return true;
    }

    _currentVipsBlock.setIsVisualBlock(true);
    _currentVipsBlock.setIsDividable(false);

    if (node.getNode().getNodeName().equals("Xdiv")) _currentVipsBlock.setDoC(7);
    else if (node.getNode().getNodeName().equals("code")) _currentVipsBlock.setDoC(7);
    else if (node.getNode().getNodeName().equals("div")) _currentVipsBlock.setDoC(5);
    else _currentVipsBlock.setDoC(8);
    return true;
  }
コード例 #7
0
ファイル: VipsParser.java プロジェクト: jgera/vips_java
  /**
   * VIPS Rule Four
   *
   * <p>If all of the child nodes of the DOM node are text nodes or virtual text nodes, do not
   * divide the node. <br>
   * If the font size and font weight of all these child nodes are same, set the DoC of the
   * extracted block to 10. Otherwise, set the DoC of this extracted block to 9.
   *
   * @param node Input node
   * @return True, if rule is applied, otherwise false.
   */
  private boolean ruleFour(ElementBox node) {
    // System.err.println("Applying rule Four on " + node.getNode().getNodeName() + " node");

    if (node.getSubBoxList().isEmpty()) return false;

    for (Box box : node.getSubBoxList()) {
      if (box instanceof TextBox) continue;
      if (!isTextNode((ElementBox) box) || !isVirtualTextNode((ElementBox) box)) return false;
    }

    _currentVipsBlock.setIsVisualBlock(true);
    _currentVipsBlock.setIsDividable(false);

    if (node.getSubBoxList().size() == 1) {
      /*
      if (node.getSubBox(0) instanceof TextBox)
      {
      	_currentVipsBlock.setIsVisualBlock(false);
      	_currentVipsBlock.setIsDividable(true);
      	_currentVipsBlock.getChildren().get(0).setIsVisualBlock(true);
      	_currentVipsBlock.getChildren().get(0).setIsDividable(false);
      	_currentVipsBlock.getChildren().get(0).setDoC(11);
      }
       */
      if (node.getSubBox(0).getNode().getNodeName().equals("em")) _currentVipsBlock.setDoC(11);
      else _currentVipsBlock.setDoC(10);
      return true;
    }

    String fontWeight = "";
    int fontSize = 0;

    for (Box childNode : node.getSubBoxList()) {
      int childFontSize = childNode.getVisualContext().getFont().getSize();

      if (childNode instanceof TextBox) {
        if (fontSize > 0) {
          if (fontSize != childFontSize) {
            _currentVipsBlock.setDoC(9);
            break;
          } else _currentVipsBlock.setDoC(10);
        } else fontSize = childFontSize;
        continue;
      }

      ElementBox child = (ElementBox) childNode;

      if (child.getStylePropertyValue("font-weight") == null) return false;

      if (fontSize > 0) {
        if (child.getStylePropertyValue("font-weight").toString().equals(fontWeight)
            && childFontSize == fontSize) {
          _currentVipsBlock.setDoC(10);
        } else {
          _currentVipsBlock.setDoC(9);
          break;
        }
      } else {
        fontWeight = child.getStylePropertyValue("font-weight").toString();
        fontSize = childFontSize;
      }
    }

    return true;
  }