Ejemplo n.º 1
0
  /**
   * VIPS Rule Nine
   *
   * <p>If the child of the node with maximum size are small than a threshold (relative size), do
   * not divide this node. <br>
   * Set the DoC based on the html tag and size of this node.
   *
   * @param node Input node
   * @return True, if rule is applied, otherwise false.
   */
  private boolean ruleNine(ElementBox node) {
    // System.err.println("Applying rule Nine on " + node.getNode().getNodeName() + " node");
    if (node.getSubBoxList().isEmpty()) return false;

    int maxSize = 0;

    for (Box childNode : node.getSubBoxList()) {
      int childSize = childNode.getWidth() * childNode.getHeight();

      if (maxSize < childSize) {
        maxSize = childSize;
      }
    }

    if (maxSize > _sizeTresholdWidth * _sizeTresholdHeight) return true;

    // TODO set DOC
    _currentVipsBlock.setIsVisualBlock(true);
    _currentVipsBlock.setIsDividable(false);

    if (node.getNode().getNodeName().equals("Xdiv")) _currentVipsBlock.setDoC(7);
    if (node.getNode().getNodeName().equals("a")) _currentVipsBlock.setDoC(11);
    else _currentVipsBlock.setDoC(8);

    return true;
  }
Ejemplo n.º 2
0
  /**
   * Counts number of visual blocks in visual structure
   *
   * @param vipsBlock Visual structure
   */
  private void getVisualBlocksCount(VipsBlock vipsBlock) {
    if (vipsBlock.isVisualBlock()) _visualBlocksCount++;

    for (VipsBlock vipsBlockChild : vipsBlock.getChildren()) {
      if (!(vipsBlockChild.getBox() instanceof TextBox)) getVisualBlocksCount(vipsBlockChild);
    }
  }
Ejemplo n.º 3
0
 /**
  * Finds previous sibling node's VIPS block.
  *
  * @param node Node
  * @param vipsBlock Actual VIPS block
  * @param foundBlock VIPS block for given node
  */
 private void findPreviousSiblingNodeVipsBlock(Node node, VipsBlock vipsBlock) {
   if (vipsBlock.getBox().getNode().equals(node)) {
     _tempVipsBlock = vipsBlock;
     return;
   } else
     for (VipsBlock vipsBlockChild : vipsBlock.getChildren())
       findPreviousSiblingNodeVipsBlock(node, vipsBlockChild);
 }
Ejemplo n.º 4
0
  /**
   * Construct VIPS block tree from viewport.
   *
   * <p>Starts from &lt;body&gt; element.
   *
   * @param element Box that represents element
   * @param node Visual structure tree node
   */
  private void constructVipsBlockTree(Box element, VipsBlock node) {
    node.setBox(element);

    if (!(element instanceof TextBox)) {
      for (Box box : ((ElementBox) element).getSubBoxList()) {
        node.addChild(new VipsBlock());
        constructVipsBlockTree(box, node.getChildren().get(node.getChildren().size() - 1));
      }
    }
  }
Ejemplo n.º 5
0
  /**
   * VIPS Rule Three
   *
   * <p>If the DOM node is the root node of the sub-DOM tree (corresponding to the block), and there
   * is only one sub DOM tree corresponding to this block, divide this node.
   *
   * @param node Input node
   * @return True, if rule is applied, otherwise false.
   */
  private boolean ruleThree(ElementBox node) {
    // System.err.println("Applying rule Three on " + node.getNode().getNodeName() + " node");

    if (!node.isRootElement()) return false;

    boolean result = true;
    int cnt = 0;

    for (VipsBlock vipsBlock : _vipsBlocks.getChildren()) {
      if (vipsBlock.getBox().getNode().getNodeName().equals(node.getNode().getNodeName())) {
        result = true;
        isOnlyOneDomSubTree(node.getNode(), vipsBlock.getBox().getNode(), result);

        if (result) cnt++;
      }
    }

    return (cnt == 1) ? true : false;
  }
Ejemplo n.º 6
0
  /*
   * Checks if node has valid children nodes
   */
  private boolean hasValidChildrenNodes(ElementBox node) {
    if (node.getNode().getNodeName().equals("img")
        || node.getNode().getNodeName().equals("input")) {
      if (node.getContentWidth() > 0 && node.getContentHeight() > 0) {
        _currentVipsBlock.setIsVisualBlock(true);
        _currentVipsBlock.setDoC(8);
        return true;
      } else return false;
    }

    if (node.getSubBoxList().isEmpty()) return false;

    _cnt = 0;

    for (Box child : node.getSubBoxList()) {
      checkValidChildrenNodes(child);
    }

    return (_cnt > 0) ? true : false;
  }
Ejemplo n.º 7
0
  /**
   * VIPS Rule One
   *
   * <p>If the DOM node is not a text node and it has no valid children, then this node cannot be
   * divided and will be cut.
   *
   * @param node Input node
   * @return True, if rule is applied, otherwise false.
   */
  private boolean ruleOne(ElementBox node) {
    // System.err.println("Applying rule One on " + node.getNode().getNodeName() + " node");

    if (!isTextNode(node)) {
      if (!hasValidChildrenNodes(node)) {
        _currentVipsBlock.setIsDividable(false);
        return true;
      }
    }

    return false;
  }
Ejemplo n.º 8
0
  /**
   * VIPS Rule Ten
   *
   * <p>If previous sibling node has not been divided, do not divide this node
   *
   * @param node Input node
   * @return True, if rule is applied, otherwise false.
   */
  private boolean ruleTen(ElementBox node) {
    // System.err.println("Applying rule Ten on " + node.getNode().getNodeName() + " node");

    // VipsBlock previousSiblingVipsBlock = null;
    // findPreviousSiblingNodeVipsBlock(node.getNode().getPreviousSibling(), _vipsBlocks,
    // previousSiblingVipsBlock);

    _tempVipsBlock = null;
    findPreviousSiblingNodeVipsBlock(node.getNode().getPreviousSibling(), _vipsBlocks);

    if (_tempVipsBlock == null) return false;

    if (_tempVipsBlock.isAlreadyDivided()) return true;

    return false;
  }
Ejemplo n.º 9
0
  /**
   * VIPS Rule Twelve
   *
   * <p>Do not divide this node <br>
   * Set the DoC value based on the html tag and size of this node.
   *
   * @param node Input node
   * @return True, if rule is applied, otherwise false.
   */
  private boolean ruleTwelve(ElementBox node) {
    // System.err.println("Applying rule Twelve on " + node.getNode().getNodeName() + " node");

    _currentVipsBlock.setIsDividable(false);
    _currentVipsBlock.setIsVisualBlock(true);

    if (node.getNode().getNodeName().equals("Xdiv")) _currentVipsBlock.setDoC(7);
    else if (node.getNode().getNodeName().equals("li")) _currentVipsBlock.setDoC(8);
    else if (node.getNode().getNodeName().equals("span")) _currentVipsBlock.setDoC(8);
    else if (node.getNode().getNodeName().equals("sup")) _currentVipsBlock.setDoC(8);
    else if (node.getNode().getNodeName().equals("img")) _currentVipsBlock.setDoC(8);
    else _currentVipsBlock.setDoC(333);
    // TODO DoC Part
    return true;
  }
Ejemplo n.º 10
0
  /**
   * VIPS Rule Seven
   *
   * <p>If the background color of this node is different from one of its children’s, divide this
   * node and at the same time, the child node with different background color will not be divided
   * in this round. Set the DoC value (6-8) for the child node based on the &lt;html&gt; tag of the
   * child node and the size of the child node.
   *
   * @param node Input node
   * @return True, if rule is applied, otherwise false.
   */
  private boolean ruleSeven(ElementBox node) {
    // System.err.println("Applying rule Seven on " + node.getNode().getNodeName() + " node");
    if (node.getSubBoxList().isEmpty()) return false;

    if (isTextNode(node)) return false;

    // String nodeBgColor = node.getStylePropertyValue("background-color");
    String nodeBgColor = _currentVipsBlock.getBgColor();

    for (VipsBlock vipsStructureChild : _currentVipsBlock.getChildren()) {
      if (!(vipsStructureChild.getBgColor().equals(nodeBgColor))) {
        vipsStructureChild.setIsDividable(false);
        vipsStructureChild.setIsVisualBlock(true);
        // TODO DoC values
        vipsStructureChild.setDoC(7);
        return true;
      }
    }

    return false;
  }
Ejemplo n.º 11
0
  /**
   * VIPS Rule Eight
   *
   * <p>If the node has at least one text node child or at least one virtual text node child, and
   * the node's relative size is smaller than a threshold, then the node cannot be divided. Set the
   * DoC value (from 5-8) based on the html tag of the node.
   *
   * @param node Input node
   * @return True, if rule is applied, otherwise false.
   */
  private boolean ruleEight(ElementBox node) {
    // System.err.println("Applying rule Eight on " + node.getNode().getNodeName() + " node");
    if (node.getSubBoxList().isEmpty()) return false;

    List<Box> children = new ArrayList<Box>();

    findTextChildrenNodes(node, children);

    int cnt = children.size();

    if (cnt == 0) return false;

    if (node.getWidth() == 0 || node.getHeight() == 0) {
      children.clear();

      getAllChildren(node, children);

      for (Box child : children) {
        if (child.getWidth() != 0 && child.getHeight() != 0) return true;
      }
    }

    if (node.getWidth() * node.getHeight() > _sizeTresholdHeight * _sizeTresholdWidth) return false;

    if (node.getNode().getNodeName().equals("ul")) {
      return true;
    }

    _currentVipsBlock.setIsVisualBlock(true);
    _currentVipsBlock.setIsDividable(false);

    if (node.getNode().getNodeName().equals("Xdiv")) _currentVipsBlock.setDoC(7);
    else if (node.getNode().getNodeName().equals("code")) _currentVipsBlock.setDoC(7);
    else if (node.getNode().getNodeName().equals("div")) _currentVipsBlock.setDoC(5);
    else _currentVipsBlock.setDoC(8);
    return true;
  }
Ejemplo n.º 12
0
  /**
   * Append node from given visual structure to parent node
   *
   * @param parentNode Given visual structure
   * @param visualStructure Parent node
   */
  private void writeVisualBlocks(Element parentNode, VisualStructure visualStructure) {
    Element layoutNode = doc.createElement("LayoutNode");

    layoutNode.setAttribute(
        "FrameSourceIndex", String.valueOf(visualStructure.getFrameSourceIndex()));
    layoutNode.setAttribute("SourceIndex", visualStructure.getSourceIndex());
    layoutNode.setAttribute("DoC", String.valueOf(visualStructure.getDoC()));
    layoutNode.setAttribute("ContainImg", String.valueOf(visualStructure.containImg()));
    layoutNode.setAttribute("IsImg", String.valueOf(visualStructure.isImg()));
    layoutNode.setAttribute("ContainTable", String.valueOf(visualStructure.containTable()));
    layoutNode.setAttribute("ContainP", String.valueOf(visualStructure.containP()));
    layoutNode.setAttribute("TextLen", String.valueOf(visualStructure.getTextLength()));
    layoutNode.setAttribute("LinkTextLen", String.valueOf(visualStructure.getLinkTextLength()));
    Box parentBox = visualStructure.getNestedBlocks().get(0).getBox().getParent();
    layoutNode.setAttribute(
        "DOMCldNum", String.valueOf(parentBox.getNode().getChildNodes().getLength()));
    layoutNode.setAttribute("FontSize", String.valueOf(visualStructure.getFontSize()));
    layoutNode.setAttribute("FontWeight", String.valueOf(visualStructure.getFontWeight()));
    layoutNode.setAttribute("BgColor", visualStructure.getBgColor());
    layoutNode.setAttribute("ObjectRectLeft", String.valueOf(visualStructure.getX()));
    layoutNode.setAttribute("ObjectRectTop", String.valueOf(visualStructure.getY()));
    layoutNode.setAttribute("ObjectRectWidth", String.valueOf(visualStructure.getWidth()));
    layoutNode.setAttribute("ObjectRectHeight", String.valueOf(visualStructure.getHeight()));
    layoutNode.setAttribute("ID", visualStructure.getId());
    layoutNode.setAttribute("order", String.valueOf(_order));

    _order++;

    if (_pDoC >= visualStructure.getDoC()) {
      // continue segmenting
      if (visualStructure.getChildrenVisualStructures().size() == 0) {
        if (visualStructure.getNestedBlocks().size() > 0) {
          String src = "";
          String content = "";
          for (VipsBlock block : visualStructure.getNestedBlocks()) {
            ElementBox elementBox = block.getElementBox();

            if (elementBox == null) continue;

            if (!elementBox.getNode().getNodeName().equals("Xdiv")
                && !elementBox.getNode().getNodeName().equals("Xspan"))
              src += getSource(elementBox.getElement());
            else src += elementBox.getText();

            content += elementBox.getText() + " ";
          }
          layoutNode.setAttribute("SRC", src);
          layoutNode.setAttribute("Content", content);
        }
      }

      parentNode.appendChild(layoutNode);

      for (VisualStructure child : visualStructure.getChildrenVisualStructures())
        writeVisualBlocks(layoutNode, child);
    } else {
      // "stop" segmentation
      if (visualStructure.getNestedBlocks().size() > 0) {
        String src = "";
        String content = "";
        for (VipsBlock block : visualStructure.getNestedBlocks()) {
          ElementBox elementBox = block.getElementBox();

          if (elementBox == null) continue;

          if (!elementBox.getNode().getNodeName().equals("Xdiv")
              && !elementBox.getNode().getNodeName().equals("Xspan"))
            src += getSource(elementBox.getElement());
          else src += elementBox.getText();

          content += elementBox.getText() + " ";
        }
        layoutNode.setAttribute("SRC", src);
        layoutNode.setAttribute("Content", content);
      }

      parentNode.appendChild(layoutNode);
    }
  }
Ejemplo n.º 13
0
  private void findVisualBlocks(VipsBlock vipsBlock, List<VipsBlock> list) {
    if (vipsBlock.isVisualBlock()) list.add(vipsBlock);

    for (VipsBlock vipsStructureChild : vipsBlock.getChildren())
      findVisualBlocks(vipsStructureChild, list);
  }
Ejemplo n.º 14
0
  /**
   * VIPS Rule Four
   *
   * <p>If all of the child nodes of the DOM node are text nodes or virtual text nodes, do not
   * divide the node. <br>
   * If the font size and font weight of all these child nodes are same, set the DoC of the
   * extracted block to 10. Otherwise, set the DoC of this extracted block to 9.
   *
   * @param node Input node
   * @return True, if rule is applied, otherwise false.
   */
  private boolean ruleFour(ElementBox node) {
    // System.err.println("Applying rule Four on " + node.getNode().getNodeName() + " node");

    if (node.getSubBoxList().isEmpty()) return false;

    for (Box box : node.getSubBoxList()) {
      if (box instanceof TextBox) continue;
      if (!isTextNode((ElementBox) box) || !isVirtualTextNode((ElementBox) box)) return false;
    }

    _currentVipsBlock.setIsVisualBlock(true);
    _currentVipsBlock.setIsDividable(false);

    if (node.getSubBoxList().size() == 1) {
      /*
      if (node.getSubBox(0) instanceof TextBox)
      {
      	_currentVipsBlock.setIsVisualBlock(false);
      	_currentVipsBlock.setIsDividable(true);
      	_currentVipsBlock.getChildren().get(0).setIsVisualBlock(true);
      	_currentVipsBlock.getChildren().get(0).setIsDividable(false);
      	_currentVipsBlock.getChildren().get(0).setDoC(11);
      }
       */
      if (node.getSubBox(0).getNode().getNodeName().equals("em")) _currentVipsBlock.setDoC(11);
      else _currentVipsBlock.setDoC(10);
      return true;
    }

    String fontWeight = "";
    int fontSize = 0;

    for (Box childNode : node.getSubBoxList()) {
      int childFontSize = childNode.getVisualContext().getFont().getSize();

      if (childNode instanceof TextBox) {
        if (fontSize > 0) {
          if (fontSize != childFontSize) {
            _currentVipsBlock.setDoC(9);
            break;
          } else _currentVipsBlock.setDoC(10);
        } else fontSize = childFontSize;
        continue;
      }

      ElementBox child = (ElementBox) childNode;

      if (child.getStylePropertyValue("font-weight") == null) return false;

      if (fontSize > 0) {
        if (child.getStylePropertyValue("font-weight").toString().equals(fontWeight)
            && childFontSize == fontSize) {
          _currentVipsBlock.setDoC(10);
        } else {
          _currentVipsBlock.setDoC(9);
          break;
        }
      } else {
        fontWeight = child.getStylePropertyValue("font-weight").toString();
        fontSize = childFontSize;
      }
    }

    return true;
  }
Ejemplo n.º 15
0
  /**
   * Tries to divide DOM elements and finds visual blocks.
   *
   * @param vipsBlock Visual structure
   */
  private void divideVipsBlockTree(VipsBlock vipsBlock) {
    _currentVipsBlock = vipsBlock;
    ElementBox elementBox = (ElementBox) vipsBlock.getBox();
    // System.err.println(elementBox.getNode().getNodeName());
    // System.out.println(elementBox.getText());

    if (elementBox.getElement().getAttribute("id").equals("logosLine")) {
      System.out.println();
    }

    // With VIPS rules it tries to determine if element is dividable
    if (applyVipsRules(elementBox) && vipsBlock.isDividable() && !vipsBlock.isVisualBlock()) {
      // if element is dividable, let's divide it
      _currentVipsBlock.setAlreadyDivided(true);
      for (VipsBlock vipsBlockChild : vipsBlock.getChildren()) {
        if (!(vipsBlockChild.getBox() instanceof TextBox)) divideVipsBlockTree(vipsBlockChild);
      }
    } else {
      if (vipsBlock.isDividable()) {
        // System.err.println("Element " + elementBox.getNode().getNodeName() + " is visual block");
        vipsBlock.setIsVisualBlock(true);
        vipsBlock.setDoC(11);
      }

      if (!verifyValidity(elementBox)) {
        _currentVipsBlock.setIsVisualBlock(false);
      }
      /*
      if (vipsBlock.isVisualBlock())
      	//System.err.println("Element " + elementBox.getNode().getNodeName() + " is visual block");
      else
      	System.err.println("Element " + elementBox.getNode().getNodeName() + " is not visual block");*/
    }
  }