/** * VIPS Rule Nine * * <p>If the child of the node with maximum size are small than a threshold (relative size), do * not divide this node. <br> * Set the DoC based on the html tag and size of this node. * * @param node Input node * @return True, if rule is applied, otherwise false. */ private boolean ruleNine(ElementBox node) { // System.err.println("Applying rule Nine on " + node.getNode().getNodeName() + " node"); if (node.getSubBoxList().isEmpty()) return false; int maxSize = 0; for (Box childNode : node.getSubBoxList()) { int childSize = childNode.getWidth() * childNode.getHeight(); if (maxSize < childSize) { maxSize = childSize; } } if (maxSize > _sizeTresholdWidth * _sizeTresholdHeight) return true; // TODO set DOC _currentVipsBlock.setIsVisualBlock(true); _currentVipsBlock.setIsDividable(false); if (node.getNode().getNodeName().equals("Xdiv")) _currentVipsBlock.setDoC(7); if (node.getNode().getNodeName().equals("a")) _currentVipsBlock.setDoC(11); else _currentVipsBlock.setDoC(8); return true; }
/** * VIPS Rule Twelve * * <p>Do not divide this node <br> * Set the DoC value based on the html tag and size of this node. * * @param node Input node * @return True, if rule is applied, otherwise false. */ private boolean ruleTwelve(ElementBox node) { // System.err.println("Applying rule Twelve on " + node.getNode().getNodeName() + " node"); _currentVipsBlock.setIsDividable(false); _currentVipsBlock.setIsVisualBlock(true); if (node.getNode().getNodeName().equals("Xdiv")) _currentVipsBlock.setDoC(7); else if (node.getNode().getNodeName().equals("li")) _currentVipsBlock.setDoC(8); else if (node.getNode().getNodeName().equals("span")) _currentVipsBlock.setDoC(8); else if (node.getNode().getNodeName().equals("sup")) _currentVipsBlock.setDoC(8); else if (node.getNode().getNodeName().equals("img")) _currentVipsBlock.setDoC(8); else _currentVipsBlock.setDoC(333); // TODO DoC Part return true; }
/** * Tries to divide DOM elements and finds visual blocks. * * @param vipsBlock Visual structure */ private void divideVipsBlockTree(VipsBlock vipsBlock) { _currentVipsBlock = vipsBlock; ElementBox elementBox = (ElementBox) vipsBlock.getBox(); // System.err.println(elementBox.getNode().getNodeName()); // System.out.println(elementBox.getText()); if (elementBox.getElement().getAttribute("id").equals("logosLine")) { System.out.println(); } // With VIPS rules it tries to determine if element is dividable if (applyVipsRules(elementBox) && vipsBlock.isDividable() && !vipsBlock.isVisualBlock()) { // if element is dividable, let's divide it _currentVipsBlock.setAlreadyDivided(true); for (VipsBlock vipsBlockChild : vipsBlock.getChildren()) { if (!(vipsBlockChild.getBox() instanceof TextBox)) divideVipsBlockTree(vipsBlockChild); } } else { if (vipsBlock.isDividable()) { // System.err.println("Element " + elementBox.getNode().getNodeName() + " is visual block"); vipsBlock.setIsVisualBlock(true); vipsBlock.setDoC(11); } if (!verifyValidity(elementBox)) { _currentVipsBlock.setIsVisualBlock(false); } /* if (vipsBlock.isVisualBlock()) //System.err.println("Element " + elementBox.getNode().getNodeName() + " is visual block"); else System.err.println("Element " + elementBox.getNode().getNodeName() + " is not visual block");*/ } }
/** * VIPS Rule Eight * * <p>If the node has at least one text node child or at least one virtual text node child, and * the node's relative size is smaller than a threshold, then the node cannot be divided. Set the * DoC value (from 5-8) based on the html tag of the node. * * @param node Input node * @return True, if rule is applied, otherwise false. */ private boolean ruleEight(ElementBox node) { // System.err.println("Applying rule Eight on " + node.getNode().getNodeName() + " node"); if (node.getSubBoxList().isEmpty()) return false; List<Box> children = new ArrayList<Box>(); findTextChildrenNodes(node, children); int cnt = children.size(); if (cnt == 0) return false; if (node.getWidth() == 0 || node.getHeight() == 0) { children.clear(); getAllChildren(node, children); for (Box child : children) { if (child.getWidth() != 0 && child.getHeight() != 0) return true; } } if (node.getWidth() * node.getHeight() > _sizeTresholdHeight * _sizeTresholdWidth) return false; if (node.getNode().getNodeName().equals("ul")) { return true; } _currentVipsBlock.setIsVisualBlock(true); _currentVipsBlock.setIsDividable(false); if (node.getNode().getNodeName().equals("Xdiv")) _currentVipsBlock.setDoC(7); else if (node.getNode().getNodeName().equals("code")) _currentVipsBlock.setDoC(7); else if (node.getNode().getNodeName().equals("div")) _currentVipsBlock.setDoC(5); else _currentVipsBlock.setDoC(8); return true; }
/* * Checks if node has valid children nodes */ private boolean hasValidChildrenNodes(ElementBox node) { if (node.getNode().getNodeName().equals("img") || node.getNode().getNodeName().equals("input")) { if (node.getContentWidth() > 0 && node.getContentHeight() > 0) { _currentVipsBlock.setIsVisualBlock(true); _currentVipsBlock.setDoC(8); return true; } else return false; } if (node.getSubBoxList().isEmpty()) return false; _cnt = 0; for (Box child : node.getSubBoxList()) { checkValidChildrenNodes(child); } return (_cnt > 0) ? true : false; }
/** * VIPS Rule Seven * * <p>If the background color of this node is different from one of its children’s, divide this * node and at the same time, the child node with different background color will not be divided * in this round. Set the DoC value (6-8) for the child node based on the <html> tag of the * child node and the size of the child node. * * @param node Input node * @return True, if rule is applied, otherwise false. */ private boolean ruleSeven(ElementBox node) { // System.err.println("Applying rule Seven on " + node.getNode().getNodeName() + " node"); if (node.getSubBoxList().isEmpty()) return false; if (isTextNode(node)) return false; // String nodeBgColor = node.getStylePropertyValue("background-color"); String nodeBgColor = _currentVipsBlock.getBgColor(); for (VipsBlock vipsStructureChild : _currentVipsBlock.getChildren()) { if (!(vipsStructureChild.getBgColor().equals(nodeBgColor))) { vipsStructureChild.setIsDividable(false); vipsStructureChild.setIsVisualBlock(true); // TODO DoC values vipsStructureChild.setDoC(7); return true; } } return false; }
/** * VIPS Rule Four * * <p>If all of the child nodes of the DOM node are text nodes or virtual text nodes, do not * divide the node. <br> * If the font size and font weight of all these child nodes are same, set the DoC of the * extracted block to 10. Otherwise, set the DoC of this extracted block to 9. * * @param node Input node * @return True, if rule is applied, otherwise false. */ private boolean ruleFour(ElementBox node) { // System.err.println("Applying rule Four on " + node.getNode().getNodeName() + " node"); if (node.getSubBoxList().isEmpty()) return false; for (Box box : node.getSubBoxList()) { if (box instanceof TextBox) continue; if (!isTextNode((ElementBox) box) || !isVirtualTextNode((ElementBox) box)) return false; } _currentVipsBlock.setIsVisualBlock(true); _currentVipsBlock.setIsDividable(false); if (node.getSubBoxList().size() == 1) { /* if (node.getSubBox(0) instanceof TextBox) { _currentVipsBlock.setIsVisualBlock(false); _currentVipsBlock.setIsDividable(true); _currentVipsBlock.getChildren().get(0).setIsVisualBlock(true); _currentVipsBlock.getChildren().get(0).setIsDividable(false); _currentVipsBlock.getChildren().get(0).setDoC(11); } */ if (node.getSubBox(0).getNode().getNodeName().equals("em")) _currentVipsBlock.setDoC(11); else _currentVipsBlock.setDoC(10); return true; } String fontWeight = ""; int fontSize = 0; for (Box childNode : node.getSubBoxList()) { int childFontSize = childNode.getVisualContext().getFont().getSize(); if (childNode instanceof TextBox) { if (fontSize > 0) { if (fontSize != childFontSize) { _currentVipsBlock.setDoC(9); break; } else _currentVipsBlock.setDoC(10); } else fontSize = childFontSize; continue; } ElementBox child = (ElementBox) childNode; if (child.getStylePropertyValue("font-weight") == null) return false; if (fontSize > 0) { if (child.getStylePropertyValue("font-weight").toString().equals(fontWeight) && childFontSize == fontSize) { _currentVipsBlock.setDoC(10); } else { _currentVipsBlock.setDoC(9); break; } } else { fontWeight = child.getStylePropertyValue("font-weight").toString(); fontSize = childFontSize; } } return true; }