/** * Carries out preprocessing that makes JEuclid handle the document better. * * @param doc Document */ static void preprocessForJEuclid(Document doc) { // underbrace and overbrace NodeList list = doc.getElementsByTagName("mo"); for (int i = 0; i < list.getLength(); i++) { Element mo = (Element) list.item(i); String parentName = ((Element) mo.getParentNode()).getTagName(); if (parentName == null) { continue; } if (parentName.equals("munder") && isTextChild(mo, "\ufe38")) { mo.setAttribute("stretchy", "true"); mo.removeChild(mo.getFirstChild()); mo.appendChild(doc.createTextNode("\u23df")); } else if (parentName.equals("mover") && isTextChild(mo, "\ufe37")) { mo.setAttribute("stretchy", "true"); mo.removeChild(mo.getFirstChild()); mo.appendChild(doc.createTextNode("\u23de")); } } // menclose for long division doesn't allow enough top padding. Oh, and // <mpadded> isn't implemented. And there isn't enough padding to left of // the bar either. Solve by adding an <mover> with just an <mspace> over# // the longdiv, contained within an mrow that adds a <mspace> before it. list = doc.getElementsByTagName("menclose"); for (int i = 0; i < list.getLength(); i++) { Element menclose = (Element) list.item(i); // Only for longdiv if (!"longdiv".equals(menclose.getAttribute("notation"))) { continue; } Element mrow = doc.createElementNS(WebMathsService.NS, "mrow"); Element mover = doc.createElementNS(WebMathsService.NS, "mover"); Element mspace = doc.createElementNS(WebMathsService.NS, "mspace"); Element mspaceW = doc.createElementNS(WebMathsService.NS, "mspace"); boolean previousElement = false; for (Node previous = menclose.getPreviousSibling(); previous != null; previous = previous.getPreviousSibling()) { if (previous.getNodeType() == Node.ELEMENT_NODE) { previousElement = true; break; } } if (previousElement) { mspaceW.setAttribute("width", "4px"); } menclose.getParentNode().insertBefore(mrow, menclose); menclose.getParentNode().removeChild(menclose); mrow.appendChild(mspaceW); mrow.appendChild(mover); mover.appendChild(menclose); mover.appendChild(mspace); } }
private static boolean isTextChild(Node parent, String text) { NodeList list = parent.getChildNodes(); if (list.getLength() != 1) { return false; } Node child = list.item(0); if (child.getNodeType() != Node.TEXT_NODE) { return false; } return child.getNodeValue().equals(text); }
private static NodeList getIFrames(NodeList node_list) { return node_list.extractAllNodesThatMatch(new TagNameFilter("iframe"), true); }
private static NodeList getRemarks(NodeList node_list) { return node_list.extractAllNodesThatMatch(new NodeClassFilter(RemarkNode.class), true); }
private static NodeList getPhotosetPosts(NodeList node_list) { return node_list.extractAllNodesThatMatch( new HasAttributeFilter("class", "my_photoset_post"), true); }
private static boolean handleURL(String address) { Main.status(String.format("Processing page \"%s\".", address)); try { NodeList posts = getPosts(address); if (posts.toNodeArray().length == 0) { return false; } for (Node post_node : posts.toNodeArray()) { if (post_node instanceof TagNode) { TagNode post = (TagNode) post_node; Post new_post = new Post(Long.parseLong(post.getAttribute("id").substring(5))); if (!Main.post_post_hash.containsKey(new_post)) { NodeList photo_posts = getPhotoPosts(post.getChildren()); NodeList remarks = getRemarks(photo_posts); for (Node node : remarks.toNodeArray()) { Matcher matcher = lores.matcher(node.getText()); String media_url = ""; if (matcher.find()) { media_url = matcher.group(); media_url = media_url.substring(17, media_url.length() - 1); } String thumb = media_url.replace( media_url.substring(media_url.lastIndexOf("_"), media_url.lastIndexOf(".")), "_75sq"); URL thumb_url = new URL(thumb); new_post.pictures.add(new Picture(new URL(media_url), thumb_url)); } NodeList photoset_posts = getPhotosetPosts(post.getChildren()); NodeList iframes = getIFrames(photoset_posts); for (Node node : iframes.toNodeArray()) { if (node instanceof TagNode) { String iframe_url = ((TagNode) node).getAttribute("src"); Parser parser2 = new Parser(iframe_url); NodeList a_list = parser2.extractAllNodesThatMatch(new TagNameFilter("a")); Node[] a_array = a_list.toNodeArray(); Node[] img_array = a_list.extractAllNodesThatMatch(new TagNameFilter("img"), true).toNodeArray(); String media_url; for (int i = 0; i < a_array.length; i++) { media_url = ((TagNode) img_array[i]).getAttribute("src"); String thumb = media_url.replace( media_url.substring( media_url.lastIndexOf("_"), media_url.lastIndexOf(".")), "_75sq"); URL thumb_url = new URL(thumb); new_post.pictures.add(new Picture(new URL(media_url), thumb_url)); } } } Main.handlePost(new_post); } else { new_post = post_post_hash.get(new_post); handleNonDownloadPost(new_post); } } } } catch (Exception ex) { ex.printStackTrace(); Main.status("Error handling post."); } return true; }