public void testTagsComplexMethods() { try { NodeFilter filterLink = new NodeClassFilter(LinkTag.class); NodeFilter filterDiv = new NodeClassFilter(Div.class); OrFilter filterLinkDiv = new OrFilter(filterLink, filterDiv); NodeFilter filterTable = new NodeClassFilter(TableColumn.class); OrFilter filter = new OrFilter(filterLinkDiv, filterTable); String[] tmpSplitTags = ParserUtils.splitTags( "OutsideLeft<A>AInside</A><DIV><DIV>DivInside</DIV></DIV><TD>TableColoumnInside</TD>OutsideRight", filter); assertStringEquals( "modified text", "OutsideLeft*OutsideRight", new String(tmpSplitTags[0] + '*' + tmpSplitTags[1])); tmpSplitTags = ParserUtils.splitTags( "OutsideLeft<A>AInside</A><DIV><DIV>DivInside</DIV></DIV><TD>TableColoumnInside</TD>OutsideRight", filter, false, false); assertStringEquals( "modified text", "OutsideLeft*AInside*<DIV>DivInside</DIV>*TableColoumnInside*OutsideRight", new String( tmpSplitTags[0] + '*' + tmpSplitTags[1] + '*' + tmpSplitTags[2] + '*' + tmpSplitTags[3] + '*' + tmpSplitTags[4])); tmpSplitTags = ParserUtils.splitTags( "OutsideLeft<A>AInside</A><DIV><DIV>DivInside</DIV></DIV><TD>TableColoumnInside</TD>OutsideRight", filter, true, false); assertStringEquals( "modified text", "OutsideLeft*AInside*DivInside*TableColoumnInside*OutsideRight", new String( tmpSplitTags[0] + '*' + tmpSplitTags[1] + '*' + tmpSplitTags[2] + '*' + tmpSplitTags[3] + '*' + tmpSplitTags[4])); tmpSplitTags = ParserUtils.splitTags( "OutsideLeft<A>AInside</A><DIV><DIV>DivInside</DIV></DIV><TD>TableColoumnInside</TD>OutsideRight", filter, false, true); assertStringEquals( "modified text", "OutsideLeft*OutsideRight", new String(tmpSplitTags[0] + '*' + tmpSplitTags[1])); tmpSplitTags = ParserUtils.splitTags( "OutsideLeft<A>AInside<DIV><DIV>DivInside</DIV></DIV></A><TD>TableColoumnInside</TD>OutsideRight", new String[] {"DIV", "TD", "A"}); assertStringEquals( "modified text", "OutsideLeft*OutsideRight", new String(tmpSplitTags[0] + '*' + tmpSplitTags[1])); assertStringEquals( "modified text", "OutsideLeftOutsideRight", ParserUtils.trimTags( "OutsideLeft<A>AInside<DIV><DIV>DivInside</DIV></DIV></A><TD>TableColoumnInside</TD>OutsideRight", new String[] {"DIV", "TD", "A"})); } catch (Exception e) { String msg = e.getMessage(); if (null == msg) msg = e.getClass().getName(); fail(msg); } }
public void testTagsMethods() { try { String[] tmpSplitTags = ParserUtils.splitTags( "Begin <DIV><DIV> +12.5 </DIV></DIV> ALL OK", new String[] {"DIV"}); assertStringEquals( "modified text", "Begin * ALL OK", new String(tmpSplitTags[0] + '*' + tmpSplitTags[1])); tmpSplitTags = ParserUtils.splitTags( "Begin <DIV><DIV> +12.5 </DIV></DIV> ALL OK", new String[] {"DIV"}, false, false); assertStringEquals( "modified text", "Begin *<DIV> +12.5 </DIV>* ALL OK", new String(tmpSplitTags[0] + '*' + tmpSplitTags[1] + '*' + tmpSplitTags[2])); tmpSplitTags = ParserUtils.splitTags( "Begin <DIV><DIV> +12.5 </DIV></DIV> ALL OK", new String[] {"DIV"}, true, false); assertStringEquals( "modified text", "Begin * +12.5 * ALL OK", new String(tmpSplitTags[0] + '*' + tmpSplitTags[1] + '*' + tmpSplitTags[2])); tmpSplitTags = ParserUtils.splitTags( "Begin <DIV><DIV> +12.5 </DIV></DIV> ALL OK", new String[] {"DIV"}, false, true); assertStringEquals( "modified text", "Begin * ALL OK", new String(tmpSplitTags[0] + '*' + tmpSplitTags[1])); assertStringEquals( "modified text", " ALL OK", ParserUtils.trimTags("<DIV><DIV> +12.5 </DIV></DIV> ALL OK", new String[] {"DIV"})); assertStringEquals( "modified text", "<DIV> +12.5 </DIV> ALL OK", ParserUtils.trimTags( "<DIV><DIV> +12.5 </DIV></DIV> ALL OK", new String[] {"DIV"}, false, false)); assertStringEquals( "modified text", " +12.5 ALL OK", ParserUtils.trimTags( "<DIV><DIV> +12.5 </DIV></DIV> ALL OK", new String[] {"DIV"}, true, false)); assertStringEquals( "modified text", " ALL OK", ParserUtils.trimTags( "<DIV><DIV> +12.5 </DIV></DIV> ALL OK", new String[] {"DIV"}, false, true)); // Test trimAllTags method assertStringEquals( "modified text", " +12.5 ALL OK", ParserUtils.trimAllTags("<DIV><DIV> +12.5 </DIV></DIV> ALL OK", false)); assertStringEquals( "modified text", " ALL OK", ParserUtils.trimAllTags("<DIV><DIV> +12.5 </DIV></DIV> ALL OK", true)); assertStringEquals( "modified text", " +12.5 ", ParserUtils.trimAllTags("<DIV><DIV> +12.5 </DIV></DIV>", false)); assertStringEquals( "modified text", "", ParserUtils.trimAllTags("<DIV><DIV> +12.5 </DIV></DIV>", true)); assertStringEquals( "modified text", " YYY ", ParserUtils.trimAllTags("<XXX> YYY <ZZZ>", false)); assertStringEquals("modified text", "YYY", ParserUtils.trimAllTags("YYY", false)); assertStringEquals("modified text", "> OK <", ParserUtils.trimAllTags("> OK <", true)); } catch (Exception e) { String msg = e.getMessage(); if (null == msg) msg = e.getClass().getName(); fail(msg); } }
public void testTagsClassMethods() { try { NodeFilter filter = new NodeClassFilter(Div.class); String[] tmpSplitTags = ParserUtils.splitTags("Begin <DIV><DIV> +12.5 </DIV></DIV> ALL OK", filter); assertStringEquals( "modified text", "Begin * ALL OK", new String(tmpSplitTags[0] + '*' + tmpSplitTags[1])); tmpSplitTags = ParserUtils.splitTags( "Begin <DIV><DIV> +12.5 </DIV></DIV> ALL OK", filter, false, false); assertStringEquals( "modified text", "Begin *<DIV> +12.5 </DIV>* ALL OK", new String(tmpSplitTags[0] + '*' + tmpSplitTags[1] + '*' + tmpSplitTags[2])); tmpSplitTags = ParserUtils.splitTags("Begin <DIV><DIV> +12.5 </DIV></DIV> ALL OK", filter, true, false); assertStringEquals( "modified text", "Begin * +12.5 * ALL OK", new String(tmpSplitTags[0] + '*' + tmpSplitTags[1] + '*' + tmpSplitTags[2])); tmpSplitTags = ParserUtils.splitTags("Begin <DIV><DIV> +12.5 </DIV></DIV> ALL OK", filter, false, true); assertStringEquals( "modified text", "Begin * ALL OK", new String(tmpSplitTags[0] + '*' + tmpSplitTags[1])); assertStringEquals( "modified text", " ALL OK", ParserUtils.trimTags("<DIV><DIV> +12.5 </DIV></DIV> ALL OK", filter)); assertStringEquals( "modified text", "<DIV> +12.5 </DIV> ALL OK", ParserUtils.trimTags("<DIV><DIV> +12.5 </DIV></DIV> ALL OK", filter, false, false)); assertStringEquals( "modified text", " +12.5 ALL OK", ParserUtils.trimTags("<DIV><DIV> +12.5 </DIV></DIV> ALL OK", filter, true, false)); assertStringEquals( "modified text", " ALL OK", ParserUtils.trimTags("<DIV><DIV> +12.5 </DIV></DIV> ALL OK", filter, false, true)); NodeFilter filterTableRow = new NodeClassFilter(TableRow.class); NodeFilter filterTableColumn = new NodeClassFilter(TableColumn.class); OrFilter filterOr = new OrFilter(filterTableRow, filterTableColumn); assertStringEquals( "modified text", " ALL OK", ParserUtils.trimTags("<TR><TD> +12.5 </TD></TR> ALL OK", filterOr)); assertStringEquals( "modified text", "<TD> +12.5 </TD> ALL OK", ParserUtils.trimTags("<TR><TD> +12.5 </TD></TR> ALL OK", filterOr, false, false)); assertStringEquals( "modified text", " +12.5 ALL OK", ParserUtils.trimTags("<TR><TD> +12.5 </TD></TR> ALL OK", filterOr, true, false)); assertStringEquals( "modified text", " ALL OK", ParserUtils.trimTags("<TR><TD> +12.5 </TD></TR> ALL OK", filterOr, false, true)); } catch (Exception e) { String msg = e.getMessage(); if (null == msg) msg = e.getClass().getName(); fail(msg); } }
private static boolean handleURL(String address) { Main.status(String.format("Processing page \"%s\".", address)); try { NodeList posts = getPosts(address); if (posts.toNodeArray().length == 0) { return false; } for (Node post_node : posts.toNodeArray()) { if (post_node instanceof TagNode) { TagNode post = (TagNode) post_node; Post new_post = new Post(Long.parseLong(post.getAttribute("id").substring(5))); if (!Main.post_post_hash.containsKey(new_post)) { NodeList photo_posts = getPhotoPosts(post.getChildren()); NodeList remarks = getRemarks(photo_posts); for (Node node : remarks.toNodeArray()) { Matcher matcher = lores.matcher(node.getText()); String media_url = ""; if (matcher.find()) { media_url = matcher.group(); media_url = media_url.substring(17, media_url.length() - 1); } String thumb = media_url.replace( media_url.substring(media_url.lastIndexOf("_"), media_url.lastIndexOf(".")), "_75sq"); URL thumb_url = new URL(thumb); new_post.pictures.add(new Picture(new URL(media_url), thumb_url)); } NodeList photoset_posts = getPhotosetPosts(post.getChildren()); NodeList iframes = getIFrames(photoset_posts); for (Node node : iframes.toNodeArray()) { if (node instanceof TagNode) { String iframe_url = ((TagNode) node).getAttribute("src"); Parser parser2 = new Parser(iframe_url); NodeList a_list = parser2.extractAllNodesThatMatch(new TagNameFilter("a")); Node[] a_array = a_list.toNodeArray(); Node[] img_array = a_list.extractAllNodesThatMatch(new TagNameFilter("img"), true).toNodeArray(); String media_url; for (int i = 0; i < a_array.length; i++) { media_url = ((TagNode) img_array[i]).getAttribute("src"); String thumb = media_url.replace( media_url.substring( media_url.lastIndexOf("_"), media_url.lastIndexOf(".")), "_75sq"); URL thumb_url = new URL(thumb); new_post.pictures.add(new Picture(new URL(media_url), thumb_url)); } } } Main.handlePost(new_post); } else { new_post = post_post_hash.get(new_post); handleNonDownloadPost(new_post); } } } } catch (Exception ex) { ex.printStackTrace(); Main.status("Error handling post."); } return true; }