@Test public void insertChildrenAtPosition() { Document doc = Jsoup.parse( "<div id=1>Text1 <p>One</p> Text2 <p>Two</p></div><div id=2>Text3 <p>Three</p></div>"); Element div1 = doc.select("div").get(0); Elements p1s = div1.select("p"); Element div2 = doc.select("div").get(1); assertEquals(2, div2.childNodeSize()); div2.insertChildren(-1, p1s); assertEquals(2, div1.childNodeSize()); // moved two out assertEquals(4, div2.childNodeSize()); assertEquals(3, p1s.get(1).siblingIndex()); // should be last List<Node> els = new ArrayList<Node>(); Element el1 = new Element(Tag.valueOf("span"), "").text("Span1"); Element el2 = new Element(Tag.valueOf("span"), "").text("Span2"); TextNode tn1 = new TextNode("Text4", ""); els.add(el1); els.add(el2); els.add(tn1); assertNull(el1.parent()); div2.insertChildren(-2, els); assertEquals(div2, el1.parent()); assertEquals(7, div2.childNodeSize()); assertEquals(3, el1.siblingIndex()); assertEquals(4, el2.siblingIndex()); assertEquals(5, tn1.siblingIndex()); }
@Test public void testClone() { Document doc = Jsoup.parse("<div><p>One<p><span>Two</div>"); Element p = doc.select("p").get(1); Element clone = p.clone(); assertNull(clone.parent()); // should be orphaned assertEquals(0, clone.siblingIndex); assertEquals(1, p.siblingIndex); assertNotNull(p.parent()); clone.append("<span>Three"); assertEquals( "<p><span>Two</span><span>Three</span></p>", TextUtil.stripNewlines(clone.outerHtml())); assertEquals( "<div><p>One</p><p><span>Two</span></p></div>", TextUtil.stripNewlines(doc.body().html())); // not modified doc.body().appendChild(clone); // adopt assertNotNull(clone.parent()); assertEquals( "<div><p>One</p><p><span>Two</span></p></div><p><span>Two</span><span>Three</span></p>", TextUtil.stripNewlines(doc.body().html())); }
private static boolean preserveWhitespace(Node node) { // looks only at this element and one level up, to prevent recursion & needless stack searches if (node != null && node instanceof Element) { Element element = (Element) node; return element.tag().preserveWhitespace() || element.parent() != null && element.parent().tag().preserveWhitespace(); } return false; }
public String getAVGwebsitesEvaluation(Document currentDoc) { int total = 0; int currentSum = 0; Elements oneStar = currentDoc.select("div.starsprite.short.star1"); Elements twoStar = currentDoc.select("div.starsprite.short.star2"); Elements threeStar = currentDoc.select("div.starsprite.short.star3"); Elements fourStar = currentDoc.select("div.starsprite.short.star4"); Elements fiveStar = currentDoc.select("div.starsprite.short.star5"); for (Element currentElem : oneStar) { if (currentElem.parent().hasClass("stars")) { total++; currentSum += 1; } } for (Element currentElem : twoStar) { if (currentElem.parent().hasClass("stars")) { total++; currentSum += 2; } } for (Element currentElem : threeStar) { if (currentElem.parent().hasClass("stars")) { total++; currentSum += 3; } } for (Element currentElem : fourStar) { if (currentElem.parent().hasClass("stars")) { total++; currentSum += 4; } } for (Element currentElem : fiveStar) { if (currentElem.parent().hasClass("stars")) { total++; currentSum += 5; } } double rating = currentSum * 1.0 / total; return String.format("%.2f", rating); }
public String reviseContForBaiduBaike(String pcont) { if (pcont == null) return ""; Document doc = Jsoup.parse(pcont); Elements eles = doc.select("a#lemma-edit"); for (Element ele : eles) { ele.parent().remove(); } eles = doc.select("div#collectBtn"); for (Element ele : eles) { ele.parent().remove(); } return doc.html(); }
private static void accumulateParents(Element el, Elements parents) { Element parent = el.parent(); if (parent != null && !parent.tagName().equals("#root")) { parents.add(parent); accumulateParents(parent, parents); } }
public List<MenuMeal> getMenuMeals(int number) { Document doc = null; List<MenuMeal> meals = new ArrayList<>(); try { doc = Jsoup.connect(String.format(URL, number)) .userAgent("Chrome/49.0.2623.112") .referrer("https://www.google.ru/") .timeout(7000) .get(); } catch (IOException e) { e.printStackTrace(); } if (doc == null) return meals; Elements elements = doc.select("td[width=400"); if (!elements.isEmpty()) { for (Element element : elements) { Element parent = element.parent(); MenuMeal menuMeal = new MenuMeal(); menuMeal.setDescription(parent.select("div[id=ssilka]").first().text()); String cost = parent.select("div[id=ssilka]").last().text(); menuMeal.setCost(Integer.valueOf(cost.substring(0, cost.indexOf("-")))); meals.add(menuMeal); } return meals; } else { return meals; } }
private static Node toNode(Element aInElement) { int i = aInElement.siblingIndex(); Node lNode = aInElement.parent().childNode(i); if (!lNode.nodeName().equals(aInElement.tagName())) { throw new RuntimeException(lNode.nodeName() + " != " + aInElement.tagName()); } return lNode; }
public Statistic parse(Document doc, String sport, String competition) { Statistic stat = parse(doc); Elements sportStatsLinks = doc.select("a[href*=sport_name=" + sport + "]"); for (Element el : sportStatsLinks) { if (el.parent().hasClass("infoPageText2") && competition.equalsIgnoreCase(el.text())) { // String competitionName = el.text(); linkToCompStat = el.attr("href").replaceAll("\u00A0", "").replaceAll(" ", ""); // String value = // el.parent().parent().select("td").get(1).text(); } else if (el.parent().hasClass("infoPageTitle2")) { linkToSportStat = el.attr("href").replaceAll("\u00A0", ""); } } logger.debug(stat); return stat; }
protected int calculatePosition(Element root, Element element) { int pos = 0; Elements family = element.parent().children(); for (int i = 0; i < family.size(); i++) { if (family.get(i).tag().equals(element.tag())) pos++; if (family.get(i) == element) break; } return pos; }
private List<LocNode> getNodeList(String locStr) { LinkedList<LocNode> nodeList = new LinkedList<LocNode>(); // 查找符合locStr的节点 Element locElement = locData.select("[Name=" + locStr + "]").first(); if (locElement != null) { nodeList.addFirst(LocNode.parseNode(locElement)); while ((locElement = locElement.parent()) != null) { LocNode curNode = LocNode.parseNode(locElement); if (curNode != null) nodeList.addFirst(curNode); else break; } } return nodeList; }
public String reviseImgForYuehui(String pcont) { if (pcont == null) return ""; Document doc = Jsoup.parse(pcont); Elements eleimages = doc.select("input[name=\"hiddenimg\"]"); if (eleimages.size() > 0) { for (Element img : eleimages) { Attributes attrs = img.attributes(); String source = attrs.get("value"); img.parent().before("<img src=\"" + source + "\" />"); } } return doc.html(); }
public void download(Connection aInConnection, Collection<Image> images) throws IOException { aInConnection.url(url); Document lDocument = aInConnection.get(); Element lMain = lDocument.getElementById("main"); Elements lContents = lMain.getElementsByClass("content"); if (lContents.size() == 1) { StringBuilder sb = new StringBuilder(); Element lContent = lContents.first(); collectImages(lContent, images); Elements lLightboxElements = lContent.getElementsByClass("lightbox"); for (Element lLightboxElement : lLightboxElements) { Collection<Node> lImageNodes = extractImageNodes(lLightboxElement); Element lParent = lLightboxElement.parent(); int i = lLightboxElement.siblingIndex(); lParent.insertChildren(i, lImageNodes); lLightboxElement.remove(); } Elements lChildElements = lContent.children(); for (Element lChildElement : lChildElements) { if (lChildElement.hasClass("clear")) { // no more post content break; } if (title == null && lChildElement.tagName().equals("h1")) { // the first h1 header is the title title = lChildElement.html(); } else { if (excerpt == null && lChildElement.tagName().equals("p")) { excerpt = lChildElement.text(); } String lStr = lChildElement.toString(); sb.append(lStr); } } content = sb.toString(); Elements lDateElements = lContent.getElementsByClass("date"); String lHunDate = lDateElements.first().html(); date = new PostDate(lHunDate); } else { System.out.println("More than one content in main section of post page " + toString()); } }
protected void append(Element node, StringBuilder sb, String tagName) { // is select more costly then getElementsByTag? MAIN: for (Element e : node.select(tagName)) { Element tmpEl = e; // check all elements until 'node' while (tmpEl != null && !tmpEl.equals(node)) { if (unlikely(tmpEl)) continue MAIN; tmpEl = tmpEl.parent(); } String text = node2Text(e); if (text.isEmpty() || text.length() < minParagraphText || text.length() > SHelper.countLetters(text) * 2) continue; sb.append(text); sb.append("\n\n"); } }
public Map doProcess() { if (!isInit) { init(); } Iterator<String> it = map.keySet().iterator(); while (it.hasNext()) { String key = it.next(); Element val = map.get(key); // 如果是ul或者table,用相应的组件渲染 if (("ul".equals(val.tagName().toLowerCase())) || ("table".equals(val.tagName().toLowerCase()))) { System.out.println("------------------列表开始-----------------------------"); Elements links = val.select("a"); for (Element ele : links) { System.out.println("a:" + ele.attr("abs:href") + ",文本:" + ele.text()); } System.out.println("------------------列表结束-----------------------------"); } else { System.out.println("------------------非列表-----------------------------"); if ("a".equals(val.tagName().toLowerCase())) { System.out.println("a:" + val.attr("abs:href") + ",文本:" + val.text()); } else { if ("span".equals(val.tagName().toLowerCase())) { System.out.println("父容器:" + val.parent().tagName()); } System.out.println("标签:" + val.tagName() + ",html:" + val.html()); } } } System.out.println("---------------top menu---------------------"); it = topMenumap.keySet().iterator(); while (it.hasNext()) { String key = (String) it.next(); Element val = topMenumap.get(key); Elements links = val.select("a"); for (Element ele : links) { System.out.println("a:" + ele.attr("abs:href") + ",文本:" + ele.text()); } } return map; }
public List<AwardMovie> getList(AwardYear awardYear) throws IOException { List<AwardMovie> awards = new ArrayList<AwardMovie>(); // This has keyword and pagination URL url = new URL("http://en.wikipedia.org" + awardYear.getWikipediaSource()); Document doc = Jsoup.parse(url, 3000); Element awardSpan = doc.select("span[id=Awards").first(); Element uiElm = awardSpan.parent().nextElementSibling(); Iterator<Element> awardLiList = uiElm.select("li").iterator(); while (awardLiList.hasNext()) { Element liElm = awardLiList.next(); Elements elms = liElm.select("a"); if (elms.size() > 0) { Element aElm = elms.get(0); Element movieElm = elms.get(1); logger.debug("aElm=" + aElm.text()); AwardMovie movie = new AwardMovie(movieElm.attr("href"), movieElm.text(), awardYear.getYear()); if (aElm.text().equals("Palme d'Or")) { movie.setBestMovie(true); logger.debug(movie); awards.add(movie); } else if (aElm.text().equals("Best Actor")) { movie.setBestLeadHero(true); logger.debug(movie); awards.add(movie); } else if (aElm.text().equals("Best Actress")) { movie.setBestLeadHeroine(true); logger.debug(movie); awards.add(movie); } else if (aElm.text().equals("Best Director")) { movie.setBestDirector(true); logger.debug(movie); awards.add(movie); } } } return awards; }
public String reviseImgForZhiHuApp(String pcont) { if (pcont == null) return ""; Document doc = Jsoup.parse(pcont); Elements noeles = doc.select("noscript"); for (Element no : noeles) { Elements eleimages = no.getElementsByTag("img"); for (Element img : eleimages) { Attributes attrs = img.attributes(); String source = attrs.get("src"); img.parent().before("<img src=\"" + source + "\" />"); } no.remove(); } Elements eleimages = doc.select("img"); for (Element img : eleimages) { String source = img.attr("data-original"), s2 = img.attr("data-actualsrc"); if (!source.equals("")) img.attr("src", source); if (!s2.equals("")) img.attr("src", s2); } return doc.html(); }
private List<ViewModel> parseList(Document doc) { List<ViewModel> list = new ArrayList<ViewModel>(); Elements files = doc.select("div.MiniEntry"); for (Element element : files) { element = element.parent(); try { ViewModel model = new ViewModel(); String url = element.select("h1").parents().attr("href"); model.setSlug(url.substring(url.lastIndexOf("/") + 1, url.lastIndexOf("."))); model.setTitle(element.select("h1").text()); model.setSummary(element.select("div.Descriptor").text()); String ln = element.select("div.Genre > div.floatleft").eq(0).select("img").attr("src"); ln = ln.substring(ln.lastIndexOf("/") + 1); ln = ln.substring(0, ln.indexOf(".")); int lnId = Integer.valueOf(ln); model.setLanguageResId(languageResMap.get(lnId)); String language = languageKeyMap.get(lnId); String genre = element.select("div.Genre > div.floatleft").eq(1).text(); genre = genre.substring(genre.indexOf(":") + 1).trim(); if (genre.contains(",")) genre = genre.substring(0, genre.indexOf(",")); model.setGenre(genre); String rating = element.select("div.Genre > div.floatright").text(); rating = rating.substring(rating.indexOf(":") + 1, rating.indexOf("/") - 1); model.setRating(Float.valueOf(rating.trim())); model.setImage(getPageLink(model) + "#language=" + language); list.add(model); } catch (Exception e) { Log.e("Kinox", "Error parsing " + element.html(), e); } } return list; }
private void parseRecord(String record) throws ExecutionException, InterruptedException { Element root = Jsoup.parse(record).getElementsByTag("div").first(); Comment comment = new Comment(); if (root.className().contains("new")) comment.setNew(true); String commentId = root.id(); comment.setLepraId(commentId); if (commentId.equals(commentToSelectId)) commentToSelect = commentsCount; Matcher level = patternLevel.matcher(root.className()); if (level.find()) comment.setLevel(Short.valueOf(level.group(1))); Element element = root.getElementsByClass("c_body").first(); if (element.className().contains("hidden")) return; boolean containsImages = false; Elements images = element.getElementsByTag("img"); for (Element image : images) { String src = image.attr("src"); if (isImagesEnabled && !TextUtils.isEmpty(src)) { if (!image.parent().tag().getName().equalsIgnoreCase("a")) image.wrap("<a href=" + "\"" + src + "\"></a>"); image.removeAttr("width"); image.removeAttr("height"); image.removeAttr("style"); image.attr("style", "max-width:100%"); containsImages = true; } else image.remove(); } String html = Utils.wrapLepraTags(element); comment.setHtml(html); comment.setOnlyText(!containsImages && !html.contains("leprosorium.ru")); Element authorElement = root.getElementsByClass("ddi").first(); if (authorElement != null) { Elements a = authorElement.getElementsByTag("a"); String url = Commons.PREFIX_URL + a.first().attr("href"); url = url.replace("\n", ""); comment.setUrl(url); String author = a.size() > 1 ? a.get(1).text() : a.get(0).text(); if (postAuthor.equals(author)) comment.setPostAuthor(true); String color = "black"; if (comment.isPostAuthor()) color = "red"; else if (author.equals(userName)) color = "#3270FF"; comment.setAuthor(author); String signature = authorElement.text().split(author)[0] + "<b><font color=\"" + color + "\">" + author + "</font></b>"; String epochDate = authorElement.getElementsByClass("js-date").first().attr("data-epoch_date"); Date date = new Date(Long.valueOf(epochDate) * 1000); signature = signature + " " + date .toLocaleString(); // DateUtils.getRelativeTimeSpanString(date.getTime(), new // Date().getTime(), DateUtils.FORMAT_ABBREV_RELATIVE); comment.setSignature(signature); } if (!post.isInbox()) { Element vote = root.getElementsByClass("vote").first(); if (vote != null) { if (!vote.select(".vote_button.vote_button_plus.vote_voted").isEmpty()) comment.setPlusVoted(true); else if (!vote.select(".vote_button.vote_button_minus.vote_voted").isEmpty()) comment.setMinusVoted(true); Element rating = vote.getElementsByClass("vote_result").first(); comment.setRating(Short.valueOf(rating.text())); } } comment.setNum(commentsCount); ServerWorker.Instance().addNewComment(post.getId(), comment); commentsCount++; if (commentToSelectId != null) { if (commentToSelect != -1 && commentsCount >= 50 + commentToSelect) { notifyAboutFirstCommentsUpdate(); commentToSelectId = null; commentToSelect = -1; } } else { if (commentsCount == 50) { notifyAboutFirstCommentsUpdate(); } else if (commentsCount != 0 && commentsCount % 100 == 0) { notifyAboutCommentsUpdate(); } } }
@Override public List<SearchField> parseSearchFields() throws IOException, OpacErrorException, JSONException { start(); String html = httpGet(opac_url + "/Search/Advanced?mylang = " + languageCode, getDefaultEncoding()); Document doc = Jsoup.parse(html); List<SearchField> fields = new ArrayList<>(); Elements options = doc.select("select#search_type0_0 option"); for (Element option : options) { TextSearchField field = new TextSearchField(); field.setDisplayName(option.text()); field.setId(option.val()); field.setHint(""); field.setData(new JSONObject()); field.getData().put("meaning", option.val()); fields.add(field); } if (fields.size() == 0) { // Weird JavaScript, e.g. view-source:http://vopac.nlg.gr/Search/Advanced Pattern pattern_key = Pattern.compile("searchFields\\[\"([^\"]+)\"\\] = \"([^\"]+)\";"); for (Element script : doc.select("script")) { if (!script.html().contains("searchFields")) continue; for (String line : script.html().split("\n")) { Matcher matcher = pattern_key.matcher(line); if (matcher.find()) { TextSearchField field = new TextSearchField(); field.setDisplayName(matcher.group(2)); field.setId(matcher.group(1)); field.setHint(""); field.setData(new JSONObject()); field.getData().put("meaning", field.getId()); fields.add(field); } } } } Elements selects = doc.select("select"); for (Element select : selects) { if (!select.attr("name").equals("filter[]")) continue; DropdownSearchField field = new DropdownSearchField(); if (select.parent().select("label").size() > 0) { field.setDisplayName(select.parent().select("label").first().text()); } field.setId(select.attr("name") + select.attr("id")); List<Map<String, String>> dropdownOptions = new ArrayList<>(); String meaning = select.attr("id"); field.addDropdownValue("", ""); for (Element option : select.select("option")) { if (option.val().contains(":")) { meaning = option.val().split(":")[0]; } field.addDropdownValue(option.val(), option.text()); } field.setData(new JSONObject()); field.getData().put("meaning", meaning); fields.add(field); } return fields; }
protected SearchRequestResult parse_search(Document doc, int page) throws OpacErrorException { doc.setBaseUri(opac_url + "/Search/Results"); if (doc.select("p.error, p.errorMsg, .alert-error").size() > 0) { throw new OpacErrorException(doc.select("p.error, p.errorMsg, .alert-error").text()); } else if (doc.select("div.result").size() == 0 && doc.select(".main p").size() > 0) { throw new OpacErrorException(doc.select(".main p").first().text()); } int rescount = -1; if (doc.select(".resulthead").size() == 1) { rescount = Integer.parseInt(doc.select(".resulthead strong").get(2).text()); } List<SearchResult> reslist = new ArrayList<>(); for (Element row : doc.select("div.result")) { SearchResult res = new SearchResult(); Element z3988el = null; if (row.select("span.Z3988").size() == 1) { z3988el = row.select("span.3988").first(); } else if (row.parent().tagName().equals("li") && row.parent().select("span.Z3988").size() > 0) { z3988el = row.parent().select("span.3988").first(); } if (z3988el != null) { List<NameValuePair> z3988data; try { StringBuilder description = new StringBuilder(); z3988data = URLEncodedUtils.parse( new URI("http://dummy/?" + z3988el.select("span.Z3988").attr("title")), "UTF-8"); for (NameValuePair nv : z3988data) { if (nv.getValue() != null) { if (!nv.getValue().trim().equals("")) { if (nv.getName().equals("rft.btitle")) { description.append("<b>").append(nv.getValue()).append("</b>"); } else if (nv.getName().equals("rft.atitle")) { description.append("<b>").append(nv.getValue()).append("</b>"); } else if (nv.getName().equals("rft.au")) { description.append("<br />").append(nv.getValue()); } else if (nv.getName().equals("rft.date")) { description.append("<br />").append(nv.getValue()); } } } } res.setInnerhtml(description.toString()); } catch (URISyntaxException e) { e.printStackTrace(); } } else { res.setInnerhtml(row.select("a.title").text()); } if (row.hasClass("available") || row.hasClass("internet")) { res.setStatus(SearchResult.Status.GREEN); } else if (row.hasClass("reservable")) { res.setStatus(SearchResult.Status.YELLOW); } else if (row.hasClass("not-available")) { res.setStatus(SearchResult.Status.RED); } else if (row.select(".status.available").size() > 0) { res.setStatus(SearchResult.Status.GREEN); } else if (row.select(".status .label-success").size() > 0) { res.setStatus(SearchResult.Status.GREEN); } else if (row.select(".status .label-important").size() > 0) { res.setStatus(SearchResult.Status.RED); } else if (row.select(".status.checkedout").size() > 0) { res.setStatus(SearchResult.Status.RED); } for (Map.Entry<String, SearchResult.MediaType> entry : mediaTypeSelectors.entrySet()) { if (row.select(entry.getKey()).size() > 0) { res.setType(entry.getValue()); break; } } for (Element img : row.select("img")) { String src = img.absUrl("src"); if (src.contains("over")) { if (!src.contains("Unavailable")) { res.setCover(src); } break; } } res.setPage(page); String href = row.select("a.title").first().absUrl("href"); try { URL idurl = new URL(href); String path = idurl.getPath(); Matcher matcher = idPattern.matcher(path); if (matcher.find()) { res.setId(matcher.group(1)); } } catch (MalformedURLException e) { e.printStackTrace(); } reslist.add(res); } return new SearchRequestResult(reslist, rescount, page); }
public void onPostExecute(AnswerObject result) { Document doc = Jsoup.parse(result.getHTML()); sendHTMLatBug(doc.html()); if (doc.select("span.notLoggedText").text().length() > 0) { Intent BackToLoginIntent = new Intent(this, TuCanMobileActivity.class); BackToLoginIntent.putExtra("lostSession", true); startActivity(BackToLoginIntent); } else { if (PREPCall == false) { String Title = doc.select("h1").text(); TextView SingleEventTitle = (TextView) findViewById(R.id.singleevent_title); SingleEventTitle.setText(Title); Elements Deltarows = doc.select("table[courseid]").first().select("tr"); Element rows; if (Deltarows.size() == 1) { rows = Deltarows.get(0).select("td").first(); } else { rows = Deltarows.get(1).select("td").first(); } Elements Paragraphs = rows.select("p"); Iterator<Element> PaIt = Paragraphs.iterator(); ArrayList<String> titles = new ArrayList<String>(); ArrayList<String> values = new ArrayList<String>(); while (PaIt.hasNext()) { Element next = PaIt.next(); String[] information = crop(next.html()); titles.add(information[0]); values.add(information[1]); } PropertyValueAdapter = new SingleEventAdapter(titles, values); setListAdapter(PropertyValueAdapter); // Termin-Selektor: // Terminselektor Iterator<Element> captionIt = doc.select("caption").iterator(); Iterator<Element> DateTable = null; Iterator<Element> materialTable = null; while (captionIt.hasNext()) { Element next = captionIt.next(); if (next.text().equals("Termine")) { System.out.println(next.parent().html()); DateTable = next.parent().select("tr").iterator(); } else if (next.text().contains("Material")) { materialTable = next.parent().select("tr").iterator(); } } ArrayList<String> eventNumber = new ArrayList<String>(); ArrayList<String> eventDate = new ArrayList<String>(); ArrayList<String> eventTime = new ArrayList<String>(); ArrayList<String> eventRoom = new ArrayList<String>(); ArrayList<String> eventInstructor = new ArrayList<String>(); while (DateTable.hasNext()) { Element next = DateTable.next(); Elements cols = next.select("td"); eventNumber.add(cols.get(0).text()); eventDate.add(cols.get(1).text()); eventTime.add(cols.get(2).text() + "-" + cols.get(3).text()); eventRoom.add(cols.get(4).text()); eventInstructor.add(cols.get(5).text()); } DateAppointmentAdapter = new AppointmentAdapter(eventDate, eventTime, eventNumber, eventRoom, eventInstructor); int ct = 0; ArrayList<String> materialNumber = new ArrayList<String>(); ArrayList<String> materialName = new ArrayList<String>(); ArrayList<String> materialDesc = new ArrayList<String>(); materialLink = new ArrayList<String>(); ArrayList<String> materialFile = new ArrayList<String>(); if (materialTable != null) { while (materialTable.hasNext()) { Element next = materialTable.next(); if (next.select("td").size() > 1) { ct++; System.out.println(ct + " " + (ct % 3)); int mod = (ct % 3); switch (mod) { case 1: materialNumber.add(next.select("td").get(0).text()); materialName.add(next.select("td").get(1).text()); break; case 2: materialDesc.add(next.select("td").get(1).text()); if (next.attr("class").equals("tbdata_nob")) { ct++; materialLink.add(""); materialFile.add(""); } break; case 0: materialLink.add(next.select("td").get(1).select("a").attr("href")); materialFile.add(next.select("td").get(1).select("a").text()); break; } } } } if (ct > 2) { FileAdapter = new AppointmentAdapter( materialNumber, materialFile, null, materialName, materialDesc); thereAreFiles = true; } else FileAdapter = new ArrayAdapter<String>( this, android.R.layout.simple_list_item_1, new String[] {"Kein Material"}); } else { String nextlink = TucanMobile.TUCAN_PROT + TucanMobile.TUCAN_HOST + doc.select("div.detailout").select("a").attr("href"); SimpleSecureBrowser callOverviewBrowser = new SimpleSecureBrowser(this); RequestObject thisRequest = new RequestObject(nextlink, localCookieManager, RequestObject.METHOD_GET, ""); PREPCall = false; callOverviewBrowser.execute(thisRequest); } } }
public void tail(Node source, int depth) { if (source instanceof Element && whitelist.isSafeTag(source.nodeName())) { destination = destination.parent(); // would have descended, so pop destination stack } }
private void migrateRightList( Document doc, Node trainingAndEventsRightNode, Session session, String locale, Map<String, String> urlMap) throws RepositoryException { Elements listElements = doc.select("div.n13-pilot"); // Check for the follow us Elements followUs = !doc.select("div.fw-cisco-assistant").isEmpty() ? doc.select("div.fw-cisco-assistant").select("div.s14-pilot") : null; if (followUs != null && !followUs.isEmpty()) { sb.append(Constants.FOLLOWUS_NODE_NOT_FOUND); } else { log.debug("Follow us does not exists"); } // end of check for follow us // Check for image Element listEle = listElements.first(); if (listEle != null) { Elements imgElements = listEle.getElementsByTag("img"); if (imgElements != null && imgElements.size() > 0) { int count = 0; for (Element imgElement : imgElements) { count = count + 1; } sb.append("<li>" + "" + count + " extra images found in the right List</li>"); } Element sibling = listEle.nextElementSibling(); if (sibling != null) { Elements image = sibling.getElementsByTag("img"); if (!image.isEmpty()) { sb.append(Constants.EXTRA_IMG_FOUND_IN_RIGHT_PANEL); } } } // end of check for image if (listElements.size() > 0) { int count = 0; for (Element listElement : listElements) { if (listElement.parent().hasClass("gd-right")) { count = count + 1; } } NodeIterator listNodeIterator = trainingAndEventsRightNode.hasNodes() ? trainingAndEventsRightNode.getNodes("list*") : null; Elements ulEle = null; if (listNodeIterator != null) { int nodeSize = (int) listNodeIterator.getSize(); log.debug("node Size" + nodeSize + "ele Size" + count); if (count == nodeSize) { Node listNode = null; for (Element ele : listElements) { listNode = (Node) listNodeIterator.next(); setListElements(ele, listNode, session, locale, urlMap); } } else if (nodeSize < count) { Node listNode; for (Element ele : listElements) { ulEle = ele.getElementsByTag("ul"); if (listNodeIterator.hasNext()) { if (!ulEle.isEmpty()) { listNode = (Node) listNodeIterator.next(); setListElements(ele, listNode, session, locale, urlMap); } } } sb.append( Constants.MISMATCH_IN_LIST_NODES + count + Constants.LIST_NODES_COUNT + nodeSize); } else if (nodeSize > count) { Node listNode; for (Element ele : listElements) { listNode = (Node) listNodeIterator.next(); setListElements(ele, listNode, session, locale, urlMap); } sb.append( Constants.MISMATCH_IN_LIST_NODES + count + Constants.LIST_NODES_COUNT + nodeSize); } } else { sb.append(Constants.LIST_NODE_NOT_FOUND); } } else { sb.append("<li>List component not found in web url</li>"); } }
/** * @param url * @param output * @return */ public String getContentPr(String url) { Document doc; String result = ""; try { doc = Jsoup.connect(url).timeout(100000).get(); // doc is null if (doc == null) { return null; } // No such data. Elements strongElements = doc.getElementsByTag("strong"); for (Element strong : strongElements) { if (strong.ownText().contains("No such data.")) return null; } Elements tbodys = doc.getElementsByTag("tbody"); Element tbody = null; if (tbodys.size() > 2) { tbody = tbodys.get(2); } else { return result; } Elements nobrs = tbody.getElementsByTag("nobr"); for (Element nobr : nobrs) { String nobr_str = nobr.ownText(); // 获取name if (nobr_str.equals("Name")) { Element other_tr = nobr.parent().nextElementSibling(); Elements div_tags = other_tr.getElementsByTag("div"); String[] names = div_tags.get(1).ownText().split("<br>"); for (String name : names) { result += name; } } // 获取Formula if (nobr_str.equals("Formula")) { result += "\t"; Element other_tr = nobr.parent().nextElementSibling(); Elements div_tags = other_tr.getElementsByTag("div"); String[] names = div_tags.get(0).ownText().split("<br>"); result += names[0]; } // 获取Exact mass if (nobr_str.equals("Exact mass")) { result += "\t"; Element other_tr = nobr.parent().nextElementSibling(); Elements div_tags = other_tr.getElementsByTag("div"); String[] names = div_tags.get(0).ownText().split("<br>"); result += names[0]; } // 获取Mol weight if (nobr_str.equals("Mol weight")) { result += "\t"; Element other_tr = nobr.parent().nextElementSibling(); Elements div_tags = other_tr.getElementsByTag("div"); String[] names = div_tags.get(0).ownText().split("<br>"); result += names[0]; } // Other DBs if (nobr_str.equals("Other DBs")) { result += "\t"; Element other_tr = nobr.parent().nextElementSibling(); Elements div_tags = other_tr.getElementsByTag("div"); for (int i = 0; i < div_tags.size(); i++) { result += (div_tags.get(i).ownText()); if (i > 0 && div_tags.get(i).getElementsByTag("a") != null) { // result += ",";// 与前面的串分隔开,Cas number不含a标签 for (Element a : div_tags.get(i).getElementsByTag("a")) { result += (a.ownText() + ","); } // cas number强行分割开 result += ","; } } } } } catch (Exception e1) { e1.printStackTrace(); } return result; }