@Override protected ArrayList<HashMap<String, String>> doInBackground(Void... params) { ArrayList<HashMap<String, String>> authors = new ArrayList<HashMap<String, String>>(); try { char l = 'a'; while (l <= 'a') { URL url = new URL("http://www.liberliber.it/audiolibri/" + l + "/index.htm"); Document doc = Jsoup.parse(url, 5000); Element e = doc.getElementById("riga02_colonna02"); e = e.getElementsByClass("contenuto_cornice").first(); e = e.getElementsByTag("tbody").first(); e = e.getElementsByTag("tr").get(1); e = e.getElementsByTag("td").get(1); e = e.getElementsByTag("ul").first(); for (Element curr : e.getElementsByTag("li")) { HashMap<String, String> m = new HashMap<String, String>(); Element el = curr.getAllElements().first(); m.put("author", el.text()); m.put("url", el.unwrap().absUrl("href")); authors.add(m); } l++; } } catch (Exception e) { e.printStackTrace(); } return authors; }
private void unwrap(final Element element) { final Set<Element> elementsToUnwrap = new LinkedHashSet<>(); element.traverse( new NodeVisitor() { @Override public void head(Node node, int depth) { if (node instanceof Element) { final Element element = (Element) node; if (element.isBlock()) { final Set<String> classes = element.classNames(); removeEmpty(classes); if (classes.isEmpty()) { elementsToUnwrap.add(element); } } } } @Override public void tail(Node node, int depth) {} }); for (final Element unwrap : elementsToUnwrap) { unwrap.unwrap(); } }
public static String getHighlightedText_math( String text, String color, String apiElement) // for math after merging with recodoc { String highlightBeginning = "<SPAN style=\"BACKGROUND-COLOR: " + color + "\">"; String highlightEnding = "</SPAN>"; Document doc = Jsoup.parse(text); Elements apiElements = doc.select("clt[api=" + apiElement + "]"); for (Element apielement : apiElements) { Document tmp = new Document(""); String[] apis = apielement.text().split("\\."); if (apis.length == 2) apielement.html(highlightBeginning + apis[0] + highlightEnding + "." + apis[1]); else apielement.wrap(highlightBeginning); } // highlight code snippet Elements codesnippets = doc.getElementsByTag("pre"); for (Element codesnippet : codesnippets) { String html = codesnippet.html(); Pattern apielementPattern = Pattern.compile("(?<=\\W)" + apiElement + "(?=\\W)"); Matcher matcher = apielementPattern.matcher(html); codesnippet.html(matcher.replaceAll(highlightBeginning + apiElement + highlightEnding)); } // remove clt tags for display Elements clts = doc.getElementsByTag("clt"); for (Element clt : clts) { clt.unwrap(); // clt.replaceWith(new TextNode(clt.text(), "")); } return doc.html(); }