private static PharmacieResultatParsingSousCategorie parserSousCategorie( Category subCategory, boolean premierAppel, Document sousCategorie) { PharmacieResultatParsingSousCategorie resultat = new PharmacieResultatParsingSousCategorie(); Element productList = sousCategorie.select("div.products-list").first(); if (productList != null) { Elements productsInfo = productList.select("div.info_product"); if (productsInfo != null) { for (Element productInfo : productsInfo) { Element productInfoLink = productInfo.select("a.name-link").first(); if (productInfoLink != null) { String productInfoLinkUrl = productInfoLink.attributes().get("href").toLowerCase(); resultat.getListeUrlArticlesTrouves().add(productInfoLinkUrl); } } } if (premierAppel) { resultat .getListeAutrePagesAParserMemeCategorie() .addAll(verificationAutrePageAParser(sousCategorie)); } } else { Element categoryList = sousCategorie.select("div.category-list").first(); if (categoryList != null) { Elements nouvellesSousCategorie = categoryList.select("h2.category-title"); if (nouvellesSousCategorie != null) { for (Element nouvelleSousCategorie : nouvellesSousCategorie) { Element productInfoLink = nouvelleSousCategorie.select("a").first(); String nouvelleSousCategorieLink = productInfoLink.attributes().get("href").toLowerCase(); String nouvelleSousCategorieName = nouvelleSousCategorie.text(); // On crée la nouvelle catégorie List<Category> nouvelleListeCategorie = new ArrayList<Category>(); Category newCategory = new Category(nouvelleSousCategorieName, nouvelleSousCategorieLink); newCategory.setParentCategory(subCategory); nouvelleListeCategorie.add(newCategory); if (!resultat.getMapAutresSousCategories().containsKey(subCategory)) { resultat.getMapAutresSousCategories().put(subCategory, nouvelleListeCategorie); } else { resultat .getMapAutresSousCategories() .get(subCategory) .add(new Category(nouvelleSousCategorieName, nouvelleSousCategorieLink)); } } } } } return resultat; }
// @PostConstruct public void init() { try { Document doc = Jsoup.connect(CURRENCY_POINT).get(); Element oshadBankContainer = doc.select("#7oiylpmiow8iy1sma9a").first(); // Oshadbank id Element currenciesContainer = oshadBankContainer.getElementsByTag("currencies").first(); for (Element currencyContainer : currenciesContainer.getElementsByTag("c")) { Currency currency = Currency.valueOf(currencyContainer.id()); currency.setBuyRate(new BigDecimal(currencyContainer.attributes().get("ar"))); currency.setSellRate(new BigDecimal(currencyContainer.attributes().get("br"))); LOGGER.info(currency.string() + " was inited"); } } catch (IOException e) { LOGGER.error(e); } }
@Test public void dataset() { Document doc = Jsoup.parse( "<div id=1 data-name=jsoup class=new data-package=jar>Hello</div><p id=2>Hello</p>"); Element div = doc.select("div").first(); Map<String, String> dataset = div.dataset(); Attributes attributes = div.attributes(); // size, get, set, add, remove assertEquals(2, dataset.size()); assertEquals("jsoup", dataset.get("name")); assertEquals("jar", dataset.get("package")); dataset.put("name", "jsoup updated"); dataset.put("language", "java"); dataset.remove("package"); assertEquals(2, dataset.size()); assertEquals(4, attributes.size()); assertEquals("jsoup updated", attributes.get("data-name")); assertEquals("jsoup updated", dataset.get("name")); assertEquals("java", attributes.get("data-language")); assertEquals("java", dataset.get("language")); attributes.put("data-food", "bacon"); assertEquals(3, dataset.size()); assertEquals("bacon", dataset.get("food")); attributes.put("data-", "empty"); assertEquals(null, dataset.get("")); // data- is not a data attribute Element p = doc.select("p").first(); assertEquals(0, p.dataset().size()); }
/** * Reads an Item from a design and inserts it into the data source. Recursively handles any * children of the item as well. * * @since 7.5.0 * @param node an element representing the item (tree node). * @param selected A set accumulating selected items. If the item that is read is marked as * selected, its item id should be added to this set. * @param context the DesignContext instance used in parsing * @return the item id of the new item * @throws DesignException if the tag name of the {@code node} element is not {@code node}. */ @Override protected String readItem(Element node, Set<String> selected, DesignContext context) { if (!"node".equals(node.tagName())) { throw new DesignException( "Unrecognized child element in " + getClass().getSimpleName() + ": " + node.tagName()); } String itemId = node.attr("text"); addItem(itemId); if (node.hasAttr("icon")) { Resource icon = DesignAttributeHandler.readAttribute("icon", node.attributes(), Resource.class); setItemIcon(itemId, icon); } if (node.hasAttr("selected")) { selected.add(itemId); } for (Element child : node.children()) { String childItemId = readItem(child, selected, context); setParent(childItemId, itemId); } return itemId; }
@SuppressLint("DefaultLocale") private String improveHtml(final String html) { final Document document = Jsoup.parse(html); for (final Element e : document.getAllElements()) { if (e.hasAttr("style")) { for (final Attribute a : e.attributes()) { if (a.getKey().compareTo("style") == 0) { final String[] items = a.getValue().trim().split(";"); String newValue = ""; for (final String item : items) { if (!item.toLowerCase(Locale.ENGLISH).contains("font-family:") && !item.toLowerCase(Locale.ENGLISH).contains("font-size:")) { newValue = newValue.concat(item).concat(";"); } } a.setValue(newValue); } } } } return document.body().html(); }
/** Produce predictable html (attributes in alphabetical order), always include close tags */ private String elementToHtml(Element producedElem, StringBuilder sb) { ArrayList<String> names = new ArrayList<String>(); for (Attribute a : producedElem.attributes().asList()) { names.add(a.getKey()); } Collections.sort(names); sb.append("<" + producedElem.tagName() + ""); for (String attrName : names) { sb.append(" ") .append(attrName) .append("=") .append("\'") .append(producedElem.attr(attrName)) .append("\'"); } sb.append(">"); for (Node child : producedElem.childNodes()) { if (child instanceof Element) { elementToHtml((Element) child, sb); } else if (child instanceof TextNode) { String text = ((TextNode) child).text(); sb.append(text.trim()); } } sb.append("</").append(producedElem.tagName()).append(">"); return sb.toString(); }
/** * Recursively writes a data source Item and its children to a design. * * @since 7.5.0 * @param design the element into which to insert the item * @param itemId the id of the item to write * @param context the DesignContext instance used in writing * @return */ @Override protected Element writeItem(Element design, Object itemId, DesignContext context) { Element element = design.appendElement("node"); element.attr("text", itemId.toString()); Resource icon = getItemIcon(itemId); if (icon != null) { DesignAttributeHandler.writeAttribute( "icon", element.attributes(), icon, null, Resource.class); } if (isSelected(itemId)) { element.attr("selected", ""); } Collection<?> children = getChildren(itemId); if (children != null) { // Yeah... see #5864 for (Object childItemId : children) { writeItem(element, childItemId, context); } } return element; }
// parse from html element div#feed_item public MeipinItem(Element root) { id = root.attr("id"); Element imga = root.child(0) // div#item_inner .child(0) // div#item_img .child(0); // a this.thumbnailUri = imga.child(0) // img .attr("src"); if (imga.attributes().hasKey("onmouseover")) { String tmp = imga.attr("onmouseover"); int start = tmp.indexOf(",") + 2; int end = tmp.indexOf("'", start); this.uri = tmp.substring(start, end); this.isGif = true; } else this.uri = this.thumbnailUri; if (this.uri.contains(".gif")) this.isGif = true; Element item_info = root.child(0) // div#item_inner .child(1); // div#item_info this.title = item_info .child(1) // h3 .child(0) // a .attr("title"); }
public String reviseImgForQdaily(String pcont) { if (pcont == null) return ""; Document doc = Jsoup.parse(pcont); Elements eleimages = doc.select("img"); for (Element img : eleimages) { Attributes attrs = img.attributes(); String source = attrs.get("src"); img.attr("src", "http://qdaily.com/" + source); } return doc.html(); }
public String reviseImgForIxiqi(String pcont) { if (pcont == null) return ""; Document doc = Jsoup.parse(pcont); Elements eleimages = doc.select("img"); for (Element img : eleimages) { Attributes attrs = img.attributes(); String source = attrs.get("data-original"); img.attr("src", source); } return doc.html(); }
/** * Check if an element is visible based on whether it has an aria presentation tag. * * @param element * @return true if the element is visible rather than just presentation. * @todo(dallison) check other aria roles for visible intentions */ static boolean isVisible(Element element) { Attributes attributes = element.attributes(); if (attributes.hasKey("role")) { if (attributes.get(ARIA_ROLE).equals(ARIA_PRESENTATION)) { return false; } else { return true; } } else { return true; } }
public String reviseImgForYuehui(String pcont) { if (pcont == null) return ""; Document doc = Jsoup.parse(pcont); Elements eleimages = doc.select("input[name=\"hiddenimg\"]"); if (eleimages.size() > 0) { for (Element img : eleimages) { Attributes attrs = img.attributes(); String source = attrs.get("value"); img.parent().before("<img src=\"" + source + "\" />"); } } return doc.html(); }
public String reviseImgForSohuNews(String pcont) { if (pcont == null) return ""; Document doc = Jsoup.parse(pcont); Elements eleimages = doc.select("img"); for (Element img : eleimages) { Attributes attrs = img.attributes(); if (attrs.hasKey("data-src")) { String source = attrs.get("data-src"); img.attr("src", source); } } return doc.html(); }
private static String cleanHtml(final Node node) { if (node instanceof Element) { Element element = ((Element) node); StringBuilder accum = new StringBuilder(); accum.append("<").append(element.tagName()); for (Attribute attribute : element.attributes()) { if (!(attribute.getKey().startsWith("_"))) { accum.append(" "); accum.append(attribute.getKey()); accum.append("=\""); accum.append(attribute.getValue()); accum.append('"'); } } if (element.childNodes().isEmpty() && element.tag().isEmpty()) { accum.append(" />"); } else { accum.append(">"); for (Node child : element.childNodes()) accum.append(cleanHtml(child)); accum.append("</").append(element.tagName()).append(">"); } return accum.toString(); } else if (node instanceof TextNode) { return ((TextNode) node).getWholeText(); } else if (node instanceof XmlDeclaration) { // HACK if (node.childNodes().isEmpty()) { return ""; } return node.outerHtml(); } else if (node instanceof Comment) { // HACK: elide comments for now. return ""; } else if (node instanceof DataNode && node.childNodes().isEmpty()) { // No child nodes are defined but we have to handle content if such exists, example // <script language="JavaScript">var a = { name: "${user.name}"}</script> String content = node.attr("data"); if (Strings.empty(content)) { return ""; } return content; } else { return node.outerHtml(); } }
private ElementMeta createSafeElement(Element sourceEl) { String sourceTag = sourceEl.tagName(); Attributes destAttrs = new Attributes(); Element dest = new Element(Tag.valueOf(sourceTag), sourceEl.baseUri(), destAttrs); int numDiscarded = 0; Attributes sourceAttrs = sourceEl.attributes(); for (Attribute sourceAttr : sourceAttrs) { if (whitelist.isSafeAttribute(sourceTag, sourceEl, sourceAttr)) destAttrs.put(sourceAttr); else numDiscarded++; } Attributes enforcedAttrs = whitelist.getEnforcedAttributes(sourceTag); destAttrs.addAll(enforcedAttrs); return new ElementMeta(dest, numDiscarded); }
private static List<String> verificationAutrePageAParser(Document sousCategorie) { List<String> autrePageAParser = new ArrayList<String>(); Element pagination = sousCategorie.select("div.results-pages").first(); if (pagination != null) { Elements liens = pagination.select("a:not(.previous), a:not(.next)"); if (liens != null) { for (Element lienElem : liens) { if (lienElem.className() != null && lienElem.className().isEmpty()) { String lien = lienElem.attributes().get("href").toLowerCase(); autrePageAParser.add(lien); } } } } return autrePageAParser; }
@Test public void testAddBooleanAttribute() { Element div = new Element(Tag.valueOf("div"), ""); div.attr("true", true); div.attr("false", "value"); div.attr("false", false); assertTrue(div.hasAttr("true")); assertEquals("", div.attr("true")); List<Attribute> attributes = div.attributes().asList(); assertEquals("There should be one attribute", 1, attributes.size()); assertTrue("Attribute should be boolean", attributes.get(0) instanceof BooleanAttribute); assertFalse(div.hasAttr("false")); assertEquals("<div true></div>", div.outerHtml()); }
public String reviseImgForZhiHuApp(String pcont) { if (pcont == null) return ""; Document doc = Jsoup.parse(pcont); Elements noeles = doc.select("noscript"); for (Element no : noeles) { Elements eleimages = no.getElementsByTag("img"); for (Element img : eleimages) { Attributes attrs = img.attributes(); String source = attrs.get("src"); img.parent().before("<img src=\"" + source + "\" />"); } no.remove(); } Elements eleimages = doc.select("img"); for (Element img : eleimages) { String source = img.attr("data-original"), s2 = img.attr("data-actualsrc"); if (!source.equals("")) img.attr("src", source); if (!s2.equals("")) img.attr("src", s2); } return doc.html(); }
@Override public boolean matches(Element root, Element element) { return element.attributes().size() > 0; }