public static Attributes createAttributes(String[] keys, String[] values) { Attributes attrs = new Attributes(); for (int i = 0; i < keys.length; i += 1) { attrs.put(keys[i], values[i]); } return attrs; }
/** * Get an attribute's value by its key. * * <p>To get an absolute URL from an attribute that may be a relative URL, prefix the key with * <code><b>abs</b></code>, which is a shortcut to the {@link #absUrl} method. E.g.: * * <blockquote> * * <code>String url = a.attr("abs:href");</code> * * </blockquote> * * @param attributeKey The attribute key. * @return The attribute, or empty string if not present (to avoid nulls). * @see #attributes() * @see #hasAttr(String) * @see #absUrl(String) */ public String attr(String attributeKey) { Validate.notNull(attributeKey); if (attributes.hasKey(attributeKey)) return attributes.get(attributeKey); else if (attributeKey.toLowerCase().startsWith("abs:")) return absUrl(attributeKey.substring("abs:".length())); else return ""; }
public void addAll(Attributes incoming) { if (incoming.size() != 0) { if (this.attributes == null) { this.attributes = new LinkedHashMap(incoming.size()); } this.attributes.putAll(incoming.attributes); } }
/** * @param attributes A list of attribs * @return Returns a mutable map parsed out of the attribute list */ public static Map<String, String> parseAttribs(Attributes attributes) { Map<String, String> attrs = new LinkedHashMap<String, String>(attributes.size() + 4); for (Attribute a : attributes.asList()) if (!SKIP_ATTR.contains(a.getKey())) attrs.put(a.getKey(), a.getValue()); return attrs; }
/** * Test if this element has an attribute. * * @param attributeKey The attribute key to check. * @return true if the attribute exists, false if not. */ public boolean hasAttr(String attributeKey) { Validate.notNull(attributeKey); if (attributeKey.toLowerCase().startsWith("abs:")) { String key = attributeKey.substring("abs:".length()); if (attributes.hasKey(key) && !absUrl(key).equals("")) return true; } return attributes.hasKey(attributeKey); }
Attributes getEnforcedAttributes(String tagName) { Attributes attrs = new Attributes(); TagName tag = TagName.valueOf(tagName); if (enforcedAttributes.containsKey(tag)) { Map<AttributeKey, AttributeValue> keyVals = enforcedAttributes.get(tag); for (Map.Entry<AttributeKey, AttributeValue> entry : keyVals.entrySet()) { attrs.put(entry.getKey().toString(), entry.getValue().toString()); } } return attrs; }
/** * Check if an element is visible based on whether it has an aria presentation tag. * * @param element * @return true if the element is visible rather than just presentation. * @todo(dallison) check other aria roles for visible intentions */ static boolean isVisible(Element element) { Attributes attributes = element.attributes(); if (attributes.hasKey("role")) { if (attributes.get(ARIA_ROLE).equals(ARIA_PRESENTATION)) { return false; } else { return true; } } else { return true; } }
public String reviseImgForIxiqi(String pcont) { if (pcont == null) return ""; Document doc = Jsoup.parse(pcont); Elements eleimages = doc.select("img"); for (Element img : eleimages) { Attributes attrs = img.attributes(); String source = attrs.get("data-original"); img.attr("src", source); } return doc.html(); }
public String reviseImgForQdaily(String pcont) { if (pcont == null) return ""; Document doc = Jsoup.parse(pcont); Elements eleimages = doc.select("img"); for (Element img : eleimages) { Attributes attrs = img.attributes(); String source = attrs.get("src"); img.attr("src", "http://qdaily.com/" + source); } return doc.html(); }
public String reviseImgForYuehui(String pcont) { if (pcont == null) return ""; Document doc = Jsoup.parse(pcont); Elements eleimages = doc.select("input[name=\"hiddenimg\"]"); if (eleimages.size() > 0) { for (Element img : eleimages) { Attributes attrs = img.attributes(); String source = attrs.get("value"); img.parent().before("<img src=\"" + source + "\" />"); } } return doc.html(); }
public String reviseImgForSohuNews(String pcont) { if (pcont == null) return ""; Document doc = Jsoup.parse(pcont); Elements eleimages = doc.select("img"); for (Element img : eleimages) { Attributes attrs = img.attributes(); if (attrs.hasKey("data-src")) { String source = attrs.get("data-src"); img.attr("src", source); } } return doc.html(); }
@Override public int hashCode() { int result = parentNode != null ? parentNode.hashCode() : 0; // not children, or will block stack as they go back up to parent) result = 31 * result + (attributes != null ? attributes.hashCode() : 0); return result; }
private ElementMeta createSafeElement(Element sourceEl) { String sourceTag = sourceEl.tagName(); Attributes destAttrs = new Attributes(); Element dest = new Element(Tag.valueOf(sourceTag), sourceEl.baseUri(), destAttrs); int numDiscarded = 0; Attributes sourceAttrs = sourceEl.attributes(); for (Attribute sourceAttr : sourceAttrs) { if (whitelist.isSafeAttribute(sourceTag, sourceEl, sourceAttr)) destAttrs.put(sourceAttr); else numDiscarded++; } Attributes enforcedAttrs = whitelist.getEnforcedAttributes(sourceTag); destAttrs.addAll(enforcedAttrs); return new ElementMeta(dest, numDiscarded); }
public Attributes clone() { if (this.attributes == null) { return new Attributes(); } try { Attributes clone = (Attributes) super.clone(); clone.attributes = new LinkedHashMap(this.attributes.size()); Iterator i$ = iterator(); while (i$.hasNext()) { Attribute attribute = (Attribute) i$.next(); clone.attributes.put(attribute.getKey(), attribute.clone()); } return clone; } catch (CloneNotSupportedException e) { throw new RuntimeException(e); } }
/** * This method is used for extraction of tables with lot of empty cells in it. It is required for * the successful extraction of most Matrix tables. */ private void fillBlankCells() { // We say: cells get a line number. If a column does not contain a cell on a certain line, add a // whitespace. // Any cell that is not filled must be empty: for (Line line : data) { int lineNumber = line.getLineNumber(); COLUMNLOOP: for (Column2 column : dataInColumns) { for (Cell cell : column.getCellObjects()) { if (cell.getLineNumber() == lineNumber) { break; } if (cell.getLineNumber() > line.getLineNumber()) { // the last cell? // Add a blank cell to this column. // System.out.println("Add line to :" + column + " in line: " + // line.getLineNumber()); // <span class='ocrx_word' id='word_9' title="bbox 2175 514 2346 555">were</span> Tag t = Tag.valueOf("span"); Attributes attributes = new Attributes(); attributes.put("class", "ocrx_word"); attributes.put("id", "word_ADDEDBYTEA"); attributes.put( "title", "bbox " + column.getAverageX1() + " " + (int) line.getAverageY1() + " " + column.getAverageX2() + " " + (int) line.getAverageY2()); Element newElement = new Element(t, "localhost:8080", attributes); newElement.text(" "); ArrayList<Element> newCell = new ArrayList<Element>(); newCell.add(newElement); // System.out.println("adding: " +newElement.text()); column.addCell(newCell); break COLUMNLOOP; } } } } }
public String put(String key, String value) { String dataKey = Attributes.dataKey(key); String oldValue = Attributes.this.hasKey(dataKey) ? ((Attribute) Attributes.this.attributes.get(dataKey)).getValue() : null; Attributes.this.attributes.put(dataKey, new Attribute(dataKey, value)); return oldValue; }
private Elements parseNextNode(String query) { if (!NEXT_NODE_TAG.equals(query)) { throw new IllegalArgumentException("Argument selector part: " + query + " is illegal"); } else { Elements eles = new Elements(); if (elements.size() == 1) { Attributes attributes = new Attributes(); Node nextNode = elements.first().nextSibling(); if (nextNode == null) { return eles; } attributes.put("value", nextNode.toString()); eles.add(new Element(Tag.valueOf("nextnode"), "", attributes)); } else { eles = elements; } return eles; } }
@Test public void dataset() { Document doc = Jsoup.parse( "<div id=1 data-name=jsoup class=new data-package=jar>Hello</div><p id=2>Hello</p>"); Element div = doc.select("div").first(); Map<String, String> dataset = div.dataset(); Attributes attributes = div.attributes(); // size, get, set, add, remove assertEquals(2, dataset.size()); assertEquals("jsoup", dataset.get("name")); assertEquals("jar", dataset.get("package")); dataset.put("name", "jsoup updated"); dataset.put("language", "java"); dataset.remove("package"); assertEquals(2, dataset.size()); assertEquals(4, attributes.size()); assertEquals("jsoup updated", attributes.get("data-name")); assertEquals("jsoup updated", dataset.get("name")); assertEquals("java", attributes.get("data-language")); assertEquals("java", dataset.get("language")); attributes.put("data-food", "bacon"); assertEquals(3, dataset.size()); assertEquals("bacon", dataset.get("food")); attributes.put("data-", "empty"); assertEquals(null, dataset.get("")); // data- is not a data attribute Element p = doc.select("p").first(); assertEquals(0, p.dataset().size()); }
void newAttribute() { if (attributes == null) attributes = new Attributes(); if (pendingAttributeName != null) { Attribute attribute; if (pendingAttributeValue == null) attribute = new Attribute(pendingAttributeName, ""); else attribute = new Attribute(pendingAttributeName, pendingAttributeValue.toString()); attributes.put(attribute); } pendingAttributeName = null; if (pendingAttributeValue != null) pendingAttributeValue.delete(0, pendingAttributeValue.length()); }
public String reviseImgForZhiHuApp(String pcont) { if (pcont == null) return ""; Document doc = Jsoup.parse(pcont); Elements noeles = doc.select("noscript"); for (Element no : noeles) { Elements eleimages = no.getElementsByTag("img"); for (Element img : eleimages) { Attributes attrs = img.attributes(); String source = attrs.get("src"); img.parent().before("<img src=\"" + source + "\" />"); } no.remove(); } Elements eleimages = doc.select("img"); for (Element img : eleimages) { String source = img.attr("data-original"), s2 = img.attr("data-actualsrc"); if (!source.equals("")) img.attr("src", source); if (!s2.equals("")) img.attr("src", s2); } return doc.html(); }
protected Node doClone(Node parent) { Node clone; try { clone = (Node) super.clone(); } catch (CloneNotSupportedException e) { throw new RuntimeException(e); } clone.parentNode = parent; // can be null, to create an orphan split clone.siblingIndex = parent == null ? 0 : siblingIndex; clone.attributes = attributes != null ? attributes.clone() : null; clone.baseUri = baseUri; clone.childNodes = new ArrayList<Node>(childNodes.size()); for (Node child : childNodes) clone.childNodes.add(child.doClone(clone)); // clone() creates orphans, doClone() keeps parent return clone; }
@Override protected String doProcess(File htmlfile, String originalUrl, Intent intent) { try { // String charset = "utf-8"; Connection coon = HttpConnection.connect(originalUrl); coon.followRedirects( false); // we don't want it be redirected to other page,example: 10.254.7.4 Document doc = coon.get(); Element head = doc.head(); Element body = doc.body(); if (body.children().size() == 0) { Log.e(TAG, "body has no child with url=" + originalUrl); return PROCESS_FAILED_URL; } /* Elements meta = head.select("meta"); if(!meta.isEmpty()){ Element m = meta.get(0); String content = m.attr("content"); String attr = content.substring(content.indexOf("charset=")+8); if(!attr.trim().isEmpty()){ charset = attr; } } */ Elements base = head.select("base"); if (base.isEmpty()) { String b = head.baseUri(); Attributes attrs = new Attributes(); attrs.put("href", b); ArrayList<Element> a = new ArrayList<>(); a.add(new Element(Tag.valueOf("base"), b, attrs)); head.insertChildren(0, a); } Element div = doc.select("div.content-main").first(); if (div == null) { Log.e(TAG, "not found specific element with url=" + originalUrl); return PROCESS_FAILED_URL; } Element title = div.select("h1.title").first(); title.remove(); body.empty(); ArrayList<Element> a = new ArrayList<>(); a.add(div); body.insertChildren(0, a); int g = 0; while (g < 2) { // try two times. if (FileUtil.saveStringToFile(doc.toString(), htmlfile, false)) { break; } g++; } if (g < 2) return StringUtils.file2Url(htmlfile, PROCESS_FAILED_URL); Log.e(TAG, "save html to file failed with url=" + originalUrl); } catch (MalformedURLException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } return PROCESS_FAILED_URL; }
@Test public void testHasClassDomMethods() { Tag tag = Tag.valueOf("a"); Attributes attribs = new Attributes(); Element el = new Element(tag, "", attribs); attribs.put("class", "toto"); boolean hasClass = el.hasClass("toto"); assertTrue(hasClass); attribs.put("class", " toto"); hasClass = el.hasClass("toto"); assertTrue(hasClass); attribs.put("class", "toto "); hasClass = el.hasClass("toto"); assertTrue(hasClass); attribs.put("class", "\ttoto "); hasClass = el.hasClass("toto"); assertTrue(hasClass); attribs.put("class", " toto "); hasClass = el.hasClass("toto"); assertTrue(hasClass); attribs.put("class", "ab"); hasClass = el.hasClass("toto"); assertFalse(hasClass); attribs.put("class", " "); hasClass = el.hasClass("toto"); assertFalse(hasClass); attribs.put("class", "tototo"); hasClass = el.hasClass("toto"); assertFalse(hasClass); attribs.put("class", "raulpismuth "); hasClass = el.hasClass("raulpismuth"); assertTrue(hasClass); attribs.put("class", " abcd raulpismuth efgh "); hasClass = el.hasClass("raulpismuth"); assertTrue(hasClass); attribs.put("class", " abcd efgh raulpismuth"); hasClass = el.hasClass("raulpismuth"); assertTrue(hasClass); attribs.put("class", " abcd efgh raulpismuth "); hasClass = el.hasClass("raulpismuth"); assertTrue(hasClass); }
/** * Remove an attribute from this element. * * @param attributeKey The attribute to remove. * @return this (for chaining) */ public Node removeAttr(String attributeKey) { Validate.notNull(attributeKey); attributes.remove(attributeKey); return this; }
/** * Set an attribute (key=value). If the attribute already exists, it is replaced. * * @param attributeKey The attribute key. * @param attributeValue The attribute value. * @return this (for chaining) */ public Node attr(String attributeKey, String attributeValue) { attributes.put(attributeKey, attributeValue); return this; }