@Test public void testClassNames() { Document doc = Jsoup.parse("<div class=\"c1 c2\">C</div>"); Element div = doc.select("div").get(0); assertEquals("c1 c2", div.className()); final Set<String> set1 = div.classNames(); final Object[] arr1 = set1.toArray(); assertTrue(arr1.length == 2); assertEquals("c1", arr1[0]); assertEquals("c2", arr1[1]); // Changes to the set should not be reflected in the Elements getters set1.add("c3"); assertTrue(2 == div.classNames().size()); assertEquals("c1 c2", div.className()); // Update the class names to a fresh set final Set<String> newSet = new LinkedHashSet<String>(3); newSet.addAll(set1); newSet.add("c3"); div.classNames(newSet); assertEquals("c1 c2 c3", div.className()); final Set<String> set2 = div.classNames(); final Object[] arr2 = set2.toArray(); assertTrue(arr2.length == 3); assertEquals("c1", arr2[0]); assertEquals("c2", arr2[1]); assertEquals("c3", arr2[2]); }
@Test public void testClonesClassnames() { Document doc = Jsoup.parse("<div class='one two'></div>"); Element div = doc.select("div").first(); Set<String> classes = div.classNames(); assertEquals(2, classes.size()); assertTrue(classes.contains("one")); assertTrue(classes.contains("two")); Element copy = div.clone(); Set<String> copyClasses = copy.classNames(); assertEquals(2, copyClasses.size()); assertTrue(copyClasses.contains("one")); assertTrue(copyClasses.contains("two")); copyClasses.add("three"); copyClasses.remove("one"); assertTrue(classes.contains("one")); assertFalse(classes.contains("three")); assertFalse(copyClasses.contains("one")); assertTrue(copyClasses.contains("three")); assertEquals("", div.html()); assertEquals("", copy.html()); }
String parseAuthor(Element element) { try { if (element.classNames().contains("m-hero__slot")) { return element.getElementsByClass("byline").first().text(); } else if (element.classNames().contains("m-entry-slot")) { return element.getElementsByClass("author").first().text(); } else throw new NullPointerException(); } catch (NullPointerException e) { e.printStackTrace(); return "Unknown author"; } }
String parseArticleLink(Element element) { try { if (element.classNames().contains("m-hero__slot")) { Element a = element.getElementsByClass("m-hero__slot-link").first(); return a.attr("href"); } else if (element.classNames().contains("m-entry-slot")) { Element h3 = element.getElementsByTag("h3").first(); Element a = h3.getElementsByTag("a").first(); return a.attr("href"); } else throw new NullPointerException(); } catch (NullPointerException e) { e.printStackTrace(); return null; } }
String parseTitle(Element element) { try { if (element.classNames().contains("m-hero__slot")) { Element a = element.getElementsByClass("m-hero__slot-link").first(); Element h2 = a.getElementsByTag("h2").first(); return h2.text(); } else if (element.classNames().contains("m-entry-slot")) { Element h3 = element.getElementsByTag("h3").first(); return h3.text(); } else throw new NullPointerException(); } catch (NullPointerException e) { e.printStackTrace(); return "Unknown title"; } }
private void findRecommendations( @NotNull Document doc, @NotNull BilingualQueryResultBuilder resultBuilder) { // Determine all candidate nodes: Elements alternativeNodes = doc.select("div.cc > p > *"); Language currentLanguage = null; for (Element node : alternativeNodes) { // If the next node is a flagicon, try to determine the language for the next entries from the // class name if (node.tagName().equals("span") && node.hasClass("flagicon")) { Set<String> classNames = node.classNames(); classNames.remove("flagicon"); for (String className : classNames) { Language candidate = Language.getExistingLanguageById(className); if (candidate != null) { currentLanguage = candidate; break; } } } else if (node.tagName().equals("a")) { String recommendationText = node.text(); DictionaryObjectBuilder objectBuilder = ImmutableDictionaryObject.builder(); objectBuilder.setLanguage(currentLanguage).setGeneralForm(recommendationText); resultBuilder.addSimilarRecommendation(objectBuilder.build()); } } }
String parseImageURL(Element element) { try { if (element.classNames().contains("m-hero__slot")) { Element a = element.getElementsByClass("m-hero__slot-link").first(); Element imgDiv = a.getElementsByAttribute("data-original").first(); return imgDiv.attr("data-original"); } else if (element.classNames().contains("m-entry-slot")) { Element imgDiv = element.getElementsByAttribute("data-original").first(); return imgDiv.attr("data-original"); } else throw new NullPointerException(); } catch (NullPointerException e) { e.printStackTrace(); // Return an image that says "No image" //noinspection SpellCheckingInspection return "http://best-classic-cars.com/images/no_image_available.png.pagespeed.ce.NRX39FjzIc.png"; } }
/** * Remove a class name from this element's {@code class} attribute. * * @param className class name to remove * @return this element */ public Element removeClass(String className) { Validate.notNull(className); Set<String> classes = classNames(); classes.remove(className); classNames(classes); return this; }
/** * Toggle a class name on this element's {@code class} attribute: if present, remove it; otherwise * add it. * * @param className class name to toggle * @return this element */ public Element toggleClass(String className) { Validate.notNull(className); Set<String> classes = classNames(); if (classes.contains(className)) classes.remove(className); else classes.add(className); classNames(classes); return this; }
private void recurse(final Element element, final Map<String, Object> values, final int depth) { final Tag tag = element.tag(); final Set<String> classes = element.classNames(); final String link = element.attr("href"); final Object content = extractChildContent(element); if (!classes.isEmpty()) { removeEmpty(classes); // toplevel classes define type if (tag.isBlock()) { if (depth == 0) { // store type attribute values.put("type", classes); for (final Element child : element.children()) { recurse(child, values, depth + 1); } } else { final Map<String, Object> childMap = new LinkedHashMap<>(); values.put(classes.iterator().next(), childMap); if (content != null) { childMap.put("name", content); } for (final Element child : element.children()) { recurse(child, childMap, depth + 1); } } } else if (tag.isInline()) { // extract href and store as URL if (classes.contains("url") && StringUtils.isNotBlank(link)) { values.put("url", link); classes.remove("url"); } if (content != null) { for (final String type : classes) { values.put(type, content); } } } } }
private ViewModel parseDetail(Document doc, ViewModel item) { if (doc.select("select#SeasonSelection").size() > 0) { item.setType(ViewModel.Type.SERIES); String rel = doc.select("select#SeasonSelection").attr("rel"); rel = rel.substring(rel.indexOf("SeriesID=") + "SeriesID=".length()); item.setSeriesID(Integer.valueOf(rel)); // Fill seasons and episodes Elements seasons = doc.select("select#SeasonSelection > option"); List<Season> list = new ArrayList<Season>(); for (Element season : seasons) { String[] rels = season.attr("rel").split(","); Season s = new Season(); s.id = Integer.valueOf(season.val()); s.name = season.text(); s.episodes = rels; list.add(s); } item.setSeasons(list.toArray(new Season[list.size()])); } else { item.setType(ViewModel.Type.MOVIE); List<Host> hostlist = new ArrayList<Host>(); Elements hosts = doc.select("ul#HosterList").select("li"); for (Element host : hosts) { int hosterId = 0; Set<String> classes = host.classNames(); for (String c : classes) { if (c.startsWith("MirStyle")) { hosterId = Integer.valueOf(c.substring("MirStyle".length())); } } String name = host.select("div.Named").text(); String count = host.select("div.Data").text(); int c = 1; if (count.contains("/")) { count = count.substring(count.indexOf("/") + 1, count.indexOf(" ", count.indexOf("/"))); c = Integer.valueOf(count); } for (int i = 0; i < c; i++) { Host h = Host.selectById(hosterId); h.setName(name); h.setMirror(i + 1); if (h.isEnabled()) { hostlist.add(h); } } } item.setMirrors(hostlist.toArray(new Host[hostlist.size()])); } String imdb = doc.select("div.IMDBRatingLinks > a").attr("href").trim(); if (!TextUtils.isEmpty(imdb)) { imdb = imdb.replace("/", ""); item.setImdbId(imdb); } return item; }
@Test public void testClassDomMethods() { Document doc = Jsoup.parse("<div><span class=' mellow yellow '>Hello <b>Yellow</b></span></div>"); List<Element> els = doc.getElementsByAttribute("class"); Element span = els.get(0); assertEquals("mellow yellow", span.className()); assertTrue(span.hasClass("mellow")); assertTrue(span.hasClass("yellow")); Set<String> classes = span.classNames(); assertEquals(2, classes.size()); assertTrue(classes.contains("mellow")); assertTrue(classes.contains("yellow")); assertEquals("", doc.className()); classes = doc.classNames(); assertEquals(0, classes.size()); assertFalse(doc.hasClass("mellow")); }
private void extractDataAsString(Document doc) throws FailedToFindElementException { Elements infoElements = doc.select(CSS_QUERY_TO_FIND_STOCKS_INFO); if (infoElements == null || infoElements.size() < 1) { throw new FailedToFindElementException("Cannot find stock info element."); } Element info = infoElements.get(0); if (info != null) { Elements dts = info.select("dt"); if (dts != null) { stockCodeStr = dts.text().trim(); } Elements category = info.select(".category"); if (category != null) { sectorStr = category.text().trim(); } } Elements stocksTables = doc.select(CSS_QUERY_TO_FIND_STOCKS_TABLE); if (stocksTables == null || stocksTables.size() < 1) { throw new FailedToFindElementException("Cannot find stock table element."); } Element stocksTable = stocksTables.get(0); Elements symbol = stocksTable.select(".symbol"); if (symbol != null) { stockNameStr = symbol.text().trim(); } Elements tds = stocksTable.select("td"); for (Element td : tds) { String text = Util.normalizeRoundParentheses(td.text().trim()); if (text.length() == 0) { } else if (td.classNames().contains("change")) { priceComparisonWithPreviousDayStr = text; } else { try { Double.parseDouble(Util.removeCommaAndNbsp(text)); } catch (NumberFormatException e) { continue; } realtimePriceStr = text; } } Elements spans = doc.select(CSS_QUERY_IN_DETAIL_PAGE_TO_FIND_SPAN_UNDER_ID_DEAL); // マザーズ,札証,札幌ア,東証,東証1部, 東証2部,東証JQG,東証JQS,東証外国,福岡Q, 福証 for (Element span : spans) { String s = span.text().trim(); if (s.length() > 0 && (s.indexOf("マ") >= 0 || s.indexOf("札") >= 0 || s.indexOf("東") >= 0 || s.indexOf("福") >= 0)) { int index = s.indexOf('('); if (index >= 0) { s = s.substring(0, index); } marketStr = s; break; } } if (marketStr == null) { for (Element span : spans) { String s = span.text().trim(); if (s.length() > 0 && !s.startsWith("(") && !s.equals("PTS") && !s.equals("OTC")) { int index = s.indexOf('('); if (index >= 0) { s = s.substring(0, index); } marketStr = s; break; } } } boolean isDebt = false; boolean isSelling = false; Elements dls = doc.select(CSS_QUERY_IN_DETAIL_PAGE_TO_FIND_ALL_DL); for (Element dl : dls) { Elements dt = dl.getElementsByTag("dt"); Elements dd = dl.getElementsByTag("dd"); String caption = dt.text().trim(); String value = Util.normalizeRoundParentheses(dd.text().trim()); if (caption.startsWith(CAPTION_PREVIOUS_CLOSING_PRICE)) { previousClosingPriceStr = value; } else if (caption.startsWith(CAPTION_OPENING_PRICE)) { openingPriceStr = value; } else if (caption.startsWith(CAPTION_HIGH_PRICE)) { highPriceStr = value; } else if (caption.startsWith(CAPTION_LOW_PRICE)) { lowPriceStr = value; } else if (caption.startsWith(CAPTION_TRADING_VOLUME_OF_STOCKS)) { tradingVolumeOfStocksStr = value; } else if (caption.startsWith(CAPTION_TRADING_VALUE_OF_MONEY)) { tradingValueOfMoneyStr = value; } else if (caption.startsWith(CAPTION_PRICE_LIMIT)) { priceLimitStr = value; } else if (caption.startsWith(CAPTION_MARKET_CAPITALIZATION)) { marketCapitalizationStr = value; } else if (caption.startsWith(CAPTION_OUTSTANDING_STOCK_VOLUME)) { outstandingStockVolumeStr = value; } else if (caption.startsWith(CAPTION_ANNUAL_INTEREST_RATE)) { annualInterestRateStr = value; } else if (caption.startsWith(CAPTION_DIVIDENDS_PER_SHARE)) { dividendsPerShareStr = value; } else if (caption.startsWith(CAPTION_PER)) { perStr = value; } else if (caption.startsWith(CAPTION_PBR)) { pbrStr = value; } else if (caption.startsWith(CAPTION_EPS)) { epsStr = value; } else if (caption.startsWith(CAPTION_BPS)) { bpsStr = value; } else if (caption.startsWith(CAPTION_MINIMUM_PURCHASE_AMOUNT)) { minimumPurchaseAmountStr = value; } else if (caption.startsWith(CAPTION_SHARE_UNIT_NUMBER)) { shareUnitNumberStr = value; } else if (caption.startsWith(CAPTION_YEARLY_HIGH)) { yearlyHighStr = value; } else if (caption.startsWith(CAPTION_YEARLY_LOW)) { yearlyLowStr = value; } else if (caption.startsWith(CAPTION_NET_ASSETS)) { netAssetsStr = value; } else if (caption.startsWith(CAPTION_UNIT_OF_TRADING)) { unitOfTradingStr = value; } else if (caption.startsWith(CAPTION_MANAGEMENT_COMPANY)) { managementCompanyStr = value; } else if (caption.startsWith(CAPTION_TYPE_OF_ASSETS_TO_BE_INVESTED)) { typeOfAssetsToBeInvestedStr = value; } else if (caption.startsWith(CAPTION_REGION_TO_BE_INVESTED)) { regionToBeInvestedStr = value; } else if (caption.startsWith(CAPTION_UNDERLYING_INDEX)) { underlyingIndexStr = value; } else if (caption.startsWith(CAPTION_SETTLEMENT_FREQUENCY)) { settlementFrequencyStr = value; } else if (caption.startsWith(CAPTION_SETTLEMENT_MONTH)) { settlementMonthStr = value; } else if (caption.startsWith(CAPTION_LISTED_DATE)) { listedDateStr = value; } else if (caption.startsWith(CAPTION_TRUST_FEE)) { trustFeeStr = value; } else if (caption.startsWith(CAPTION_MARGIN_DEBT_BALANCE)) { marginDebtBalanceStr = value; isDebt = true; isSelling = false; } else if (caption.startsWith(CAPTION_MARGIN_RATIO_COMPARISON_WITH_PREVIOUS_WEEK)) { if (isDebt) { marginDebtBalanceRatioComparisonWithPreviousWeekStr = value; } else if (isSelling) { marginSellingBalanceRatioComparisonWithPreviousWeekStr = value; } } else if (caption.startsWith(CAPTION_MARGIN_SELLING_BALANCE)) { marginSellingBalanceStr = value; isDebt = false; isSelling = true; } else if (caption.startsWith(CAPTION_RATIO_OF_MARGIN_BALANCE)) { ratioOfMarginBalanceStr = value; } else { if (!caption.equals("") && !caption.startsWith("値上がり率") && !caption.startsWith("値下がり率") && !caption.startsWith("[買い]") && !caption.startsWith("[売り]") && value.indexOf("リアルタイム株価") < 0) { // // TODO: unknown data format // System.out.println("unknown caption=" + caption); System.out.println("unknown value=" + value); } } } }
@Override public Element clone() { Element clone = (Element) super.clone(); clone.classNames = null; // derived on first hit, otherwise gets a pointer to source classnames return clone; }