@Test public void equalsWithUnequalElementReturnsFalse() { TypedElement input1 = new TypedElement(Jsoup.parse("<h1/>")); TypedElement input2 = new TypedElement(Jsoup.parse("<h2/>")); assertThat(input1.equals(input2), is(false)); }
private static String[] crop(String startstring) { if (startstring.length() > 0) { String[] splitted = startstring.split("</b>"); return new String[] {Jsoup.parse(splitted[0]).text().trim(), Jsoup.parse(splitted[1]).text()}; } else { return new String[] {"", ""}; } }
@Test public void testBrHasSpace() { Document doc = Jsoup.parse("<p>Hello<br>there</p>"); assertEquals("Hello there", doc.text()); assertEquals("Hello there", doc.select("p").first().ownText()); doc = Jsoup.parse("<p>Hello <br> there</p>"); assertEquals("Hello there", doc.text()); }
@Test public void testById() { Elements els = Jsoup.parse("<div><p id=foo>Hello</p><p id=foo>Foo two!</p></div>").select("#foo"); assertEquals(2, els.size()); assertEquals("Hello", els.get(0).text()); assertEquals("Foo two!", els.get(1).text()); Elements none = Jsoup.parse("<div id=1></div>").select("#foo"); assertEquals(0, none.size()); }
public void testWrite(String design, T expected, boolean writeData) { String written = write(expected, writeData); Element producedElem = Jsoup.parse(written).body().child(0); Element comparableElem = Jsoup.parse(design).body().child(0); String produced = elementToHtml(producedElem); String comparable = elementToHtml(comparableElem); Assert.assertEquals(comparable, produced); }
public void parsingHTML(String urlPath) { String price = ""; try { URL url = new URL(urlPath); Document doc; if (server_down) { doc = Jsoup.parse(url.openStream(), "UTF-8", urlPath); } else { doc = Jsoup.parse(url.openStream(), "ISO-8859-1", urlPath); } // Document doc = Jsoup.parse(url, 2000); Elements trs = doc.select("tr"); for (int a = 2; a <= 6; a++) { Elements tds = trs.get(a).select("td"); for (int i = 1; i <= 5; i++) { switch (a) { // DATUM case 2: foodDayList.get(i - 1).setDate(tds.get(i).select("td").text()); // Log.v(TAG, "date: " + tds.get(i).select("td").text()); break; // ESSEN 1 DESC case 3: foodDayList.get(i - 1).setEssen1Desc(tds.get(i).select("td").text()); break; case 4: // TODO: ESSEN 1 PREIS price = ""; for (int p = 0; p < 4; p++) { price += tds.get((i - 1) * 4 + p).text(); } foodDayList.get(i - 1).setEssen1Price(price); break; case 5: // ESSEN 2 DESC foodDayList.get(i - 1).setEssen2Desc(tds.get(i).select("td").text()); break; case 6: // TODO ESSEN 2 PREIS price = ""; for (int p = 0; p < 4; p++) { price += tds.get((i - 1) * 4 + p).text(); } foodDayList.get(i - 1).setEssen2Price(price); break; } } } } catch (Exception e) { Log.e(TAG, e.getMessage()); } }
@Test public void parsesUnterminatedTag() { String h1 = "<p"; Document doc = Jsoup.parse(h1); assertEquals(1, doc.getElementsByTag("p").size()); String h2 = "<div id=1<p id='2'"; doc = Jsoup.parse(h2); Element d = doc.getElementById("1"); assertEquals(1, d.children().size()); Element p = doc.getElementById("2"); assertNotNull(p); }
@Override public List<TweetOut> getTweetOutList(User user) throws DAOException { List<TweetOut> lstTweetOut = new ArrayList<>(); try { ResultSet resultSet = this.executeQuery( SQL_SELECTTWEETOUT, false, user.getUserId(), user.getUserId(), user.getUserId(), user.getUserId(), user.getUserId(), user.getUserId()); Pattern pat = Pattern.compile("@([\\w]+)"); while (resultSet.next()) { EntityMapping<TweetOut> EntityMapping = new EntityMapping<>(TweetOut.class); try { TweetOut tmpTweetOut = EntityMapping.getMapping(resultSet); Matcher match = pat.matcher(tmpTweetOut.getBody()); if (match.find()) { String action = match.group(1); DAOUser daoUser = new DAOUser(DAOFactory.getInstance()); User user1 = daoUser.searchByUserName(action); tmpTweetOut.setBody( Jsoup.parse(tmpTweetOut.getBody()) .text() .replace( "@" + action, "<a class=\"\" href=\"/User?id=" + user1.getUserId() + "\"><strong class=\"center-middle-txt\">@" + action + "</strong></a>")); } else { tmpTweetOut.setBody(Jsoup.parse(tmpTweetOut.getBody()).text()); } lstTweetOut.add(tmpTweetOut); } catch (Exception e) { throw new DAOException(e); } } } catch (Exception e) { throw new DAOException(e); } finally { this.CloseConnection(); } return lstTweetOut; }
@Test public void testByTag() { // should be case insensitive Elements els = Jsoup.parse("<div id=1><div id=2><p>Hello</p></div></div><DIV id=3>").select("DIV"); assertEquals(3, els.size()); assertEquals("1", els.get(0).id()); assertEquals("2", els.get(1).id()); assertEquals("3", els.get(2).id()); Elements none = Jsoup.parse("<div id=1><div id=2><p>Hello</p></div></div><div id=3>").select("span"); assertEquals(0, none.size()); }
@Test public void testGetElementById() { Document doc = Jsoup.parse(reference); Element div = doc.getElementById("div1"); assertEquals("div1", div.id()); assertNull(doc.getElementById("none")); Document doc2 = Jsoup.parse("<div id=1><div id=2><p>Hello <span id=2>world!</span></p></div></div>"); Element div2 = doc2.getElementById("2"); assertEquals("div", div2.tagName()); // not the span Element span = div2.child(0).getElementById("2"); // called from <p> context should be span assertEquals("span", span.tagName()); }
@Test public void testByClass() { Elements els = Jsoup.parse("<p id=0 class='ONE two'><p id=1 class='one'><p id=2 class='two'>") .select("P.One"); assertEquals(2, els.size()); assertEquals("0", els.get(0).id()); assertEquals("1", els.get(1).id()); Elements none = Jsoup.parse("<div class='one'></div>").select(".foo"); assertEquals(0, none.size()); Elements els2 = Jsoup.parse("<div class='One-Two'></div>").select(".one-two"); assertEquals(1, els2.size()); }
public String reviseContForLieyunwang(String pcont) { if (pcont == null) return ""; Document doc = Jsoup.parse(pcont); Elements eles = doc.select("div#share-box"); for (Element ele : eles) { ele.remove(); } eles = doc.select("div[id^=BAIDU]"); for (Element ele : eles) { ele.remove(); } eles = doc.select("iframe[id^=360_HOT]"); for (Element ele : eles) { ele.remove(); } eles = doc.select("div.n_article"); for (Element ele : eles) { ele.remove(); } eles = doc.select("div#comment-box"); for (Element ele : eles) { ele.remove(); } return doc.html(); }
public String reviseContForTieba(String pcont) { if (pcont == null) return ""; Document doc = Jsoup.parse(pcont); Elements eles = doc.select("div.BAIDU_CLB_AD"); for (Element ele : eles) { ele.remove(); } eles = doc.select("ul.p_mtail"); for (Element ele : eles) { ele.remove(); } eles = doc.select("ul.p_props_tail"); for (Element ele : eles) { ele.remove(); } eles = doc.select("div.thread_recommend"); for (Element ele : eles) { ele.remove(); } eles = doc.select("div.j_lzl_container"); for (Element ele : eles) { ele.remove(); } return doc.html(); }
@Test public void designIsSerializedWithCorrectPrefixesAndPackageNames() throws IOException { ByteArrayOutputStream out = serializeDesign(ctx); // Check the mapping from prefixes to package names using the html tree String[] expectedPrefixes = {"my"}; String[] expectedPackageNames = {"com.addon.mypackage"}; int index = 0; Document doc = Jsoup.parse(out.toString("UTF-8")); Element head = doc.head(); for (Node child : head.childNodes()) { if ("meta".equals(child.nodeName())) { String name = child.attributes().get("name"); if ("package-mapping".equals(name)) { String content = child.attributes().get("content"); String[] parts = content.split(":"); assertEquals("Unexpected prefix.", expectedPrefixes[index], parts[0]); assertEquals("Unexpected package name.", expectedPackageNames[index], parts[1]); index++; } } } assertEquals("Unexpected number of prefix - package name pairs.", 1, index); }
/** * @param url * @return */ public static ProductInfo parse(String url) { ProductInfo productInfo = new ProductInfo(); PrintLogTool.info("开始解析" + category.message() + "[URL=" + url + "]", logger); // 商品类型 productInfo.setCategory(category); try { doc = Jsoup.parse(new URL(url), 20000); } catch (MalformedURLException e) { logger.error("", e); } catch (IOException e) { logger.error("", e); } // 商品编号 setSerialNo(productInfo, url); // 当前价格 setCurrentPrice(productInfo); // 商品名称 setProductName(productInfo); // 商品图片 setProductPic(productInfo); return productInfo; }
private BancoMegaSena() throws IOException { this.concursos = new ArrayList<Concurso>(); File input = new File("C:\\Users\\Rodrigo Lacerda\\Downloads\\D_mgsasc (1)\\d_megasc.htm"); Document doc = Jsoup.parse(input, "UTF-8"); Elements trs = doc.getElementsByTag("tr"); System.out.println(trs.get(1).getElementsByTag("th")); for (Element tr : trs) if (tr.getElementsByTag("th").isEmpty()) { String codigo = tr.getElementsByTag("td").get(0).text(); String d1 = tr.getElementsByTag("td").get(2).text(); String d2 = tr.getElementsByTag("td").get(3).text(); String d3 = tr.getElementsByTag("td").get(4).text(); String d4 = tr.getElementsByTag("td").get(5).text(); String d5 = tr.getElementsByTag("td").get(6).text(); String d6 = tr.getElementsByTag("td").get(7).text(); boolean acumulado = tr.getElementsByTag("td").get(15).text().equals("SIM"); Concurso concurso = new Concurso(Integer.parseInt(codigo)); concurso.addNumero(Integer.parseInt(d1)); concurso.addNumero(Integer.parseInt(d2)); concurso.addNumero(Integer.parseInt(d3)); concurso.addNumero(Integer.parseInt(d4)); concurso.addNumero(Integer.parseInt(d5)); concurso.addNumero(Integer.parseInt(d6)); concurso.setAcumulado(acumulado); this.concursos.add(concurso); } }
@Override public void upload( ComponentParameter compParameter, IMultipartFile multipartFile, HashMap<String, Object> json) { try { ID id = ItSiteUtil.getLoginUser(compParameter).getId(); if (id != null) { final Document document = Jsoup.parse( multipartFile.getInputStream(), compParameter.request.getCharacterEncoding(), ""); final Elements as = document.getElementsByTag("a"); for (final Element a : as) { if (a.hasAttr("add_date")) { final BookmarkBean bean = new BookmarkBean(); final long t = ConvertUtils.toLong(a.attr("add_date"), 0) * 1000; bean.setTitle(a.text()); bean.setUrl(a.attr("href")); bean.setUserId(id); bean.setUpdateDate(new Date(t)); try { BookmarkUtils.applicationModule.doUpdate(bean); } catch (Exception e) { } } } } } catch (final Exception e) { throw DataObjectException.wrapException("没有权限"); } }
@Override public Object parseHtml2Obj(String html) { Document doc = Jsoup.parse(html); Element title = doc.getElementById("activity-name"); Element createtime = doc.getElementById("post-date"); // Element from = doc.getElementById("post-user"); Element content = doc.getElementById("essay-body"); Elements pic = doc.select("#media img"); Elements _intro = doc.select(".text p"); String intro = null; if (_intro.isEmpty()) { intro = "阅读全部"; } else { intro = _intro.first().text(); } // List<ArticleObj> objs = new ArrayList<ArticleObj>(); ArticleObj obj = new ArticleObj(); obj.setFrom(account_desc); obj.setContent(content.html()); obj.setCreatetime(createtime.text()); obj.setTitle(title.text()); obj.setIntro(intro.substring(0, intro.length() > 50 ? 50 : intro.length()) + "..."); if (!pic.isEmpty()) { String src = pic.get(0).attr("src"); obj.setPic(getSrc(src)); } System.err.println(obj.getPic()); dbRobot.AddArticleData(obj); cur_count++; return null; }
/** * 从网址里面抽取链接 * * @return 链接的集合 */ public static List<String> getUrlsByPage(String str) { List<String> urls = new ArrayList<String>(); try { URL url = new URL(str); int end = 0; String host = url.getHost(); Document doc = Jsoup.parse(url, 30000); Elements links = doc.select("a"); String href = null; for (Element link : links) { href = link.attr("href"); if (href.startsWith(HTTP)) { urls.add(href); } else if (href.startsWith("/")) { urls.add(HTTP + host + href); } else { if (end > 0) { urls.add(str + href); } else { urls.add(str + href); } } } } catch (MalformedURLException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } return urls; }
public String reviseImgForWX(String pcont) { if (pcont == null) return ""; Document doc = Jsoup.parse(pcont); Elements eleimages = doc.select("img"); if (eleimages.size() > 0) { for (Element img : eleimages) { String source = img.attr("data-src"); int pos = source.lastIndexOf("/") + 1; source = source.substring(0, pos); img.removeAttr("data-s"); img.removeAttr("data-src"); img.removeAttr("data-w"); img.attr("src", source + "640"); img.attr("max-width", "640"); } } Elements elesrp = doc.select("script"); Elements divs = doc.select("div"); if (elesrp.size() > 0 && divs.size() > 0) { for (Element ele : elesrp) { String s = ele.html(); Pattern p = Pattern.compile("(?<=(var\\scover\\s=\\s\"))\\S+(?=\")"); Matcher m = p.matcher(s); if (m.find()) { String nimg = "<img src=\"" + m.group() + "\"/>"; divs.get(0).before(nimg); } } } return doc.html(); }
public Item( String source, String link, String title, String description, Date datePublication, List enclosure) { try { this.uri = link; values = new ArrayList<Prop>(); values.add(new Prop(RSS.link, link, true, false)); values.add(new Prop(RSS.title, title)); values.add(new Prop(RSS.description, Jsoup.parse(description).text())); if (datePublication != null) { values.add(new Prop(RSS.getURI() + "pubDate", datePublication.toString())); values.add( new Prop(RSS.getURI() + "pubDateTime", Long.toString(datePublication.getTime()))); } for (Object o : enclosure) { SyndEnclosureImpl e = (SyndEnclosureImpl) o; values.add(new Prop(RSS.image, e.getUrl(), true, false)); } values.add(new Prop("http://purl.org/rss/1.0/source", source, false, false)); } catch (NullPointerException e) { logger.error(e); } }
@Test public void testTextareaElementWithNoAttributes() throws Exception { TemplateFunctions functions = new TemplateFunctions(applicationContext, viewResolver, localeResolver); TextareaElement element = new TextareaElement(); element.setValue("Test"); element.setName("string"); element.setLabel("String"); element.setRows(20); element.setColumns(80); String output = functions.render(new MockHttpServletRequest(), element); System.out.println(output); Document doc = Jsoup.parse(output); assertEquals("Attributes has content", "", element.getAttributesAsString()); assertEquals( "Name does not match", "string[0].value", doc.select("textarea").first().attr("name")); assertEquals("String Value does not match", "Test", doc.select("textarea").first().text()); assertEquals( "Textarea element rows is not 20", "20", doc.select("textarea").first().attr("rows")); assertEquals( "Textarea element columns is not 80", "80", doc.select("textarea").first().attr("cols")); }
private static String makeModular(String html) { String text = ""; Document doc = Jsoup.parse(html); Elements els = doc.getAllElements(); boolean moved = false; String url = ""; for (Element el : els) { switch (el.nodeName()) { case "title": text = el.text(); if (text.toLowerCase().contains("moved") && text.toLowerCase().contains("permanently")) { moved = true; } break; case "body": if (moved) { url = getMovedUrl(el); } break; default: break; } } if (moved) { getMovedUrl(doc); } return text; }
//// COMPLETAMENTE INUTILE public static int[] getPrice(String path) { int[] month = new int[31]; int count = 0; try { File input = new File(path); Document doc = Jsoup.parse(input, "UTF-8", "http://example.com/"); Elements elementi_div = doc.getElementsByTag("div"); for (Element e : elementi_div) { if (e.text().length() > 0) if (Character.isDigit(e.text().charAt(0)) && e.text().contains("€ ")) { count++; String[] arr = e.text().split(" "); month[Integer.parseInt(arr[0]) - 1] = Integer.parseInt(arr[2].replace(".", "")); } } } catch (Exception e) { System.out.println(e); } if (count == 0) { System.out.println("Non e' stato scaricato il file"); // getPrice(path); } return month; }
@Test public void testTextareaElementWithValue() throws Exception { TemplateFunctions functions = new TemplateFunctions(applicationContext, viewResolver, localeResolver); TextareaElement element = new TextareaElement(); element.setHtmlId("string"); element.setDefaultValue("Default Value"); element.setName("string"); element.setLabel("String"); element.setValue("not default"); String output = functions.render(new MockHttpServletRequest(), element); Document doc = Jsoup.parse(output); assertEquals( "Name does not match", "string[0].value", doc.getElementById("string").attr("name")); assertEquals( "Textarea Value does not match", "not default", doc.getElementById("string").text()); assertEquals( "Label for attribute does not match element id", element.getHtmlId(), doc.select("label").first().attr("for")); assertEquals("Label value does not match", "String", doc.select("label").first().text()); }
/** This function is called when a page is fetched and ready to be processed by your program. */ @Override public void visit(Page page) { // System.out.println(page.getWebURL().toString().contains("city")+" // "+page.getWebURL()); // url title date body if (page.getWebURL().toString().contains("/ads/") || page.getWebURL().toString().contains("/view/")) { if (page.getParseData() instanceof HtmlParseData) { HtmlParseData htmlParseData = (HtmlParseData) page.getParseData(); String html = htmlParseData.getHtml(); Document doc = Jsoup.parse(html); String url; String title; String body = null; String date = null; url = page.getWebURL().getURL(); title = doc.title(); // System.out.println(title); date = doc.getElementsByClass("nameDiv").get(1).text(); body = doc.getElementsByClass("addsTexts").get(0).text(); System.out.println(date); System.out.println(body); // System.out.println(body); // System.out.println("URL: " + url); // System.out.println("title: " + title); // System.out.println("date: " + date); // System.out.println("body: " + body); Indexer.add(url, title, body, date); } } }
@GET @Path("/logs") @Produces({MediaType.TEXT_HTML}) public String logs() { Document doc = null; try { ArrayList<Logger.Log> logs = Logger.getInstance().getLastLogs(); File file = new File(getClass().getClassLoader().getResource("logs.html").getFile()); doc = Jsoup.parse(file, "UTF-8"); Element tbody = doc.getElementById("logs"); for (Logger.Log log : logs) { Element tr = tbody.appendElement("tr").addClass(log.getType_log()); tr.appendElement("td").addClass("type").text(log.getType_log()); tr.appendElement("td").addClass("date").text(log.getDate().toString()); tr.appendElement("td").addClass("message").text(log.getMessage()); } } catch (Exception e) { e.printStackTrace(); } if (doc != null) { return doc.html(); } return null; }
@Override public String fire(String inputContent) throws Exception { validate(); Document document = Jsoup.parse(inputContent); Elements elements = document.select(cssSelector); return (elements != null && elements.size() > 0 ? elements.html().trim() : null); }
@Override public void onTaskFinished(String response) { progress.cancel(); if (response == null) { Toast.makeText(this, "An error has occured.", Toast.LENGTH_LONG).show(); onResume(); return; } else if (response == LocationHelper.LOCATION_FAILED) { Toast.makeText(this, "Unable to determine your location.", Toast.LENGTH_LONG).show(); onResume(); return; } Document doc = Jsoup.parse(response); if (doc.title().contains("Route")) { RouteActivity.start(HomeActivity.this, response); } else if (doc.title().contains("Stop")) { System.out.println(doc.title()); } else if (response.contains("Did you mean?") && (response.contains("class=\"routeList\"") || response.contains("class=\"ambiguousLocations\""))) { new DidYouMeanDialog() .newInstance(this, response) .show(getSupportFragmentManager(), "com.steelhawks.hawkscout.DID_YOU_MEAN_DIALOG"); } else if (response.contains("<h3>Nearby Stops:</h3>")) { NearbyStopsActivity.start(this, response); } else { new NoResultsDialog() .show(getSupportFragmentManager(), "com.steelhawks.hawkscout.NO_RESULTS"); onResume(); } }
public void enviarEmail(List<String> destinos, String mensagem, String titulo) throws EmailException { SimpleEmail email = new SimpleEmail(); email.setHostName(this.host); // Quando a porta utilizada não é a padrão (gmail = 465) email.setSmtpPort(this.porta); // Adicione os destinatários for (String destino : destinos) { email.addTo(destino, "", "UTF-8"); } email.setSentDate(new Date()); // Configure o seu Email do qual enviará email.setFrom(this.email, this.empresa.getNome()); // Adicione um assunto email.setSubject(titulo); // Adicione a mensagem do Email email.setMsg(Jsoup.parse(mensagem).text()); // Para autenticar no servidor é necessário chamar os dois métodos abaixo email.setTLS(true); email.setSSL(true); email.setAuthentication(this.email, this.senha); email.send(); }