private Stack<Stop> fetchStopSequence(String key) throws IOException { if (stopSequences.containsKey(key)) { return stopSequences.get(key); } Stack<Stop> stopSequence = new Stack<Stop>(); url = DBC.URL + '/' + DBC.GOOGLE_MAPS_EXTENSION + DBC.STOP_SEQUENCE_EXTENSION + routeShortName() + "&direction=" + direction.toChar(); Document doc = makeTmpJSoupDoc(url); Elements data = doc.getElementsByTag("data"); if (!data.isEmpty()) { Elements pois = data.get(0).getElementsByTag("poi"); for (Element poi : pois) { String address = poi.getElementsByTag("address").get(0).html() + ", " + poi.getElementsByTag("location").get(0).html(); double lat = Double.parseDouble(poi.getElementsByTag("lat").get(0).html()); double lon = Double.parseDouble(poi.getElementsByTag("lng").get(0).html()); int code = Integer.parseInt(poi.getElementsByTag("stopnumber").get(0).html()); Stop stop = new Stop(code, lat, lon, address, agency); if (!StopSiteCrawler.stopsFile.get("stop_code").contains(code)) { stop.write(); } stopSequence.push(stop); } } stopSequences.put(key, stopSequence); return stopSequence; }
public Collection<String> extractSubscribedUser(final String htmlContent) { // logger.debug("htmlContent:\n" + htmlContent); final List<String> result = new ArrayList<String>(); final Document document = Jsoup.parse(htmlContent); final Elements tables = document.getElementsByTag("table"); for (final Element table : tables) { if (isSubscriptTable(table)) { for (final Element tr : table.getElementsByTag("tr")) { final Elements tds = tr.getElementsByTag("td"); if (!tds.isEmpty()) { final String name = tds.get(0).text(); if (name != null) { final String nameTrimed = name; if (nameTrimed.length() > 1) { logger.debug("found subscription for user: '******'"); result.add(nameTrimed); } } } } } } logger.debug("found " + result.size() + " subscribed users in htmlcontent"); return result; }
@Override protected Boolean doInBackground(String... mess) { try { Document page = Jsoup.connect("http://messmenu.snu.in/messMenu.php").get(); Element menu; if (mess[0].equals("dh1")) menu = page.getElementsByTag("tbody").get(0); else menu = page.getElementsByTag("tbody").get(1); Elements breakfast_items = menu.getElementsByTag("td").get(1).children(); Elements lunch_items = menu.getElementsByTag("td").get(2).children(); Elements dinner_items = menu.getElementsByTag("td").get(3).children(); for (Element item : breakfast_items) breakfast.add(item.text()); for (Element item : lunch_items) lunch.add(item.text()); for (Element item : dinner_items) dinner.add(item.text()); return true; } catch (IOException | IndexOutOfBoundsException e) { e.printStackTrace(); } return false; }
@Override protected void parseRow( final String query, final int options, final Element tr, final List<Name> results) { final String thumbnailUrl = tr.getElementsByAttributeValue("class", "primary_photo") .first() .getElementsByTag("img") .first() .attr("src"); final Element r = tr.getElementsByAttributeValue("class", "result_text").first(); final Element a = r.getElementsByTag("a").first(); final String url = Imdb.BASE_URL + a.attr("href"); final String name = a.ownText(); String job = ""; Reference ref = null; final Elements smalls = r.getElementsByTag("small"); if (!smalls.isEmpty()) { final String refUrl = Imdb.BASE_URL + smalls.first().getElementsByTag("a").first().attr("href"); String desc = smalls.first().text(); if (desc.startsWith("(") && desc.endsWith(")")) desc = desc.substring(1, desc.length() - 1); final int comma = desc.indexOf(','); if (comma != -1) { job = desc.substring(0, comma).trim(); ref = new Reference(refUrl, desc.substring(comma + 1).trim()); } else { if (desc.matches(".+\\(\\d+\\)")) ref = new Reference(refUrl, desc.substring(comma + 1).trim()); else job = desc; } } results.add(new Name(url, thumbnailUrl, name, job, ref)); }
private JCas computeCommentCas(Element comment) throws UIMAException { JCas cCas = JCasFactory.createJCas(); String cid = comment.attr("CID"); String cuserid = comment.attr("CUSERID"); // String cgold = comment.attr("CGOLD"); // String cgold = getgold(comment.attr("CGOLD")); // String cgold_yn = comment.attr("CGOLD_YN"); String csubject = comment.getElementsByTag("CSubject").get(0).text(); String cbody = comment.getElementsByTag("CBody").get(0).text(); /** Setup comment CAS */ cCas.reset(); cCas.setDocumentLanguage("en"); String commentText = TextNormalizer.normalize(SubjectBodyAggregator.getCommentText(csubject, cbody)); cCas.setDocumentText(commentText); // cCas.setDocumentText(csubject + ". " + cbody); /** Run the UIMA pipeline */ SimplePipeline.runPipeline(cCas, this.analysisEngineList); // this.analyzer.analyze(commentCas, new SimpleContent("c-" + cid, csubject + ". " + cbody)); return cCas; }
@Override protected ArrayList<HashMap<String, String>> doInBackground(Void... params) { ArrayList<HashMap<String, String>> authors = new ArrayList<HashMap<String, String>>(); try { char l = 'a'; while (l <= 'a') { URL url = new URL("http://www.liberliber.it/audiolibri/" + l + "/index.htm"); Document doc = Jsoup.parse(url, 5000); Element e = doc.getElementById("riga02_colonna02"); e = e.getElementsByClass("contenuto_cornice").first(); e = e.getElementsByTag("tbody").first(); e = e.getElementsByTag("tr").get(1); e = e.getElementsByTag("td").get(1); e = e.getElementsByTag("ul").first(); for (Element curr : e.getElementsByTag("li")) { HashMap<String, String> m = new HashMap<String, String>(); Element el = curr.getAllElements().first(); m.put("author", el.text()); m.put("url", el.unwrap().absUrl("href")); authors.add(m); } l++; } } catch (Exception e) { e.printStackTrace(); } return authors; }
public static ArrayList<EntryModel> getPopularContent() { final ArrayList<EntryModel> result = new ArrayList<>(); Thread thread = new Thread( () -> { try { Document document = Jsoup.connect("http://jkanime.net/").get(); Elements elements = document.getElementsByClass("home_portada_bg"); for (Element element : elements) { result.add( new EntryModel( Constants.TYPE_SHOW, element.getElementsByTag("a").first().text(), element.getElementsByTag("a").first().attr("abs:href"), element.getElementsByTag("img").first().attr("src"))); } } catch (IOException e) { e.printStackTrace(); } }); thread.start(); try { thread.join(); return result; } catch (InterruptedException | NullPointerException e) { e.printStackTrace(); return null; } }
public List<Arrival> busTimetable(final Arrival arrival) throws Exception { final Calendar now = Calendar.getInstance(Locale.UK); final Uri url = Uri.parse("http://transportapi.com") .buildUpon() .path( String.format( "v3/uk/bus/route/%s/%s/inbound/%s/%s/%s/timetable", arrival.bus.operator, arrival.bus.route, arrival.stop.atcocode, dateFormat.format(now.getTime()), timeFormat.format(now.getTime()))) .appendQueryParameter("api_key", apiKey) .appendQueryParameter("app_id", appId) .appendQueryParameter("group", "no") .build(); Log.d("JSON API", String.format("Requesting %s", url)); final HttpResponse response = http.execute(new HttpGet(url.toString())); final StatusLine status = response.getStatusLine(); if (status.getStatusCode() != HttpStatus.SC_OK) { response.getEntity().getContent().close(); throw new IOException(status.getReasonPhrase()); } final Document doc = Jsoup.parse(EntityUtils.toString(response.getEntity()), url.toString()); final Element stopList = doc.getElementsByClass("busroutelist").first(); final Elements stopListItems = stopList.getElementsByTag("li"); ArrayList<Arrival> result = new ArrayList<Arrival>(); for (Element stopListItem : stopListItems) { String destcode; String destname; Time desttime; Element timeElement = stopListItem.getElementsByClass("routelist-time").first(); desttime = parseSimpleTime(timeElement.text().substring(0, 5)); Element destElement = stopListItem.getElementsByClass("routelist-destination").first(); String href = destElement.getElementsByTag("a").first().attr("href"); destcode = href; if (destcode.startsWith("/v3/uk/bus/stop/")) { destcode = destcode.substring("/v3/uk/bus/stop/".length()); } if (destcode.indexOf('/') > 0) { destcode = destcode.substring(0, destcode.indexOf('/')); } destname = destElement.text(); result.add(new Arrival(arrival.bus, new Stop(destcode, destname), desttime)); } return result; }
public List<MersHPVO> mersHPData() { List<MersHPVO> list = new ArrayList<MersHPVO>(); try { Document doc = Jsoup.connect("http://www.cdc.go.kr/CDC/cms/content/16/63316_view.html").get(); // System.out.println(doc); Elements trs = doc.select("table tbody tr"); // System.out.println(trs); String data = ""; for (Element tr : trs) { Iterator<Element> it = tr.getElementsByTag("td").iterator(); int size = tr.getElementsByTag("td").size(); MersHPVO vo = new MersHPVO(); if (size == 5) { it.next().html(); vo.setGugun(it.next().html()); vo.setName(it.next().html()); vo.setDuration(it.next().html()); vo.setNum(it.next().html()); } else { vo.setGugun(it.next().html()); vo.setName(it.next().html()); vo.setDuration(it.next().html()); vo.setNum(it.next().html()); } list.add(vo); // if(i==2) break; /*while(it.hasNext()) { MersHPVO vo=new MersHPVO(); String str=it.next().html(); if(str.startsWith("<strong>")) { vo.setGugun(it.next().html()); vo.setName(it.next().html()); vo.setDuration(it.next().html()); vo.setNum(it.next().html()); } else { vo.setGugun(str); vo.setName(it.next().html()); vo.setDuration(it.next().html()); vo.setNum(it.next().html()); } list.add(vo); }*/ } } catch (Exception ex) { System.out.println(ex.getMessage()); } return list; }
private boolean isSubscriptTable(final Element table) { final Elements trs = table.getElementsByTag("tr"); if (trs != null && !trs.isEmpty()) { final Element head = trs.get(0); final Elements tds = head.getElementsByTag("th"); if (tds != null && !tds.isEmpty()) { final String text = tds.get(0).text(); return text != null && text.contains("Teilnehmer"); } } return false; }
private String getColumnTextContent(Elements rowElements, int i) { Element content = rowElements.get(i); Element a = content.getElementsByTag("a").first(); Element span = content.getElementsByTag("span").first(); StringBuffer text = new StringBuffer(); text.append(content.ownText()); if (a != null) text.append(a.ownText()); if (span != null) text.append(span.ownText()); return text.toString(); }
private VSEStructureElement.StudyType handleProgramData(Document doc) throws Exception { VSEStructureElement.StudyType studyType = new VSEStructureElement.StudyType(); Elements tables = doc.body().select("table"); studyType.name = tables.get(0).select("tbody tr").get(3).select("td").get(1).text(); Elements programmeRows = tables.get(1).select("tbody tr"); for (Element row : programmeRows) { Elements cells = row.getElementsByTag("td"); if (cells.get(0).hasAttr("width")) break; String link = cells.get(5).getElementsByTag("a").get(0).attr("href"); Map<String, String> args = HttpRequestBuilder.getGetArguments(link, '=', ';'); String programmeString = cells.get(1).text(); final VSEStructureElement.Programme programme = new VSEStructureElement.Programme(); int spaceIdx = programmeString.indexOf(" "); String[] codes = programmeString.substring(0, spaceIdx).split("-"); programme.name = programmeString.substring(spaceIdx + 1); programme.addCode(codes[codes.length - 1]); studyType.addCode(codes[0]); runTask( args, new OnDocumentLoaded() { @Override public void loaded(Document document) throws Exception { programme.fields = handleFieldsData(document); } }); studyType.programmes.add(programme); } Elements specializationRows = tables.get(2).select("tbody tr"); for (Element row : specializationRows) { Elements cells = row.getElementsByTag("td"); if (cells.get(0).hasAttr("width")) break; String specializationString = cells.get(1).text(); String[] specializationParts = specializationString.split(" ", 2); VSEStructureElement specialization = new VSEStructureElement(); specialization.addCode(specializationParts[0]); specialization.name = specializationParts[1]; studyType.specializations.add(specialization); } return studyType; }
/** * Parse nasdq page and write in hbase * * @param symbol */ public static void parseUSSymbols(String symbol) { if (!Hbase.getData(symbol).equals("")) { // System.out.println(symbol + " Exists!"); return; } String result = HttpRequest.sendPost( "http://www.nasdaq.com/symbol/" + symbol.toLowerCase() + "/historical", length + "|false|" + symbol); if (result.equals("")) { WriteError(symbol); System.out.println(symbol + " result error"); return; } // System.out.println(result); Document doc = Jsoup.parse(result); JSONArray HistoricalData = new JSONArray(); try { Element body = doc.getElementsByTag("tbody").get(0); // System.out.println(body.toString()); Elements nodes = body.getElementsByTag("tr"); if (nodes.size() == 0) { WriteError(symbol); System.out.println(symbol + " size 0"); return; } // System.out.println(nodes.size()); for (Element node : nodes) { JSONArray DailyData = new JSONArray(); Elements units = node.getElementsByTag("td"); for (Element unit : units) { if (!unit.text().equals("")) { DailyData.put(unit.text()); } } if (DailyData.length() > 0) { HistoricalData.put(DailyData); } } Hbase.addData(symbol, type, HistoricalData.toString()); // System.out.println(symbol + " done"); } catch (Exception e) { if (handleError) { errors.add(symbol); } else { WriteError(symbol); System.out.println(symbol + " parsing error"); } // TODO: handle exception } }
/** * 从一个xmltxt中得到当前信息的程序 * * @param element * @throws IllegalAccessException */ public void dealelement(Element element) { Field[] fields = this.getClass().getDeclaredFields(); for (int i = 0; i < fields.length; i++) { Field f = fields[i]; String type = f.getGenericType().toString(); if (type.equals("class java.lang.String")) { Element temp = element.getElementsByTag(f.getName()).first(); if (temp != null) { try { f.set(this, temp.text()); } catch (IllegalAccessException e) { e.printStackTrace(); } } } else if (type.equals("class java.lang.Integer")) { Element temp = element.getElementsByTag(f.getName()).first(); if (temp != null) { int txt = Integer.parseInt(temp.text()); try { f.set(this, txt); } catch (IllegalAccessException e) { e.printStackTrace(); } } } else if (type.equals("class java.lang.Float")) { Element temp = element.getElementsByTag(f.getName()).first(); if (temp != null) { Float txt = Float.parseFloat(temp.text()); try { f.set(this, txt); } catch (IllegalAccessException e) { e.printStackTrace(); } } } else if (type.equals("java.util.List<java.lang.String>")) { Elements temp = element.getElementsByTag(f.getName()); if (temp.size() > 0) { List<String> list = new ArrayList<>(); for (Element ele : temp) { list.add(ele.text()); } try { f.set(this, list); } catch (IllegalAccessException e) { e.printStackTrace(); } } } } }
private void getDatafromJsoup(String url) { // TODO Auto-generated method stub try { Document doc = Jsoup.connect(url).get(); // Elements content = doc.getElementsByClass("cell item"); Elements header = doc.getElementsByClass("topic_content"); Log.e("topic_content", header.text()); title = header.text(); Elements content = doc.getElementsByTag("tbody"); for (Element link : content) { DetailEntity entity = new DetailEntity(); Elements avatar = link.getElementsByTag("img"); { String avaterLink = avatar.attr("src"); if (avaterLink.startsWith("//cdn.")) { entity.setAvater("http:" + avaterLink); } } Elements reply_content = link.getElementsByClass("reply_content"); Log.e("reply_content", reply_content.text()); entity.setReply_count(reply_content.text()); Elements title = link.getElementsByTag("a"); if (title.attr("href").startsWith("/member/")) { Log.e("title", title.text()); entity.setTitle(title.text()); } Log.e( "other", link.getElementsByClass("fade small").text() + link.getElementsByClass("small fade").text()); if (!TextUtils.isEmpty(reply_content.text())) entities.add(entity); } } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } }
String parseArticleLink(Element element) { try { if (element.classNames().contains("m-hero__slot")) { Element a = element.getElementsByClass("m-hero__slot-link").first(); return a.attr("href"); } else if (element.classNames().contains("m-entry-slot")) { Element h3 = element.getElementsByTag("h3").first(); Element a = h3.getElementsByTag("a").first(); return a.attr("href"); } else throw new NullPointerException(); } catch (NullPointerException e) { e.printStackTrace(); return null; } }
String parseTitle(Element element) { try { if (element.classNames().contains("m-hero__slot")) { Element a = element.getElementsByClass("m-hero__slot-link").first(); Element h2 = a.getElementsByTag("h2").first(); return h2.text(); } else if (element.classNames().contains("m-entry-slot")) { Element h3 = element.getElementsByTag("h3").first(); return h3.text(); } else throw new NullPointerException(); } catch (NullPointerException e) { e.printStackTrace(); return "Unknown title"; } }
// @PostConstruct public void init() { try { Document doc = Jsoup.connect(CURRENCY_POINT).get(); Element oshadBankContainer = doc.select("#7oiylpmiow8iy1sma9a").first(); // Oshadbank id Element currenciesContainer = oshadBankContainer.getElementsByTag("currencies").first(); for (Element currencyContainer : currenciesContainer.getElementsByTag("c")) { Currency currency = Currency.valueOf(currencyContainer.id()); currency.setBuyRate(new BigDecimal(currencyContainer.attributes().get("ar"))); currency.setSellRate(new BigDecimal(currencyContainer.attributes().get("br"))); LOGGER.info(currency.string() + " was inited"); } } catch (IOException e) { LOGGER.error(e); } }
private Response postToLogin(String username, String password, String[] captchaData) throws ConnectionException { try { Map<String, String> data = new HashMap<>(); Document loginDocument = Jsoup.connect(Endpoints.LOGIN_URL.url()).get(); Element loginForm = loginDocument.getElementById("loginForm"); for (Element input : loginForm.getElementsByTag("input")) { data.put(input.attr("name"), input.attr("value")); } Date now = new Date(); data.put("timezone_field", new SimpleDateFormat("XXX").format(now).replace(':', '|')); data.put("username", username); data.put("password", password); data.put("js_time", String.valueOf(now.getTime() / 1000)); if (captchaData.length > 0) { data.put("hip_solution", captchaData[0]); data.put("hip_token", captchaData[1]); data.put("fid", captchaData[2]); data.put("hip_type", "visual"); data.put("captcha_provider", "Hip"); } else { data.remove("hip_solution"); data.remove("hip_token"); data.remove("fid"); data.remove("hip_type"); data.remove("captcha_provider"); } return Jsoup.connect(Endpoints.LOGIN_URL.url()).data(data).method(Method.POST).execute(); } catch (IOException e) { throw ExceptionHandler.generateException("While submitting credentials", e); } }
@Override protected void initialize(Element source) { Elements elements = source.getElementsByTag("td"); Element element = elements.get(0).select("[data-sc-params]").get(0); String name = element .attr("data-sc-params") .replaceAll("\\{ 'name': '", "") .replaceAll("', 'magnet':.*", "") .replaceAll("%20", "\\.") .replaceAll("%5B.*", ""); ShowData showData = ShowData.fromFilename(name); initialize(showData); seeds = Integer.parseInt(elements.get(4).text()); peers = Integer.parseInt(elements.get(5).text()); element = elements.get(0).select("div a[title=Download torrent file]").get(0); String[] array = element.attr("href").split("\\?"); downloadLink = array[0].replaceAll("\\.torrent", "/temp\\.torrent"); if (downloadLink.startsWith("//")) { downloadLink = "http:" + downloadLink; } }
public static ArrayList<EntryModel> getSearchResults(final String query) { final ArrayList<EntryModel> result = new ArrayList<>(); Thread thread = new Thread( () -> { try { Document document = Jsoup.connect("http://jkanime.net/buscar/" + query.replace(" ", "_")).get(); Elements elements = document.getElementsByClass("search"); for (Element element : elements) { String title = element.getElementsByClass("titl").first().text(); String url = element.getElementsByClass("titl").first().attr("abs:href"); String picUrl = element.getElementsByTag("img").first().attr("src"); result.add(new EntryModel(Constants.TYPE_SHOW, title, url, picUrl)); } } catch (IOException e) { e.printStackTrace(); } }); thread.start(); try { thread.join(); return result; } catch (InterruptedException | NullPointerException e) { e.printStackTrace(); return null; } }
@BeforeClass public static void setUp() { File input = new File("src/test/java/org/jenkinsci/plugins/marketfeaturereport/market_features.html"); Document doc = null; try { doc = Jsoup.parse(input, "UTF-8"); } catch (IOException e) { e.printStackTrace(); } assert doc != null; Element content = doc.getElementById("market-feature-header"); Elements header = content.getElementsByClass("rTableHead"); Elements failedHeader = content.getElementsByClass("rTableHeadFailed"); Elements rows = content.getElementsByClass("rTableCell"); Elements rows_failed = content.getElementsByClass("rTableCellFailed"); int count_failed = 0, count = 0; for (Element element : header) { summary_table.put(element.text(), rows.get(count).text()); ++count; } Elements link_error = content.getElementsByTag("a"); for (Element element : failedHeader) { summary_table.put(element.text(), rows_failed.get(count_failed).text()); String linkHref = link_error.get(count_failed).attr("href"); summary_error_table.put(element.text(), linkHref); ++count_failed; } }
private static Collection<Node> extractImageNodes(Element aInContent) { Collection<Node> lImageNodes = new LinkedList<>(); Elements lImageElements = aInContent.getElementsByTag("img"); if (!lImageElements.isEmpty()) { int i = 0; for (Element lImageElement : lImageElements) { i++; if (lImageElement.hasClass("float-left")) { if (!lImageElement.hasClass("alignleft")) { lImageElement.addClass("alignleft"); } } else if (lImageElement.hasClass("float-right")) { if (!lImageElement.hasClass("alignright")) { lImageElement.addClass("alignright"); } } if (i > 1) { lImageElement.removeAttr("width"); lImageElement.removeAttr("height"); } Node lThisNode = toNode(lImageElement); lImageNodes.add(lThisNode.clone()); } } return lImageNodes; }
private boolean jsoupImpl(InputStream is) throws Exception { Document doc = Jsoup.parse(inputStreamToStringBuilder(is).toString()); Element element = doc.getElementById("FundHoldSharesTable"); if (element == null) { return false; } element = element.getElementsByTag("tbody").first(); Elements elements = element.getElementsByTag("tr"); // for (Element node : elements) { // System.out.println(node.text()); // } for (int i = 1; i < elements.size(); i++) { String text = elements.get(i).text(); map.put(text.split(" ")[0], text); } return true; }
private void migratePrimaryCta( Element upperRightElement, Node midSizeUpperRightNode, String locale, Map<String, String> urlMap) throws PathNotFoundException, ValueFormatException, VersionException, LockException, ConstraintViolationException, RepositoryException { if (upperRightElement != null) { if (midSizeUpperRightNode.hasNode("primary_cta_v2")) { Element title = upperRightElement.getElementsByTag("h3").first(); Element description = upperRightElement.getElementsByTag("p").first(); Element link = upperRightElement.getElementsByTag("a").first(); Node ctaNode = midSizeUpperRightNode.getNode("primary_cta_v2"); if (title != null) { ctaNode.setProperty("title", title.text()); } else { sb.append(Constants.PRIMARY_CTA_TITLE_ELEMENT_NOT_FOUND); } if (description != null) { ctaNode.setProperty("description", description.text()); } else { sb.append(Constants.PRIMARY_CTA_DESCRIPTION_ELEMENT_NOT_FOUND); } if (link != null) { ctaNode.setProperty("linktext", link.text()); if (ctaNode.hasNode("linkurl")) { String aUrl = link.absUrl("href"); if (aUrl.equals("")) { aUrl = link.attr("href"); } aUrl = FrameworkUtils.getLocaleReference(aUrl, urlMap, locale, sb); Node linkUrlNode = ctaNode.getNode("linkurl"); linkUrlNode.setProperty("url", aUrl); } else { sb.append(Constants.PRIMARY_CTA_LINK_URL_NODE_NOT_FOUND); } } else { sb.append(Constants.PRIMARY_CTA_ANCHOR_ELEMENT_NOT_FOUND); } } else { sb.append(Constants.PRIMARY_CTA_COMPONENT_NOT_FOUND); } } else { sb.append(Constants.PRIMARY_CTA_COMPONENT_INWEB_NOT_FOUND); } }
private boolean hasSeason() { Element section = getDoc().getElementById("titleOverview"); for (Element a : section.getElementsByTag("a")) if (a.text().equalsIgnoreCase("Episode Guide") && a.attr("href").contains(id + "")) return true; return false; }
@Test public void createsDocumentStructure() { String html = "<meta name=keywords /><link rel=stylesheet /><title>jsoup</title><p>Hello world</p>"; Document doc = Jsoup.parse(html); Element head = doc.getHead(); Element body = doc.getBody(); assertEquals(2, doc.children().size()); assertEquals(3, head.children().size()); assertEquals(1, body.children().size()); assertEquals("keywords", head.getElementsByTag("meta").get(0).attr("name")); assertEquals(0, body.getElementsByTag("meta").size()); assertEquals("jsoup", doc.getTitle()); assertEquals("Hello world", body.text()); assertEquals("Hello world", body.children().get(0).text()); }
public static List<KoseYazisi> getKoseYazisi(KoseYazari koseYazari, int aySayisi) { List<KoseYazisi> koseYazilari = new ArrayList<KoseYazisi>(); String linkHref = ""; for (int i = 1; i < aySayisi * 2; i++) { String url = getolderUrl(i, koseYazari.getTumYazilariLink(), koseYazari.getId()); Document doc; try { doc = Jsoup.connect(url).timeout(CUMHURIYET.timeout).get(); Element element = doc.select("ul#article-list").first(); Elements links = element.getElementsByTag("a"); for (Element link : links) { linkHref = link.attr("href"); String linkText = link.text(); if (linkHref.contains("/haber/turkiye") || linkHref.contains("/haber/secim_2015") || linkHref.contains("/haber/diger")) { continue; } String plot = ""; String dateString = ""; String koseYazisiLink = ""; String[] items = linkText.split(" "); for (int j = 0; j < 3; j++) { dateString = dateString + items[j] + " "; } dateString.trim(); for (int j = 4; j < items.length; j++) { plot = plot + items[j] + " "; } plot.trim(); koseYazisiLink = linkHref; KoseYazisi koseYazisi = new KoseYazisi(Utils.getIdFromLink(linkHref), dateString, plot, koseYazisiLink); koseYazisi.setYazarAdi(koseYazari.getKoseYazariAdi()); koseYazilari.add(koseYazisi); } } catch (IOException e) { System.err.println("Yazar id : " + koseYazari.getId() + "Link = " + linkHref); e.printStackTrace(); } } return koseYazilari; }
public List<MersVO> mersData() { List<MersVO> list = new ArrayList<MersVO>(); Date date = new Date(); SimpleDateFormat sdf = new SimpleDateFormat("yyyy-M-d"); StringTokenizer st = new StringTokenizer(sdf.format(date), "-"); int year = Integer.parseInt(st.nextToken()); int month = Integer.parseInt(st.nextToken()); int day = Integer.parseInt(st.nextToken()); try { Document doc = Jsoup.connect("http://www.cdc.go.kr/CDC/cms/content/15/63315_view.html").get(); // System.out.println(doc); Elements trs = doc.select("table tbody tr"); // System.out.println(trs); String data = ""; String[] temp = {"panel panel-primary", "panel panel-green", "panel panel-yellow"}; int i = 0; for (Element tr : trs) { Iterator<Element> it = tr.getElementsByTag("td").iterator(); // if(i==2) break; while (it.hasNext()) { MersVO vo = new MersVO(); vo.setType(it.next().text()); vo.setMers(it.next().text().replace("*", "")); /*vo.setYsum(it.next().text().replace(",", "")); vo.setPlus(it.next().text()); vo.setMinus(it.next().text());*/ if (data.equals("")) { data = it.next().text(); vo.setIng(data); } else { vo.setIng(data); } vo.setNsum(it.next().text().replace(",", "")); vo.setHouse(it.next().text()); vo.setOffice(it.next().text()); vo.setDis(it.next().text().replace(",", "")); vo.setDiv1(temp[i]); vo.setYear(year); vo.setMonth(month); if (i == 2) { vo.setDay(day - 1); } else { vo.setDay(day); } list.add(vo); i++; } } } catch (Exception ex) { System.out.println(ex.getMessage()); } return list; }
public static LinkedList<String> getHuXiuNewsDataList(String newsUrl) throws IOException { LinkedList<String> data = null; Elements majorElements; Element majorElement = null; String content = ""; Document document = Jsoup.connect(newsUrl).timeout(TIME_OUT).get(); majorElements = document.getElementsByClass("textbox-content"); if (!majorElements.isEmpty()) { data = new LinkedList<String>(); majorElement = majorElements.get(0); majorElements = majorElement.getElementsByTag("p"); if (!majorElements.isEmpty()) { for (int i = 0; i < majorElements.size(); i++) { majorElement = majorElements.get(i); Elements imgElements = majorElement.getElementsByTag("img"); if (!imgElements.isEmpty()) { content = imgElements.get(0).attr("src"); } else { if (content.contains("http://") || (content.contains("https://"))) { content = ""; } else { content = majorElement.text(); Elements bElements = majorElement.getElementsByTag("b"); if (!bElements.isEmpty()) { String strongString = bElements.get(0).text(); content = content.substring(strongString.length()); content = "$" + strongString + " $" + content; } content = FOUR_BLANK_SPACE + content; } // System.out.println("text =" + majorElement.text()); } if (!TextUtils.isEmpty(content)) { if (!content.contains("readmore.gif")) { data.add(content); } } } } } return data; }