@Override public Collection<News> crawl() { HashSet<News> news = new HashSet<>(); try { String startURL = Settings.HOMEPAGE; Document doc = Jsoup.connect(startURL).get(); Elements contents = doc.select("article"); // extract all articles out of src long counter = 1; for (Element content : contents) { // getting content for all article Elements articleLink = content.select("a.teaser__link"); Element img = articleLink.select("img").first(); String imageSrc = null; try { // try to clean image src imageSrc = img.attr("data-srcset"); imageSrc = imageSrc.split(",")[0].split(" ")[0]; } catch (Exception e) { } String title = articleLink.select("div.title__catchline").text(); String undertitle = articleLink.select("div.title__name").text(); String link = articleLink.select("[href]").attr("href"); news.add(new News(counter, title, undertitle, link, imageSrc, "DE")); counter++; } } catch (Exception ex) { System.out.println("Website not parsed!!"); return null; } return news; }
private void buildResultList(Elements productInfo) { ParsedToken product = new ParsedToken(); product.setText(productInfo.select("strong.title").tagName("em").text()); product.setSalesamount( Integer.parseInt( productInfo.select("span.condition").tagName("em").text().replaceAll("[^0-9]", ""))); this.productinfoList.add(product); }
@Override public void initialize(URL location, ResourceBundle resources) { // TODO 自動生成されたメソッド・スタブ addOption.setOnAction( event -> { for (int i = 0; i < cmb.size(); i++) { // System.out.println(cmb.get(i).getValue()); if (cmb.get(i).getValue() == null) { break; } else if (i == cmb.size() - 1) { addTask.setDisable(false); } } }); addTask.setOnAction( event -> { try { String url = webView.getEngine().getLocation(); System.out.println(url); Document document = Jsoup.connect(url).get(); Elements input = document.select("input"); Map params = new HashMap<String, String>(); for (ComboBox cmbx : cmb) { ValuePair vp = (ValuePair) cmbx.getValue(); params.put(vp.getName(), vp.getvalue()); } // System.out.println(input.select("[name=shop_bid]").first()); // System.out.println(input.select("[name=shop_bid]").first().val()); params.put("shop_bid", input.select("[name=shop_bid]").first().val()); params.put("item_id", input.select("[name=item_id]").first().val()); params.put("__event", input.select("[name=__event]").first().val()); params.put("units", "1"); Map map = new HashMap<String, Long>(); // System.out.println(document.select("#stime").size()); if (document.select("#stime").size() != 0) { System.out.println(document.select("#stime")); map.put("stime", Long.parseLong((input.select("#stime").first().val()))); map.put("etime", Long.parseLong((input.select("#etime").first().val()))); } else { map = null; } BuyTask task = new BuyTask(url, params, map); task.call(); this.getScene().getWindow().hide(); } catch (Exception e) { // TODO 自動生成された catch ブロック e.printStackTrace(); } }); }
@Override public String getPostSectionString(PostExtractionDetails postExtractionDetails, Document doc) { StringBuilder wikiPostContentSB = new StringBuilder(); Elements contentElements = doc.select("div.mw-content-ltr"); contentElements.select("div.noprint").remove(); contentElements.select("div#stub").remove(); for (Element contentElement : contentElements) { convertImagesToLocal(contentElement); } wikiPostContentSB.append(contentElements.outerHtml()); return wikiPostContentSB.toString(); }
/** * Parse search results from a search result site * * @param pUrl */ private void parseSearchResults(String pUrl) { LOGGER.info("Started parsing: " + pUrl); Document doc = null; doc = ParserUtils.connectGetUrl(ParserUtils.getUri(pUrl).toASCIIString()); doc.setBaseUri(DEFAULT_VSP_URL); Elements results = doc.select("div[class*=map-list-item]"); for (Element result : results) { PersistentEntity ent = new PersistentEntity(); Elements infoElement = result.select("div[class*=info-content]"); LOGGER.debug(infoElement.select("p[class*=establishment-category]").first().ownText()); String tmp = result .select("div[class*=info-content]") .select("p[class*=establishment-category]") .first() .ownText(); ent.setIndustry(new Utf8(tmp.split("/")[0])); ent.setLabel(new Utf8(tmp)); // getting same as value to where it is EylloLink link = ParserUtils.detectUrl( infoElement.select("p[class*=establishment-name]").select("a").first()); if (link != null) { LOGGER.debug(DEFAULT_VSP_URL + link.getLinkHref()); ent.putToSameAs( new Utf8(DEFAULT_VSP_URL + link.getLinkHref()), new Utf8(link.getLinkText())); ent.setName(new Utf8(link.getLinkText())); } // getting its address and phone PersistentPoint point = new PersistentPoint(); infoElement = result.select("div[class*=establishment-details]").select("p"); ent.addToTelephones(new Utf8(infoElement.get(0).ownText())); point.setAddress(new Utf8(infoElement.get(0).text())); if (!result.attr("data-lng").toString().equals("") && !result.attr("data-lat").toString().equals("")) { // Format in [lon, lat], note, the order of lon/lat here in order to conform with GeoJSON. point.addToCoordinates(Double.parseDouble(result.attr("data-lng"))); point.addToCoordinates(Double.parseDouble(result.attr("data-lat"))); point.setAccuracy(EylloLocation.GEOCODER_VERIF_ACC_HIGH); } ent.setPersistentpoint(point); ent.addToScenarioId(getScenarioId()); this.pEntities.add(ent); } LOGGER.info("Completed getting basic information from entities."); }
/* * Getting news from "http://enib.net/" */ public List<News> getNews() { Document doc = null; try { doc = Jsoup.connect("http://enib.net/").get(); } catch (IOException e) { System.out.println("Can't load news"); e.printStackTrace(); } /* * Getting name, information, description and add it to the news List */ Elements getter = doc.getElementsByClass("news"); for (Element get : getter) { String news = ""; String name = get.select("h1").text(); String information = get.select("h2").text(); Elements markdown = get.getElementsByClass("markdown"); for (Element paragraph : markdown.select("p")) { news = news + paragraph.text() + System.getProperty("line.separator"); } News n = new News(name, information, news); this.news.add(n); } return this.news; }
/** * achieve the num of people him/her fellowed * * @param doc * @return */ private String getFellowPeopleNum(Document doc) { Elements friendHtml = doc.select("div[id=\"friend\"]"); Elements fellowPeopleNumHtml = null; if (friendHtml != null) { fellowPeopleNumHtml = friendHtml.select("a"); // 关注人数 if (fellowPeopleNumHtml != null) { String fellowPeopleNum = UtilsMethod.findFirstStringByRegex("成员[0-9]+", fellowPeopleNumHtml.text()); if (fellowPeopleNum != null) { fellowPeopleNum = fellowPeopleNum.replaceAll("[\\D]+", ""); if (fellowPeopleNum != null) { return fellowPeopleNum; } else { return null; } } else { return null; } } else { return null; } } else { return null; } }
private static int parseDates(Elements dates) throws ParseException { int column = 0; Date dt = DB_DATETIME_FORMATTER4.parse(today); Calendar calendar = Calendar.getInstance(); calendar.setTime(dt); int m = calendar.get(Calendar.MONTH) + 1; int d = calendar.get(Calendar.DAY_OF_MONTH); String str = d < 10 ? String.valueOf(m) + "月" + "0" + String.valueOf(d) + "日" : String.valueOf(m) + "月" + String.valueOf(d) + "日"; Elements dateNotes = dates.select("th"); for (int j = 1; j < dateNotes.size(); j++) { Element el = dateNotes.get(j); String temp = DBclass.xmlFilte(el.text()); if (temp.indexOf(str) > -1) { column = j; break; } } return column; }
@Test public void getStockQuoteFromWebsite() throws IOException { Document doc = Jsoup.connect("http://www.investopedia.com/markets/stocks/ibm").get(); Elements table = doc.getElementById("MarketsSummary").getElementsByTag("table"); Elements td = table.select("td"); System.out.println("---Start---"); System.out.println(td.get(1).text()); }
public static void main(String args[]) throws IOException { // Element.ownText() // Step 1: To extract all labels and instances... Document doc = Jsoup.connect("http://127.0.0.1/master%20project/websites/home.php").get(); Elements labelElements = doc.getElementsByAttributeValue("id", "label"); Elements instanceElements = doc.getElementsByAttributeValue("id", "instance"); // Step 2: To pair C(l,i) using single link clustering algorithm... NOTE: special Date case... HashMap<String, String[]> singleLinkClusterMap = new HashMap<String, String[]>(); // singleLinkClusterMap.put(key, value) for (int i = 0; i < labelElements.size(); i++) { // Keys... String key = labelElements.select("[tag=" + i + "]").text(); if (!key.toLowerCase().equals("date")) { // Values... Elements instanceElementsForThisKey = instanceElements.select("[tag=" + i + "]"); String[] values = new String[instanceElementsForThisKey.size()]; for (int j = 0; j < instanceElementsForThisKey.size(); j++) { values[j] = instanceElementsForThisKey.remove(0).text(); } singleLinkClusterMap.put(key, values); } else { Date date = new Date(); String modifiedDate = new SimpleDateFormat("yyyy-MM-dd").format(date); String[] values = {modifiedDate.toString()}; singleLinkClusterMap.put(key, values); } } System.out.println("label:" + singleLinkClusterMap); // Step 3: To create base Ontology // TEST: to fire the source page with a request query then extract the data from resulting // page... // Step 4: To create one(or more) slave to which the base ontology and interface website address // is sent to. // This(These) slaves will then repeat steps 1 and 2 then create their own Ontology O' // Step 5: The new ontology O' will then be sent back to the Master to merge with original O. }
private Elements addStyleForTable(Elements pcont) { Elements td = pcont.select("td"); for (Element d : td) { d.attr("style", "border: 1px solid #aaa;width:auto"); d.removeAttr("class"); } return pcont; }
public void parseWebLink() throws IOException { Document doc = Jsoup.connect(url).get(); Elements info = doc.select("div[id*=MediaStoryList"); Elements links = info.select("a[href]"); boolean writeOrNot = true; try { /* define timeToken to compare either the target urls in the file */ SimpleDateFormat sdf = new SimpleDateFormat("yyMMddHH"); Date rightNow = new Date(); String timeToken = sdf.format(rightNow); /* read the target#.txt where # is node id file to decide write or not */ File file = new File("News/target" + node_id + ".txt"); if (file.exists()) { FileInputStream fstream = new FileInputStream("News/target" + node_id + ".txt"); DataInputStream in = new DataInputStream(fstream); BufferedReader br = new BufferedReader(new InputStreamReader(in)); String strLine; int isSame = 1; while ((strLine = br.readLine()) != null) { isSame = strLine.compareTo(timeToken + "count = " + count); if (isSame == 0) { writeOrNot = false; System.out.println( "The set of urls have already written in the file:target" + node_id + ".txt"); } } // Close the input stream in.close(); } /* end the reading file */ /* decide write or not */ if (writeOrNot) { /* write to file named target#.txt where # is node id */ FileWriter outputToTxt = new FileWriter("News/target" + node_id + ".txt", true); BufferedWriter writeToFile = new BufferedWriter(outputToTxt); writeToFile.write(timeToken + "count = " + count); writeToFile.newLine(); int urlIsSame = 1; for (int i = 0; i < links.size(); i++) { String levelTwoUrl = links.get(i).attr("href"); // String compareIsSame = links.get(i+1).attr("href"); // System.out.println(line + "\n"); if (i != 0) { urlIsSame = levelTwoUrl.compareTo(links.get(i - 1).attr("href")); } if (urlIsSame != 0) { writeToFile.write(levelTwoUrl); writeToFile.newLine(); } } writeToFile.close(); System.out.println("The file : target" + node_id + ".txt is written!"); } } catch (IOException e) { System.out.println("The IO Error msg is:" + e.getMessage()); } }
// Busca os endereços pelo número do CEP. public List<Address> getByCep(String cep) throws IOException { listEnderecos = new ArrayList<Address>(); // mapeamento dos parametros que será passado na requisição Map<String, String> query = new HashMap<String, String>(); query.put("CEP", cep); query.put("Metodo", "listaLogradouro"); query.put("TipoConsulta", "cep"); query.put("StartRow", "1"); query.put("EndRow", "10"); // Faz uma requisição no site do correios (www.buscacep.com.br) com Json, passando os parametros // mapeados, // requisição deverá ser do tipo post. // Armazena o retorno em uma variavel doc. Document doc = Jsoup.connect(Utils.adressCorreios) .data(query) .header("Origin", "http://www.buscacep.correios.com.br") .header("Referer", "http://www.buscacep.correios.com.br") .post(); // Acessa o retorno do doc e percorre o resultado buscando as informações dos endereços // Armazena os resultados na lista de endereços criadas e retorna a mesma para que outras // classes possam acessar. Elements elements = doc.select("table").eq(2); Elements rows = elements.select("tr"); Iterator<Element> rowIterator = rows.iterator(); while (rowIterator.hasNext()) { Address enderecos = new Address(); Element element = rowIterator.next(); Elements logradouro = element.children().select("td").eq(0); enderecos.setLogradouro(logradouro.text()); Elements bairro = element.children().select("td").eq(1); enderecos.setBairro(bairro.text()); Elements cidade = element.children().select("td").eq(2); Elements estado = element.children().select("td").eq(3); StringBuilder sbLocalidade = new StringBuilder(); sbLocalidade.append(cidade.text()); sbLocalidade.append("/"); sbLocalidade.append(estado.text()); enderecos.setLocalidade(sbLocalidade.toString()); Elements codigopostal = element.children().select("td").eq(4); enderecos.setCEP(codigopostal.text()); listEnderecos.add(enderecos); } return listEnderecos; }
@Override public List<ParsedToken> collectSearchResult(Elements elements) { if (elements != null) { Elements productList = elements.select("#productList"); String products = productList.attr("data-products"); String[] temp1 = products.split("\\["); String[] temp2 = temp1[1].split("\\]"); String[] producIdList = temp2[0].split(","); for (String id : producIdList) { String pId = "#".concat(id.trim()); Elements productInfo = elements.select(pId); buildResultList(productInfo); } } return this.productinfoList; }
public static void main(String[] args) { String url = "http://www.xe.com/currencyconverter/convert/?Amount=1&From=USD&To=CNY"; String html = HttpClientUtil.doGet(url); Document doc = Jsoup.parse(html); Elements elements = doc.getElementsByClass("ucc-result-table"); String rate = elements.select(".rightCol").first().ownText().replace(" ", ""); System.out.println(rate); }
public boolean checkLogin(Document doc) { Elements elmts = doc.select(".zu-top-nav-userinfo"); if (!elmts.isEmpty()) { userName = elmts.select(".name").text(); log.info("登录成功!" + "登录用户为:" + userName); isLogin = true; return true; } log.info("未登录"); isLogin = false; return false; }
public String leituraJxr() throws IOException { // método para pegar os nomes dos métodos declarados Elements elements = document.getElementsByTag("pre"); elements.select("a.jxr_linenumber").remove(); // elements.select("strong.jxr_keyword").remove(); // elements.select("span.jxr_string").remove(); // elements.select("em.jxr_comment").remove(); for (Element children : elements) { children.getElementsByClass("jxr_comment").remove(); children.getElementsByClass("jxr_javadoccomment").remove(); } return elements.text(); // retorna o código sem lixo }
// Busca o Cep pelo logradouro. public List<String> getByAdress(String address) throws IOException { listAddress = new ArrayList<String>(); // mapeamento dos parametros que será passado na requisição Map<String, String> query = new HashMap<String, String>(); query.put("relaxation", address); query.put("TipoCep", "ALL"); query.put("semelhante", "N"); query.put("cfm", "1"); query.put("Metodo", "listaLogradouro"); query.put("TipoConsulta", "relaxation"); query.put("StartRow", "1"); query.put("EndRow", "10"); // Faz uma requisição no site do correios (www.buscacep.com.br) com Json, passando os parametros // mapeados, // requisição deverá ser do tipo post. // Armazena o retorno em uma variavel doc. Document doc = Jsoup.connect(Utils.adressCorreios) .timeout(20000) .data(query) .header("Origin", "http://www.buscacep.correios.com.br") .header("Referer", "http://www.buscacep.correios.com.br") .post(); // Acessa o retorno do doc e percorre o resultado buscando as informações de Cep de acordo com o // endereço passado. // Armazena os resultados na lista criada e retorna a mesma para que outras classes possam // acessar Elements elements = doc.select("table").eq(2); Elements rows = elements.select("tr"); Iterator<Element> rowIterator = rows.iterator(); while (rowIterator.hasNext()) { Address enderecos = new Address(); Element element = rowIterator.next(); Elements codigopostal = element.children().select("td").eq(4); enderecos.setCEP(codigopostal.text()); listAddress.add(enderecos.getCEP()); } return listAddress; }
@Override Product parseProductUrl(String url) throws IOException { try { Product product = new Product(); url = url.replaceAll("\r\n", ""); Document doc = Jsoup.connect(url).timeout(0).get(); Elements elements = doc.select("table[style=padding-left:10px;]").select("td"); product.setName( elements .get(1) .html() .substring(0, elements.get(1).html().indexOf("<a style=")) .replace( " <img style=\"border: 2px solid #fff; box-shadow: rgba(0, 0, 0, 0.6) 0px 2px 2px;\" src=\"./images/new.jpg \" />", "")); product.setCena(elements.select("p").text().replace("z³", "")); product.setOpis(elements.select("td[style=width:800px]").text().replace("Opis: ", "")); product.setName( product.getName() + " " + product .getOpis() .substring( product.getOpis().indexOf("Kolor: ") + 7, product.getOpis().length() - 1)); product.setImg(elements.select("td").select("a").select("img[width=450px]").attr("abs:src")); // System.out.println(product.getName()); // System.out.println(product.getCena()); // System.out.println(product.getOpis()); // System.out.println(product.getImg()); // System.out.println(); return product; } catch (Exception e) { e.printStackTrace(); return null; } }
public static String updateAFGXml(boolean isActivate, String target, String ectXml) { String conditionStr = isActivate ? "<cp:conditions/>" : "<cp:conditions><ss:rule-deactivated/></cp:conditions>"; Document doc = Jsoup.parse(ectXml, "UTF-8"); Elements ruleAudio = doc.select("cp|rule[id=cfu] "); Elements ruleAudioCondition = ruleAudio.select("cp|conditions"); ruleAudioCondition.remove(); // we cant change it to "<cp:conditions/> directly ruleAudio.prepend(conditionStr); Elements ruleAudioForwardTarget = ruleAudio.select("ss|forward-to>ss|target"); ruleAudioForwardTarget.html(target); String r = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"; r += doc.getElementsByTag("ss:simservs").outerHtml(); // modify for jsoup problem r = r.replaceAll("noreplytimer", "NoReplyTimer"); // r= r.replaceAll("\n", ""); r = r.replaceAll(">\\s+(.+)\\s+<", ">$1<"); return r; }
public void getIPTShows() { CloseableHttpClient httpClient = HttpClientBuilder.create().build(); CloseableHttpResponse response = null; String pageURL = "https://www.iptorrents.com"; try { HttpGet httpGet = new HttpGet(pageURL); httpGet.addHeader( "User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.90 Safari/537.36"); response = httpClient.execute(httpGet); response.removeHeaders("Transfer-Encoding"); HttpPost thePost = new HttpPost(pageURL + "?username=mcpchelper81&password=ru68ce48&php="); thePost.setHeaders(response.getAllHeaders()); response.close(); response = null; response = httpClient.execute(thePost); httpGet = new HttpGet("https://www.iptorrents.com/t?5"); httpGet.setHeaders(response.getHeaders("set-cookie")); httpGet.addHeader( "accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"); httpGet.addHeader("accept-encoding", "gzip, deflate, sdch"); httpGet.addHeader("accept-language", "en-US,en;q=0.8"); httpGet.addHeader("dnt", "1"); httpGet.addHeader("upgrade-insecure-requests", "1"); httpGet.addHeader( "user-agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.106 Safari/537.36"); response.close(); response = null; response = httpClient.execute(httpGet); Header contentType = response.getFirstHeader("Content-Type"); HttpEntity httpEntity = response.getEntity(); String[] contentArray = contentType.getValue().split(";"); String charset = "UTF-8"; if (contentArray.length > 1 && contentArray[1].contains("=")) { charset = contentArray[1].trim().split("=")[1]; } Document pageDoc = Jsoup.parse(httpEntity.getContent(), charset, httpGet.getURI().getPath()); Elements results = pageDoc.getElementsByClass("torrents"); response.close(); Elements rawShowObjects = results.select("tr"); IPTToTvShowEpisode makeShows = new IPTToTvShowEpisode(); List<TvShowEpisode> theShows = makeShows.makeTSEBeans(rawShowObjects); DBActions.insertIPTTvEpisodes(theShows, "https://www.iptorrents.com/t?5"); } catch (MalformedURLException MURLe) { MURLe.printStackTrace(); } catch (Exception e) { e.printStackTrace(); } }
protected void parseLoginStep2(SimpleObject context) { String text = ContextUtil.getContent(context); if (text == null) { return; } String phone1 = phoneNo; String password1 = password; String n = StringUtil.subStr("strEnc(username,", ");", text).trim(); if (!StringUtils.isBlank(n)) { String[] stra = n.trim().replaceAll("\'", "").split(","); // pwd, digit, f, s phone1 = executeJsFunc("des/tel_com_des.js", "strEnc", phoneNo, stra[0], stra[1], stra[2]); password1 = executeJsFunc("des/tel_com_des.js", "strEnc", password, stra[0], stra[1], stra[2]); } Document doc = ContextUtil.getDocumentOfContent(context); Elements form = doc.select("form#c2000004"); Request req = new Request(fixedFullUrl(form.attr("action"))); req.setMethod("POST"); req.initNameValuePairs(12); req.addNameValuePairs("lt", form.select("input[name=lt]").attr("value")); req.addNameValuePairs("_eventId", "submit"); req.addNameValuePairs("forbidpass", "null"); req.addNameValuePairs("areaname", areaName); req.addNameValuePairs("password", password1); req.addNameValuePairs("authtype", "c2000004"); req.addNameValuePairs("customFileld01", customField1); req.addNameValuePairs("customFileld02", customField2); req.addNameValuePairs("forbidaccounts", "null"); req.addNameValuePairs("open_no", "c2000004"); req.addNameValuePairs("username", phone1); req.addNameValuePairs("randomId", authCode == null ? "" : authCode); req.setCharset(UAM_CHAR_SET); req.addObjservers( new AbstractProcessorObserver(util, WaringConstaint.ZGDX_3) { @Override public void afterRequest(SimpleObject context) { parseLoginStep3(context); } }); spider.addRequest(req); }
private void parseLoginStep4(SimpleObject context) { Document doc = ContextUtil.getDocumentOfContent(context); Elements e1 = doc.select("form#c2000004"); if (e1.size() > 0) { data.put("errMsg", e1.select("td#status2").text()); setStatus(STAT_STOPPED_FAIL); notifyStatus(); return; } e1 = doc.select("form#login_form"); if (e1.size() > 0) { data.put("errMsg", "登录失败,请重试!"); setStatus(STAT_STOPPED_FAIL); notifyStatus(); return; } String text = ContextUtil.getContent(context); String url = StringUtil.subStr( "<script type='text/javascript'>location.replace('", "');</script>", text); if (StringUtils.isBlank(url.trim())) { if ("IBM HTTP Server".equalsIgnoreCase(doc.select("title").text())) { setStatus(STAT_LOGIN_SUC); // notifyStatus(); ssoLogin(context); } else { data.put("fail", true); setStatus(STAT_STOPPED_FAIL); notifyStatus(); logger.error("Login Fail....."); } return; } getUrl( url, null, new Object[] {UAM_CHAR_SET}, new AbstractProcessorObserver(util, WaringConstaint.ZGDX_5) { @Override public void afterRequest(SimpleObject context) { setStatus(STAT_LOGIN_SUC); ssoLogin(context); } }); }
@Override public void initialize(URL location, ResourceBundle resources) { urlField.setOnAction( event -> { String text = urlField.getText(); urlField.setText("tetetetetetetete"); webView.getEngine().load(text); }); webView .getEngine() .getLoadWorker() .stateProperty() .addListener( (ov, oldState, newState) -> { if (newState == State.SUCCEEDED) { String url = webView.getEngine().getLocation(); urlField.setText(url); if (Pattern.compile("http://item.rakuten.co.jp/.*").matcher(url).find()) { try { Elements tmp; Document document = Jsoup.connect(url).get(); tmp = document.select("input"); tmp = tmp.select("#etime"); if (tmp.size() != 0) { if (!(Long.parseLong(tmp.first().val()) < new Date().getTime())) { entryButton.setDisable(false); } } else { entryButton.setDisable(false); } } catch (Exception e) { // TODO 自動生成された catch ブロック e.printStackTrace(); } } } ; }); entryButton.setOnAction( event -> { urlField.setText("webView disable"); sendEntryTaskController(); }); }
/* * Connects Jsoup to each MET search page and gets the link for each painting * Sends the link to each painting to paintingScraper(link) * */ public ArrayList<String> connector(String galleryURL) { Document doc; try { doc = Jsoup.connect(galleryURL).get(); } catch (IOException e) { // TODO Auto-generated catch block doc = null; e.printStackTrace(); } Elements grid = doc.getElementsByClass("grid-results-thumbnail"); Elements linksHTML = grid.select("a[href]"); ArrayList<String> linkList = new ArrayList<String>(); for (Element link : linksHTML) { linkList.add(link.attr("href")); } return linkList; }
private boolean updateDailyNews(Document doc, String dailyTitle, DailyNews dailyNews) throws JSONException { Elements viewMoreElements = doc.getElementsByClass("view-more"); if (viewMoreElements.size() > 1) { dailyNews.setMulti(true); Elements questionTitleElements = doc.getElementsByClass("question-title"); for (int j = 0; j < viewMoreElements.size(); j++) { if (questionTitleElements.get(j).text().length() == 0) { dailyNews.addQuestionTitle(dailyTitle); } else { dailyNews.addQuestionTitle(questionTitleElements.get(j).text()); } Elements viewQuestionElement = viewMoreElements.get(j).select("a"); if (viewQuestionElement.text().equals("查看知乎讨论")) { dailyNews.addQuestionUrl(viewQuestionElement.attr("href")); } else { return false; } } } else if (viewMoreElements.size() == 1) { dailyNews.setMulti(false); Elements viewQuestionElement = viewMoreElements.select("a"); if (viewQuestionElement.text().equals("查看知乎讨论")) { dailyNews.setQuestionUrl(viewQuestionElement.attr("href")); } else { return false; } // Question title is the same with daily title if (doc.getElementsByClass("question-title").text().length() == 0) { dailyNews.setQuestionTitle(dailyTitle); } else { dailyNews.setQuestionTitle(doc.getElementsByClass("question-title").text()); } } else { return false; } return true; }
public static Result textSelList(Element elsPar, ArrayList<String> jsoupSelectors) { Result res = new Result(); Elements els; Elements elsTemp; els = elsPar.select(jsoupSelectors.remove(0)); for (String sel : jsoupSelectors) { elsTemp = els.select(sel); if (elsTemp != null) els = elsTemp; } if (els == null || els.size() != 1) { log.error("jsoup selector on elements does not match 1"); System.exit(1); return res.setContinua(false); } return res.setRetStr(els.get(0).text()); }
@Override public String fire(String inputContent, String[] args) throws Exception { validateCSSSelectorRuleArgs(args); Document document = Jsoup.parse(inputContent); Elements elements = null; for (int i = 0; i < args.length; ++i) { if (i == 0) { elements = document.select(args[i]); } else { if (elements != null) { elements = elements.select(args[i]); } else { break; } } } return (elements != null && elements.size() > 0 ? elements.html().trim() : null); }
@Override public NewsEntity parseNewsPerCategory(String newsURL) { // TODO Auto-generated method stub NewsEntity parsetData = null; try { Document doc = Jsoup.connect(newsURL).timeout(Constants.MAX_DELAY_TIME * 1000).get(); doc.outputSettings().charset(Charset.forName("UTF-8")); doc.normalise(); Elements titleElement = doc.select("title"); String titleName = titleElement.text(); if (titleName.contains("|")) { titleName = titleName.substring(0, titleName.indexOf("|")).trim(); } Elements newsElements = doc.select("div[class=content]").select("p"); newsElements.select("a, img, script, xml, input, label, textarea").remove(); if (newsElements != null) { try { parsetData = new NewsEntity(); parsetData.setNewsTitle(titleName); parsetData.setNewsBody(newsElements.text()); parsetData.setNewsURL(newsURL); // System.out.println("URL: " + newsURL + " HASH: " + // NewsAggregatorUtility.StringToSHA1Hash(newsURL)); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } } } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } return parsetData; }
@Test public void testGenerateManyToOneProperty() throws Exception { Map<String, Object> root = TestHelpers.createInspectionResultWrapper(ENTITY_NAME, MANY_TO_ONE_PROP); Resource<URL> templateResource = resourceFactory.create( getClass().getResource(Deployments.BASE_PACKAGE_PATH + Deployments.SEARCH_FORM_INPUT)); TemplateProcessor processor = processorFactory.fromTemplate(new FreemarkerTemplate(templateResource)); String output = processor.process(root); Document html = Jsoup.parseBodyFragment(output); assertThat(output.trim(), not(equalTo(""))); Elements container = html.select("div.form-group"); assertThat(container, notNullValue()); Elements formInputElement = container.select("div.col-sm-10 > select"); assertThat(formInputElement.attr("id"), equalTo("customer")); assertThat(formInputElement.attr("ng-model"), equalTo("search" + "." + "customer")); }