public void scrapBlogPost(String href, BlogPost ourCopy, boolean commentsOnly, String marker) throws IOException { Document document; try { document = Jsoup.connect(href).get(); } catch (IOException e) { try { Thread.sleep(2000); } catch (InterruptedException e1) { // TODO Auto-generated catch block e1.printStackTrace(); } document = Jsoup.connect(href).get(); } if (ourCopy == null) ourCopy = new BlogPost(); BlogPost post = parser.parseBlogPost(document, ourCopy); List<BlogComment> comments = parser.parseBlogComments(document); if (comments != null && !comments.isEmpty()) { post.setComments(comments); post.setCommentCount(comments.size()); } else { post.setCommentCount(0); } post.setUrl(href); post.setMarker(marker); repository.save(post); }
@Override public void populateMetaData(MetaData metaData) throws MetaDataException { Document doc; try { if (method.equals("GET")) { doc = Jsoup.connect(url).get(); } else if (method.equals("POST")) { doc = Jsoup.connect(url).data(requestData).post(); } else { throw new MetaDataException("Unsupported HTML access method: " + method); } for (MetaDataAttribute attribute : attributes) { Elements elements = doc.select(attribute.getQuery()); if (elements.size() > 0) { String sValue = elements.get(0).text(); Object oValue = attribute.getValueMapper().parse(sValue); metaData.put(attribute.getName(), oValue); } } } catch (IOException e) { throw new MetaDataException(e); } catch (ValueMapperException e) { throw new MetaDataException(e); } }
/* * I haven't found a direct way of extracting the download URL of a Mixcloud track. * Mixcloud's track preview URLs and full download URLs are similar. The preview URL for * a Mixcloud track is simple to extract. * * This method replaces the "previews" part of the preview URL with "cloudcasts/originals" and then * cycles through all of Mixcloud's stream servers until the download URL is found. * * Similarity between Mixcloud preview URL and full download URL: * http://stream8.mxcdn.com/previews/9/6/a/e/93a8-2d77-4573-85c5-68bfb679d9bc.mp3 - preview URL * http://stream11.mxcdn.com/cloudcasts/originals/9/6/a/e/93a8-2d77-4573-85c5-68bfb679d9bc.mp3 - download URL */ private String generateStreamURL() throws IOException { String downloadUrl = this.getPreviewURL().replaceAll("previews", "cloudcasts/originals"); try { @SuppressWarnings("unused") Response res = Jsoup.connect(downloadUrl) .ignoreContentType(true) .userAgent("Mozilla/5.0 (Windows NT 6.1; rv:17.0) Gecko/20100101 Firefox/17.0") .execute(); return downloadUrl; } catch (HttpStatusException firstAttempt) { int serversToCycle = 30; for (int i = 1; i <= serversToCycle; ) { try { String cycledUrl = downloadUrl.replaceAll("stream[0-9]+", ("stream" + i)); Response res = Jsoup.connect(cycledUrl) .ignoreContentType(true) .userAgent("Mozilla/5.0 (Windows NT 6.1; rv:17.0) Gecko/20100101 Firefox/17.0") .execute(); if (res.parse().toString().length() < 2000) i++; else return cycledUrl; } catch (HttpStatusException cycledAttempt) { i++; } } } return null; }
private Response postToLogin(String username, String password, String[] captchaData) throws ConnectionException { try { Map<String, String> data = new HashMap<>(); Document loginDocument = Jsoup.connect(Endpoints.LOGIN_URL.url()).get(); Element loginForm = loginDocument.getElementById("loginForm"); for (Element input : loginForm.getElementsByTag("input")) { data.put(input.attr("name"), input.attr("value")); } Date now = new Date(); data.put("timezone_field", new SimpleDateFormat("XXX").format(now).replace(':', '|')); data.put("username", username); data.put("password", password); data.put("js_time", String.valueOf(now.getTime() / 1000)); if (captchaData.length > 0) { data.put("hip_solution", captchaData[0]); data.put("hip_token", captchaData[1]); data.put("fid", captchaData[2]); data.put("hip_type", "visual"); data.put("captcha_provider", "Hip"); } else { data.remove("hip_solution"); data.remove("hip_token"); data.remove("fid"); data.remove("hip_type"); data.remove("captcha_provider"); } return Jsoup.connect(Endpoints.LOGIN_URL.url()).data(data).method(Method.POST).execute(); } catch (IOException e) { throw ExceptionHandler.generateException("While submitting credentials", e); } }
@Override protected String doInBackground(Void... params) { String html = ""; try { Connection.Response loginForm; loginForm = Jsoup.connect("https://ta.yrdsb.ca/yrdsb/").method(Connection.Method.GET).execute(); // Login to page using user/pass entered in MainActivity Document document = Jsoup.connect("https://ta.yrdsb.ca/yrdsb/") .data("cookieexists", "false") .data("username", MainActivity.usernameString) .data("password", MainActivity.passwordString) .data("submit", "Login") .cookies(loginForm.cookies()) .post(); // Convert document into string for easier processing html = document.toString(); Document doc = Jsoup.parse(html); // for (int x = 0; x < doc.select("[width=85%], [border=0], [cellspacing=0], // [cellpadding=5]").size(); x++){ System.out.println( doc.select("[width=85%], [border=0], [cellspacing=0], [cellpadding=5]") .select("tr") .size()); // System.out.println(doc.select("[width=85%], [border=0], [cellspacing=0], // [cellpadding=5]").select("tr").get(5)); // } // Prepare array to store grades grades = new ArrayList<>(); courses = new ArrayList<>(); // Regex to search html string for grades, then add to array Pattern p = Pattern.compile("current mark\\s?=\\s?(\\d+\\.?\\d*)"); Matcher m = p.matcher(html); while (m.find()) { grades.add(new Double(m.group(1))); } Pattern p1 = Pattern.compile("([a-zA-Z]{3}[0-9]{1}[a-zA-Z]{1}[0-9]{1})"); Matcher m1 = p1.matcher(html); while (m1.find()) { courses.add(new String(m1.group(1))); } } catch (IOException e) { e.printStackTrace(); } return html; }
public static final String getAllTopicArtileUrl(String url) throws IOException { String topic = null; Document doc = Jsoup.connect(url).get(); Elements blocks = doc.select("a"); topic = "http://bbs.nju.edu.cn/" + blocks.get(blocks.size() - 1).attr("href"); String nextContent = Jsoup.connect(topic).get().toString(); topic = nextContent.substring(nextContent.indexOf("url=") + 4, nextContent.indexOf(".A\" />") + 2); topic = "http://bbs.nju.edu.cn/" + topic.replace("amp;", ""); return topic; }
@Override public void run() { // TODO Auto-generated method stub Document doc = null; Elements eles = null; if (!Utils.isNET(NewsContentActivity.this)) { Utils.showToast(NewsContentActivity.this, "网络不可用哦,亲!", Toast.LENGTH_SHORT); } else { try { doc = Jsoup.connect(url).timeout(8000).get(); if (null == doc) { Utils.showToast(NewsContentActivity.this, "网络不给力哦,亲,请返回再进入吧!", Toast.LENGTH_SHORT); return; } eles = doc.select("#Cnt-Main-Article-QQ P"); StringBuilder sb = new StringBuilder(); for (int i = 0; i < eles.size(); i++) { sb.append(eles.get(i).outerHtml()); } Message msg = new Message(); Bundle bundle = new Bundle(); bundle.putString("content", sb.toString()); Log.i("content", sb.toString()); msg.setData(bundle); msg.what = NewsContentActivity.NEWCONTENTRECEIVED; myHandler.sendMessage(msg); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } }
@Override public SearchResult[] getSearchResults(String searchString) throws IOException { Document doc = Jsoup.connect(searchString).timeout(CONNECTION_TIMEOUT_VALUE).get(); boolean onSearchResultsPage = doc.location().contains("adultSearch.htm"); // found the movie without a search results page if (doc.location() != null && !onSearchResultsPage) { String idOfPage = getIDStringFromDocumentLocation(doc); String posterPath = getPosterPreviewPathFromIDString(idOfPage); String label = doc.select("title").first().text(); Thumb previewImage = new Thumb(posterPath); // SearchResult directResult = new SearchResult(doc.location()); SearchResult result = null; if (posterPath != null) result = new SearchResult(doc.location(), label, previewImage); else result = new SearchResult(doc.location(), label, null); SearchResult[] directResultArray = {result}; return directResultArray; } Elements foundMovies = doc.select("table[width=690]:contains(Wish List) tr tbody:has(img)"); LinkedList<SearchResult> searchList = new LinkedList<SearchResult>(); for (Element movie : foundMovies) { String urlPath = movie.select("a").first().attr("href"); String thumb = movie.select("img").first().attr("src"); String label = movie.select("img").first().attr("alt"); SearchResult searchResult = new SearchResult(urlPath, label, new Thumb(thumb)); if (!searchList.contains(searchResult)) searchList.add(searchResult); } return searchList.toArray(new SearchResult[searchList.size()]); }
private static String getTrailer(Movie movie) { String trailerLink = ""; if (Integer.valueOf(movie.getMovieYear()) < 1990) { trailerLink = "null"; } else { trailerLink += "http://www.youtube.com"; String link = formatYoutubeString(movie.getMovieName()); try { Document d = Jsoup.connect("http://www.youtube.com/" + link).get(); Element e = d.body(); String html = e.toString(); String linkDiv = ""; int max = html.indexOf("class=\"yt-lockup-title \"><a href=\"") + 100; for (int i = html.indexOf("class=\"yt-lockup-title \"><a href=\""); i < max; i++) { linkDiv += html.charAt(i); } for (int i = linkDiv.indexOf("<a href=\"") + 9; i < linkDiv.indexOf("class=\"yt-uix-sessionlink") - 2; i++) { trailerLink += linkDiv.charAt(i); } } catch (Exception e) { System.out.println(e.toString()); } } return trailerLink; }
/** * getMovieActors parses through the movie's page html and returns three actors. * * @author defq0n * @param pageLink is the extended imdb url for the movie page. * @return movieActors String containing three actors. */ private static String[] getMovieActors(String pageLink) { String[] movieActors = {"", "", ""}; try { Document d = Jsoup.connect("http://imdb.com" + pageLink).get(); Element e = d.body(); String html = e.toString(); String actorsDiv = ""; for (int i = html.indexOf("<h4 class=\"inline\">Stars:</h4>") + 30; i < html.indexOf("See full cast and crew"); i++) { actorsDiv += html.charAt(i); } String tempDiv = actorsDiv; for (int i = 0; i < 3; i++) { // we will get the first three top actors String actor = ""; String t = "itemprop=\"url\"><span class=\"itemprop\" itemprop=\"name\">"; for (int j = tempDiv.indexOf(t) + t.length(); j < tempDiv.indexOf("</span></a>"); j++) { actor += tempDiv.charAt(j); } movieActors[i] = actor; tempDiv = ""; for (int j = actorsDiv.indexOf(actor + "</span>") + actor.length() + 7; j < actorsDiv.length(); j++) { tempDiv += actorsDiv.charAt(j); } } } catch (Exception e) { System.out.println(e.toString()); } return movieActors; }
@Override public void rip() throws IOException { logger.info(" Retrieving " + this.url.toExternalForm()); Document doc = Jsoup.connect(this.url.toExternalForm()).userAgent(USER_AGENT).get(); Pattern p = Pattern.compile("^.*var qualityArr = (.*});.*$", Pattern.DOTALL); Matcher m = p.matcher(doc.html()); if (m.matches()) { try { JSONObject json = new JSONObject(m.group(1)); String vidUrl = null; for (String quality : new String[] {"1080p", "720p", "480p", "240p"}) { if (json.has(quality)) { vidUrl = json.getString(quality); break; } } if (vidUrl == null) { throw new IOException("Unable to find video URL at " + this.url); } addURLToDownload(new URL(vidUrl), HOST + "_" + getGID(this.url)); waitForThreads(); return; } catch (JSONException e) { logger.error("Error while parsing JSON at " + url, e); throw e; } } throw new IOException("Failed to rip video at " + this.url); }
public static String requestFee(String url) { boolean finish = false; String fee = ""; do { try { Connection conn = Jsoup.connect(url); Document doc = conn.timeout(5000).get(); if (doc.select("#block-system-main > table > tbody > tr:nth-child(2) > td:nth-child(4)") .size() > 0) { fee = getFee( doc.select( "#block-system-main > table > tbody > tr:nth-child(2) > td:nth-child(4)") .text()); } else { fee = url; } finish = true; } catch (IOException e) { System.out.println("requestFee : " + e.getMessage()); } } while (!finish); return fee; }
public static void initMajorList(String originalUrl) { System.out.println("preparing majorList"); boolean finish = false; do { try { majorList.clear(); Connection conn = Jsoup.connect(originalUrl); Document doc = conn.timeout(10000).get(); Elements es = doc.select("#accordion__target-3 > div.course-listing__box > a"); for (Element e : es) { // major MajorForCollection major = new MajorForCollection(); major.setLevel(LEVEL); major.setTitle(e.select("h3").get(0).text().trim()); major.setType(e.select("p").get(0).text().replaceAll("-[\\s\\S]*", "").trim()); major.setUrl(e.select("a").get(0).attr("href")); majorList.add(major); } ; finish = true; } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } } while (!finish); System.out.println("majorList prepared"); System.out.println("majorList size: " + majorList.size()); }
@Override public void generateSessionId() { LOGGER.info("login to DMM"); try { Connection.Response res = Jsoup.connect("https://www.dmm.co.jp/my/") .data("login_id", userId) .data("password", password) .data("sava_password", "1") .data("save_login_id", "1") .data("act", "commit") .method(Method.POST) .execute(); String sesId = res.cookie(SESSION_ID_KEY); LOGGER.info("sessionId={}", sesId); this.sessionId = sesId; } catch (SocketTimeoutException e) { LOGGER.warn("login failed", e); generateSessionIdRetry(3000); } catch (SSLHandshakeException e) { LOGGER.warn("login failed", e); generateSessionIdRetry(3000); } catch (IOException e) { throw new EgetException("failed to login", e); } }
public static void processPage(String URL) throws SQLException, IOException { // check if the given URL is already in database String sql = "select * from Record where URL = '" + URL + "'"; ResultSet rs = db.runSql(sql); if (rs.next()) { } else { // store the URL to database to avoid parsing again sql = "INSERT INTO test.Record " + "(URL) VALUES " + "(?);"; PreparedStatement stmt = db.conn.prepareStatement(sql, Statement.RETURN_GENERATED_KEYS); stmt.setString(1, URL); stmt.execute(); // get useful information Document doc = Jsoup.connect("http://www.mit.edu/").get(); if (doc.text().contains("PhD")) { System.out.println(URL); } // get all links and recursively call the processPage method Elements questions = doc.select("a[href]"); for (Element link : questions) { if (link.attr("href").contains("mit.edu")) processPage(link.attr("abs:href")); } } }
public static String getLatestChangeLog() throws IOException { String toReturn = Jsoup.connect(URL_LATEST_CHANGE_LOG).followRedirects(false).execute().body().trim(); Log.d(LOG_TAG, "getLatestChangeLog changeLog: " + toReturn); if (toReturn.toLowerCase().contains("<html>")) throw new IOException("Wrong page loaded"); return toReturn; }
public void init() { String url = "http://www.thjnpx.org/cms/"; // url="http://www.taqpx.org/cms/"; // url="http://www.jyzdy.org/zgclCMS/"; // url = "http://www.tsinghua.edu.cn/publish/newthu/index.html"; Document doc = null; int i = 1; while (doc == null && i < 4) { try { doc = Jsoup.connect(url).get(); } catch (IOException e) { System.out.println("连接超时次数:" + i); } i++; } if (doc == null) { return; } System.out.println("--------------------分析中--------------------------------"); title = doc.title(); System.out.println("网站链接:" + url); System.out.println("网站标题:" + title); // System.out.println("html----------:"+doc.body().html()); deleteComent(null); Element body = doc.body(); getChildElement(body, 0); isInit = true; }
public static String getType(Document doc) { String type = ""; if (doc.select("#kw").size() > 0) { Element e = doc.select("#kw").get(0); StringBuilder typeURL = new StringBuilder(); typeURL.append("http://widget.unistats.ac.uk/Widget/"); typeURL.append(e.attr("data-institution") + "/"); typeURL.append(e.attr("data-course") + "/"); typeURL.append(e.attr("data-orientation") + "/"); typeURL.append("null/"); typeURL.append(e.attr("data-language") + "/"); typeURL.append(e.attr("data-kismode")); boolean finishe = false; try { do { Connection tmpConn = Jsoup.connect(typeURL.toString()); Document tmpDoc = tmpConn.timeout(10000).get(); if (tmpDoc.select("#kisWidget > div.widgetCourse > h1").size() > 0) { e = tmpDoc.select("#kisWidget > div.widgetCourse > h1").get(0); type = e.text().trim().indexOf(" ") > 0 ? e.text().trim().substring(0, e.text().trim().indexOf(" ")) : e.text().trim(); } finishe = true; } while (!finishe); } catch (Exception ex) { ex.printStackTrace(); } } return type; }
public List<MenuMeal> getMenuMeals(int number) { Document doc = null; List<MenuMeal> meals = new ArrayList<>(); try { doc = Jsoup.connect(String.format(URL, number)) .userAgent("Chrome/49.0.2623.112") .referrer("https://www.google.ru/") .timeout(7000) .get(); } catch (IOException e) { e.printStackTrace(); } if (doc == null) return meals; Elements elements = doc.select("td[width=400"); if (!elements.isEmpty()) { for (Element element : elements) { Element parent = element.parent(); MenuMeal menuMeal = new MenuMeal(); menuMeal.setDescription(parent.select("div[id=ssilka]").first().text()); String cost = parent.select("div[id=ssilka]").last().text(); menuMeal.setCost(Integer.valueOf(cost.substring(0, cost.indexOf("-")))); meals.add(menuMeal); } return meals; } else { return meals; } }
@Override public String getCiteItem() { String baseurl = "http://pubs.rsc.org/en/content/getformatedresult/"; String doi = null; String posturl = null; try { Document doc = Jsoup.connect(url).timeout(30000).get(); doi = doc.select("input#DOI").attr("value"); posturl = baseurl + doi.toLowerCase() + "?downloadtype=article"; } catch (UnsupportedEncodingException e2) { e2.printStackTrace(); return null; } catch (IOException e2) { e2.printStackTrace(); return null; } HttpURLConnection con = null; try { String postParams = "ResultAbstractFormat=BibTex&go="; URL u = new URL(posturl); con = (HttpURLConnection) u.openConnection(); con.setRequestMethod("POST"); con.setDoOutput(true); con.setDoInput(true); con.setUseCaches(false); con.setRequestProperty("Content-Type", "application/x-www-form-urlencoded"); con.setRequestProperty( "User-Agent", "Mozilla/5.0 (Windows NT 6.1; rv:37.0) Gecko/20100101 Firefox/37.0"); @SuppressWarnings("resource") OutputStreamWriter osw = new OutputStreamWriter(con.getOutputStream(), "UTF-8"); osw.write(postParams); osw.flush(); osw.close(); } catch (Exception e) { e.printStackTrace(); return null; } finally { if (con != null) { con.disconnect(); } } StringBuilder buffer = new StringBuilder(); try { BufferedReader br = new BufferedReader(new InputStreamReader(con.getInputStream(), "UTF-8")); String temp; while ((temp = br.readLine()) != null) { buffer.append(temp); buffer.append("\n"); } } catch (Exception e) { e.printStackTrace(); return null; } return buffer.toString(); }
public static Integer getLatestVersionCode() throws IOException, NumberFormatException { Integer toReturn = Integer.parseInt( Jsoup.connect(URL_LATEST_VERSION_CODE).followRedirects(false).execute().body().trim()); Log.d(LOG_TAG, "getLatestVersionCode versionCode: " + toReturn); return toReturn; }
/** Mudah is not standardized, result will be messy if crawl them */ @Override public List<Item> parse(String query, int size) throws IOException { // request for a page Document doc = Jsoup.connect("http://www.mudah.my/li?q=" + query) .userAgent(Constant.HTTP_USER_AGENT) .timeout(Constant.HTTP_TIMEOUT) .get(); Elements listS = doc.select("div.listing_thumbs").first().select("div.list_ads"); ArrayList<Item> result = new ArrayList<Item>(size); for (int i = 0; i < listS.size(); i++) { Element list = listS.get(i); String img = ""; list.select("div.image_thumb"); Elements imgS = list.select("div.image_thumb > a + img"); if (imgS.size() < 0) { // some may not have images img = imgS.first().attr("href"); } Element listE = list.select("li.listing_ads_title").first(); String title = listE.child(0).text(); String url = listE.child(0).attr("href"); String price = listE.text(); price = price.substring(price.lastIndexOf("RM") + 2).trim().replaceAll(" ", ""); int dPrice = Integer.parseInt(price); result.add(new Item("Mudah", title, dPrice, img, url)); } return result; }
@Override public Elements fetchContent(String url, String pattern) throws IOException { Document doc = Jsoup.connect(url).get(); Elements body = doc.select("body"); Elements headlines = doc.select("body a[rel=bookmark]"); return headlines; }
private static Response execute( String url, Method method, Map<String, String> cookies, Map<String, String> data) { Response response = null; Connection connection = Jsoup.connect(url); connection.method(method); connection.timeout(10000); connection.ignoreContentType(true); connection.maxBodySize(0); if (cookies != null) { connection.cookies(cookies); } if (data != null) { for (Entry<String, String> entry : data.entrySet()) { String key = entry.getKey(); String value = entry.getValue(); connection.data(key, value); } } try { response = connection.execute(); } catch (IOException e) { e.printStackTrace(); } return response; }
public Worker(String url, boolean verbose) throws Exception { Document doc; doc = Jsoup.connect(url).get(); // select anchors with href only Elements links = doc.select("a[href]"); String l_Href; String host; int linksNum; Parser parser; for (Element link : links) { // absolute = http:// added l_Href = link.attr("abs:href"); if (!l_Href.isEmpty()) { parser = new Parser(l_Href); host = parser.getHost(); // if tempStats contains the url, add one to the value if (tempStats.containsKey(host)) { linksNum = tempStats.get(host); tempStats.put(host, linksNum += 1); } // if it doesn't, add it else { tempStats.put(host, 1); } // parse the url tempQueue.add(parser.getURL()); } } if (verbose) { System.out.println( Thread.currentThread().getName() + " : " + tempQueue.size() + " links from " + url); } }
public static ArrayList<EntryModel> getPopularContent() { final ArrayList<EntryModel> result = new ArrayList<>(); Thread thread = new Thread( () -> { try { Document document = Jsoup.connect("http://jkanime.net/").get(); Elements elements = document.getElementsByClass("home_portada_bg"); for (Element element : elements) { result.add( new EntryModel( Constants.TYPE_SHOW, element.getElementsByTag("a").first().text(), element.getElementsByTag("a").first().attr("abs:href"), element.getElementsByTag("img").first().attr("src"))); } } catch (IOException e) { e.printStackTrace(); } }); thread.start(); try { thread.join(); return result; } catch (InterruptedException | NullPointerException e) { e.printStackTrace(); return null; } }
public static ArrayList<EntryModel> getSearchResults(final String query) { final ArrayList<EntryModel> result = new ArrayList<>(); Thread thread = new Thread( () -> { try { Document document = Jsoup.connect("http://jkanime.net/buscar/" + query.replace(" ", "_")).get(); Elements elements = document.getElementsByClass("search"); for (Element element : elements) { String title = element.getElementsByClass("titl").first().text(); String url = element.getElementsByClass("titl").first().attr("abs:href"); String picUrl = element.getElementsByTag("img").first().attr("src"); result.add(new EntryModel(Constants.TYPE_SHOW, title, url, picUrl)); } } catch (IOException e) { e.printStackTrace(); } }); thread.start(); try { thread.join(); return result; } catch (InterruptedException | NullPointerException e) { e.printStackTrace(); return null; } }
@Override public List<String> parseCategory(String categoryName, String categoryURL) { // TODO Auto-generated method stub List<String> linksByCategoryList = null; try { Document doc = Jsoup.connect(categoryURL).timeout(Constants.MAX_DELAY_TIME * 1000).get(); Elements links = doc.select("div[class=views-field views-field-title]").select("a"); if (links != null && links.size() > 0) { linksByCategoryList = new ArrayList<String>(); for (Element element : links) { String newsLink = element.attr("href"); newsLink = newsLink.substring(1); linksByCategoryList.add(newsLink); } } } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } return linksByCategoryList; }
@Override protected String doInBackground(String... urls) { Document doc = null; try { Connection.Response res = Jsoup.connect(urls[0]) .data("eid", mUserName) .timeout(3000) .data("pw", mPassword) .data("submit", "Login") .method(Method.POST) .execute(); doc = res.parse(); // get the cookie mCookieValue = res.cookie(COOKIE_TYPE); mLoginResponse = doc.toString(); /* Log.w("response", doc.toString());*/ } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } return mLoginResponse; }
@Override protected Object doInBackground(Object[] params) { try { Document feed = Jsoup.connect("https://gdata.youtube.com/feeds/api/videos/" + ID_VIDEO + "?v=2") .userAgent( "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.22 (KHTML, like Gecko) Chrome/25.0.1364.172 Safari/537.22") .timeout(60000) .ignoreContentType(true) .get(); values.setTitleVideo(feed.getElementsByTag("title").text()); values.setDescriptionVideo(feed.getElementsByTag("media:description").text()); values.setLikeVideo(feed.select("yt|rating").attr("numLikes")); values.setDislikeVideo(feed.select("yt|rating").attr("numDislikes")); values.setAverageVideo(feed.select("gd|rating").attr("average")); values.setViewsVideo(feed.select("yt|statistics").attr("viewCount")); values.setDurationVideo(feed.select("yt|duration").attr("seconds")); values.setThumbnailVideo(feed.select("media|thumbnail").first().attr("url")); values.setAuthorVideo(feed.getElementsByTag("name").text()); values.setAuthorVideoId(feed.getElementsByTag("yt:uploaderId").text()); values.setNumberCommentsVideo(feed.select("gd|feedLink").attr("countHint")); values.setFeedCommentsVideo(feed.select("gd|feedLink").attr("href")); values.setCategoryVideo(feed.select("media|category").attr("label")); values.setDateVideo(feed.getElementsByTag("yt:uploaded").text()); Document image = Jsoup.connect(feed.getElementsByTag("uri").text()) .userAgent( "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.22 (KHTML, like Gecko) Chrome/25.0.1364.172 Safari/537.22") .timeout(60000) .ignoreContentType(true) .get(); values.setThumbnailAuthor(image.select("media|thumbnail").attr("url")); if (showLog) { Log.d(TAG_LOG, feed.getElementsByTag("title").text()); Log.d(TAG_LOG, feed.getElementsByTag("media:description").text()); Log.d(TAG_LOG, feed.select("yt|rating").attr("numLikes")); Log.d(TAG_LOG, feed.select("yt|rating").attr("numDislikes")); Log.d(TAG_LOG, feed.select("gd|rating").attr("average")); Log.d(TAG_LOG, feed.select("yt|statistics").attr("viewCount")); Log.d(TAG_LOG, feed.select("yt|duration").attr("seconds")); Log.d(TAG_LOG, feed.select("media|thumbnail").first().attr("url")); Log.d(TAG_LOG, feed.getElementsByTag("name").text()); Log.d(TAG_LOG, feed.getElementsByTag("yt:uploaderId").text()); Log.d(TAG_LOG, feed.select("gd|feedLink").attr("countHint")); Log.d(TAG_LOG, feed.select("gd|feedLink").attr("href")); Log.d(TAG_LOG, feed.select("media|category").attr("label")); Log.d(TAG_LOG, feed.getElementsByTag("yt:uploaded").text()); Log.d(TAG_LOG, image.select("media|thumbnail").attr("url")); } } catch (IOException e) { error = true; e.printStackTrace(); } return null; }