/* * I haven't found a direct way of extracting the download URL of a Mixcloud track. * Mixcloud's track preview URLs and full download URLs are similar. The preview URL for * a Mixcloud track is simple to extract. * * This method replaces the "previews" part of the preview URL with "cloudcasts/originals" and then * cycles through all of Mixcloud's stream servers until the download URL is found. * * Similarity between Mixcloud preview URL and full download URL: * http://stream8.mxcdn.com/previews/9/6/a/e/93a8-2d77-4573-85c5-68bfb679d9bc.mp3 - preview URL * http://stream11.mxcdn.com/cloudcasts/originals/9/6/a/e/93a8-2d77-4573-85c5-68bfb679d9bc.mp3 - download URL */ private String generateStreamURL() throws IOException { String downloadUrl = this.getPreviewURL().replaceAll("previews", "cloudcasts/originals"); try { @SuppressWarnings("unused") Response res = Jsoup.connect(downloadUrl) .ignoreContentType(true) .userAgent("Mozilla/5.0 (Windows NT 6.1; rv:17.0) Gecko/20100101 Firefox/17.0") .execute(); return downloadUrl; } catch (HttpStatusException firstAttempt) { int serversToCycle = 30; for (int i = 1; i <= serversToCycle; ) { try { String cycledUrl = downloadUrl.replaceAll("stream[0-9]+", ("stream" + i)); Response res = Jsoup.connect(cycledUrl) .ignoreContentType(true) .userAgent("Mozilla/5.0 (Windows NT 6.1; rv:17.0) Gecko/20100101 Firefox/17.0") .execute(); if (res.parse().toString().length() < 2000) i++; else return cycledUrl; } catch (HttpStatusException cycledAttempt) { i++; } } } return null; }
public int crawl(int ok, int fail, String url, boolean isShort) { if (!ua.equals("") && ua != null) UA = ua; try { Document doc; if (isShort) { Response resp = Jsoup.connect(url).userAgent(UA).followRedirects(true).execute(); doc = resp.parse(); } else { doc = Jsoup.connect(url).userAgent(UA).timeout(10000).get(); } resultTitle = resultCont = ""; /*MLog.e("","title_rex="+title_rex); MLog.e("","cont_rex="+cont_rex); MLog.e("","auth_rex="+auth_rex); MLog.e("","extra_rex="+extra_rex); MLog.e("","source="+source+" url="+url);*/ if (cont_rex.contains(" ")) { String ctemp = cont_rex.trim(); String[] cgp = ctemp.split(" "); if (cgp[1].equals("all")) { cont_len = -1; cont_rex = cgp[0]; } } Elements eletitle = doc.select(this.title_rex), eleauth = null, elecont = doc.select(this.cont_rex), eleextra = null; if (Constant.DEBUG) FileUtils.writeFile(doc.html(), "clip"); if (!auth_rex.equals("")) eleauth = doc.select(this.auth_rex); if (!extra_rex.equals("")) eleextra = doc.select(this.extra_rex); if (eletitle.size() > 0) { resultTitle = eletitle.get(0).html(); if (elecont.size() > 0) { elecont = addStyleForTable(elecont); if (cont_len == -1) { for (Element ele : elecont) { resultCont = resultCont + ele.html(); } } else resultCont = elecont.get(0).html(); } if (!auth_rex.equals("")) { if (eleauth.size() > 0) resultCont = "<p>" + eleauth.get(0).html() + "</p>" + resultCont; } if (!extra_rex.equals("")) { eleextra = addStyleForTable(eleextra); if (eleextra.size() > 0) resultCont = resultCont + eleextra.get(0).html(); } return ok; } else { MLog.e("", "没有匹配到title"); return fail; } } catch (IOException e) { // TODO Auto-generated catch block MLog.e("", "没有请求到数据"); return fail; } }
private void login(String[] captchaData) throws InvalidCredentialsException, ConnectionException, ParseException { final Response loginResponse = postToLogin(username, password, captchaData); this.cookies = new HashMap<>(loginResponse.cookies()); Document loginResponseDocument; try { loginResponseDocument = loginResponse.parse(); } catch (IOException e) { throw new ParseException("While parsing the login response", e); } Elements inputs = loginResponseDocument.select("input[name=skypetoken]"); if (inputs.size() > 0) { this.setSkypeToken(inputs.get(0).attr("value")); HttpURLConnection asmResponse = getAsmToken(); String[] setCookie = asmResponse.getHeaderField("Set-Cookie").split(";")[0].split("="); this.cookies.put(setCookie[0], setCookie[1]); registerEndpoint(); this.loadAllContacts(); this.getContactRequests(false); try { this.registerWebSocket(); } catch (Exception e) { throw new RuntimeException(e); } loggedIn.set(true); (sessionKeepaliveThread = new KeepaliveThread(this)).start(); (reauthThread = new AuthenticationChecker(this)).start(); } else { boolean foundError = false; Elements captchas = loginResponseDocument.select("#captchaContainer"); if (captchas.size() > 0) { Element captcha = captchas.get(0); String url = null; for (Element scriptTag : captcha.getElementsByTag("script")) { String text = scriptTag.html(); if (text.contains("skypeHipUrl")) { url = text.substring(text.indexOf('"') + 1, text.lastIndexOf('"')); } } if (url != null) { try { String rawjs = Endpoints.custom(url, this).as(String.class).get(); Pattern p = Pattern.compile("imageurl:'([^']*)'"); Matcher m = p.matcher(rawjs); if (m.find()) { String imgurl = m.group(1); m = Pattern.compile("hid=([^&]*)").matcher(imgurl); if (m.find()) { String hid = m.group(1); m = Pattern.compile("fid=([^&]*)").matcher(imgurl); if (m.find()) { String fid = m.group(1); CaptchaEvent event = new CaptchaEvent(imgurl); getEventDispatcher().callEvent(event); String response = event.getCaptcha(); if (response != null) { login(new String[] {response, hid, fid}); } else { throw new CaptchaException(); } foundError = true; } } } } catch (ConnectionException e) { MinorErrorEvent err = new MinorErrorEvent(MinorErrorEvent.ErrorSource.PARSING_CAPTCHA, e); getEventDispatcher().callEvent(err); } } } if (!foundError) { Elements elements = loginResponseDocument.select(".message_error"); if (elements.size() > 0) { Element div = elements.get(0); if (div.children().size() > 1) { Element span = div.child(1); throw new InvalidCredentialsException(span.text()); } } else { throw new InvalidCredentialsException( "Could not find error message. Dumping entire page. \n" + loginResponseDocument.html()); } } } }