/*
   * I haven't found a direct way of extracting the download URL of a Mixcloud track.
   * Mixcloud's track preview URLs and full download URLs are similar. The preview URL for
   * a Mixcloud track is simple to extract.
   *
   * This method replaces the "previews" part of the preview URL with "cloudcasts/originals" and then
   * cycles through all of Mixcloud's stream servers until the download URL is found.
   *
   * Similarity between Mixcloud preview URL and full download URL:
   * http://stream8.mxcdn.com/previews/9/6/a/e/93a8-2d77-4573-85c5-68bfb679d9bc.mp3 - preview URL
   * http://stream11.mxcdn.com/cloudcasts/originals/9/6/a/e/93a8-2d77-4573-85c5-68bfb679d9bc.mp3 - download URL
   */
  private String generateStreamURL() throws IOException {
    String downloadUrl = this.getPreviewURL().replaceAll("previews", "cloudcasts/originals");

    try {
      @SuppressWarnings("unused")
      Response res =
          Jsoup.connect(downloadUrl)
              .ignoreContentType(true)
              .userAgent("Mozilla/5.0 (Windows NT 6.1; rv:17.0) Gecko/20100101 Firefox/17.0")
              .execute();
      return downloadUrl;
    } catch (HttpStatusException firstAttempt) {
      int serversToCycle = 30;
      for (int i = 1; i <= serversToCycle; ) {
        try {
          String cycledUrl = downloadUrl.replaceAll("stream[0-9]+", ("stream" + i));

          Response res =
              Jsoup.connect(cycledUrl)
                  .ignoreContentType(true)
                  .userAgent("Mozilla/5.0 (Windows NT 6.1; rv:17.0) Gecko/20100101 Firefox/17.0")
                  .execute();
          if (res.parse().toString().length() < 2000) i++;
          else return cycledUrl;
        } catch (HttpStatusException cycledAttempt) {
          i++;
        }
      }
    }

    return null;
  }
示例#2
0
  public int crawl(int ok, int fail, String url, boolean isShort) {
    if (!ua.equals("") && ua != null) UA = ua;
    try {
      Document doc;
      if (isShort) {
        Response resp = Jsoup.connect(url).userAgent(UA).followRedirects(true).execute();
        doc = resp.parse();
      } else {
        doc = Jsoup.connect(url).userAgent(UA).timeout(10000).get();
      }
      resultTitle = resultCont = "";

      /*MLog.e("","title_rex="+title_rex);
      MLog.e("","cont_rex="+cont_rex);
      MLog.e("","auth_rex="+auth_rex);
      MLog.e("","extra_rex="+extra_rex);
      MLog.e("","source="+source+" url="+url);*/

      if (cont_rex.contains(" ")) {
        String ctemp = cont_rex.trim();
        String[] cgp = ctemp.split(" ");
        if (cgp[1].equals("all")) {
          cont_len = -1;
          cont_rex = cgp[0];
        }
      }

      Elements eletitle = doc.select(this.title_rex),
          eleauth = null,
          elecont = doc.select(this.cont_rex),
          eleextra = null;

      if (Constant.DEBUG) FileUtils.writeFile(doc.html(), "clip");

      if (!auth_rex.equals("")) eleauth = doc.select(this.auth_rex);
      if (!extra_rex.equals("")) eleextra = doc.select(this.extra_rex);

      if (eletitle.size() > 0) {
        resultTitle = eletitle.get(0).html();
        if (elecont.size() > 0) {
          elecont = addStyleForTable(elecont);
          if (cont_len == -1) {
            for (Element ele : elecont) {
              resultCont = resultCont + ele.html();
            }
          } else resultCont = elecont.get(0).html();
        }
        if (!auth_rex.equals("")) {
          if (eleauth.size() > 0) resultCont = "<p>" + eleauth.get(0).html() + "</p>" + resultCont;
        }
        if (!extra_rex.equals("")) {
          eleextra = addStyleForTable(eleextra);
          if (eleextra.size() > 0) resultCont = resultCont + eleextra.get(0).html();
        }

        return ok;
      } else {
        MLog.e("", "没有匹配到title");
        return fail;
      }

    } catch (IOException e) {
      // TODO Auto-generated catch block
      MLog.e("", "没有请求到数据");
      return fail;
    }
  }
示例#3
0
  private void login(String[] captchaData)
      throws InvalidCredentialsException, ConnectionException, ParseException {
    final Response loginResponse = postToLogin(username, password, captchaData);
    this.cookies = new HashMap<>(loginResponse.cookies());
    Document loginResponseDocument;
    try {
      loginResponseDocument = loginResponse.parse();
    } catch (IOException e) {
      throw new ParseException("While parsing the login response", e);
    }
    Elements inputs = loginResponseDocument.select("input[name=skypetoken]");
    if (inputs.size() > 0) {
      this.setSkypeToken(inputs.get(0).attr("value"));
      HttpURLConnection asmResponse = getAsmToken();
      String[] setCookie = asmResponse.getHeaderField("Set-Cookie").split(";")[0].split("=");
      this.cookies.put(setCookie[0], setCookie[1]);

      registerEndpoint();

      this.loadAllContacts();
      this.getContactRequests(false);
      try {
        this.registerWebSocket();
      } catch (Exception e) {
        throw new RuntimeException(e);
      }
      loggedIn.set(true);
      (sessionKeepaliveThread = new KeepaliveThread(this)).start();
      (reauthThread = new AuthenticationChecker(this)).start();
    } else {
      boolean foundError = false;
      Elements captchas = loginResponseDocument.select("#captchaContainer");
      if (captchas.size() > 0) {
        Element captcha = captchas.get(0);
        String url = null;
        for (Element scriptTag : captcha.getElementsByTag("script")) {
          String text = scriptTag.html();
          if (text.contains("skypeHipUrl")) {
            url = text.substring(text.indexOf('"') + 1, text.lastIndexOf('"'));
          }
        }
        if (url != null) {
          try {
            String rawjs = Endpoints.custom(url, this).as(String.class).get();
            Pattern p = Pattern.compile("imageurl:'([^']*)'");
            Matcher m = p.matcher(rawjs);
            if (m.find()) {
              String imgurl = m.group(1);
              m = Pattern.compile("hid=([^&]*)").matcher(imgurl);
              if (m.find()) {
                String hid = m.group(1);
                m = Pattern.compile("fid=([^&]*)").matcher(imgurl);
                if (m.find()) {
                  String fid = m.group(1);
                  CaptchaEvent event = new CaptchaEvent(imgurl);
                  getEventDispatcher().callEvent(event);
                  String response = event.getCaptcha();
                  if (response != null) {
                    login(new String[] {response, hid, fid});
                  } else {
                    throw new CaptchaException();
                  }
                  foundError = true;
                }
              }
            }
          } catch (ConnectionException e) {
            MinorErrorEvent err =
                new MinorErrorEvent(MinorErrorEvent.ErrorSource.PARSING_CAPTCHA, e);
            getEventDispatcher().callEvent(err);
          }
        }
      }
      if (!foundError) {
        Elements elements = loginResponseDocument.select(".message_error");
        if (elements.size() > 0) {
          Element div = elements.get(0);
          if (div.children().size() > 1) {
            Element span = div.child(1);
            throw new InvalidCredentialsException(span.text());
          }
        } else {
          throw new InvalidCredentialsException(
              "Could not find error message. Dumping entire page. \n"
                  + loginResponseDocument.html());
        }
      }
    }
  }