Exemplo n.º 1
0
  public void getAndSaveSongInfo(Document doc, String mid, String id) {
    ArrayList<String> SongInfo = null;
    sInfo = new SongInfo();
    sInfo.id = id;
    if (!crawledSongs.contains(mid)) {
      crawledSongs.add(mid);
      sInfo.mid = mid;
      if (doc != null) {
        String sname = doc.select("div.song_title").select("span").text();
        sInfo.sname = sname;
        SongInfo = extractSongInfo(doc);
      } else {
        System.err.println("The html is not fetched!");
      }
      if (SongInfo != null && SongInfo.size() > 0) {
        sInfo.singer = SongInfo.get(0);
        sInfo.language = SongInfo.get(1);
        sInfo.album = SongInfo.get(2);
        sInfo.date = SongInfo.get(3);
      }

      sDao.insertSongInfo(sInfo);
      sDao.updateSongInfo(sInfo.mid, id);
    }

    crawledSongs.add(mid);
  }
Exemplo n.º 2
0
 public void initial() throws ClassNotFoundException {
   crawledSongs = new HashSet<String>();
   sDao = new SongInfoDao();
   ArrayList<Map<String, String>> songs = sDao.getResult("mid", "qqsonginfoc");
   for (int i = 0; i < songs.size(); i++) {
     crawledSongs.add(songs.get(i).get("mid"));
   }
 }
Exemplo n.º 3
0
  public void crawlSongInfo() throws ClassNotFoundException, URISyntaxException {
    sDao = new SongInfoDao();
    ArrayList<Map<String, String>> search = sDao.getResult("*", "qqsonginfos");
    for (int i = 0; i < search.size(); i++) {
      String id = search.get(i).get("id");
      String sname = search.get(i).get("sname").trim().replace(" ", "%20");
      URI uri = new URI(sname);
      sname = uri.toASCIIString();
      String singer = search.get(i).get("singer").trim().replace(" ", "%20");
      // System.out.println(singer.length());
      uri = new URI(singer);
      singer = uri.toASCIIString();
      String album = search.get(i).get("album").trim().replace(" ", "%20");
      System.out.println("album:" + album);
      uri = new URI(album);
      album = uri.toASCIIString();
      // System.out.println("album2:"+album);
      System.out.println(id + sname + singer + album);

      String url3 =
          "http://s.plcloud.music.qq.com/fcgi-bin/smartbox.fcg?o_utf8=1&utf8=1&key="
              + sname
              + "%20"
              + singer
              + "&g_tk=5381&loginUin=0&hostUin=0&format=jsonp&inCharset=GB2312&outCharset=utf-8&notice=0&platform=yqq&jsonpCallback=MusicJsonCallBack&needNewCode=0";
      Document doc3 = null;
      doc3 = this.getHtmlContent(url3);
      String res = doc3.select("body").text();
      int length = res.length();
      res = res.substring(18, length - 2);
      System.out.println("res:" + res);
      JSONObject resObject = JSONObject.fromObject(res);
      String tips = resObject.getString("tips");
      // String t = "{\"song\":[],\"singer\":[],\"album\":[],\"mv\":[]}";
      System.out.println("tips:" + tips);
      JSONObject dataObject = JSONObject.fromObject(tips);
      String song = dataObject.getString("song");
      System.out.println("song:" + song);
      // JSONObject songObject = JSONObject.fromObject(song);
      // String itemlist = songObject.getString("itemlist");
      if (song.length() > 3) {
        JSONArray songArray = JSONArray.fromObject(song);
        //				JSONObject songObject = JSONObject.fromObject(song);
        JSONObject midObject = (JSONObject) songArray.get(0);
        String mid = midObject.getString("mid");
        System.out.println("mid:" + mid);
        String url4 =
            "http://s.plcloud.music.qq.com/fcgi-bin/fcg_yqq_song_detail_info.fcg?songmid=" + mid;
        Document doc4 = null;
        doc4 = this.getHtmlContent(url4);
        this.getAndSaveSongInfo(doc4, mid, id);
      } else {
        String url =
            "http://soso.music.qq.com/fcgi-bin/multiple_music_search.fcg?mid=1&p=1&catZhida=1&lossless=0&t=100&utf8=1&w="
                + sname
                + "%20"
                + singer
                + "%20"
                + album;
        // String url =
        // "http://soso.music.qq.com/fcgi-bin/multiple_music_search.fcg?mid=1&p=1&catZhida=1&lossless=0&t=100&utf8=1&w=ϲ����%20BEYOND%20���ܾ���";
        System.out.println(url);
        Document doc = null;
        doc = this.getHtmlContent(url);
        String url2 = this.getURL(doc);
        if (url2 != null) {
          System.out.println("url2:" + url2);
          String mid = url2.split("=")[1].split("&")[0];
          // System.out.println(mid);
          Document doc2 = null;
          doc2 = this.getHtmlContent(url2);
          this.getAndSaveSongInfo(doc2, mid, id);
        }
        // else sDao.updateSongInfo("notfound", id);
      }
    }
  }