Ejemplo n.º 1
0
 public static void main(String[] args) {
   Utils.initLog4j();
   run(args);
   //		Utils.initLog4j();
   //		douban250ErrorIdLog.error("testError");
   //		logger.error("xx");
 }
Ejemplo n.º 2
0
  /** 解析一条电影详细信息 */
  public static MovieBO parseMovieInfoFromWeb(Element liElement) throws Exception {
    MovieBO movieBO = new MovieBO();
    MovieInfo movieInfo = new MovieInfo();
    movieBO.setMovieInfo(movieInfo);
    // 电影排名
    Elements noEles = liElement.select(".pic > em");
    if (noEles.size() > 0) {
      movieBO.setNo(Utils.toInt(noEles.get(0).text(), 0));
    }

    // 电影名称
    Elements titleEles = liElement.select(".item > .info > .hd > a > .title");
    for (int i = 0; i < titleEles.size(); i++) {
      if (i == 0) {
        movieInfo.setMovieName(titleEles.get(i).text().trim());
      }
      if (i == 1) {
        movieInfo.setMovieOriginalName(Utils.toString(titleEles.get(i).text().trim().split("/")));
      }
    }

    // 电影对应的豆瓣连接
    Elements doubanIdEles = liElement.select(".item > .info > .hd > a");
    for (int i = 0; i < doubanIdEles.size(); i++) {
      if (i == 0) {
        movieInfo.setDoubanId(doubanIdEles.get(0).attr("href").trim());
        break;
      }
    }

    // 电影详情页
    Document detailDoc = null;
    int loop = 0;
    while (true) {
      try {
        detailDoc =
            Jsoup.connect(movieInfo.getDoubanId())
                //						.header("User-Agent", UserAgentAry[Utils.getRandom(UserAgentAry.length)])
                //						.header("Accept",
                // "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8")
                //						.header("Accept-Encoding", "gzip,deflate,sdch")
                //						.header("Accept-Language", "en-US,en;q=0.8,ja;q=0.6,zh-CN;q=0.4,zh;q=0.2")
                //						.header("Connection", "keep-alive")
                //						.header("Cache-Control", "max-age=0")
                .header("Cookie", "bid=\"" + SpiderUtils.getRandomStr(11) + "\"; ")
                .get();
        if (detailDoc != null) {
          break;
        }
      } catch (Exception e) {
        logger.info("第" + (loop + 1) + "次拉取第" + (movieBO.getNo()) + "条数据失败");
        loop++;
        if (loop >= 3) {
          throw e;
        }
        continue;
      }
    }

    Element detailContent = detailDoc.getElementById("content");
    // icon
    Elements iconElements = detailContent.select("img[rel=v:image]");
    if (iconElements.size() > 0) {
      movieBO.setIconUrl(iconElements.get(0).attr("src"));
    }

    // 导演
    Element infoDiv = detailContent.select(".article #info").get(0);
    Elements directorsElements = infoDiv.select("span:contains(导演)").get(0).select("a");
    List<String> directorList = new ArrayList<String>();
    for (Element direcE : directorsElements) {
      String director = direcE.text().trim();
      directorList.add(director);
    }
    movieInfo.setDirectors(Utils.toString(directorList));

    // 编剧
    Elements writerElementSpans = infoDiv.select("span:contains(编剧)");
    if (writerElementSpans.size() > 0) {
      Elements writerElements = infoDiv.select("span:contains(编剧)").get(0).select("a");
      List<String> writerList = new ArrayList<String>();
      for (Element writerE : writerElements) {
        String writer = writerE.text().trim();
        writerList.add(writer);
      }
      movieInfo.setScreenWriter(Utils.toString(writerList));
    }
    // 主演
    Elements leadingRoleElements = infoDiv.select("span:contains(主演)").get(0).select("a");
    List<String> leadingRoleList = new ArrayList<String>();
    for (Element leadingRoleE : leadingRoleElements) {
      String leadingRole = leadingRoleE.text().trim();
      leadingRoleList.add(leadingRole);
    }
    movieInfo.setLeadingRole(Utils.toString(leadingRoleList));
    // 类型
    Elements typeElements = infoDiv.select("span[property=v:genre]");
    List<String> typeList = new ArrayList<String>();
    for (Element typeE : typeElements) {
      String type = typeE.text().trim();
      typeList.add(type);
    }
    movieInfo.setType(Utils.toString(typeList));
    // 制片国家
    int regionBegin = infoDiv.text().indexOf("制片国家/地区:");
    int regionEnd = infoDiv.text().indexOf("语言:");
    if (regionBegin > 0 && regionEnd > 0 && (regionBegin + "制片国家/地区:".length() < regionEnd)) {
      movieInfo.setRegion(infoDiv.text().substring(regionBegin + "制片国家/地区:".length(), regionEnd));
    }
    // 语言
    int langBegin = infoDiv.text().indexOf("语言:");
    int langEnd = infoDiv.text().indexOf("上映日期:");
    if (langBegin > 0 && langEnd > 0 && (langBegin + "语言:".length() < langEnd)) {
      movieInfo.setLanguage(infoDiv.text().substring(langBegin + "语言:".length(), langEnd));
    }
    // 上映日期
    Elements releaseElements = infoDiv.select("span[property=v:initialReleaseDate]");
    if (releaseElements.size() > 0) {
      movieInfo.setMovieLength(releaseElements.get(0).text());
    }
    // 片长
    Elements lengthElements = infoDiv.select("span[property=v:runtime]");
    if (lengthElements.size() > 0) {
      movieInfo.setMovieLength(lengthElements.get(0).text());
    }
    // 又名
    int aliasBegin = infoDiv.text().indexOf("又名:");
    int aliasEnd = infoDiv.text().indexOf("IMDb链接:");
    if (aliasBegin > 0 && aliasBegin > 0 && (aliasBegin + "IMDb链接:".length() < aliasEnd)) {
      String aliasStr = infoDiv.text().substring(aliasBegin + "又名:".length(), aliasEnd);
      movieInfo.setMovieAliasName(Utils.toString(aliasStr.split("/")));
    }
    // imdb连接
    Elements imdbElements = infoDiv.select("span:containsOwn(IMDb链接) + a");
    if (imdbElements.size() > 0) {
      movieInfo.setImdbId(imdbElements.get(0).attr("href").trim());
    }
    // 描述
    Elements introElements = detailContent.select("span[property=v:summary]");
    if (introElements.size() > 0) {
      movieInfo.setIntroduction(introElements.get(0).text().trim());
    }

    return movieBO;
  }