Пример #1
0
  private void handleSingleTitle(String tagName) {
    int beginIndex = 0;
    int endIndex = 0;

    String titleURL = Common.getRegularURL(baseURL + "/" + tagName);
    String titleText = getAllPageString(titleURL);

    // 取得標題列表和網址列表
    tsukkomiMode = false;
    String titleName = getTitleOnMainPage(titleURL, titleText);

    if (titleName == null) {
      print("無效的作品主頁網址: " + titleURL);
      return;
    }

    List<List<String>> combinationList = getVolumeTitleAndUrlOnMainPage(titleURL, titleText);
    List<String> volumeTitleList = combinationList.get(0);
    List<String> volumeUrlList = combinationList.get(1);
    tsukkomiMode = true;

    // List<String> snsSysIDList = new ArrayList<String>(); // sns_sys_id
    // List<String> snsViewPointTokenList = new ArrayList<String>(); // sns_view_point_token
    String snsSysID = "";
    String snsViewPointToken = "";
    boolean stepByStepMode = false;
    String lastVolumeID = getLastVolumeID(titleText);
    String lastVolumeTitle = getLastVolumeTitle(titleText);

    // 用於main_list.js
    newTitleList.add(titleName);
    newTagList.add(tagName);
    newVolumeTitleList.add(lastVolumeTitle);
    newVolumeDirList.add(lastVolumeID);

    //  如果不需要更新,就跳過往下個去做
    if (!needUpdate(tagName, volumeTitleList)) {
      int lastIndex = volumeTitleList.size() - 1;
      if (lastIndex < 0 && titleText.indexOf("g_last_chapter_id") > 0) {
        print("集數列表因為版權而拿掉 , 需要一集一集慢慢爬");
        stepByStepMode = true;
      } else if (lastIndex < 0) {
        print("跳過 , 因為 " + titleName + "[" + tagName + "] 沒有任何集數 ");
        return;
      } else {
        print(
            "跳過 , 因為 " + titleName + "[" + tagName + "] 已有最新集數: " + volumeTitleList.get(lastIndex));
        return;
      }
    }

    String titleIntroduction = getTitleIntroduction(titleText);
    handleTitlePic(tagName, titleText);

    if (!stepByStepMode
        || !new File(getBaseOutputDirectory() + tagName + Common.getSlash() + "comment.js")
            .exists()) {
      handleTitleComment(tagName, titleText);
    }

    int volumeCount = volumeUrlList.size();
    int existedVolumeCount = getExistedVolumeCount(tagName);

    // print("" + tagName + ":" + volumeCount + "," + existedVolumeCount);
    // System.exit(0);

    // 取得每個集數的評論列表
    for (int j = existedVolumeCount; j < volumeCount && !stepByStepMode; j++) // 某個作品的集數列表
    {
      String volumeURL = volumeUrlList.get(j);
      String volumeText = getAllPageString(volumeURL);
      String volumeTitle = volumeTitleList.get(j);
      snsSysID = handleSingleVolume(tagName, titleName, titleIntroduction, volumeTitle, volumeText);
    }

    String nowVolumeID = lastVolumeID;
    while (stepByStepMode) {

      String volumeURL = baseURL + "/" + tagName + "/" + nowVolumeID + ".shtml";
      String volumeText = getAllPageString(volumeURL);

      beginIndex = volumeText.indexOf("g_chapter_name");
      beginIndex = volumeText.indexOf("\"", beginIndex) + 1;
      endIndex = volumeText.indexOf("\"", beginIndex);
      String volumeTitle = volumeText.substring(beginIndex, endIndex);
      print("正要處理的集數: " + volumeTitle + " : " + volumeURL);
      snsSysID = handleSingleVolume(tagName, titleName, titleIntroduction, volumeTitle, volumeText);

      nowVolumeID = getPreviousVolumeID(volumeText);

      if (nowVolumeID == null) break;
    }

    // outputMainListFile(stepByStepMode);
  }
Пример #2
0
  private void handleTitleComment(String tagName, String text) {
    int beginIndex = text.indexOf("token32:");
    if (beginIndex < 0) {
      return;
    }
    beginIndex = text.indexOf("'", beginIndex) + 1;
    int endIndex = text.indexOf("'", beginIndex);

    if (beginIndex <= 0 || endIndex <= 0) {
      return;
    }

    String token32 = text.substring(beginIndex, endIndex);
    String commentURL = Common.getRegularURL("http://t.178.com/resource/show?token32=" + token32);
    print(tagName + "'s commentURL : " + commentURL);

    // 取得評論頁數
    text = getAllPageString(commentURL);
    int pageCount = 1;
    beginIndex = text.lastIndexOf("<li><a href=") - 5;
    if (beginIndex > 0) {
      beginIndex = text.lastIndexOf("<li><a href=", beginIndex);
      beginIndex = text.indexOf("page=", beginIndex);
      beginIndex = text.indexOf(">", beginIndex) + 1;
      endIndex = text.indexOf("<", beginIndex);
      pageCount = Integer.parseInt(text.substring(beginIndex, endIndex));
    }

    List<String> nameList = new ArrayList<String>();
    List<String> dateList = new ArrayList<String>();
    List<String> commentList = new ArrayList<String>();
    String temp = "";

    // 下載全部評論
    for (int i = 1; i <= pageCount; i++) {
      text = getAllPageString(commentURL + "&page=" + i);
      beginIndex = endIndex = 0;
      while (true) {
        beginIndex = text.indexOf("post-by hovercard", beginIndex);
        if (beginIndex < 0) break;

        // 取得評論的名字
        beginIndex = text.indexOf(">", beginIndex) + 1;
        endIndex = text.indexOf("<", beginIndex);
        temp = text.substring(beginIndex, endIndex).trim();
        nameList.add(temp);

        // 取得評論內容
        beginIndex = text.indexOf("-->", beginIndex);
        beginIndex = text.indexOf(">", beginIndex) + 1;
        endIndex = text.indexOf("<", beginIndex);
        temp = text.substring(beginIndex, endIndex).trim();
        commentList.add(temp);

        // 取得評論當下時間
        beginIndex = text.indexOf("<a href=", beginIndex);
        beginIndex = text.indexOf(">", beginIndex) + 1;
        endIndex = text.indexOf("<", beginIndex);
        temp = text.substring(beginIndex, endIndex).trim();
        temp = Common.getTraditionalChinese(temp);
        temp = getFormatDate(temp);
        dateList.add(temp);
      }
    }

    // 寫出評論
    List<List<String>> combinationList = new ArrayList<List<String>>();
    combinationList.add(nameList);
    combinationList.add(commentList);
    combinationList.add(dateList);

    String filePath = getBaseOutputDirectory() + tagName + Common.getSlash();
    outputListFile(combinationList, "TITLE_COMMONET", filePath, "comment.js");
  }