private void handleSingleTitle(String tagName) { int beginIndex = 0; int endIndex = 0; String titleURL = Common.getRegularURL(baseURL + "/" + tagName); String titleText = getAllPageString(titleURL); // 取得標題列表和網址列表 tsukkomiMode = false; String titleName = getTitleOnMainPage(titleURL, titleText); if (titleName == null) { print("無效的作品主頁網址: " + titleURL); return; } List<List<String>> combinationList = getVolumeTitleAndUrlOnMainPage(titleURL, titleText); List<String> volumeTitleList = combinationList.get(0); List<String> volumeUrlList = combinationList.get(1); tsukkomiMode = true; // List<String> snsSysIDList = new ArrayList<String>(); // sns_sys_id // List<String> snsViewPointTokenList = new ArrayList<String>(); // sns_view_point_token String snsSysID = ""; String snsViewPointToken = ""; boolean stepByStepMode = false; String lastVolumeID = getLastVolumeID(titleText); String lastVolumeTitle = getLastVolumeTitle(titleText); // 用於main_list.js newTitleList.add(titleName); newTagList.add(tagName); newVolumeTitleList.add(lastVolumeTitle); newVolumeDirList.add(lastVolumeID); // 如果不需要更新,就跳過往下個去做 if (!needUpdate(tagName, volumeTitleList)) { int lastIndex = volumeTitleList.size() - 1; if (lastIndex < 0 && titleText.indexOf("g_last_chapter_id") > 0) { print("集數列表因為版權而拿掉 , 需要一集一集慢慢爬"); stepByStepMode = true; } else if (lastIndex < 0) { print("跳過 , 因為 " + titleName + "[" + tagName + "] 沒有任何集數 "); return; } else { print( "跳過 , 因為 " + titleName + "[" + tagName + "] 已有最新集數: " + volumeTitleList.get(lastIndex)); return; } } String titleIntroduction = getTitleIntroduction(titleText); handleTitlePic(tagName, titleText); if (!stepByStepMode || !new File(getBaseOutputDirectory() + tagName + Common.getSlash() + "comment.js") .exists()) { handleTitleComment(tagName, titleText); } int volumeCount = volumeUrlList.size(); int existedVolumeCount = getExistedVolumeCount(tagName); // print("" + tagName + ":" + volumeCount + "," + existedVolumeCount); // System.exit(0); // 取得每個集數的評論列表 for (int j = existedVolumeCount; j < volumeCount && !stepByStepMode; j++) // 某個作品的集數列表 { String volumeURL = volumeUrlList.get(j); String volumeText = getAllPageString(volumeURL); String volumeTitle = volumeTitleList.get(j); snsSysID = handleSingleVolume(tagName, titleName, titleIntroduction, volumeTitle, volumeText); } String nowVolumeID = lastVolumeID; while (stepByStepMode) { String volumeURL = baseURL + "/" + tagName + "/" + nowVolumeID + ".shtml"; String volumeText = getAllPageString(volumeURL); beginIndex = volumeText.indexOf("g_chapter_name"); beginIndex = volumeText.indexOf("\"", beginIndex) + 1; endIndex = volumeText.indexOf("\"", beginIndex); String volumeTitle = volumeText.substring(beginIndex, endIndex); print("正要處理的集數: " + volumeTitle + " : " + volumeURL); snsSysID = handleSingleVolume(tagName, titleName, titleIntroduction, volumeTitle, volumeText); nowVolumeID = getPreviousVolumeID(volumeText); if (nowVolumeID == null) break; } // outputMainListFile(stepByStepMode); }
private void handleTitleComment(String tagName, String text) { int beginIndex = text.indexOf("token32:"); if (beginIndex < 0) { return; } beginIndex = text.indexOf("'", beginIndex) + 1; int endIndex = text.indexOf("'", beginIndex); if (beginIndex <= 0 || endIndex <= 0) { return; } String token32 = text.substring(beginIndex, endIndex); String commentURL = Common.getRegularURL("http://t.178.com/resource/show?token32=" + token32); print(tagName + "'s commentURL : " + commentURL); // 取得評論頁數 text = getAllPageString(commentURL); int pageCount = 1; beginIndex = text.lastIndexOf("<li><a href=") - 5; if (beginIndex > 0) { beginIndex = text.lastIndexOf("<li><a href=", beginIndex); beginIndex = text.indexOf("page=", beginIndex); beginIndex = text.indexOf(">", beginIndex) + 1; endIndex = text.indexOf("<", beginIndex); pageCount = Integer.parseInt(text.substring(beginIndex, endIndex)); } List<String> nameList = new ArrayList<String>(); List<String> dateList = new ArrayList<String>(); List<String> commentList = new ArrayList<String>(); String temp = ""; // 下載全部評論 for (int i = 1; i <= pageCount; i++) { text = getAllPageString(commentURL + "&page=" + i); beginIndex = endIndex = 0; while (true) { beginIndex = text.indexOf("post-by hovercard", beginIndex); if (beginIndex < 0) break; // 取得評論的名字 beginIndex = text.indexOf(">", beginIndex) + 1; endIndex = text.indexOf("<", beginIndex); temp = text.substring(beginIndex, endIndex).trim(); nameList.add(temp); // 取得評論內容 beginIndex = text.indexOf("-->", beginIndex); beginIndex = text.indexOf(">", beginIndex) + 1; endIndex = text.indexOf("<", beginIndex); temp = text.substring(beginIndex, endIndex).trim(); commentList.add(temp); // 取得評論當下時間 beginIndex = text.indexOf("<a href=", beginIndex); beginIndex = text.indexOf(">", beginIndex) + 1; endIndex = text.indexOf("<", beginIndex); temp = text.substring(beginIndex, endIndex).trim(); temp = Common.getTraditionalChinese(temp); temp = getFormatDate(temp); dateList.add(temp); } } // 寫出評論 List<List<String>> combinationList = new ArrayList<List<String>>(); combinationList.add(nameList); combinationList.add(commentList); combinationList.add(dateList); String filePath = getBaseOutputDirectory() + tagName + Common.getSlash(); outputListFile(combinationList, "TITLE_COMMONET", filePath, "comment.js"); }