@Override public synchronized void setParameters() { Common.downloadFile(webSite, SetUp.getTempDirectory(), indexName, false, ""); Common.newEncodeFile(SetUp.getTempDirectory(), indexName, indexEncodeName, Encoding.GBK); String tempStr = Common.getFileString(SetUp.getTempDirectory(), indexEncodeName); String[] lines = tempStr.split("\n"); for (int i = 0; i < lines.length; i++) { String line = Common.getTraditionalChinese(lines[i]); // ".": contain all characters except "\r" and "\n" // "(?s).": contain all characters if (line.matches("(?s).*title(?s).*")) { // get title ex.<title>尸錄 4話</title> String[] temp = line.split("<|>"); if (getWholeTitle() == null || getWholeTitle().equals("")) setWholeTitle(Common.getStringRemovedIllegalChar(temp[2])); } else if (line.matches("(?s).*page(?s).*")) { // get total page ex. | 共34頁 | int beginIndex = line.indexOf(Common.getStringUsingDefaultLanguage("共", "共")); int endIndex = line.indexOf(Common.getStringUsingDefaultLanguage("頁", "頁")); String temp = line.substring(beginIndex + 1, endIndex); totalPage = Integer.parseInt(temp); break; } } comicURL = new String[totalPage]; // totalPage = amount of comic pic SetUp.setWholeTitle(wholeTitle); }
@Override public String getTitleOnMainPage(String urlString, String allPageString) { if (needTsukkomiMode(urlString)) { tsukkomiMode = true; return "Tsukkomi"; } tsukkomiMode = false; int beginIndex = allPageString.indexOf("<h1>") + 4; int endIndex = allPageString.indexOf("</h1>", beginIndex); if (urlString.indexOf("mh.") > 0) { beginIndex = allPageString.indexOf("g_comic_name"); beginIndex = allPageString.indexOf("\"", beginIndex) + 1; endIndex = allPageString.indexOf("\"", beginIndex); } // Common.debugPrintln( "B: " + beginIndex + " E: " + endIndex ); if (beginIndex < 0 || endIndex < 0) { return null; } String title = allPageString.substring(beginIndex, endIndex).trim(); return Common.getStringRemovedIllegalChar(Common.getTraditionalChinese(title)); }
@Override public String getTitleOnMainPage(String urlString, String allPageString) { int beginIndex = allPageString.indexOf("<title>"); beginIndex = allPageString.indexOf(">", beginIndex) + 1; int endIndex = allPageString.indexOf("</title", beginIndex); String title = allPageString.substring(beginIndex, endIndex).split("-")[0].trim(); return Common.getStringRemovedIllegalChar(Common.getTraditionalChinese(title)); }
@Override public String getTitleOnMainPage(String urlString, String allPageString) { int beginIndex = allPageString.indexOf("valign=\"middle\"><b>"); beginIndex = allPageString.indexOf("<b>", beginIndex) + 3; int endIndex = allPageString.indexOf("</b>", beginIndex); String title = allPageString.substring(beginIndex, endIndex).trim(); return Common.getStringRemovedIllegalChar(Common.getTraditionalChinese(title)); }
@Override public String getTitleOnSingleVolumePage(String urlString) { String allPageString = getAllPageString(urlString); int beginIndex = Common.getIndexOfOrderKeyword(allPageString, ">>", 3) + 2; int endIndex = Common.getSmallerIndexOfTwoKeyword(allPageString, beginIndex, "[", ">>"); String title = allPageString.substring(beginIndex, endIndex).trim(); return Common.getStringRemovedIllegalChar(Common.getTraditionalChinese(title)); }
@Override public List<List<String>> getVolumeTitleAndUrlOnMainPage(String urlString, String allPageString) { // combine volumeList and urlList into combinationList, return it. List<List<String>> combinationList = new ArrayList<List<String>>(); List<String> urlList = new ArrayList<String>(); List<String> volumeList = new ArrayList<String>(); if (tsukkomiMode) { urlList.add(urlString); volumeList.add("tsukkomi"); combinationList.add(volumeList); combinationList.add(urlList); return combinationList; } int beginIndex = allPageString.indexOf("class=\"cartoon_online_border\""); int endIndex = allPageString.indexOf("document.write", beginIndex); if (urlString.indexOf("mh.") > 0) { beginIndex = allPageString.indexOf("chapter_list"); endIndex = allPageString.indexOf("</script>", beginIndex); } String tempString = allPageString.substring(beginIndex, endIndex); int volumeCount = tempString.split("href=\"").length - 1; String volumeTitle = ""; beginIndex = endIndex = 0; for (int i = 0; i < volumeCount; i++) { // 取得單集位址 beginIndex = tempString.indexOf("href=\"", beginIndex) + 6; endIndex = tempString.indexOf("\"", beginIndex); urlList.add(baseURL + tempString.substring(beginIndex, endIndex)); // 取得單集名稱 beginIndex = tempString.indexOf(">", beginIndex) + 1; endIndex = tempString.indexOf("<", beginIndex); volumeTitle = tempString.substring(beginIndex, endIndex); volumeList.add( getVolumeWithFormatNumber( Common.getStringRemovedIllegalChar( Common.getTraditionalChinese(volumeTitle.trim())))); } totalVolume = volumeCount; Common.debugPrintln("共有" + totalVolume + "集"); combinationList.add(volumeList); combinationList.add(urlList); return combinationList; }
@Override public String getTitleOnMainPage(String urlString, String allPageString) { String[] lines = allPageString.split("\n"); int beginIndex = lines[0].indexOf("<title>", 1) + 7; int endIndex = lines[0].indexOf("_", beginIndex) - 4; return Common.getStringRemovedIllegalChar( Common.getTraditionalChinese(lines[0].substring(beginIndex, endIndex))); }
@Override public List<List<String>> getVolumeTitleAndUrlOnMainPage(String urlString, String allPageString) { // combine volumeList and urlList into combinationList, return it. List<List<String>> combinationList = new ArrayList<List<String>>(); List<String> urlList = new ArrayList<String>(); List<String> volumeList = new ArrayList<String>(); String[] lines = allPageString.split("\n"); int beginIndex = 0; int endIndex = 0; String volumeURL = ""; beginIndex = allPageString.indexOf("id='comiclistn'"); endIndex = allPageString.indexOf("</table>", beginIndex); String tempString = allPageString.substring(beginIndex, endIndex); int volumeCount = tempString.split("<dd>").length - 1; // 單集位址的網域名稱(有四組,可置換) String baseVolumeURL = "http://comic.kukudm.com"; beginIndex = endIndex = 0; for (int i = 0; i < volumeCount; i++) { // 取得單集位址 beginIndex = tempString.indexOf("<dd>", beginIndex) + 1; beginIndex = tempString.indexOf("'", beginIndex) + 1; endIndex = tempString.indexOf("'", beginIndex); volumeURL = tempString.substring(beginIndex, endIndex); if (volumeURL.matches("http.*")) { urlList.add(tempString.substring(beginIndex, endIndex)); } else { urlList.add(baseVolumeURL + tempString.substring(beginIndex, endIndex)); } // 取得單集名稱 beginIndex = tempString.indexOf(">", beginIndex) + 1; endIndex = tempString.indexOf("<", beginIndex); volumeList.add( getVolumeWithFormatNumber( Common.getStringRemovedIllegalChar( Common.getTraditionalChinese( tempString.substring(beginIndex, endIndex).trim())))); } totalVolume = volumeCount; Common.debugPrintln("共有" + totalVolume + "集"); combinationList.add(volumeList); combinationList.add(urlList); return combinationList; }
@Override public List<List<String>> getVolumeTitleAndUrlOnMainPage(String urlString, String allPageString) { // combine volumeList and urlList into combinationList, return it. List<List<String>> combinationList = new ArrayList<List<String>>(); List<String> urlList = new ArrayList<String>(); List<String> volumeList = new ArrayList<String>(); String tempString = ""; int beginIndex, endIndex; beginIndex = allPageString.indexOf("class=\"plie\""); endIndex = allPageString.indexOf("</ul>", beginIndex); // 存放集數頁面資訊的字串 tempString = allPageString.substring(beginIndex, endIndex); int volumeCount = tempString.split("href=").length - 1; String volumeTitle = ""; beginIndex = endIndex = 0; for (int i = 0; i < volumeCount; i++) { // 取得單集位址 beginIndex = tempString.indexOf("href=", beginIndex); beginIndex = tempString.indexOf("\"", beginIndex) + 1; endIndex = tempString.indexOf("\"", beginIndex); urlList.add(baseURL + tempString.substring(beginIndex, endIndex)); // 取得單集名稱 beginIndex = tempString.indexOf(">", beginIndex) + 1; endIndex = tempString.indexOf("</a>", beginIndex); volumeTitle = tempString.substring(beginIndex, endIndex); volumeTitle = volumeTitle.replaceFirst("<br\\s+/{0,1}>", ""); volumeList.add( getVolumeWithFormatNumber( Common.getStringRemovedIllegalChar( Common.getTraditionalChinese(volumeTitle.trim())))); } totalVolume = volumeCount; Common.debugPrintln("共有" + totalVolume + "集"); combinationList.add(volumeList); combinationList.add(urlList); return combinationList; }
@Override public void setParameters() { Common.debugPrintln("開始解析各參數 :"); Common.debugPrintln("開始解析title和wholeTitle :"); if (getWholeTitle() == null || getWholeTitle().equals("")) { String allPageString = getAllPageString(webSite); int beginIndex = Common.getIndexOfOrderKeyword(allPageString, ">>", 4) + 2; int endIndex = allPageString.indexOf("<", beginIndex); String title = allPageString.substring(beginIndex, endIndex).trim(); setWholeTitle(Common.getStringRemovedIllegalChar(Common.getTraditionalChinese(title))); } Common.debugPrintln("作品名稱(title) : " + getTitle()); Common.debugPrintln("章節名稱(wholeTitle) : " + getWholeTitle()); }
@Override public List<List<String>> getVolumeTitleAndUrlOnMainPage(String urlString, String allPageString) { // combine volumeList and urlList into combinationList, return it. List<List<String>> combinationList = new ArrayList<List<String>>(); List<String> urlList = new ArrayList<String>(); List<String> volumeList = new ArrayList<String>(); int beginIndex = allPageString.indexOf("<li><a href=\"http://www") - 1; int endIndex = allPageString.indexOf("</table>", beginIndex); String listString = allPageString.substring(beginIndex, endIndex); totalVolume = allPageString.split("<li><a href=\"http://www").length - 1; beginIndex = endIndex = 0; for (int i = 0; i < totalVolume; i++) { // 取得單集位址 beginIndex = listString.indexOf("http://www", beginIndex); endIndex = listString.indexOf("\"", beginIndex); urlList.add(listString.substring(beginIndex, endIndex)); // 取得單集名稱 beginIndex = listString.indexOf("<span", beginIndex) + 1; beginIndex = listString.indexOf(">", beginIndex) + 1; endIndex = listString.indexOf("<", beginIndex); String volumeTitle = listString.substring(beginIndex, endIndex); volumeList.add( getVolumeWithFormatNumber( Common.getStringRemovedIllegalChar( Common.getTraditionalChinese(volumeTitle.trim())))); } Common.debugPrintln("共有" + totalVolume + "集"); combinationList.add(volumeList); combinationList.add(urlList); return combinationList; }
@Override public String getTitleOnMainPage(String urlString, String allPageString) { int beginIndex, endIndex; beginIndex = urlString.indexOf("mop"); beginIndex = urlString.indexOf("/", beginIndex) + 1; String backMainURL = ""; if (urlString.matches(".*\\?id=.*")) { String tempString = urlString.substring(beginIndex, urlString.length()); tempString = tempString.replaceAll("\\.jsp\\?id=", "/"); backMainURL = tempString + ".html"; } else backMainURL = urlString.substring(beginIndex, urlString.length()); beginIndex = allPageString.indexOf(backMainURL + "\""); beginIndex = allPageString.indexOf(">", beginIndex) + 1; endIndex = allPageString.indexOf("<", beginIndex); String title = allPageString.substring(beginIndex, endIndex).trim(); return Common.getStringRemovedIllegalChar(Common.getTraditionalChinese(title)); }
@Override public void setParameters() { Common.debugPrintln("開始解析各參數 :"); Common.downloadFile(webSite, SetUp.getTempDirectory(), indexName, false, ""); if (getWholeTitle() == null || getWholeTitle().equals("")) { Common.debugPrintln("開始解析title和wholeTitle :"); String allPageString = Common.getFileString(SetUp.getTempDirectory(), indexEncodeName); int beginIndex = allPageString.indexOf("alt="); beginIndex = allPageString.indexOf("\"", beginIndex) + 1; int endIndex = allPageString.indexOf("\"", beginIndex); String tempTitleString = allPageString.substring(beginIndex, endIndex).trim(); setWholeTitle( getVolumeWithFormatNumber( Common.getStringRemovedIllegalChar( Common.getTraditionalChinese(tempTitleString.trim())))); } Common.debugPrintln("作品名稱(title) : " + getTitle()); Common.debugPrintln("章節名稱(wholeTitle) : " + getWholeTitle()); }
private void handleTitleComment(String tagName, String text) { int beginIndex = text.indexOf("token32:"); if (beginIndex < 0) { return; } beginIndex = text.indexOf("'", beginIndex) + 1; int endIndex = text.indexOf("'", beginIndex); if (beginIndex <= 0 || endIndex <= 0) { return; } String token32 = text.substring(beginIndex, endIndex); String commentURL = Common.getRegularURL("http://t.178.com/resource/show?token32=" + token32); print(tagName + "'s commentURL : " + commentURL); // 取得評論頁數 text = getAllPageString(commentURL); int pageCount = 1; beginIndex = text.lastIndexOf("<li><a href=") - 5; if (beginIndex > 0) { beginIndex = text.lastIndexOf("<li><a href=", beginIndex); beginIndex = text.indexOf("page=", beginIndex); beginIndex = text.indexOf(">", beginIndex) + 1; endIndex = text.indexOf("<", beginIndex); pageCount = Integer.parseInt(text.substring(beginIndex, endIndex)); } List<String> nameList = new ArrayList<String>(); List<String> dateList = new ArrayList<String>(); List<String> commentList = new ArrayList<String>(); String temp = ""; // 下載全部評論 for (int i = 1; i <= pageCount; i++) { text = getAllPageString(commentURL + "&page=" + i); beginIndex = endIndex = 0; while (true) { beginIndex = text.indexOf("post-by hovercard", beginIndex); if (beginIndex < 0) break; // 取得評論的名字 beginIndex = text.indexOf(">", beginIndex) + 1; endIndex = text.indexOf("<", beginIndex); temp = text.substring(beginIndex, endIndex).trim(); nameList.add(temp); // 取得評論內容 beginIndex = text.indexOf("-->", beginIndex); beginIndex = text.indexOf(">", beginIndex) + 1; endIndex = text.indexOf("<", beginIndex); temp = text.substring(beginIndex, endIndex).trim(); commentList.add(temp); // 取得評論當下時間 beginIndex = text.indexOf("<a href=", beginIndex); beginIndex = text.indexOf(">", beginIndex) + 1; endIndex = text.indexOf("<", beginIndex); temp = text.substring(beginIndex, endIndex).trim(); temp = Common.getTraditionalChinese(temp); temp = getFormatDate(temp); dateList.add(temp); } } // 寫出評論 List<List<String>> combinationList = new ArrayList<List<String>>(); combinationList.add(nameList); combinationList.add(commentList); combinationList.add(dateList); String filePath = getBaseOutputDirectory() + tagName + Common.getSlash(); outputListFile(combinationList, "TITLE_COMMONET", filePath, "comment.js"); }
// more : // http://interface3.i.178.com/~cite.embed.ViewAll?callback=?res_id=4606&sys_res_id=4606_8436&sys_name=manhua178 // normal : // http://interface3.i.178.com/~cite.embed.VoteJS/sysname/manhua178/sys_id/6567_34593/token/0a7e131c24510879fa79ad4c8c6660bd private List<String> getCommentParseText(List<String> textList, String commentURL) { int beginIndex = 0; int endIndex = 0; String text = getAllPageString(commentURL); if (commentURL.indexOf("VoteJS") > 0) { beginIndex = text.indexOf("cite_vote_num"); if (beginIndex < 0) { // 尚未評論 textList.add(""); return textList; } beginIndex = text.indexOf(">", beginIndex) + 1; endIndex = text.indexOf("<", beginIndex); textList.add(text.substring(beginIndex, endIndex)); beginIndex = text.indexOf("postVote(", beginIndex); beginIndex = text.indexOf("(", beginIndex) + 1; endIndex = text.indexOf(",", beginIndex); res_id = text.substring(beginIndex, endIndex); } while (true) { beginIndex = text.indexOf("interactive-opinion-block-", beginIndex); if (beginIndex < 0) { break; } beginIndex = text.indexOf(">", beginIndex) + 1; endIndex = text.indexOf("<", beginIndex); String comment = text.substring(beginIndex, endIndex); comment = getUtf8Text(comment); comment = comment.replaceAll("\"|'", ""); comment = Common.getTraditionalChinese(comment); if (comment.matches("更多")) { break; } textList.add(comment); beginIndex = text.indexOf("title=", beginIndex); if (beginIndex < 0) { break; } beginIndex = text.indexOf("\"", beginIndex) + 1; endIndex = text.indexOf(")", beginIndex); String temp = text.substring(beginIndex, endIndex); temp = getUtf8Text(temp); temp = temp.replaceAll("共有", ""); temp = temp.replaceAll("人赞同此观点", ""); String[] temps = temp.split("\\("); if (temps.length < 2) { print("FAIL -> " + temps.length + " : " + temp); } String num = temps[0]; String ratio = temps[1]; textList.add(num); textList.add(ratio); } return textList; }
@Override public List<List<String>> getVolumeTitleAndUrlOnMainPage(String urlString, String allPageString) { // combine volumeList and urlList into combinationList, return it. List<List<String>> combinationList = new ArrayList<List<String>>(); List<String> urlList = new ArrayList<String>(); List<String> volumeList = new ArrayList<String>(); String tempString = ""; int lastPage = 0; int beginIndex, endIndex; urlString = urlString.split("0/0/")[0]; // 接著在迴圈內下載每一頁, 取得每一集資訊 int totalVolumeCount = 0; String pageURL = urlString; while (true) { if (lastPage++ > 0) { pageURL = urlString + "0/0/" + lastPage; } allPageString = getAllPageString(pageURL); beginIndex = allPageString.indexOf("class=\"comicBox\""); endIndex = allPageString.indexOf("class=\"fbComment\"", beginIndex); tempString = allPageString.substring(beginIndex, endIndex); // 代表此頁已經沒有集數了。 if (tempString.indexOf("class=\"recTitle\"") < 0) break; // 取得存放一整頁面集數資訊 beginIndex = allPageString.indexOf("class=\"comicBox\""); beginIndex = allPageString.indexOf("class=\"relativeRec", beginIndex); endIndex = allPageString.indexOf("</div>", beginIndex); tempString = allPageString.substring(beginIndex, endIndex); int volumeCount = tempString.split("<h3").length - 1; // 單一頁面的集數 totalVolumeCount += volumeCount; String volumeURL = ""; String volumeTitle = ""; beginIndex = endIndex = 0; for (int j = 0; j < volumeCount; j++) { // 取得單集位址 beginIndex = tempString.indexOf("<h3", beginIndex); beginIndex = tempString.indexOf("href=", beginIndex); beginIndex = tempString.indexOf("\"", beginIndex) + 1; endIndex = tempString.indexOf("\"", beginIndex); volumeURL = baseURL + tempString.substring(beginIndex, endIndex); urlList.add(baseURL + tempString.substring(beginIndex, endIndex)); // 取得單集名稱 beginIndex = tempString.indexOf("title=", beginIndex); beginIndex = tempString.indexOf("\"", beginIndex) + 1; endIndex = tempString.indexOf("\"", beginIndex); volumeTitle = tempString.substring(beginIndex, endIndex); volumeList.add( getVolumeWithFormatNumber( Common.getStringRemovedIllegalChar( Common.getTraditionalChinese(volumeTitle.trim())))); Common.debugPrintln(volumeURL + " : " + volumeTitle); } } Common.debugPrintln(" 共有 " + (lastPage - 1) + " 張目錄頁"); totalVolume = totalVolumeCount; Common.debugPrintln("共有" + totalVolume + "集"); combinationList.add(volumeList); combinationList.add(urlList); return combinationList; }