@Override public void parseComicURL() { // parse URL and save all URLs in comicURL // // 先取得前面的下載伺服器網址 String allPageString = Common.getFileString(SetUp.getTempDirectory(), indexName); Common.debugPrint("開始解析這一集有幾頁 : "); int beginIndex = 0, endIndex = 0; totalPage = allPageString.split("<option ").length; Common.debugPrintln("共 " + totalPage + " 頁"); comicURL = new String[totalPage]; String picURL = ""; int p = 0; // 目前頁數 for (int i = 0; i < totalPage && Run.isAlive; i++) { beginIndex = allPageString.indexOf("<img id"); beginIndex = allPageString.indexOf("\"", beginIndex) + 1; endIndex = allPageString.indexOf("\"", beginIndex); String tempURL = allPageString.substring(beginIndex, endIndex); if (Common.isLegalURL(tempURL)) { comicURL[p++] = tempURL; Common.debugPrintln(p + " " + comicURL[p - 1]); // debug // 每解析一個網址就下載一張圖 singlePageDownload(getTitle(), getWholeTitle(), comicURL[p - 1], totalPage, p, 0); } else { totalPage--; } // Common.downloadFile( comicURL[p - 1], "", p + ".jpg", false, "" ); if (p < totalPage) { beginIndex = allPageString.indexOf("</select>"); beginIndex = allPageString.indexOf("\"", beginIndex) + 1; endIndex = allPageString.indexOf("\"", beginIndex); tempURL = allPageString.substring(beginIndex, endIndex); if ("#".equals(tempURL)) { Common.debugPrintln("THE LAST PAGE !!"); break; } String nextPageURL = "http://comic101.com" + tempURL; Common.downloadFile(nextPageURL, SetUp.getTempDirectory(), indexName, false, ""); allPageString = Common.getFileString(SetUp.getTempDirectory(), indexName); } } // System.exit( 0 ); // debug }
// 將代號轉為實際字串 private String[] getRealCodeTokens(String[] codeTokens, String[] varTokens) { String[] realCodeTokens = new String[codeTokens.length]; String tempChar = ""; for (int i = 0; i < codeTokens.length; i++) { realCodeTokens[i] = ""; Common.debugPrintln("這次要分解的code : " + codeTokens[i]); for (int j = codeTokens[i].length() - 1; j >= 0; j--) { int index = -1; // 兩個數字字元組合在一起 index = getVarIndex(codeTokens[i].charAt(j)); if (j > 0 && index >= 0) { char c = codeTokens[i].charAt(j - 1); if (c >= '1' && c <= '9') { int num = Integer.parseInt(String.valueOf(c)); index += ((26 + 26 + 10) * num); // 若之後找不到此index對應的token , 可直接用此數字字串 tempChar = "" + codeTokens[i].charAt(j); j--; } else { tempChar = ""; } } else { tempChar = ""; } if (index >= 0 && index < varTokens.length && !varTokens[index].equals("")) { realCodeTokens[i] = varTokens[index] + realCodeTokens[i]; } else { realCodeTokens[i] = "" + codeTokens[i].charAt(j) + tempChar + realCodeTokens[i]; } // Common.debugPrintln( realCodeTokens[i] ); } Common.debugPrintln("分解結果: " + realCodeTokens[i]); } // System.exit( 0 ); return realCodeTokens; }
@Override public void parseComicURL() { // parse URL and save all URLs in comicURL // // 先取得前面的下載伺服器網址 String allPageString = getAllPageString(webSite); Common.debugPrint("開始解析這一集有幾頁 : "); String baseURL = "http://mh2.xindm.cn"; int beginIndex = allPageString.indexOf("Array("); beginIndex = allPageString.indexOf("\"", beginIndex); int endIndex = allPageString.indexOf(");", beginIndex); String tempPicString = allPageString.substring(beginIndex, endIndex); String[] picURLs = tempPicString.split(","); totalPage = picURLs.length; Common.debugPrintln("共 " + totalPage + " 頁"); comicURL = new String[totalPage]; for (int i = 0; i < picURLs.length; i++) { comicURL[i] = baseURL + picURLs[i].replaceAll("\"", ""); Common.debugPrintln("第" + (i + 1) + "頁網址:" + comicURL[i]); } // 須取得cookie才能下載圖片(防盜連專家....) String[] cookies = Common.getCookieStrings(webSite, null); String cookieString = ""; int cookieCount = 0; // 取得前兩組cookie就可以了 if (cookies[0] != null) { cookieString = "Hm_lvt_016bf6f495d44a067f569423ad894560=1337210178886; " + cookies[0].split(";")[0]; } Common.debugPrintln("取得cookies:" + cookieString); for (int p = 1; p <= totalPage && Run.isAlive; p++) { String referURL = webSite + "?p=" + p; // 每解析一個網址就下載一張圖 singlePageDownloadUsingSimple( getTitle(), getWholeTitle(), comicURL[p - 1], totalPage, p, cookieString, referURL); Common.debugPrintln((p) + " " + comicURL[p - 1] + " " + referURL); // debug } // System.exit(1); // debug }
@Override public void setParameters() { Common.debugPrintln("開始解析各參數 :"); Common.debugPrintln("開始解析title和wholeTitle :"); if (getWholeTitle() == null || getWholeTitle().equals("")) { String allPageString = getAllPageString(webSite); int beginIndex = Common.getIndexOfOrderKeyword(allPageString, ">>", 4) + 2; int endIndex = allPageString.indexOf("<", beginIndex); String title = allPageString.substring(beginIndex, endIndex).trim(); setWholeTitle(Common.getStringRemovedIllegalChar(Common.getTraditionalChinese(title))); } Common.debugPrintln("作品名稱(title) : " + getTitle()); Common.debugPrintln("章節名稱(wholeTitle) : " + getWholeTitle()); }
@Override public List<List<String>> getVolumeTitleAndUrlOnMainPage(String urlString, String allPageString) { // combine volumeList and urlList into combinationList, return it. List<List<String>> combinationList = new ArrayList<List<String>>(); List<String> urlList = new ArrayList<String>(); List<String> volumeList = new ArrayList<String>(); if (tsukkomiMode) { urlList.add(urlString); volumeList.add("tsukkomi"); combinationList.add(volumeList); combinationList.add(urlList); return combinationList; } int beginIndex = allPageString.indexOf("class=\"cartoon_online_border\""); int endIndex = allPageString.indexOf("document.write", beginIndex); if (urlString.indexOf("mh.") > 0) { beginIndex = allPageString.indexOf("chapter_list"); endIndex = allPageString.indexOf("</script>", beginIndex); } String tempString = allPageString.substring(beginIndex, endIndex); int volumeCount = tempString.split("href=\"").length - 1; String volumeTitle = ""; beginIndex = endIndex = 0; for (int i = 0; i < volumeCount; i++) { // 取得單集位址 beginIndex = tempString.indexOf("href=\"", beginIndex) + 6; endIndex = tempString.indexOf("\"", beginIndex); urlList.add(baseURL + tempString.substring(beginIndex, endIndex)); // 取得單集名稱 beginIndex = tempString.indexOf(">", beginIndex) + 1; endIndex = tempString.indexOf("<", beginIndex); volumeTitle = tempString.substring(beginIndex, endIndex); volumeList.add( getVolumeWithFormatNumber( Common.getStringRemovedIllegalChar( Common.getTraditionalChinese(volumeTitle.trim())))); } totalVolume = volumeCount; Common.debugPrintln("共有" + totalVolume + "集"); combinationList.add(volumeList); combinationList.add(urlList); return combinationList; }
@Override public List<List<String>> getVolumeTitleAndUrlOnMainPage(String urlString, String allPageString) { // combine volumeList and urlList into combinationList, return it. List<List<String>> combinationList = new ArrayList<List<String>>(); List<String> urlList = new ArrayList<String>(); List<String> volumeList = new ArrayList<String>(); String[] lines = allPageString.split("\n"); int beginIndex = 0; int endIndex = 0; String volumeURL = ""; beginIndex = allPageString.indexOf("id='comiclistn'"); endIndex = allPageString.indexOf("</table>", beginIndex); String tempString = allPageString.substring(beginIndex, endIndex); int volumeCount = tempString.split("<dd>").length - 1; // 單集位址的網域名稱(有四組,可置換) String baseVolumeURL = "http://comic.kukudm.com"; beginIndex = endIndex = 0; for (int i = 0; i < volumeCount; i++) { // 取得單集位址 beginIndex = tempString.indexOf("<dd>", beginIndex) + 1; beginIndex = tempString.indexOf("'", beginIndex) + 1; endIndex = tempString.indexOf("'", beginIndex); volumeURL = tempString.substring(beginIndex, endIndex); if (volumeURL.matches("http.*")) { urlList.add(tempString.substring(beginIndex, endIndex)); } else { urlList.add(baseVolumeURL + tempString.substring(beginIndex, endIndex)); } // 取得單集名稱 beginIndex = tempString.indexOf(">", beginIndex) + 1; endIndex = tempString.indexOf("<", beginIndex); volumeList.add( getVolumeWithFormatNumber( Common.getStringRemovedIllegalChar( Common.getTraditionalChinese( tempString.substring(beginIndex, endIndex).trim())))); } totalVolume = volumeCount; Common.debugPrintln("共有" + totalVolume + "集"); combinationList.add(volumeList); combinationList.add(urlList); return combinationList; }
@Override public void setParameters() { Common.debugPrintln("開始解析各參數 :"); Common.downloadFile(webSite, SetUp.getTempDirectory(), indexName, false, ""); if (getWholeTitle() == null || getWholeTitle().equals("")) { Common.debugPrintln("開始解析title和wholeTitle :"); String allPageString = Common.getFileString(SetUp.getTempDirectory(), indexEncodeName); int beginIndex = allPageString.indexOf("alt="); beginIndex = allPageString.indexOf("\"", beginIndex) + 1; int endIndex = allPageString.indexOf("\"", beginIndex); String tempTitleString = allPageString.substring(beginIndex, endIndex).trim(); setWholeTitle( getVolumeWithFormatNumber( Common.getStringRemovedIllegalChar( Common.getTraditionalChinese(tempTitleString.trim())))); } Common.debugPrintln("作品名稱(title) : " + getTitle()); Common.debugPrintln("章節名稱(wholeTitle) : " + getWholeTitle()); }
public String getMainUrlFromSingleVolumeUrl(String volumeURL) { // ex. http://www.178.com/mh/kongjuzhiyuan/16381-2.shtml轉為 // http://manhua.178.com/kongjuzhiyuan/ String allPageString = getAllPageString(volumeURL); int beginIndex = allPageString.indexOf("g_comic_url"); beginIndex = allPageString.indexOf("\"", beginIndex) + 1; int endIndex = allPageString.indexOf("\"", beginIndex); String mainPageURL = baseURL + "/" + allPageString.substring(beginIndex, endIndex); Common.debugPrintln("MAIN_URL: " + mainPageURL); return mainPageURL; }
@Override public void parseComicURL() { // parse URL and save all URLs in comicURL // // 先取得前面的下載伺服器網址 String allPageString = Common.getFileString(SetUp.getTempDirectory(), indexName); Common.debugPrint("開始解析這一集有幾頁 : "); int beginIndex = allPageString.indexOf("name=\"selectb\""); beginIndex = allPageString.indexOf(">", beginIndex) + 1; int endIndex = allPageString.indexOf("</select>", beginIndex); String tempString = allPageString.substring(beginIndex, endIndex); totalPage = tempString.split("<option").length - 1; Common.debugPrintln("共 " + totalPage + " 頁"); comicURL = new String[totalPage]; String[] comicPageURL = new String[totalPage]; beginIndex = endIndex = 0; for (int i = 0; i < totalPage && Run.isAlive; i++) { beginIndex = allPageString.indexOf("value=", beginIndex); beginIndex = allPageString.indexOf("\"", beginIndex) + 1; endIndex = allPageString.indexOf("\"", beginIndex); comicPageURL[i] = baseURL + allPageString.substring(beginIndex, endIndex); } String picURL = ""; for (int p = 0; p < totalPage; p++) { if (!Common.existPicFile(getDownloadDirectory(), p + 1) || !Common.existPicFile(getDownloadDirectory(), p + 2)) { allPageString = getAllPageString(comicPageURL[p]); beginIndex = allPageString.indexOf("id=picwin"); beginIndex = allPageString.indexOf("src=", beginIndex); beginIndex = allPageString.indexOf("\"", beginIndex) + 1; endIndex = allPageString.indexOf("\"", beginIndex); comicURL[p] = Common.getFixedChineseURL(allPageString.substring(beginIndex, endIndex)); // Common.debugPrintln( ( p + 1 ) + " " + comicURL[p] ); // debug // 每解析一個網址就下載一張圖 singlePageDownload(getTitle(), getWholeTitle(), comicURL[p], totalPage, p + 1, 0); } } // System.exit( 0 ); // debug }
@Override public List<List<String>> getVolumeTitleAndUrlOnMainPage(String urlString, String allPageString) { // combine volumeList and urlList into combinationList, return it. List<List<String>> combinationList = new ArrayList<List<String>>(); List<String> urlList = new ArrayList<String>(); List<String> volumeList = new ArrayList<String>(); String tempString = ""; int beginIndex, endIndex; beginIndex = allPageString.indexOf("class=\"plie\""); endIndex = allPageString.indexOf("</ul>", beginIndex); // 存放集數頁面資訊的字串 tempString = allPageString.substring(beginIndex, endIndex); int volumeCount = tempString.split("href=").length - 1; String volumeTitle = ""; beginIndex = endIndex = 0; for (int i = 0; i < volumeCount; i++) { // 取得單集位址 beginIndex = tempString.indexOf("href=", beginIndex); beginIndex = tempString.indexOf("\"", beginIndex) + 1; endIndex = tempString.indexOf("\"", beginIndex); urlList.add(baseURL + tempString.substring(beginIndex, endIndex)); // 取得單集名稱 beginIndex = tempString.indexOf(">", beginIndex) + 1; endIndex = tempString.indexOf("</a>", beginIndex); volumeTitle = tempString.substring(beginIndex, endIndex); volumeTitle = volumeTitle.replaceFirst("<br\\s+/{0,1}>", ""); volumeList.add( getVolumeWithFormatNumber( Common.getStringRemovedIllegalChar( Common.getTraditionalChinese(volumeTitle.trim())))); } totalVolume = volumeCount; Common.debugPrintln("共有" + totalVolume + "集"); combinationList.add(volumeList); combinationList.add(urlList); return combinationList; }
public String getMainUrlFromSingleVolumeUrl(String volumeURL) { // ex. http://comic.ck101.com/page/1749372轉為 // http://comic.ck101.com/comic/7039 String allPageString = getAllPageString(volumeURL); int beginIndex = allPageString.lastIndexOf("class=\"page_title\""); beginIndex = allPageString.lastIndexOf("href=", beginIndex); beginIndex = allPageString.lastIndexOf("\"", beginIndex) + 1; int endIndex = allPageString.lastIndexOf("\"", beginIndex); String mainPageURL = baseURL + volumeURL.substring(beginIndex, endIndex).trim(); Common.debugPrintln("MAIN_URL: " + mainPageURL); return mainPageURL; }
public String getMainUrlFromSingleVolumeUrl(String volumeURL) { // ex. http://dm.game.mop.com/primary/45489.html#pic或 // http://dm.game.mop.com/primary/45489/1.html#pic轉為 // http://dm.game.mop.com/fengmian/4827.html int beginIndex = volumeURL.indexOf("primary"); beginIndex = volumeURL.indexOf("/", beginIndex) + 1; int endIndex = volumeURL.indexOf("/", beginIndex); if (endIndex < 0) { endIndex = volumeURL.indexOf("/", beginIndex); } String mainPageURL = volumeURL.substring(0, endIndex) + ".html"; Common.debugPrintln("MAIN_URL: " + mainPageURL); return mainPageURL; }
@Override public List<List<String>> getVolumeTitleAndUrlOnMainPage(String urlString, String allPageString) { // combine volumeList and urlList into combinationList, return it. List<List<String>> combinationList = new ArrayList<List<String>>(); List<String> urlList = new ArrayList<String>(); List<String> volumeList = new ArrayList<String>(); int beginIndex = allPageString.indexOf("<li><a href=\"http://www") - 1; int endIndex = allPageString.indexOf("</table>", beginIndex); String listString = allPageString.substring(beginIndex, endIndex); totalVolume = allPageString.split("<li><a href=\"http://www").length - 1; beginIndex = endIndex = 0; for (int i = 0; i < totalVolume; i++) { // 取得單集位址 beginIndex = listString.indexOf("http://www", beginIndex); endIndex = listString.indexOf("\"", beginIndex); urlList.add(listString.substring(beginIndex, endIndex)); // 取得單集名稱 beginIndex = listString.indexOf("<span", beginIndex) + 1; beginIndex = listString.indexOf(">", beginIndex) + 1; endIndex = listString.indexOf("<", beginIndex); String volumeTitle = listString.substring(beginIndex, endIndex); volumeList.add( getVolumeWithFormatNumber( Common.getStringRemovedIllegalChar( Common.getTraditionalChinese(volumeTitle.trim())))); } Common.debugPrintln("共有" + totalVolume + "集"); combinationList.add(volumeList); combinationList.add(urlList); return combinationList; }
public void print(String message) { Common.debugPrintln(message); }
public void showParameters() { // for debug Common.debugPrintln("----------"); Common.debugPrintln("totalPage = " + totalPage); Common.debugPrintln("webSite = " + webSite); Common.debugPrintln("----------"); }
@Override public void parseComicURL() { // parse URL and save all URLs in comicURL // // 先取得前面的下載伺服器網址 initNewData(); String allPageString = Common.getFileString(SetUp.getTempDirectory(), indexName); Common.debugPrint("開始解析這一集有幾頁 : "); if (tsukkomiMode) { int beginIndex = 0; int endIndex = 0; String listURL = webSite; List<String> tagNameList = new ArrayList<String>(); if (webSite.matches(".*/")) { listURL = webSite.substring(0, webSite.length() - 1); } if (isRssPage()) { print("is RSS page : " + listURL); outputNewListFile(listURL); tagNameList = getTagNameList(listURL); for (int i = 0; i < tagNameList.size(); i++) // 作品列表 { String tagName = tagNameList.get(i); handleSingleTitle(tagName); } } else // ex. // http://manhua.dmzj.com/tags/category_search/0-0-0-all-0-0-1-447.shtml#category_nav_anchor { print("is Normal List Page : " + webSite); if (webSite.indexOf("/update_") > 0) { handleAllUpdatePage(); } else if (webSite.indexOf("/rank/") > 0) { handleAllRankPage(); } } System.exit(0); } // 取得所有位址編碼代號 int beginIndex = allPageString.indexOf("'[") + 2; int endIndex = allPageString.indexOf("\"]", beginIndex) + 1; String allCodeString = allPageString.substring(beginIndex, endIndex); totalPage = allCodeString.split("\",\"").length; Common.debugPrintln("共 " + totalPage + " 頁"); comicURL = new String[totalPage]; refers = new String[totalPage]; // 取得位址編碼代號的替換字元 beginIndex = allPageString.indexOf(",'", endIndex) + 2; endIndex = allPageString.indexOf("'.", beginIndex); String allVarString = allPageString.substring(beginIndex, endIndex); String[] varTokens = allVarString.split("\\|"); for (int i = 0; i < varTokens.length; i++) { Common.debugPrintln(i + " " + varTokens[i]); // test } // System.exit( 0 ); String basePicURL = "http://images.dmzj.com/"; // "http://images.manhua.178.com/"; String[] codeTokens = allCodeString.split("\",\""); codeTokens = getRealCodeTokens(codeTokens, varTokens); String firstCode = codeTokens[0].replaceAll("\"", ""); String firstPicURL = ""; Common.debugPrintln("第一張編碼:" + firstCode); firstPicURL = basePicURL + Common.getFixedChineseURL(getDecodeURL(firstCode)); firstPicURL = firstPicURL.replaceAll("\\\\", ""); Common.debugPrintln("第一張圖片網址:" + firstPicURL); // System.exit( 0 ); String[] picNames = new String[totalPage]; for (int i = 0; i < picNames.length; i++) { codeTokens[i] = codeTokens[i].replaceAll("\"", ""); beginIndex = codeTokens[i].lastIndexOf("/") + 1; endIndex = codeTokens[i].length(); // .lastIndexOf( "\"" ); // Common.debugPrintln( codeTokens[i] + " " + beginIndex + " " + endIndex ); picNames[i] = Common.getFixedChineseURL(getDecodeURL(codeTokens[i].substring(beginIndex, endIndex))); // System.exit( 0 ); // debug } endIndex = firstPicURL.lastIndexOf("/") + 1; String parentPicURL = firstPicURL.substring(0, endIndex); for (int i = 0; i < codeTokens.length && Run.isAlive; i++) { comicURL[i] = parentPicURL + picNames[i]; // 存入每一頁的網頁網址 refers[i] = webSite; // Common.debugPrintln( ( i + 1 ) + " " + comicURL[i] ); // debug } // System.exit( 0 ); // debug }
@Override public List<List<String>> getVolumeTitleAndUrlOnMainPage(String urlString, String allPageString) { // combine volumeList and urlList into combinationList, return it. List<List<String>> combinationList = new ArrayList<List<String>>(); List<String> urlList = new ArrayList<String>(); List<String> volumeList = new ArrayList<String>(); String tempString = ""; int lastPage = 0; int beginIndex, endIndex; urlString = urlString.split("0/0/")[0]; // 接著在迴圈內下載每一頁, 取得每一集資訊 int totalVolumeCount = 0; String pageURL = urlString; while (true) { if (lastPage++ > 0) { pageURL = urlString + "0/0/" + lastPage; } allPageString = getAllPageString(pageURL); beginIndex = allPageString.indexOf("class=\"comicBox\""); endIndex = allPageString.indexOf("class=\"fbComment\"", beginIndex); tempString = allPageString.substring(beginIndex, endIndex); // 代表此頁已經沒有集數了。 if (tempString.indexOf("class=\"recTitle\"") < 0) break; // 取得存放一整頁面集數資訊 beginIndex = allPageString.indexOf("class=\"comicBox\""); beginIndex = allPageString.indexOf("class=\"relativeRec", beginIndex); endIndex = allPageString.indexOf("</div>", beginIndex); tempString = allPageString.substring(beginIndex, endIndex); int volumeCount = tempString.split("<h3").length - 1; // 單一頁面的集數 totalVolumeCount += volumeCount; String volumeURL = ""; String volumeTitle = ""; beginIndex = endIndex = 0; for (int j = 0; j < volumeCount; j++) { // 取得單集位址 beginIndex = tempString.indexOf("<h3", beginIndex); beginIndex = tempString.indexOf("href=", beginIndex); beginIndex = tempString.indexOf("\"", beginIndex) + 1; endIndex = tempString.indexOf("\"", beginIndex); volumeURL = baseURL + tempString.substring(beginIndex, endIndex); urlList.add(baseURL + tempString.substring(beginIndex, endIndex)); // 取得單集名稱 beginIndex = tempString.indexOf("title=", beginIndex); beginIndex = tempString.indexOf("\"", beginIndex) + 1; endIndex = tempString.indexOf("\"", beginIndex); volumeTitle = tempString.substring(beginIndex, endIndex); volumeList.add( getVolumeWithFormatNumber( Common.getStringRemovedIllegalChar( Common.getTraditionalChinese(volumeTitle.trim())))); Common.debugPrintln(volumeURL + " : " + volumeTitle); } } Common.debugPrintln(" 共有 " + (lastPage - 1) + " 張目錄頁"); totalVolume = totalVolumeCount; Common.debugPrintln("共有" + totalVolume + "集"); combinationList.add(volumeList); combinationList.add(urlList); return combinationList; }