@Override public void parseComicURL() { // parse URL and save all URLs in comicURL // // 先取得前面的下載伺服器網址 String allPageString = Common.getFileString(SetUp.getTempDirectory(), indexName); Common.debugPrint("開始解析這一集有幾頁 : "); int beginIndex = 0, endIndex = 0; totalPage = allPageString.split("<option ").length; Common.debugPrintln("共 " + totalPage + " 頁"); comicURL = new String[totalPage]; String picURL = ""; int p = 0; // 目前頁數 for (int i = 0; i < totalPage && Run.isAlive; i++) { beginIndex = allPageString.indexOf("<img id"); beginIndex = allPageString.indexOf("\"", beginIndex) + 1; endIndex = allPageString.indexOf("\"", beginIndex); String tempURL = allPageString.substring(beginIndex, endIndex); if (Common.isLegalURL(tempURL)) { comicURL[p++] = tempURL; Common.debugPrintln(p + " " + comicURL[p - 1]); // debug // 每解析一個網址就下載一張圖 singlePageDownload(getTitle(), getWholeTitle(), comicURL[p - 1], totalPage, p, 0); } else { totalPage--; } // Common.downloadFile( comicURL[p - 1], "", p + ".jpg", false, "" ); if (p < totalPage) { beginIndex = allPageString.indexOf("</select>"); beginIndex = allPageString.indexOf("\"", beginIndex) + 1; endIndex = allPageString.indexOf("\"", beginIndex); tempURL = allPageString.substring(beginIndex, endIndex); if ("#".equals(tempURL)) { Common.debugPrintln("THE LAST PAGE !!"); break; } String nextPageURL = "http://comic101.com" + tempURL; Common.downloadFile(nextPageURL, SetUp.getTempDirectory(), indexName, false, ""); allPageString = Common.getFileString(SetUp.getTempDirectory(), indexName); } } // System.exit( 0 ); // debug }
private void buildIndexFile(String tagName) { String dirPath = getBaseOutputDirectory() + tagName + Common.getSlash(); String path = ""; String text = ""; List<String> volumeTagList = new ArrayList<String>(); File dir = new File(dirPath); // 你的log檔路徑 File fileList[] = dir.listFiles(); // 得出檔案清單 String volumeTitle = ""; // 取得代號清單 for (int i = 0; i < fileList.length; i++) { if (fileList[i].isFile()) { // 過濾檔案 String[] temps = fileList[i].toString().split("\\\\"); String volumeTag = temps[temps.length - 1].split("\\.")[0]; if (!volumeTag.matches("comment")) volumeTagList.add(volumeTag); // print(i + " TAG : " + volumeTag); } } for (int i = 0; i < volumeTagList.size(); i++) { path = dirPath + volumeTagList.get(i) + ".js"; text = Common.getFileString(path); if (text.split("'").length <= 1) continue; volumeTitle = text.split("'")[1]; } System.exit(0); }
@Override public String getAllPageString(String urlString) { String indexName = Common.getStoredFileName(SetUp.getTempDirectory(), "index_178_", "html"); Common.downloadGZIPInputStreamFile(urlString, SetUp.getTempDirectory(), indexName, false, ""); return Common.getFileString(SetUp.getTempDirectory(), indexName); }
@Override public synchronized void setParameters() { Common.downloadFile(webSite, SetUp.getTempDirectory(), indexName, false, ""); Common.newEncodeFile(SetUp.getTempDirectory(), indexName, indexEncodeName, Encoding.GBK); String tempStr = Common.getFileString(SetUp.getTempDirectory(), indexEncodeName); String[] lines = tempStr.split("\n"); for (int i = 0; i < lines.length; i++) { String line = Common.getTraditionalChinese(lines[i]); // ".": contain all characters except "\r" and "\n" // "(?s).": contain all characters if (line.matches("(?s).*title(?s).*")) { // get title ex.<title>尸錄 4話</title> String[] temp = line.split("<|>"); if (getWholeTitle() == null || getWholeTitle().equals("")) setWholeTitle(Common.getStringRemovedIllegalChar(temp[2])); } else if (line.matches("(?s).*page(?s).*")) { // get total page ex. | 共34頁 | int beginIndex = line.indexOf(Common.getStringUsingDefaultLanguage("共", "共")); int endIndex = line.indexOf(Common.getStringUsingDefaultLanguage("頁", "頁")); String temp = line.substring(beginIndex + 1, endIndex); totalPage = Integer.parseInt(temp); break; } } comicURL = new String[totalPage]; // totalPage = amount of comic pic SetUp.setWholeTitle(wholeTitle); }
private boolean needUpdate(String tagName, List<String> volumeTitleList) { if (volumeTitleList.size() == 0) return false; String text = Common.getFileString(getBaseOutputDirectory(), tagName + ".js"); int lastVolumeIndex = volumeTitleList.size() - 1; String lastVolumeTitle = volumeTitleList.get(lastVolumeIndex); // 如果目錄裡面找不到最後一集,代表需要更新 return (text.indexOf(lastVolumeTitle) < 0); }
@Override public String getAllPageString(String urlString) { String indexName = Common.getStoredFileName(SetUp.getTempDirectory(), "index_xindm_", "html"); String indexEncodeName = Common.getStoredFileName(SetUp.getTempDirectory(), "index_xindm_encode_", "html"); Common.downloadFile(urlString, SetUp.getTempDirectory(), indexName, false, ""); Common.newEncodeFile(SetUp.getTempDirectory(), indexName, indexEncodeName); return Common.getFileString(SetUp.getTempDirectory(), indexEncodeName); }
private int getExistedVolumeCount(String tagName) { String text = Common.getFileString(getBaseOutputDirectory(), tagName + ".js"); int beginIndex = text.indexOf("new Array"); int endIndex = text.indexOf(")", beginIndex); if (beginIndex < 0 || endIndex < 0) return 0; String temp = text.substring(beginIndex, endIndex); // print(temp); return temp.split(",").length; }
@Override // 因為原檔就是utf8了,所以無須轉碼 public String getAllPageString(String urlString) { if (urlString.matches(".*/")) { urlString = urlString.substring(0, urlString.length() - 1); } String indexName = Common.getStoredFileName(SetUp.getTempDirectory(), "index_ck_", "html"); // Common.downloadFile( urlString, SetUp.getTempDirectory(), indexName, false, "" ); Common.simpleDownloadFile(urlString, SetUp.getTempDirectory(), indexName, urlString); return Common.getFileString(SetUp.getTempDirectory(), indexName); }
@Override public void parseComicURL() { // parse URL and save all URLs in comicURL // // 先取得前面的下載伺服器網址 String allPageString = Common.getFileString(SetUp.getTempDirectory(), indexName); Common.debugPrint("開始解析這一集有幾頁 : "); int beginIndex = allPageString.indexOf("name=\"selectb\""); beginIndex = allPageString.indexOf(">", beginIndex) + 1; int endIndex = allPageString.indexOf("</select>", beginIndex); String tempString = allPageString.substring(beginIndex, endIndex); totalPage = tempString.split("<option").length - 1; Common.debugPrintln("共 " + totalPage + " 頁"); comicURL = new String[totalPage]; String[] comicPageURL = new String[totalPage]; beginIndex = endIndex = 0; for (int i = 0; i < totalPage && Run.isAlive; i++) { beginIndex = allPageString.indexOf("value=", beginIndex); beginIndex = allPageString.indexOf("\"", beginIndex) + 1; endIndex = allPageString.indexOf("\"", beginIndex); comicPageURL[i] = baseURL + allPageString.substring(beginIndex, endIndex); } String picURL = ""; for (int p = 0; p < totalPage; p++) { if (!Common.existPicFile(getDownloadDirectory(), p + 1) || !Common.existPicFile(getDownloadDirectory(), p + 2)) { allPageString = getAllPageString(comicPageURL[p]); beginIndex = allPageString.indexOf("id=picwin"); beginIndex = allPageString.indexOf("src=", beginIndex); beginIndex = allPageString.indexOf("\"", beginIndex) + 1; endIndex = allPageString.indexOf("\"", beginIndex); comicURL[p] = Common.getFixedChineseURL(allPageString.substring(beginIndex, endIndex)); // Common.debugPrintln( ( p + 1 ) + " " + comicURL[p] ); // debug // 每解析一個網址就下載一張圖 singlePageDownload(getTitle(), getWholeTitle(), comicURL[p], totalPage, p + 1, 0); } } // System.exit( 0 ); // debug }
@Override public synchronized void parseComicURL() { System.out.print("parse the pic URL:"); for (int i = 0; i < totalPage && Run.isAlive; i++) { // 檢查下一張圖是否存在同個資料夾,若存在就跳下一張 if (!Common.existPicFile(getDownloadDirectory(), i + 2) || !Common.existPicFile(getDownloadDirectory(), i + 1)) { int endIndex = webSite.lastIndexOf("/"); String tempWebSite = webSite.substring(0, endIndex + 1) + (i + 1) + ".htm"; Common.downloadFile(tempWebSite, SetUp.getTempDirectory(), indexName, false, ""); Common.newEncodeFile(SetUp.getTempDirectory(), indexName, indexEncodeName, Encoding.GBK); String tempStr = Common.getFileString(SetUp.getTempDirectory(), indexEncodeName); String[] lines = tempStr.split("\n"); for (int count = 0; count < lines.length && Run.isAlive; count++) { String line = lines[count]; if (line.matches("(?s).*document.write(?s).*")) { String[] temp = line.split("'\"|\"|'|>"); System.out.println(baseURL + temp[3]); // replace %20 from white space in URL String frontURL = temp[3].replaceAll("\\s", "%20"); comicURL[i] = Common.getFixedChineseURL(baseURL + frontURL); // Common.debugPrintln( i + " " + comicURL[i] ); // debug // 每解析一個網址就下載一張圖 singlePageDownload(getTitle(), getWholeTitle(), comicURL[i], totalPage, i + 1, 0); break; } } } } // System.exit( 0 ); // debug }
@Override public void setParameters() { Common.debugPrintln("開始解析各參數 :"); Common.downloadFile(webSite, SetUp.getTempDirectory(), indexName, false, ""); if (getWholeTitle() == null || getWholeTitle().equals("")) { Common.debugPrintln("開始解析title和wholeTitle :"); String allPageString = Common.getFileString(SetUp.getTempDirectory(), indexEncodeName); int beginIndex = allPageString.indexOf("alt="); beginIndex = allPageString.indexOf("\"", beginIndex) + 1; int endIndex = allPageString.indexOf("\"", beginIndex); String tempTitleString = allPageString.substring(beginIndex, endIndex).trim(); setWholeTitle( getVolumeWithFormatNumber( Common.getStringRemovedIllegalChar( Common.getTraditionalChinese(tempTitleString.trim())))); } Common.debugPrintln("作品名稱(title) : " + getTitle()); Common.debugPrintln("章節名稱(wholeTitle) : " + getWholeTitle()); }
private void updateIndexFile( String tagName, String titleName, String titleIntroduction, String volumeTitle, String snsSysID) { String text = Common.getFileString(getBaseOutputDirectory(), tagName + ".js"); int midIndex1 = text.indexOf("new Array(") + 11; int endIndex = text.length(); if (midIndex1 < 11) { print("第 1 筆索引資料"); // 新建index file List<String> volumeTitleList = new ArrayList<String>(); List<String> snsSysIDList = new ArrayList<String>(); volumeTitleList.add(volumeTitle); snsSysIDList.add(snsSysID); outputVolumeIndex(tagName, titleName, titleIntroduction, volumeTitleList, snsSysIDList); return; } print("第 n 筆索引資料"); text = text.substring(0, midIndex1) + "'" + getOutputText(volumeTitle) + "', " + "'" + getVolumeID(snsSysID) + "', " + text.substring(midIndex1, endIndex); Common.outputFile(text, getBaseOutputDirectory(), tagName + ".js"); }
@Override public void parseComicURL() { // parse URL and save all URLs in comicURL // // 先取得前面的下載伺服器網址 initNewData(); String allPageString = Common.getFileString(SetUp.getTempDirectory(), indexName); Common.debugPrint("開始解析這一集有幾頁 : "); if (tsukkomiMode) { int beginIndex = 0; int endIndex = 0; String listURL = webSite; List<String> tagNameList = new ArrayList<String>(); if (webSite.matches(".*/")) { listURL = webSite.substring(0, webSite.length() - 1); } if (isRssPage()) { print("is RSS page : " + listURL); outputNewListFile(listURL); tagNameList = getTagNameList(listURL); for (int i = 0; i < tagNameList.size(); i++) // 作品列表 { String tagName = tagNameList.get(i); handleSingleTitle(tagName); } } else // ex. // http://manhua.dmzj.com/tags/category_search/0-0-0-all-0-0-1-447.shtml#category_nav_anchor { print("is Normal List Page : " + webSite); if (webSite.indexOf("/update_") > 0) { handleAllUpdatePage(); } else if (webSite.indexOf("/rank/") > 0) { handleAllRankPage(); } } System.exit(0); } // 取得所有位址編碼代號 int beginIndex = allPageString.indexOf("'[") + 2; int endIndex = allPageString.indexOf("\"]", beginIndex) + 1; String allCodeString = allPageString.substring(beginIndex, endIndex); totalPage = allCodeString.split("\",\"").length; Common.debugPrintln("共 " + totalPage + " 頁"); comicURL = new String[totalPage]; refers = new String[totalPage]; // 取得位址編碼代號的替換字元 beginIndex = allPageString.indexOf(",'", endIndex) + 2; endIndex = allPageString.indexOf("'.", beginIndex); String allVarString = allPageString.substring(beginIndex, endIndex); String[] varTokens = allVarString.split("\\|"); for (int i = 0; i < varTokens.length; i++) { Common.debugPrintln(i + " " + varTokens[i]); // test } // System.exit( 0 ); String basePicURL = "http://images.dmzj.com/"; // "http://images.manhua.178.com/"; String[] codeTokens = allCodeString.split("\",\""); codeTokens = getRealCodeTokens(codeTokens, varTokens); String firstCode = codeTokens[0].replaceAll("\"", ""); String firstPicURL = ""; Common.debugPrintln("第一張編碼:" + firstCode); firstPicURL = basePicURL + Common.getFixedChineseURL(getDecodeURL(firstCode)); firstPicURL = firstPicURL.replaceAll("\\\\", ""); Common.debugPrintln("第一張圖片網址:" + firstPicURL); // System.exit( 0 ); String[] picNames = new String[totalPage]; for (int i = 0; i < picNames.length; i++) { codeTokens[i] = codeTokens[i].replaceAll("\"", ""); beginIndex = codeTokens[i].lastIndexOf("/") + 1; endIndex = codeTokens[i].length(); // .lastIndexOf( "\"" ); // Common.debugPrintln( codeTokens[i] + " " + beginIndex + " " + endIndex ); picNames[i] = Common.getFixedChineseURL(getDecodeURL(codeTokens[i].substring(beginIndex, endIndex))); // System.exit( 0 ); // debug } endIndex = firstPicURL.lastIndexOf("/") + 1; String parentPicURL = firstPicURL.substring(0, endIndex); for (int i = 0; i < codeTokens.length && Run.isAlive; i++) { comicURL[i] = parentPicURL + picNames[i]; // 存入每一頁的網頁網址 refers[i] = webSite; // Common.debugPrintln( ( i + 1 ) + " " + comicURL[i] ); // debug } // System.exit( 0 ); // debug }
private void outputMainListFile(boolean stepByStepMode) { List<String> tagList = new ArrayList<String>(); List<String> nameList = new ArrayList<String>(); List<String> lastVolumeTitleList = new ArrayList<String>(); List<String> lastVolumeIDList = new ArrayList<String>(); String name = ""; String path = ""; String text = ""; File dir = new File(getBaseOutputDirectory()); // 你的log檔路徑 File fileList[] = dir.listFiles(); // 得出檔案清單 // 取得代號清單 for (int i = 0; i < fileList.length; i++) { if (fileList[i].isDirectory()) { // 過濾檔案 String[] temps = fileList[i].toString().split("\\\\"); name = temps[temps.length - 1]; tagList.add(name); // print(i + " TAG : " + name); } } // 取得名稱清單 for (int i = 0; i < tagList.size(); i++) { path = getBaseOutputDirectory() + tagList.get(i) + ".js"; text = Common.getFileString(path); // print("------------" + text + "------------end"); String[] temps = text.split("'"); if (temps.length <= 1) { // buildIndexFile(tagList.get(i)); continue; } name = temps[1]; // 取第一個''資料字串 nameList.add(name); // print(i + " NAME : " + name); int beginIndex = 0; int endIndex = 0; String temp = ""; if (stepByStepMode) // 新的放後面 { beginIndex = text.indexOf(");", beginIndex) - 2; endIndex = text.lastIndexOf("'", beginIndex); beginIndex = text.lastIndexOf("'", endIndex - 2) + 1; temp = text.substring(beginIndex, endIndex); lastVolumeTitleList.add(temp); print("文件中最新一集: " + temp); beginIndex = text.indexOf(");", endIndex + 1) + 1; endIndex = text.lastIndexOf("'", beginIndex); beginIndex = text.lastIndexOf("'", endIndex - 2) + 1; temp = text.substring(beginIndex, endIndex); lastVolumeIDList.add(temp); } else // 新的放前面 { beginIndex = text.indexOf("new Array(", beginIndex); beginIndex = text.indexOf("'", beginIndex) + 1; endIndex = text.indexOf("'", beginIndex); temp = text.substring(beginIndex, endIndex); lastVolumeTitleList.add(temp); beginIndex = text.indexOf("'", endIndex + 1) + 1; endIndex = text.indexOf("'", beginIndex); temp = text.substring(beginIndex, endIndex); lastVolumeIDList.add(temp); } // print(i + " VOLUME : " + temp); } List<List<String>> combinationList = new ArrayList<List<String>>(); combinationList.add(nameList); combinationList.add(tagList); combinationList.add(lastVolumeTitleList); combinationList.add(lastVolumeIDList); outputListFile(combinationList, "MAIN_LIST", mainListFileName); }