@Override
  public void parseComicURL() { // parse URL and save all URLs in comicURL  //
    // 先取得前面的下載伺服器網址

    String allPageString = getAllPageString(webSite);
    Common.debugPrint("開始解析這一集有幾頁 : ");

    int beginIndex = allPageString.indexOf("Array(") + 6;
    int endIndex = allPageString.indexOf(");", beginIndex);
    String[] urlListTokes = allPageString.substring(beginIndex, endIndex).split(",");

    totalPage = urlListTokes.length;
    Common.debugPrintln("共 " + totalPage + " 頁");
    comicURL = new String[totalPage];

    // ex. http://76.manmankan.com/2011/201111/1916/43124/001.jpg
    String baseURL = "http://76.manmankan.com";

    for (int i = 0; i < urlListTokes.length && Run.isAlive; i++) {
      String fontURL = urlListTokes[i].trim().substring(1, urlListTokes[i].length() - 1);
      comicURL[i] = baseURL + fontURL;
      // Common.debugPrintln( ( i + 1 ) + " " + comicURL[i] ); // debug
    }
    // System.exit(0); // debug
  }
Exemple #2
0
  @Override
  public void parseComicURL() { // parse URL and save all URLs in comicURL  //
    // 先取得前面的下載伺服器網址

    String allPageString = Common.getFileString(SetUp.getTempDirectory(), indexName);
    Common.debugPrint("開始解析這一集有幾頁 : ");

    int beginIndex = 0, endIndex = 0;

    totalPage = allPageString.split("<option ").length;
    Common.debugPrintln("共 " + totalPage + " 頁");
    comicURL = new String[totalPage];

    String picURL = "";
    int p = 0; // 目前頁數
    for (int i = 0; i < totalPage && Run.isAlive; i++) {
      beginIndex = allPageString.indexOf("<img id");
      beginIndex = allPageString.indexOf("\"", beginIndex) + 1;
      endIndex = allPageString.indexOf("\"", beginIndex);
      String tempURL = allPageString.substring(beginIndex, endIndex);

      if (Common.isLegalURL(tempURL)) {
        comicURL[p++] = tempURL;
        Common.debugPrintln(p + " " + comicURL[p - 1]); // debug
        // 每解析一個網址就下載一張圖
        singlePageDownload(getTitle(), getWholeTitle(), comicURL[p - 1], totalPage, p, 0);
      } else {
        totalPage--;
      }
      // Common.downloadFile( comicURL[p - 1], "", p + ".jpg", false, "" );

      if (p < totalPage) {
        beginIndex = allPageString.indexOf("</select>");
        beginIndex = allPageString.indexOf("\"", beginIndex) + 1;
        endIndex = allPageString.indexOf("\"", beginIndex);
        tempURL = allPageString.substring(beginIndex, endIndex);
        if ("#".equals(tempURL)) {
          Common.debugPrintln("THE LAST PAGE !!");
          break;
        }

        String nextPageURL = "http://comic101.com" + tempURL;

        Common.downloadFile(nextPageURL, SetUp.getTempDirectory(), indexName, false, "");
        allPageString = Common.getFileString(SetUp.getTempDirectory(), indexName);
      }
    }

    // System.exit( 0 ); // debug
  }
  @Override
  public void parseComicURL() { // parse URL and save all URLs in comicURL  //
    // 先取得前面的下載伺服器網址

    String allPageString = Common.getFileString(SetUp.getTempDirectory(), indexName);
    Common.debugPrint("開始解析這一集有幾頁 : ");

    int beginIndex = allPageString.indexOf("name=\"selectb\"");
    beginIndex = allPageString.indexOf(">", beginIndex) + 1;
    int endIndex = allPageString.indexOf("</select>", beginIndex);

    String tempString = allPageString.substring(beginIndex, endIndex);

    totalPage = tempString.split("<option").length - 1;
    Common.debugPrintln("共 " + totalPage + " 頁");
    comicURL = new String[totalPage];

    String[] comicPageURL = new String[totalPage];
    beginIndex = endIndex = 0;
    for (int i = 0; i < totalPage && Run.isAlive; i++) {
      beginIndex = allPageString.indexOf("value=", beginIndex);
      beginIndex = allPageString.indexOf("\"", beginIndex) + 1;
      endIndex = allPageString.indexOf("\"", beginIndex);

      comicPageURL[i] = baseURL + allPageString.substring(beginIndex, endIndex);
    }

    String picURL = "";
    for (int p = 0; p < totalPage; p++) {
      if (!Common.existPicFile(getDownloadDirectory(), p + 1)
          || !Common.existPicFile(getDownloadDirectory(), p + 2)) {
        allPageString = getAllPageString(comicPageURL[p]);

        beginIndex = allPageString.indexOf("id=picwin");
        beginIndex = allPageString.indexOf("src=", beginIndex);
        beginIndex = allPageString.indexOf("\"", beginIndex) + 1;
        endIndex = allPageString.indexOf("\"", beginIndex);

        comicURL[p] = Common.getFixedChineseURL(allPageString.substring(beginIndex, endIndex));
        // Common.debugPrintln( ( p + 1 ) + " " + comicURL[p] ); // debug

        // 每解析一個網址就下載一張圖
        singlePageDownload(getTitle(), getWholeTitle(), comicURL[p], totalPage, p + 1, 0);
      }
    }

    // System.exit( 0 ); // debug
  }
  @Override
  public void parseComicURL() { // parse URL and save all URLs in comicURL  //
    // 先取得前面的下載伺服器網址

    String allPageString = getAllPageString(webSite);
    Common.debugPrint("開始解析這一集有幾頁 : ");

    String baseURL = "http://mh2.xindm.cn";

    int beginIndex = allPageString.indexOf("Array(");
    beginIndex = allPageString.indexOf("\"", beginIndex);
    int endIndex = allPageString.indexOf(");", beginIndex);
    String tempPicString = allPageString.substring(beginIndex, endIndex);
    String[] picURLs = tempPicString.split(",");

    totalPage = picURLs.length;
    Common.debugPrintln("共 " + totalPage + " 頁");
    comicURL = new String[totalPage];

    for (int i = 0; i < picURLs.length; i++) {
      comicURL[i] = baseURL + picURLs[i].replaceAll("\"", "");
      Common.debugPrintln("第" + (i + 1) + "頁網址:" + comicURL[i]);
    }

    // 須取得cookie才能下載圖片(防盜連專家....)
    String[] cookies = Common.getCookieStrings(webSite, null);
    String cookieString = "";
    int cookieCount = 0; // 取得前兩組cookie就可以了
    if (cookies[0] != null) {

      cookieString =
          "Hm_lvt_016bf6f495d44a067f569423ad894560=1337210178886; " + cookies[0].split(";")[0];
    }
    Common.debugPrintln("取得cookies:" + cookieString);

    for (int p = 1; p <= totalPage && Run.isAlive; p++) {

      String referURL = webSite + "?p=" + p;
      // 每解析一個網址就下載一張圖
      singlePageDownloadUsingSimple(
          getTitle(), getWholeTitle(), comicURL[p - 1], totalPage, p, cookieString, referURL);

      Common.debugPrintln((p) + " " + comicURL[p - 1] + " " + referURL); // debug
    }
    // System.exit(1); // debug
  }
  @Override
  public void parseComicURL() { // parse URL and save all URLs in comicURL  //
    // 先取得前面的下載伺服器網址

    initNewData();

    String allPageString = Common.getFileString(SetUp.getTempDirectory(), indexName);
    Common.debugPrint("開始解析這一集有幾頁 : ");

    if (tsukkomiMode) {
      int beginIndex = 0;
      int endIndex = 0;
      String listURL = webSite;
      List<String> tagNameList = new ArrayList<String>();

      if (webSite.matches(".*/")) {
        listURL = webSite.substring(0, webSite.length() - 1);
      }

      if (isRssPage()) {
        print("is RSS page : " + listURL);
        outputNewListFile(listURL);
        tagNameList = getTagNameList(listURL);

        for (int i = 0; i < tagNameList.size(); i++) // 作品列表
        {
          String tagName = tagNameList.get(i);
          handleSingleTitle(tagName);
        }
      } else // ex.
      // http://manhua.dmzj.com/tags/category_search/0-0-0-all-0-0-1-447.shtml#category_nav_anchor
      {
        print("is Normal List Page : " + webSite);

        if (webSite.indexOf("/update_") > 0) {
          handleAllUpdatePage();
        } else if (webSite.indexOf("/rank/") > 0) {
          handleAllRankPage();
        }
      }

      System.exit(0);
    }

    // 取得所有位址編碼代號
    int beginIndex = allPageString.indexOf("'[") + 2;
    int endIndex = allPageString.indexOf("\"]", beginIndex) + 1;

    String allCodeString = allPageString.substring(beginIndex, endIndex);

    totalPage = allCodeString.split("\",\"").length;
    Common.debugPrintln("共 " + totalPage + " 頁");
    comicURL = new String[totalPage];
    refers = new String[totalPage];

    // 取得位址編碼代號的替換字元
    beginIndex = allPageString.indexOf(",'", endIndex) + 2;
    endIndex = allPageString.indexOf("'.", beginIndex);
    String allVarString = allPageString.substring(beginIndex, endIndex);

    String[] varTokens = allVarString.split("\\|");

    for (int i = 0; i < varTokens.length; i++) {
      Common.debugPrintln(i + " " + varTokens[i]); // test
    }
    // System.exit( 0 );

    String basePicURL = "http://images.dmzj.com/"; // "http://images.manhua.178.com/";
    String[] codeTokens = allCodeString.split("\",\"");

    codeTokens = getRealCodeTokens(codeTokens, varTokens);

    String firstCode = codeTokens[0].replaceAll("\"", "");

    String firstPicURL = "";
    Common.debugPrintln("第一張編碼:" + firstCode);
    firstPicURL = basePicURL + Common.getFixedChineseURL(getDecodeURL(firstCode));
    firstPicURL = firstPicURL.replaceAll("\\\\", "");

    Common.debugPrintln("第一張圖片網址:" + firstPicURL);

    // System.exit( 0 );

    String[] picNames = new String[totalPage];
    for (int i = 0; i < picNames.length; i++) {
      codeTokens[i] = codeTokens[i].replaceAll("\"", "");
      beginIndex = codeTokens[i].lastIndexOf("/") + 1;
      endIndex = codeTokens[i].length(); // .lastIndexOf( "\"" );
      // Common.debugPrintln( codeTokens[i] + " " + beginIndex + " " + endIndex );
      picNames[i] =
          Common.getFixedChineseURL(getDecodeURL(codeTokens[i].substring(beginIndex, endIndex)));

      // System.exit( 0 ); // debug
    }

    endIndex = firstPicURL.lastIndexOf("/") + 1;
    String parentPicURL = firstPicURL.substring(0, endIndex);

    for (int i = 0; i < codeTokens.length && Run.isAlive; i++) {
      comicURL[i] = parentPicURL + picNames[i]; // 存入每一頁的網頁網址
      refers[i] = webSite;
      // Common.debugPrintln( ( i + 1 ) + " " + comicURL[i]  ); // debug

    }

    // System.exit( 0 ); // debug
  }
  @Override
  public void parseComicURL() { // parse URL and save all URLs in comicURL  //

    webSite = getRegularURL(webSite); // 將全集頁面轉為正規的全集頁面位址

    Common.downloadFile(webSite, SetUp.getTempDirectory(), indexName, false, "");
    Common.newEncodeFile(SetUp.getTempDirectory(), indexName, indexEncodeName, pageCode);
    String allPageString = Common.getFileString(SetUp.getTempDirectory(), indexEncodeName);
    Common.debugPrint("開始解析這一集有幾頁 : ");

    allPageString = Common.getTraditionalChinese(allPageString);

    List<String> urlList = new ArrayList<String>();
    List<String> volumeList = new ArrayList<String>();

    String hrefString = "href=";
    String tempString = "";
    String volumeURL = "";
    String volumeTitle = "";
    String baseTempURL = webSite.substring(0, webSite.lastIndexOf("/") + 1);

    int beginIndex = 0;
    int endIndex = 0;
    int amount = 0;

    while (true) {
      // 先找出每個超連結網址
      beginIndex =
          Common.getSmallerIndexOfTwoKeyword(
              allPageString, beginIndex, hrefString, hrefString.toUpperCase());
      endIndex = Common.getSmallerIndexOfTwoKeyword(allPageString, beginIndex, ">", " ");

      if (beginIndex < 0 || endIndex < 0) {
        break;
      }

      tempString = allPageString.substring(beginIndex + 5, endIndex).replaceAll("\"", "").trim();

      Common.debugPrintln("找到的連結: " + tempString);

      // 代表有下層的目錄網址
      if (unRedundantURL(tempString)) {

        // 檢查是否為完整網址
        if (tempString.matches("http://(?s).*")) {
          volumeURL = tempString;
        } else {
          volumeURL = baseTempURL + tempString;
        }

        // 位址不重複 才加入
        if (!urlList.contains(volumeURL)) {
          // 取得單集位址
          urlList.add(volumeURL);

          // 然後取單集名稱
          do {
            beginIndex = allPageString.indexOf(">", beginIndex) + 1;
            endIndex = allPageString.indexOf("<", beginIndex);
            volumeTitle = allPageString.substring(beginIndex, endIndex).trim();
          } while (volumeTitle.matches(""));

          volumeList.add(
              Common.getStringRemovedIllegalChar(Common.getTraditionalChinese(volumeTitle)));

          Common.debugPrintln(" " + volumeURL + " " + volumeTitle);

          amount++;
        }
      }
      beginIndex = endIndex;
    }
    // System.exit( 0 );
    totalPage = amount;

    comicURL = new String[totalPage];
    String[] titles = new String[totalPage];

    for (int i = 0; i < totalPage; i++) {
      comicURL[i] = urlList.get(i);
      titles[i] = volumeList.get(i) + "." + Common.getDefaultTextExtension();
    }

    // 取得作者名稱
    String author = "";

    if ((beginIndex = allPageString.indexOf("作者:")) > 0) {
      beginIndex += 3;
      endIndex = allPageString.indexOf("<", beginIndex);
      author = allPageString.substring(beginIndex, endIndex);
    } else if ((beginIndex = allPageString.indexOf("作者:")) > 0) {
      beginIndex += 3;
      endIndex = allPageString.indexOf("<", beginIndex);
      author = allPageString.substring(beginIndex, endIndex);
    } else {
      Common.debugPrintln("此站無法取得作者訊息");
      author = getTitle();
    }

    author = Common.getStringRemovedIllegalChar(author);

    Common.debugPrintln("作者名稱: " + author);

    NumberFormat formatter = new DecimalFormat(Common.getZero());

    // 取得小說網址
    beginIndex = endIndex = 0;
    String tempTitle = "";
    String tempURL = "";

    try {

      for (int i = 0; i < totalPage && Run.isAlive; i++) {
        // 每解析一個網址就下載一張圖
        if (!new File(getDownloadDirectory() + titles[i]).exists() && Run.isAlive) {

          singlePageDownload(getTitle(), getWholeTitle(), comicURL[i], totalPage, i + 1, 0);
          pageExtension =
              comicURL[i].substring(comicURL[i].lastIndexOf(".") + 1, comicURL[i].length());
          String fileName = formatter.format(i + 1) + "." + pageExtension;

          nowTitle = titles[i].substring(0, titles[i].lastIndexOf("."));
          handleSingleNovel(fileName, titles[i]); // 處理單一小說主函式

        } else {
          Common.debugPrintln(titles[i] + "已下載,跳過");
        }

        // System.exit( 0 );
      }

      handleWholeNovel(titles, webSite, author);

    } catch (Exception ex) {

      Common.hadleErrorMessage(ex, "處理下載文字檔發生問題");
      try {
        throw new Exception();
      } catch (Exception ex1) {
        Logger.getLogger(ParseTianyaBook.class.getName()).log(Level.SEVERE, null, ex1);
      }
    }

    // System.exit( 0 ); // debug
  }