Exemplo n.º 1
0
  @Override
  public void parseComicURL() { // parse URL and save all URLs in comicURL  //
    // 先取得前面的下載伺服器網址

    String allPageString = Common.getFileString(SetUp.getTempDirectory(), indexName);
    Common.debugPrint("開始解析這一集有幾頁 : ");

    int beginIndex = 0, endIndex = 0;

    totalPage = allPageString.split("<option ").length;
    Common.debugPrintln("共 " + totalPage + " 頁");
    comicURL = new String[totalPage];

    String picURL = "";
    int p = 0; // 目前頁數
    for (int i = 0; i < totalPage && Run.isAlive; i++) {
      beginIndex = allPageString.indexOf("<img id");
      beginIndex = allPageString.indexOf("\"", beginIndex) + 1;
      endIndex = allPageString.indexOf("\"", beginIndex);
      String tempURL = allPageString.substring(beginIndex, endIndex);

      if (Common.isLegalURL(tempURL)) {
        comicURL[p++] = tempURL;
        Common.debugPrintln(p + " " + comicURL[p - 1]); // debug
        // 每解析一個網址就下載一張圖
        singlePageDownload(getTitle(), getWholeTitle(), comicURL[p - 1], totalPage, p, 0);
      } else {
        totalPage--;
      }
      // Common.downloadFile( comicURL[p - 1], "", p + ".jpg", false, "" );

      if (p < totalPage) {
        beginIndex = allPageString.indexOf("</select>");
        beginIndex = allPageString.indexOf("\"", beginIndex) + 1;
        endIndex = allPageString.indexOf("\"", beginIndex);
        tempURL = allPageString.substring(beginIndex, endIndex);
        if ("#".equals(tempURL)) {
          Common.debugPrintln("THE LAST PAGE !!");
          break;
        }

        String nextPageURL = "http://comic101.com" + tempURL;

        Common.downloadFile(nextPageURL, SetUp.getTempDirectory(), indexName, false, "");
        allPageString = Common.getFileString(SetUp.getTempDirectory(), indexName);
      }
    }

    // System.exit( 0 ); // debug
  }
Exemplo n.º 2
0
  private void buildIndexFile(String tagName) {
    String dirPath = getBaseOutputDirectory() + tagName + Common.getSlash();
    String path = "";
    String text = "";
    List<String> volumeTagList = new ArrayList<String>();
    File dir = new File(dirPath); // 你的log檔路徑
    File fileList[] = dir.listFiles(); // 得出檔案清單
    String volumeTitle = "";

    // 取得代號清單
    for (int i = 0; i < fileList.length; i++) {
      if (fileList[i].isFile()) { // 過濾檔案
        String[] temps = fileList[i].toString().split("\\\\");
        String volumeTag = temps[temps.length - 1].split("\\.")[0];
        if (!volumeTag.matches("comment")) volumeTagList.add(volumeTag);
        // print(i + " TAG : " + volumeTag);
      }
    }

    for (int i = 0; i < volumeTagList.size(); i++) {
      path = dirPath + volumeTagList.get(i) + ".js";
      text = Common.getFileString(path);

      if (text.split("'").length <= 1) continue;

      volumeTitle = text.split("'")[1];
    }

    System.exit(0);
  }
Exemplo n.º 3
0
  @Override
  public String getAllPageString(String urlString) {
    String indexName = Common.getStoredFileName(SetUp.getTempDirectory(), "index_178_", "html");
    Common.downloadGZIPInputStreamFile(urlString, SetUp.getTempDirectory(), indexName, false, "");

    return Common.getFileString(SetUp.getTempDirectory(), indexName);
  }
Exemplo n.º 4
0
  @Override
  public synchronized void setParameters() {
    Common.downloadFile(webSite, SetUp.getTempDirectory(), indexName, false, "");
    Common.newEncodeFile(SetUp.getTempDirectory(), indexName, indexEncodeName, Encoding.GBK);

    String tempStr = Common.getFileString(SetUp.getTempDirectory(), indexEncodeName);
    String[] lines = tempStr.split("\n");

    for (int i = 0; i < lines.length; i++) {
      String line = Common.getTraditionalChinese(lines[i]);

      // ".": contain all characters except "\r" and "\n"
      // "(?s).": contain all characters
      if (line.matches("(?s).*title(?s).*")) {
        // get title ex.<title>尸錄 4話</title>
        String[] temp = line.split("<|>");

        if (getWholeTitle() == null || getWholeTitle().equals(""))
          setWholeTitle(Common.getStringRemovedIllegalChar(temp[2]));
      } else if (line.matches("(?s).*page(?s).*")) {
        // get total page ex. | 共34頁 |
        int beginIndex = line.indexOf(Common.getStringUsingDefaultLanguage("共", "共"));
        int endIndex = line.indexOf(Common.getStringUsingDefaultLanguage("頁", "頁"));

        String temp = line.substring(beginIndex + 1, endIndex);
        totalPage = Integer.parseInt(temp);

        break;
      }
    }

    comicURL = new String[totalPage]; // totalPage = amount of comic pic
    SetUp.setWholeTitle(wholeTitle);
  }
Exemplo n.º 5
0
  private boolean needUpdate(String tagName, List<String> volumeTitleList) {
    if (volumeTitleList.size() == 0) return false;

    String text = Common.getFileString(getBaseOutputDirectory(), tagName + ".js");
    int lastVolumeIndex = volumeTitleList.size() - 1;
    String lastVolumeTitle = volumeTitleList.get(lastVolumeIndex);

    //  如果目錄裡面找不到最後一集,代表需要更新
    return (text.indexOf(lastVolumeTitle) < 0);
  }
Exemplo n.º 6
0
  @Override
  public String getAllPageString(String urlString) {
    String indexName = Common.getStoredFileName(SetUp.getTempDirectory(), "index_xindm_", "html");
    String indexEncodeName =
        Common.getStoredFileName(SetUp.getTempDirectory(), "index_xindm_encode_", "html");
    Common.downloadFile(urlString, SetUp.getTempDirectory(), indexName, false, "");
    Common.newEncodeFile(SetUp.getTempDirectory(), indexName, indexEncodeName);

    return Common.getFileString(SetUp.getTempDirectory(), indexEncodeName);
  }
Exemplo n.º 7
0
  private int getExistedVolumeCount(String tagName) {
    String text = Common.getFileString(getBaseOutputDirectory(), tagName + ".js");
    int beginIndex = text.indexOf("new Array");
    int endIndex = text.indexOf(")", beginIndex);

    if (beginIndex < 0 || endIndex < 0) return 0;

    String temp = text.substring(beginIndex, endIndex);
    // print(temp);

    return temp.split(",").length;
  }
Exemplo n.º 8
0
  @Override // 因為原檔就是utf8了,所以無須轉碼
  public String getAllPageString(String urlString) {
    if (urlString.matches(".*/")) {
      urlString = urlString.substring(0, urlString.length() - 1);
    }

    String indexName = Common.getStoredFileName(SetUp.getTempDirectory(), "index_ck_", "html");
    // Common.downloadFile( urlString, SetUp.getTempDirectory(), indexName, false, "" );
    Common.simpleDownloadFile(urlString, SetUp.getTempDirectory(), indexName, urlString);

    return Common.getFileString(SetUp.getTempDirectory(), indexName);
  }
Exemplo n.º 9
0
  @Override
  public void parseComicURL() { // parse URL and save all URLs in comicURL  //
    // 先取得前面的下載伺服器網址

    String allPageString = Common.getFileString(SetUp.getTempDirectory(), indexName);
    Common.debugPrint("開始解析這一集有幾頁 : ");

    int beginIndex = allPageString.indexOf("name=\"selectb\"");
    beginIndex = allPageString.indexOf(">", beginIndex) + 1;
    int endIndex = allPageString.indexOf("</select>", beginIndex);

    String tempString = allPageString.substring(beginIndex, endIndex);

    totalPage = tempString.split("<option").length - 1;
    Common.debugPrintln("共 " + totalPage + " 頁");
    comicURL = new String[totalPage];

    String[] comicPageURL = new String[totalPage];
    beginIndex = endIndex = 0;
    for (int i = 0; i < totalPage && Run.isAlive; i++) {
      beginIndex = allPageString.indexOf("value=", beginIndex);
      beginIndex = allPageString.indexOf("\"", beginIndex) + 1;
      endIndex = allPageString.indexOf("\"", beginIndex);

      comicPageURL[i] = baseURL + allPageString.substring(beginIndex, endIndex);
    }

    String picURL = "";
    for (int p = 0; p < totalPage; p++) {
      if (!Common.existPicFile(getDownloadDirectory(), p + 1)
          || !Common.existPicFile(getDownloadDirectory(), p + 2)) {
        allPageString = getAllPageString(comicPageURL[p]);

        beginIndex = allPageString.indexOf("id=picwin");
        beginIndex = allPageString.indexOf("src=", beginIndex);
        beginIndex = allPageString.indexOf("\"", beginIndex) + 1;
        endIndex = allPageString.indexOf("\"", beginIndex);

        comicURL[p] = Common.getFixedChineseURL(allPageString.substring(beginIndex, endIndex));
        // Common.debugPrintln( ( p + 1 ) + " " + comicURL[p] ); // debug

        // 每解析一個網址就下載一張圖
        singlePageDownload(getTitle(), getWholeTitle(), comicURL[p], totalPage, p + 1, 0);
      }
    }

    // System.exit( 0 ); // debug
  }
Exemplo n.º 10
0
  @Override
  public synchronized void parseComicURL() {
    System.out.print("parse the pic URL:");

    for (int i = 0; i < totalPage && Run.isAlive; i++) {
      // 檢查下一張圖是否存在同個資料夾,若存在就跳下一張
      if (!Common.existPicFile(getDownloadDirectory(), i + 2)
          || !Common.existPicFile(getDownloadDirectory(), i + 1)) {
        int endIndex = webSite.lastIndexOf("/");
        String tempWebSite = webSite.substring(0, endIndex + 1) + (i + 1) + ".htm";

        Common.downloadFile(tempWebSite, SetUp.getTempDirectory(), indexName, false, "");
        Common.newEncodeFile(SetUp.getTempDirectory(), indexName, indexEncodeName, Encoding.GBK);

        String tempStr = Common.getFileString(SetUp.getTempDirectory(), indexEncodeName);
        String[] lines = tempStr.split("\n");

        for (int count = 0; count < lines.length && Run.isAlive; count++) {
          String line = lines[count];

          if (line.matches("(?s).*document.write(?s).*")) {
            String[] temp = line.split("'\"|\"|'|>");

            System.out.println(baseURL + temp[3]);
            // replace %20 from white space in URL
            String frontURL = temp[3].replaceAll("\\s", "%20");
            comicURL[i] = Common.getFixedChineseURL(baseURL + frontURL);
            // Common.debugPrintln( i + " " + comicURL[i] ); // debug

            // 每解析一個網址就下載一張圖
            singlePageDownload(getTitle(), getWholeTitle(), comicURL[i], totalPage, i + 1, 0);

            break;
          }
        }
      }
    }
    // System.exit( 0 ); // debug
  }
Exemplo n.º 11
0
  @Override
  public void setParameters() {
    Common.debugPrintln("開始解析各參數 :");
    Common.downloadFile(webSite, SetUp.getTempDirectory(), indexName, false, "");

    if (getWholeTitle() == null || getWholeTitle().equals("")) {
      Common.debugPrintln("開始解析title和wholeTitle :");
      String allPageString = Common.getFileString(SetUp.getTempDirectory(), indexEncodeName);

      int beginIndex = allPageString.indexOf("alt=");
      beginIndex = allPageString.indexOf("\"", beginIndex) + 1;
      int endIndex = allPageString.indexOf("\"", beginIndex);
      String tempTitleString = allPageString.substring(beginIndex, endIndex).trim();

      setWholeTitle(
          getVolumeWithFormatNumber(
              Common.getStringRemovedIllegalChar(
                  Common.getTraditionalChinese(tempTitleString.trim()))));
    }

    Common.debugPrintln("作品名稱(title) : " + getTitle());
    Common.debugPrintln("章節名稱(wholeTitle) : " + getWholeTitle());
  }
Exemplo n.º 12
0
  private void updateIndexFile(
      String tagName,
      String titleName,
      String titleIntroduction,
      String volumeTitle,
      String snsSysID) {
    String text = Common.getFileString(getBaseOutputDirectory(), tagName + ".js");

    int midIndex1 = text.indexOf("new Array(") + 11;
    int endIndex = text.length();

    if (midIndex1 < 11) {
      print("第 1 筆索引資料");
      // 新建index file
      List<String> volumeTitleList = new ArrayList<String>();
      List<String> snsSysIDList = new ArrayList<String>();
      volumeTitleList.add(volumeTitle);
      snsSysIDList.add(snsSysID);
      outputVolumeIndex(tagName, titleName, titleIntroduction, volumeTitleList, snsSysIDList);
      return;
    }

    print("第 n 筆索引資料");

    text =
        text.substring(0, midIndex1)
            + "'"
            + getOutputText(volumeTitle)
            + "', "
            + "'"
            + getVolumeID(snsSysID)
            + "', "
            + text.substring(midIndex1, endIndex);

    Common.outputFile(text, getBaseOutputDirectory(), tagName + ".js");
  }
Exemplo n.º 13
0
  @Override
  public void parseComicURL() { // parse URL and save all URLs in comicURL  //
    // 先取得前面的下載伺服器網址

    initNewData();

    String allPageString = Common.getFileString(SetUp.getTempDirectory(), indexName);
    Common.debugPrint("開始解析這一集有幾頁 : ");

    if (tsukkomiMode) {
      int beginIndex = 0;
      int endIndex = 0;
      String listURL = webSite;
      List<String> tagNameList = new ArrayList<String>();

      if (webSite.matches(".*/")) {
        listURL = webSite.substring(0, webSite.length() - 1);
      }

      if (isRssPage()) {
        print("is RSS page : " + listURL);
        outputNewListFile(listURL);
        tagNameList = getTagNameList(listURL);

        for (int i = 0; i < tagNameList.size(); i++) // 作品列表
        {
          String tagName = tagNameList.get(i);
          handleSingleTitle(tagName);
        }
      } else // ex.
      // http://manhua.dmzj.com/tags/category_search/0-0-0-all-0-0-1-447.shtml#category_nav_anchor
      {
        print("is Normal List Page : " + webSite);

        if (webSite.indexOf("/update_") > 0) {
          handleAllUpdatePage();
        } else if (webSite.indexOf("/rank/") > 0) {
          handleAllRankPage();
        }
      }

      System.exit(0);
    }

    // 取得所有位址編碼代號
    int beginIndex = allPageString.indexOf("'[") + 2;
    int endIndex = allPageString.indexOf("\"]", beginIndex) + 1;

    String allCodeString = allPageString.substring(beginIndex, endIndex);

    totalPage = allCodeString.split("\",\"").length;
    Common.debugPrintln("共 " + totalPage + " 頁");
    comicURL = new String[totalPage];
    refers = new String[totalPage];

    // 取得位址編碼代號的替換字元
    beginIndex = allPageString.indexOf(",'", endIndex) + 2;
    endIndex = allPageString.indexOf("'.", beginIndex);
    String allVarString = allPageString.substring(beginIndex, endIndex);

    String[] varTokens = allVarString.split("\\|");

    for (int i = 0; i < varTokens.length; i++) {
      Common.debugPrintln(i + " " + varTokens[i]); // test
    }
    // System.exit( 0 );

    String basePicURL = "http://images.dmzj.com/"; // "http://images.manhua.178.com/";
    String[] codeTokens = allCodeString.split("\",\"");

    codeTokens = getRealCodeTokens(codeTokens, varTokens);

    String firstCode = codeTokens[0].replaceAll("\"", "");

    String firstPicURL = "";
    Common.debugPrintln("第一張編碼:" + firstCode);
    firstPicURL = basePicURL + Common.getFixedChineseURL(getDecodeURL(firstCode));
    firstPicURL = firstPicURL.replaceAll("\\\\", "");

    Common.debugPrintln("第一張圖片網址:" + firstPicURL);

    // System.exit( 0 );

    String[] picNames = new String[totalPage];
    for (int i = 0; i < picNames.length; i++) {
      codeTokens[i] = codeTokens[i].replaceAll("\"", "");
      beginIndex = codeTokens[i].lastIndexOf("/") + 1;
      endIndex = codeTokens[i].length(); // .lastIndexOf( "\"" );
      // Common.debugPrintln( codeTokens[i] + " " + beginIndex + " " + endIndex );
      picNames[i] =
          Common.getFixedChineseURL(getDecodeURL(codeTokens[i].substring(beginIndex, endIndex)));

      // System.exit( 0 ); // debug
    }

    endIndex = firstPicURL.lastIndexOf("/") + 1;
    String parentPicURL = firstPicURL.substring(0, endIndex);

    for (int i = 0; i < codeTokens.length && Run.isAlive; i++) {
      comicURL[i] = parentPicURL + picNames[i]; // 存入每一頁的網頁網址
      refers[i] = webSite;
      // Common.debugPrintln( ( i + 1 ) + " " + comicURL[i]  ); // debug

    }

    // System.exit( 0 ); // debug
  }
Exemplo n.º 14
0
  private void outputMainListFile(boolean stepByStepMode) {
    List<String> tagList = new ArrayList<String>();
    List<String> nameList = new ArrayList<String>();
    List<String> lastVolumeTitleList = new ArrayList<String>();
    List<String> lastVolumeIDList = new ArrayList<String>();
    String name = "";
    String path = "";
    String text = "";
    File dir = new File(getBaseOutputDirectory()); // 你的log檔路徑
    File fileList[] = dir.listFiles(); // 得出檔案清單

    // 取得代號清單
    for (int i = 0; i < fileList.length; i++) {
      if (fileList[i].isDirectory()) { // 過濾檔案
        String[] temps = fileList[i].toString().split("\\\\");
        name = temps[temps.length - 1];
        tagList.add(name);
        // print(i + " TAG : " + name);
      }
    }

    // 取得名稱清單
    for (int i = 0; i < tagList.size(); i++) {
      path = getBaseOutputDirectory() + tagList.get(i) + ".js";
      text = Common.getFileString(path);

      // print("------------" + text + "------------end");
      String[] temps = text.split("'");

      if (temps.length <= 1) {
        // buildIndexFile(tagList.get(i));
        continue;
      }

      name = temps[1]; // 取第一個''資料字串
      nameList.add(name);

      // print(i + " NAME : " + name);

      int beginIndex = 0;
      int endIndex = 0;
      String temp = "";

      if (stepByStepMode) // 新的放後面
      {
        beginIndex = text.indexOf(");", beginIndex) - 2;
        endIndex = text.lastIndexOf("'", beginIndex);
        beginIndex = text.lastIndexOf("'", endIndex - 2) + 1;
        temp = text.substring(beginIndex, endIndex);
        lastVolumeTitleList.add(temp);

        print("文件中最新一集: " + temp);

        beginIndex = text.indexOf(");", endIndex + 1) + 1;
        endIndex = text.lastIndexOf("'", beginIndex);
        beginIndex = text.lastIndexOf("'", endIndex - 2) + 1;
        temp = text.substring(beginIndex, endIndex);
        lastVolumeIDList.add(temp);
      } else // 新的放前面
      {
        beginIndex = text.indexOf("new Array(", beginIndex);
        beginIndex = text.indexOf("'", beginIndex) + 1;
        endIndex = text.indexOf("'", beginIndex);
        temp = text.substring(beginIndex, endIndex);
        lastVolumeTitleList.add(temp);

        beginIndex = text.indexOf("'", endIndex + 1) + 1;
        endIndex = text.indexOf("'", beginIndex);
        temp = text.substring(beginIndex, endIndex);
        lastVolumeIDList.add(temp);
      }

      // print(i + " VOLUME : " + temp);
    }

    List<List<String>> combinationList = new ArrayList<List<String>>();
    combinationList.add(nameList);
    combinationList.add(tagList);
    combinationList.add(lastVolumeTitleList);
    combinationList.add(lastVolumeIDList);
    outputListFile(combinationList, "MAIN_LIST", mainListFileName);
  }