Beispiel #1
0
  @Override
  public synchronized void setParameters() {
    Common.downloadFile(webSite, SetUp.getTempDirectory(), indexName, false, "");
    Common.newEncodeFile(SetUp.getTempDirectory(), indexName, indexEncodeName, Encoding.GBK);

    String tempStr = Common.getFileString(SetUp.getTempDirectory(), indexEncodeName);
    String[] lines = tempStr.split("\n");

    for (int i = 0; i < lines.length; i++) {
      String line = Common.getTraditionalChinese(lines[i]);

      // ".": contain all characters except "\r" and "\n"
      // "(?s).": contain all characters
      if (line.matches("(?s).*title(?s).*")) {
        // get title ex.<title>尸錄 4話</title>
        String[] temp = line.split("<|>");

        if (getWholeTitle() == null || getWholeTitle().equals(""))
          setWholeTitle(Common.getStringRemovedIllegalChar(temp[2]));
      } else if (line.matches("(?s).*page(?s).*")) {
        // get total page ex. | 共34頁 |
        int beginIndex = line.indexOf(Common.getStringUsingDefaultLanguage("共", "共"));
        int endIndex = line.indexOf(Common.getStringUsingDefaultLanguage("頁", "頁"));

        String temp = line.substring(beginIndex + 1, endIndex);
        totalPage = Integer.parseInt(temp);

        break;
      }
    }

    comicURL = new String[totalPage]; // totalPage = amount of comic pic
    SetUp.setWholeTitle(wholeTitle);
  }
Beispiel #2
0
  @Override
  public String getAllPageString(String urlString) {
    String indexName = Common.getStoredFileName(SetUp.getTempDirectory(), "index_KUKU_", "html");
    String indexEncodeName =
        Common.getStoredFileName(SetUp.getTempDirectory(), "index_KUKU_encode_", "html");

    Common.downloadFile(urlString, SetUp.getTempDirectory(), indexName, false, "");
    Common.newEncodeFile(SetUp.getTempDirectory(), indexName, indexEncodeName, Encoding.GBK);

    return Common.getFileString(SetUp.getTempDirectory(), indexEncodeName);
  }
Beispiel #3
0
  /** @author user */
  public ParseKUKU() {
    enumName = "KUKU";
    regexs =
        new String[] {
          "(?s).*kukudm.com(?s).*", "(?s).*socomic.com(?s).*", "(?s).*socomic.net(?s).*"
        };
    parserName = this.getClass().getName();
    downloadBefore = true;
    siteID = Site.formString("KUKU");
    siteName = "kuku";
    indexName = Common.getStoredFileName(SetUp.getTempDirectory(), "index_kuku_parse_", "html");
    indexEncodeName =
        Common.getStoredFileName(SetUp.getTempDirectory(), "index_kuku_encode_parse_", "html");

    baseURL = "http://n.kukudm.com/";
  }
Beispiel #4
0
  @Override
  public synchronized void parseComicURL() {
    System.out.print("parse the pic URL:");

    for (int i = 0; i < totalPage && Run.isAlive; i++) {
      // 檢查下一張圖是否存在同個資料夾,若存在就跳下一張
      if (!Common.existPicFile(getDownloadDirectory(), i + 2)
          || !Common.existPicFile(getDownloadDirectory(), i + 1)) {
        int endIndex = webSite.lastIndexOf("/");
        String tempWebSite = webSite.substring(0, endIndex + 1) + (i + 1) + ".htm";

        Common.downloadFile(tempWebSite, SetUp.getTempDirectory(), indexName, false, "");
        Common.newEncodeFile(SetUp.getTempDirectory(), indexName, indexEncodeName, Encoding.GBK);

        String tempStr = Common.getFileString(SetUp.getTempDirectory(), indexEncodeName);
        String[] lines = tempStr.split("\n");

        for (int count = 0; count < lines.length && Run.isAlive; count++) {
          String line = lines[count];

          if (line.matches("(?s).*document.write(?s).*")) {
            String[] temp = line.split("'\"|\"|'|>");

            System.out.println(baseURL + temp[3]);
            // replace %20 from white space in URL
            String frontURL = temp[3].replaceAll("\\s", "%20");
            comicURL[i] = Common.getFixedChineseURL(baseURL + frontURL);
            // Common.debugPrintln( i + " " + comicURL[i] ); // debug

            // 每解析一個網址就下載一張圖
            singlePageDownload(getTitle(), getWholeTitle(), comicURL[i], totalPage, i + 1, 0);

            break;
          }
        }
      }
    }
    // System.exit( 0 ); // debug
  }