Пример #1
0
  @Override
  public String getTitleOnMainPage(String urlString, String allPageString) {
    String[] lines = allPageString.split("\n");

    int beginIndex = lines[0].indexOf("<title>", 1) + 7;
    int endIndex = lines[0].indexOf("_", beginIndex) - 4;

    return Common.getStringRemovedIllegalChar(
        Common.getTraditionalChinese(lines[0].substring(beginIndex, endIndex)));
  }
Пример #2
0
  @Override
  public String getAllPageString(String urlString) {
    String indexName = Common.getStoredFileName(SetUp.getTempDirectory(), "index_KUKU_", "html");
    String indexEncodeName =
        Common.getStoredFileName(SetUp.getTempDirectory(), "index_KUKU_encode_", "html");

    Common.downloadFile(urlString, SetUp.getTempDirectory(), indexName, false, "");
    Common.newEncodeFile(SetUp.getTempDirectory(), indexName, indexEncodeName, Encoding.GBK);

    return Common.getFileString(SetUp.getTempDirectory(), indexEncodeName);
  }
Пример #3
0
  @Override
  public List<List<String>> getVolumeTitleAndUrlOnMainPage(String urlString, String allPageString) {
    // combine volumeList and urlList into combinationList, return it.

    List<List<String>> combinationList = new ArrayList<List<String>>();
    List<String> urlList = new ArrayList<String>();
    List<String> volumeList = new ArrayList<String>();

    String[] lines = allPageString.split("\n");

    int beginIndex = 0;
    int endIndex = 0;
    String volumeURL = "";

    beginIndex = allPageString.indexOf("id='comiclistn'");
    endIndex = allPageString.indexOf("</table>", beginIndex);
    String tempString = allPageString.substring(beginIndex, endIndex);

    int volumeCount = tempString.split("<dd>").length - 1;

    // 單集位址的網域名稱(有四組,可置換)
    String baseVolumeURL = "http://comic.kukudm.com";
    beginIndex = endIndex = 0;
    for (int i = 0; i < volumeCount; i++) {
      // 取得單集位址
      beginIndex = tempString.indexOf("<dd>", beginIndex) + 1;
      beginIndex = tempString.indexOf("'", beginIndex) + 1;
      endIndex = tempString.indexOf("'", beginIndex);
      volumeURL = tempString.substring(beginIndex, endIndex);
      if (volumeURL.matches("http.*")) {
        urlList.add(tempString.substring(beginIndex, endIndex));
      } else {
        urlList.add(baseVolumeURL + tempString.substring(beginIndex, endIndex));
      }

      // 取得單集名稱
      beginIndex = tempString.indexOf(">", beginIndex) + 1;
      endIndex = tempString.indexOf("<", beginIndex);
      volumeList.add(
          getVolumeWithFormatNumber(
              Common.getStringRemovedIllegalChar(
                  Common.getTraditionalChinese(
                      tempString.substring(beginIndex, endIndex).trim()))));
    }

    totalVolume = volumeCount;
    Common.debugPrintln("共有" + totalVolume + "集");

    combinationList.add(volumeList);
    combinationList.add(urlList);

    return combinationList;
  }
Пример #4
0
  /** @author user */
  public ParseKUKU() {
    enumName = "KUKU";
    regexs =
        new String[] {
          "(?s).*kukudm.com(?s).*", "(?s).*socomic.com(?s).*", "(?s).*socomic.net(?s).*"
        };
    parserName = this.getClass().getName();
    downloadBefore = true;
    siteID = Site.formString("KUKU");
    siteName = "kuku";
    indexName = Common.getStoredFileName(SetUp.getTempDirectory(), "index_kuku_parse_", "html");
    indexEncodeName =
        Common.getStoredFileName(SetUp.getTempDirectory(), "index_kuku_encode_parse_", "html");

    baseURL = "http://n.kukudm.com/";
  }
Пример #5
0
  @Override
  public synchronized void setParameters() {
    Common.downloadFile(webSite, SetUp.getTempDirectory(), indexName, false, "");
    Common.newEncodeFile(SetUp.getTempDirectory(), indexName, indexEncodeName, Encoding.GBK);

    String tempStr = Common.getFileString(SetUp.getTempDirectory(), indexEncodeName);
    String[] lines = tempStr.split("\n");

    for (int i = 0; i < lines.length; i++) {
      String line = Common.getTraditionalChinese(lines[i]);

      // ".": contain all characters except "\r" and "\n"
      // "(?s).": contain all characters
      if (line.matches("(?s).*title(?s).*")) {
        // get title ex.<title>尸錄 4話</title>
        String[] temp = line.split("<|>");

        if (getWholeTitle() == null || getWholeTitle().equals(""))
          setWholeTitle(Common.getStringRemovedIllegalChar(temp[2]));
      } else if (line.matches("(?s).*page(?s).*")) {
        // get total page ex. | 共34頁 |
        int beginIndex = line.indexOf(Common.getStringUsingDefaultLanguage("共", "共"));
        int endIndex = line.indexOf(Common.getStringUsingDefaultLanguage("頁", "頁"));

        String temp = line.substring(beginIndex + 1, endIndex);
        totalPage = Integer.parseInt(temp);

        break;
      }
    }

    comicURL = new String[totalPage]; // totalPage = amount of comic pic
    SetUp.setWholeTitle(wholeTitle);
  }
Пример #6
0
  public Run(String[] originalArgs, int runMode) {
    this(runMode);

    args = originalArgs;

    if (!Common.withGUI()) {
      SetUp set = new SetUp();
      set.readSetFile(); // set up the file name and directory
    }
    // test( args );

    webSite = "";
  }
Пример #7
0
  @Override
  public synchronized void parseComicURL() {
    System.out.print("parse the pic URL:");

    for (int i = 0; i < totalPage && Run.isAlive; i++) {
      // 檢查下一張圖是否存在同個資料夾,若存在就跳下一張
      if (!Common.existPicFile(getDownloadDirectory(), i + 2)
          || !Common.existPicFile(getDownloadDirectory(), i + 1)) {
        int endIndex = webSite.lastIndexOf("/");
        String tempWebSite = webSite.substring(0, endIndex + 1) + (i + 1) + ".htm";

        Common.downloadFile(tempWebSite, SetUp.getTempDirectory(), indexName, false, "");
        Common.newEncodeFile(SetUp.getTempDirectory(), indexName, indexEncodeName, Encoding.GBK);

        String tempStr = Common.getFileString(SetUp.getTempDirectory(), indexEncodeName);
        String[] lines = tempStr.split("\n");

        for (int count = 0; count < lines.length && Run.isAlive; count++) {
          String line = lines[count];

          if (line.matches("(?s).*document.write(?s).*")) {
            String[] temp = line.split("'\"|\"|'|>");

            System.out.println(baseURL + temp[3]);
            // replace %20 from white space in URL
            String frontURL = temp[3].replaceAll("\\s", "%20");
            comicURL[i] = Common.getFixedChineseURL(baseURL + frontURL);
            // Common.debugPrintln( i + " " + comicURL[i] ); // debug

            // 每解析一個網址就下載一張圖
            singlePageDownload(getTitle(), getWholeTitle(), comicURL[i], totalPage, i + 1, 0);

            break;
          }
        }
      }
    }
    // System.exit( 0 ); // debug
  }
Пример #8
0
  // 解析輸入網址並做後續處理
  public void run() {
    Common.debugPrintln("開始解析單一位址:");
    if (args.length == 0) {
      Common.errorReport("WRONG: No URL of comic !!");
    } else if (args.length > 4) {
      Common.errorReport("WRONG: Too many args !!");
    } else if (Common.isLegalURL(args[0])) {
      if (args.length == 1) { // ComicDown URL
        webSite = args[0];
      } else if (args.length == 2 && args[1].equals("add")) { // ComicDown URL add
        webSite = args[0];
        SetUp.addSchedule = true;
      } else if (args.length == 3
          && args[1].matches("\\d+")
          && args[2].matches("\\d+")) { // ComicDown URL beginVolume endVolume
        webSite = args[0];
        SetUp.setDownloadVolume(args[1], args[2]);
      } else if (args.length == 4
          && args[1].matches("\\d+")
          && args[2].matches("\\d+")
          && args[3].equals("add")) { // ComicDown URL beginVolume endVolume
        webSite = args[0];
        SetUp.setDownloadVolume(args[1], args[2]);
        SetUp.addSchedule = true;
      } else {
        Common.errorReport("WRONG: illegal parameters !!");
      }
    } else {
      Common.errorReport("WRONG: illegal URL :  [" + args[0] + "]");
    }

    if (isAlive && isLegal) {
      ParseWebPage pw = new ParseWebPage(webSite);

      if (pw.getSiteID() == Site.CC) {
        ParseOnlineComicSite parse = new ParseCC();
        runSingleParseModule(parse);
      } else if (pw.getSiteID() == Site.KUKU) {
        ParseOnlineComicSite parse = new ParseKUKU();
        runSingleParseModule(parse);
      } else if (pw.getSiteID() == Site.EH) {
        ParseOnlineComicSite parse = new ParseEH();
        runSingleParseModule(parse);
      }
      // 九九系列網站
      else if (pw.getSiteID() == Site.NINENINE_COMIC) {
        ParseOnlineComicSite parse = new Parse99Comic();
        runSingleParseModule(parse);
      } else if (pw.getSiteID() == Site.NINENINE_COMIC_TC) {
        ParseOnlineComicSite parse = new Parse99ComicTC();
        runSingleParseModule(parse);
      } else if (pw.getSiteID() == Site.NINENINE_MANGA) {
        ParseOnlineComicSite parse = new Parse99Manga();
        runSingleParseModule(parse);
      } else if (pw.getSiteID() == Site.NINENINE_MANGA_TC) {
        ParseOnlineComicSite parse = new Parse99MangaTC();
        runSingleParseModule(parse);
      } else if (pw.getSiteID() == Site.NINENINE_MANGA_WWW) {
        ParseOnlineComicSite parse = new Parse99MangaWWW();
        runSingleParseModule(parse);
      } else if (pw.getSiteID() == Site.NINENINE_99770) {
        ParseOnlineComicSite parse = new Parse99770();
        runSingleParseModule(parse);
      } else if (pw.getSiteID() == Site.NINENINE_MH_99770) {
        ParseOnlineComicSite parse = new ParseMh99770();
        runSingleParseModule(parse);
      } else if (pw.getSiteID() == Site.NINENINE_MH) {
        ParseOnlineComicSite parse = new Parse99Mh();
        runSingleParseModule(parse);
      } else if (pw.getSiteID() == Site.NINENINE_COCO) {
        ParseOnlineComicSite parse = new ParseCoco();
        runSingleParseModule(parse);
      } else if (pw.getSiteID() == Site.NINENINE_COCO_TC) {
        ParseOnlineComicSite parse = new ParseCocoTC();
        runSingleParseModule(parse);
      } else if (pw.getSiteID() == Site.NINENINE_1MH) {
        ParseOnlineComicSite parse = new Parse1Mh();
        runSingleParseModule(parse);
      } else if (pw.getSiteID() == Site.NINENINE_3G) {
        ParseOnlineComicSite parse = new Parse3G();
        runSingleParseModule(parse);
      }
      /*
      else if ( pw.getSiteID() == Site.ONE_SEVEN_EIGHT ) {
      ParseOnlineComicSite parse = new Parse178();
      runSingleParseModule( parse );
      }
      */
      else if (pw.getSiteID() == Site.EIGHT_COMIC) {
        ParseOnlineComicSite parse = new ParseEC();
        runSingleParseModule(parse);
      } else if (pw.getSiteID() == Site.EIGHT_COMIC_PHOTO) {
        ParseOnlineComicSite parse = new ParseECphoto();
        runSingleParseModule(parse);
      } else if (pw.getSiteID() == Site.JUMPCNCN) {
        ParseOnlineComicSite parse = new ParseJumpcncn();
        runSingleParseModule(parse);
      } else if (pw.getSiteID() == Site.DMEDEN) {
        ParseOnlineComicSite parse = new ParseDmeden();
        runSingleParseModule(parse);
      } else if (pw.getSiteID() == Site.JUMPCN) {
        ParseOnlineComicSite parse = new ParseJumpcn();
        runSingleParseModule(parse);
      } else if (pw.getSiteID() == Site.MANGAFOX) {
        ParseOnlineComicSite parse = new ParseMangaFox();
        runSingleParseModule(parse);
      } else if (pw.getSiteID() == Site.MANMANKAN) {
        ParseOnlineComicSite parse = new ParseManmankan();
        runSingleParseModule(parse);
      } else if (pw.getSiteID() == Site.XINDM) {
        ParseOnlineComicSite parse = new ParseXindm();
        runSingleParseModule(parse);
      } else if (pw.getSiteID() == Site.EX) {
        ParseOnlineComicSite parse = new ParseEX();
        runSingleParseModule(parse);
      } else if (pw.getSiteID() == Site.GOOGLE_PIC) {
        ParseOnlineComicSite parse = new ParseGooglePic();
        runSingleParseModule(parse);
      } else if (pw.getSiteID() == Site.NANA) {
        ParseOnlineComicSite parse = new ParseNANA();
        runSingleParseModule(parse);
      } else if (pw.getSiteID() == Site.CITY_MANGA) {
        ParseOnlineComicSite parse = new ParseCityManga();
        runSingleParseModule(parse);
      } else if (pw.getSiteID() == Site.IIBQ) {
        ParseOnlineComicSite parse = new ParseIIBQ();
        runSingleParseModule(parse);
      } else if (pw.getSiteID() == Site.BAIDU) {
        ParseOnlineComicSite parse = new ParseBAIDU();
        runSingleParseModule(parse);
      } else if (pw.getSiteID() == Site.SF) {
        ParseOnlineComicSite parse = new ParseSF();
        runSingleParseModule(parse);
      } else if (pw.getSiteID() == Site.KKKMH) {
        ParseOnlineComicSite parse = new ParseKKKMH();
        runSingleParseModule(parse);
      } else if (pw.getSiteID() == Site.SIX_COMIC) {
        ParseOnlineComicSite parse = new ParseSixComic();
        runSingleParseModule(parse);
      } else if (pw.getSiteID() == Site.MANHUA_178) {
        ParseOnlineComicSite parse = new Parse178();
        runSingleParseModule(parse);
      } else if (pw.getSiteID() == Site.KANGDM) {
        ParseOnlineComicSite parse = new ParseKangdm();
        runSingleParseModule(parse);
      } else if (pw.getSiteID() == Site.BENGOU) {
        ParseOnlineComicSite parse = new ParseBengou();
        runSingleParseModule(parse);
      } else if (pw.getSiteID() == Site.EMLAND) {
        ParseOnlineComicSite parse = new ParseEmland();
        runSingleParseModule(parse);
      } else if (pw.getSiteID() == Site.MOP) {
        ParseOnlineComicSite parse = new ParseMOP();
        runSingleParseModule(parse);
      } else if (pw.getSiteID() == Site.DM5) {
        ParseOnlineComicSite parse = new ParseDM5();
        runSingleParseModule(parse);
      } else if (pw.getSiteID() == Site.CK) {
        ParseOnlineComicSite parse = new ParseCK();
        runSingleParseModule(parse);
      } else if (pw.getSiteID() == Site.TUKU) {
        ParseOnlineComicSite parse = new ParseTUKU();
        runSingleParseModule(parse);
      } else if (pw.getSiteID() == Site.HH) {
        ParseOnlineComicSite parse = new ParseHH();
        runSingleParseModule(parse);
      } else if (pw.getSiteID() == Site.IASK) {
        ParseOnlineComicSite parse = new ParseIASK();
        runSingleParseModule(parse);
      } else if (pw.getSiteID() == Site.JM) {
        ParseOnlineComicSite parse = new ParseJM();
        runSingleParseModule(parse);
      } else if (pw.getSiteID() == Site.MANGA_WINDOW) {
        ParseOnlineComicSite parse = new ParseMangaWindow();
        runSingleParseModule(parse);
      } else if (pw.getSiteID() == Site.CK_NOVEL) {
        ParseOnlineComicSite parse = new ParseCKNovel();
        runSingleParseModule(parse);
      } else if (pw.getSiteID() == Site.MYBEST) {
        ParseOnlineComicSite parse = new ParseMyBest();
        runSingleParseModule(parse);
      } else if (pw.getSiteID() == Site.IMANHUA) {
        ParseOnlineComicSite parse = new ParseImanhua();
        runSingleParseModule(parse);
      } else if (pw.getSiteID() == Site.VERYIM) {
        ParseOnlineComicSite parse = new ParseVeryim();
        runSingleParseModule(parse);
      } else if (pw.getSiteID() == Site.WENKU) {
        ParseOnlineComicSite parse = new ParseWenku();
        runSingleParseModule(parse);
      } else if (pw.getSiteID() == Site.FUMANHUA) {
        ParseOnlineComicSite parse = new ParseFumanhua();
        runSingleParseModule(parse);
      } else if (pw.getSiteID() == Site.SIX_MANGA) {
        ParseOnlineComicSite parse = new ParseSixManga();
        runSingleParseModule(parse);
      } else if (pw.getSiteID() == Site.XXBH) {
        ParseOnlineComicSite parse = new ParseXXBH();
        runSingleParseModule(parse);
      } else if (pw.getSiteID() == Site.COMIC_131) {
        ParseOnlineComicSite parse = new Parse131();
        runSingleParseModule(parse);
      } else if (pw.getSiteID() == Site.BLOGSPOT) {
        ParseOnlineComicSite parse = new ParseBlogspot();
        runSingleParseModule(parse);
      } else if (pw.getSiteID() == Site.PIXNET_BLOG) {
        ParseOnlineComicSite parse = new ParsePixnetBlog();
        runSingleParseModule(parse);
      } else if (pw.getSiteID() == Site.XUITE_BLOG) {
        ParseOnlineComicSite parse = new ParseXuiteBlog();
        runSingleParseModule(parse);
      } else if (pw.getSiteID() == Site.YAM_BLOG) {
        ParseOnlineComicSite parse = new ParseYamBlog();
        runSingleParseModule(parse);
      } else if (pw.getSiteID() == Site.EYNY_NOVEL) {
        ParseOnlineComicSite parse = new ParseEynyNovel();
        runSingleParseModule(parse);
      } else if (pw.getSiteID() == Site.ZUIWANJU) {
        ParseOnlineComicSite parse = new ParseZuiwanju();
        runSingleParseModule(parse);
      } else if (pw.getSiteID() == Site.TWO_ECY) {
        ParseOnlineComicSite parse = new Parse2ecy();
        runSingleParseModule(parse);
      } else if (pw.getSiteID() == Site.TIANYA_BOOK) {
        ParseOnlineComicSite parse = new ParseTianyaBook();
        runSingleParseModule(parse);
      } else if (pw.getSiteID() == Site.EIGHT_NOVEL) {
        ParseOnlineComicSite parse = new ParseEightNovel();
        runSingleParseModule(parse);
      } else if (pw.getSiteID() == Site.QQ_BOOK) {
        ParseOnlineComicSite parse = new ParseQQBook();
        runSingleParseModule(parse);
      } else if (pw.getSiteID() == Site.QQ_ORIGIN_BOOK) {
        ParseOnlineComicSite parse = new ParseQQOriginBook();
        runSingleParseModule(parse);
      } else if (pw.getSiteID() == Site.SINA_BOOK) {
        ParseOnlineComicSite parse = new ParseSinaBook();
        runSingleParseModule(parse);
      } else if (pw.getSiteID() == Site.FIVEONE_CTO) {
        ParseOnlineComicSite parse = new Parse51Cto();
        runSingleParseModule(parse);
      } else if (pw.getSiteID() == Site.ONESEVEN_KK) {
        ParseOnlineComicSite parse = new Parse17KK();
        runSingleParseModule(parse);
      } else if (pw.getSiteID() == Site.UUS8) {
        ParseOnlineComicSite parse = new ParseUUS8();
        runSingleParseModule(parse);
      } else if (pw.getSiteID() == Site.WENKU8) {
        ParseOnlineComicSite parse = new ParseWenku8();
        runSingleParseModule(parse);
      } else if (pw.getSiteID() == Site.IFENG_BOOK) {
        ParseOnlineComicSite parse = new ParseIfengBook();
        runSingleParseModule(parse);
      } else if (pw.getSiteID() == Site.XUNLOOK) {
        ParseOnlineComicSite parse = new ParseXunlook();
        runSingleParseModule(parse);
      } else if (pw.getSiteID() == Site.WENKU7) {
        ParseOnlineComicSite parse = new Parse7Wenku();
        runSingleParseModule(parse);
      } else if (pw.getSiteID() == Site.WOYOUXIAN) {
        ParseOnlineComicSite parse = new ParseWoyouxian();
        runSingleParseModule(parse);
      } else if (pw.getSiteID() == Site.SHUNONG) {
        ParseOnlineComicSite parse = new ParseShunong();
        runSingleParseModule(parse);
      } else if (pw.getSiteID() == Site.SOGOU) {
        ParseOnlineComicSite parse = new ParseSogou();
        runSingleParseModule(parse);
      } else if (pw.getSiteID() == Site.TING1) {
        ParseOnlineComicSite parse = new Parse1Ting();
        runSingleParseModule(parse);
      } else if (pw.getSiteID() == Site.XIAMI) {
        ParseOnlineComicSite parse = new ParseXiami();
        runSingleParseModule(parse);
      } else // Site.UNKNOWN
      {
        Common.urlIsUnknown = true;
      }
    }
  }