示例#1
0
  @Override
  public String getAllPageString(String urlString) {
    String indexName = Common.getStoredFileName(SetUp.getTempDirectory(), "index_178_", "html");
    Common.downloadGZIPInputStreamFile(urlString, SetUp.getTempDirectory(), indexName, false, "");

    return Common.getFileString(SetUp.getTempDirectory(), indexName);
  }
示例#2
0
  private void buildIndexFile(String tagName) {
    String dirPath = getBaseOutputDirectory() + tagName + Common.getSlash();
    String path = "";
    String text = "";
    List<String> volumeTagList = new ArrayList<String>();
    File dir = new File(dirPath); // 你的log檔路徑
    File fileList[] = dir.listFiles(); // 得出檔案清單
    String volumeTitle = "";

    // 取得代號清單
    for (int i = 0; i < fileList.length; i++) {
      if (fileList[i].isFile()) { // 過濾檔案
        String[] temps = fileList[i].toString().split("\\\\");
        String volumeTag = temps[temps.length - 1].split("\\.")[0];
        if (!volumeTag.matches("comment")) volumeTagList.add(volumeTag);
        // print(i + " TAG : " + volumeTag);
      }
    }

    for (int i = 0; i < volumeTagList.size(); i++) {
      path = dirPath + volumeTagList.get(i) + ".js";
      text = Common.getFileString(path);

      if (text.split("'").length <= 1) continue;

      volumeTitle = text.split("'")[1];
    }

    System.exit(0);
  }
示例#3
0
  @Override
  public String getTitleOnMainPage(String urlString, String allPageString) {
    if (needTsukkomiMode(urlString)) {
      tsukkomiMode = true;
      return "Tsukkomi";
    }

    tsukkomiMode = false;

    int beginIndex = allPageString.indexOf("<h1>") + 4;
    int endIndex = allPageString.indexOf("</h1>", beginIndex);
    if (urlString.indexOf("mh.") > 0) {
      beginIndex = allPageString.indexOf("g_comic_name");
      beginIndex = allPageString.indexOf("\"", beginIndex) + 1;
      endIndex = allPageString.indexOf("\"", beginIndex);
    }

    // Common.debugPrintln( "B: " + beginIndex + "  E: " + endIndex );

    if (beginIndex < 0 || endIndex < 0) {
      return null;
    }

    String title = allPageString.substring(beginIndex, endIndex).trim();

    return Common.getStringRemovedIllegalChar(Common.getTraditionalChinese(title));
  }
示例#4
0
  @Override
  public String getTitleOnSingleVolumePage(String urlString) {
    String allPageString = getAllPageString(urlString);
    int beginIndex = Common.getIndexOfOrderKeyword(allPageString, ">>", 3) + 2;
    int endIndex = Common.getSmallerIndexOfTwoKeyword(allPageString, beginIndex, "[", ">>");
    String title = allPageString.substring(beginIndex, endIndex).trim();

    return Common.getStringRemovedIllegalChar(Common.getTraditionalChinese(title));
  }
示例#5
0
 private String getBaseOutputDirectory() {
   return Common.getNowAbsolutePath()
       + "down"
       + Common.getSlash()
       + "SVN"
       + Common.getSlash()
       + "Tsukkomi1"
       + Common.getSlash();
 }
示例#6
0
  @Override
  public String getTitleOnMainPage(String urlString, String allPageString) {
    int beginIndex = allPageString.indexOf("valign=\"middle\"><b>");
    beginIndex = allPageString.indexOf("<b>", beginIndex) + 3;
    int endIndex = allPageString.indexOf("</b>", beginIndex);
    String title = allPageString.substring(beginIndex, endIndex).trim();

    return Common.getStringRemovedIllegalChar(Common.getTraditionalChinese(title));
  }
示例#7
0
  @Override
  public String getTitleOnMainPage(String urlString, String allPageString) {
    int beginIndex = allPageString.indexOf("<title>");
    beginIndex = allPageString.indexOf(">", beginIndex) + 1;
    int endIndex = allPageString.indexOf("</title", beginIndex);
    String title = allPageString.substring(beginIndex, endIndex).split("-")[0].trim();

    return Common.getStringRemovedIllegalChar(Common.getTraditionalChinese(title));
  }
示例#8
0
  @Override
  public List<List<String>> getVolumeTitleAndUrlOnMainPage(String urlString, String allPageString) {
    // combine volumeList and urlList into combinationList, return it.

    List<List<String>> combinationList = new ArrayList<List<String>>();
    List<String> urlList = new ArrayList<String>();
    List<String> volumeList = new ArrayList<String>();

    if (tsukkomiMode) {
      urlList.add(urlString);
      volumeList.add("tsukkomi");
      combinationList.add(volumeList);
      combinationList.add(urlList);

      return combinationList;
    }

    int beginIndex = allPageString.indexOf("class=\"cartoon_online_border\"");
    int endIndex = allPageString.indexOf("document.write", beginIndex);

    if (urlString.indexOf("mh.") > 0) {
      beginIndex = allPageString.indexOf("chapter_list");
      endIndex = allPageString.indexOf("</script>", beginIndex);
    }

    String tempString = allPageString.substring(beginIndex, endIndex);

    int volumeCount = tempString.split("href=\"").length - 1;

    String volumeTitle = "";
    beginIndex = endIndex = 0;
    for (int i = 0; i < volumeCount; i++) {
      // 取得單集位址
      beginIndex = tempString.indexOf("href=\"", beginIndex) + 6;
      endIndex = tempString.indexOf("\"", beginIndex);
      urlList.add(baseURL + tempString.substring(beginIndex, endIndex));

      // 取得單集名稱
      beginIndex = tempString.indexOf(">", beginIndex) + 1;
      endIndex = tempString.indexOf("<", beginIndex);
      volumeTitle = tempString.substring(beginIndex, endIndex);

      volumeList.add(
          getVolumeWithFormatNumber(
              Common.getStringRemovedIllegalChar(
                  Common.getTraditionalChinese(volumeTitle.trim()))));
    }

    totalVolume = volumeCount;
    Common.debugPrintln("共有" + totalVolume + "集");

    combinationList.add(volumeList);
    combinationList.add(urlList);

    return combinationList;
  }
示例#9
0
  @Override
  public String getTitleOnMainPage(String urlString, String allPageString) {
    String[] lines = allPageString.split("\n");

    int beginIndex = lines[0].indexOf("<title>", 1) + 7;
    int endIndex = lines[0].indexOf("_", beginIndex) - 4;

    return Common.getStringRemovedIllegalChar(
        Common.getTraditionalChinese(lines[0].substring(beginIndex, endIndex)));
  }
示例#10
0
  @Override
  public String getAllPageString(String urlString) {
    String indexName = Common.getStoredFileName(SetUp.getTempDirectory(), "index_xindm_", "html");
    String indexEncodeName =
        Common.getStoredFileName(SetUp.getTempDirectory(), "index_xindm_encode_", "html");
    Common.downloadFile(urlString, SetUp.getTempDirectory(), indexName, false, "");
    Common.newEncodeFile(SetUp.getTempDirectory(), indexName, indexEncodeName);

    return Common.getFileString(SetUp.getTempDirectory(), indexEncodeName);
  }
示例#11
0
  private List<String> getTagNameList(String url) {
    int beginIndex = 0;
    int endIndex = 0;
    int index = 0;
    List<String> urlList = new ArrayList<String>();
    String temp = "";

    String dummyTagName = "ghdxj";
    Common.deleteFile(getBaseOutputDirectory(), dummyTagName + ".js");
    Common.deleteFolder(getBaseOutputDirectory() + dummyTagName);
    urlList.add(dummyTagName); // 因為第一個都會取得錯誤的評論資料,所以想把錯都推給東方

    String allPageString = getAllPageString(url);

    while (true) {
      beginIndex = allPageString.indexOf(" href=", beginIndex);
      if (beginIndex < 0) break;
      beginIndex = allPageString.indexOf("=", beginIndex) + 2;
      if (beginIndex < 0) break;

      temp = allPageString.substring(beginIndex, beginIndex + 30);
      if (temp.indexOf(baseURL) >= 0) {
        // print("with BASE");
        // ex. href='http://manhua.dmzj.com/lianaibaojun/
        beginIndex = allPageString.indexOf(".com", beginIndex);
        if (beginIndex < 0) break;
        beginIndex = allPageString.indexOf("/", beginIndex);
        if (beginIndex < 0) break;
      }
      beginIndex++; // 從"/"之後開始
      endIndex = allPageString.indexOf("/", beginIndex);
      if (endIndex < 0) break;
      temp = allPageString.substring(beginIndex, endIndex);

      boolean existed = false;
      for (int i = 0; i < urlList.size(); i++) {
        if (urlList.get(i).equals(temp)) {
          existed = true;
          break;
        }
      }

      if (existed || isIllegalPage(temp)) {
        continue;
      }

      urlList.add(temp);
      // print("" + index + " : " + temp);
      index++;
      beginIndex = endIndex;
    }

    return urlList;
  }
示例#12
0
  /** @author user */
  public ParseMOP() {
    siteID = Site.MOP;
    siteName = "MOP";
    indexName = Common.getStoredFileName(SetUp.getTempDirectory(), "index_mop_parse_", "html");
    indexEncodeName =
        Common.getStoredFileName(SetUp.getTempDirectory(), "index_mop_encode_parse_", "html");

    jsName = "index_mop.js";
    radixNumber = 15221471; // default value, not always be useful!!

    baseURL = "http://dm.game.mop.com";
  }
示例#13
0
  @Override // 因為原檔就是utf8了,所以無須轉碼
  public String getAllPageString(String urlString) {
    if (urlString.matches(".*/")) {
      urlString = urlString.substring(0, urlString.length() - 1);
    }

    String indexName = Common.getStoredFileName(SetUp.getTempDirectory(), "index_ck_", "html");
    // Common.downloadFile( urlString, SetUp.getTempDirectory(), indexName, false, "" );
    Common.simpleDownloadFile(urlString, SetUp.getTempDirectory(), indexName, urlString);

    return Common.getFileString(SetUp.getTempDirectory(), indexName);
  }
示例#14
0
  @Override
  public List<List<String>> getVolumeTitleAndUrlOnMainPage(String urlString, String allPageString) {
    // combine volumeList and urlList into combinationList, return it.

    List<List<String>> combinationList = new ArrayList<List<String>>();
    List<String> urlList = new ArrayList<String>();
    List<String> volumeList = new ArrayList<String>();

    String[] lines = allPageString.split("\n");

    int beginIndex = 0;
    int endIndex = 0;
    String volumeURL = "";

    beginIndex = allPageString.indexOf("id='comiclistn'");
    endIndex = allPageString.indexOf("</table>", beginIndex);
    String tempString = allPageString.substring(beginIndex, endIndex);

    int volumeCount = tempString.split("<dd>").length - 1;

    // 單集位址的網域名稱(有四組,可置換)
    String baseVolumeURL = "http://comic.kukudm.com";
    beginIndex = endIndex = 0;
    for (int i = 0; i < volumeCount; i++) {
      // 取得單集位址
      beginIndex = tempString.indexOf("<dd>", beginIndex) + 1;
      beginIndex = tempString.indexOf("'", beginIndex) + 1;
      endIndex = tempString.indexOf("'", beginIndex);
      volumeURL = tempString.substring(beginIndex, endIndex);
      if (volumeURL.matches("http.*")) {
        urlList.add(tempString.substring(beginIndex, endIndex));
      } else {
        urlList.add(baseVolumeURL + tempString.substring(beginIndex, endIndex));
      }

      // 取得單集名稱
      beginIndex = tempString.indexOf(">", beginIndex) + 1;
      endIndex = tempString.indexOf("<", beginIndex);
      volumeList.add(
          getVolumeWithFormatNumber(
              Common.getStringRemovedIllegalChar(
                  Common.getTraditionalChinese(
                      tempString.substring(beginIndex, endIndex).trim()))));
    }

    totalVolume = volumeCount;
    Common.debugPrintln("共有" + totalVolume + "集");

    combinationList.add(volumeList);
    combinationList.add(urlList);

    return combinationList;
  }
示例#15
0
  /** @author user */
  public ParseXindm() {
    enumName = "XINDM";
    parserName = this.getClass().getName();
    regexs = new String[] {"(?s).*www.xindm.cn/(?s).*"};
    downloadBefore = true;
    siteID = Site.formString("XINDM");
    siteName = "Xindm";
    indexName = Common.getStoredFileName(SetUp.getTempDirectory(), "index_xindm_parse_", "html");
    indexEncodeName =
        Common.getStoredFileName(SetUp.getTempDirectory(), "index_xindm_encode_parse_", "html");

    jsName = "index_xindm.js";
    radixNumber = 185273; // default value, not always be useful!!
  }
示例#16
0
  @Override
  public List<List<String>> getVolumeTitleAndUrlOnMainPage(String urlString, String allPageString) {
    // combine volumeList and urlList into combinationList, return it.

    List<List<String>> combinationList = new ArrayList<List<String>>();
    List<String> urlList = new ArrayList<String>();
    List<String> volumeList = new ArrayList<String>();

    String tempString = "";
    int beginIndex, endIndex;

    beginIndex = allPageString.indexOf("class=\"plie\"");
    endIndex = allPageString.indexOf("</ul>", beginIndex);

    // 存放集數頁面資訊的字串
    tempString = allPageString.substring(beginIndex, endIndex);

    int volumeCount = tempString.split("href=").length - 1;

    String volumeTitle = "";
    beginIndex = endIndex = 0;
    for (int i = 0; i < volumeCount; i++) {
      // 取得單集位址
      beginIndex = tempString.indexOf("href=", beginIndex);
      beginIndex = tempString.indexOf("\"", beginIndex) + 1;
      endIndex = tempString.indexOf("\"", beginIndex);
      urlList.add(baseURL + tempString.substring(beginIndex, endIndex));

      // 取得單集名稱
      beginIndex = tempString.indexOf(">", beginIndex) + 1;
      endIndex = tempString.indexOf("</a>", beginIndex);
      volumeTitle = tempString.substring(beginIndex, endIndex);
      volumeTitle = volumeTitle.replaceFirst("<br\\s+/{0,1}>", "");

      volumeList.add(
          getVolumeWithFormatNumber(
              Common.getStringRemovedIllegalChar(
                  Common.getTraditionalChinese(volumeTitle.trim()))));
    }

    totalVolume = volumeCount;
    Common.debugPrintln("共有" + totalVolume + "集");

    combinationList.add(volumeList);
    combinationList.add(urlList);

    return combinationList;
  }
示例#17
0
  // 將代號轉為實際字串
  private String[] getRealCodeTokens(String[] codeTokens, String[] varTokens) {
    String[] realCodeTokens = new String[codeTokens.length];

    String tempChar = "";

    for (int i = 0; i < codeTokens.length; i++) {
      realCodeTokens[i] = "";
      Common.debugPrintln("這次要分解的code : " + codeTokens[i]);

      for (int j = codeTokens[i].length() - 1; j >= 0; j--) {
        int index = -1;
        // 兩個數字字元組合在一起

        index = getVarIndex(codeTokens[i].charAt(j));

        if (j > 0 && index >= 0) {
          char c = codeTokens[i].charAt(j - 1);

          if (c >= '1' && c <= '9') {
            int num = Integer.parseInt(String.valueOf(c));

            index += ((26 + 26 + 10) * num);

            // 若之後找不到此index對應的token , 可直接用此數字字串
            tempChar = "" + codeTokens[i].charAt(j);

            j--;
          } else {
            tempChar = "";
          }
        } else {
          tempChar = "";
        }

        if (index >= 0 && index < varTokens.length && !varTokens[index].equals("")) {
          realCodeTokens[i] = varTokens[index] + realCodeTokens[i];
        } else {
          realCodeTokens[i] = "" + codeTokens[i].charAt(j) + tempChar + realCodeTokens[i];
        }
        // Common.debugPrintln( realCodeTokens[i] );

      }
      Common.debugPrintln("分解結果: " + realCodeTokens[i]);
    }
    // System.exit( 0 );

    return realCodeTokens;
  }
示例#18
0
  /** @author user */
  public Parse178() {
    siteID = Site.MANHUA_178;
    siteName = "178";
    indexName = Common.getStoredFileName(SetUp.getTempDirectory(), "index_178_parse_", "html");
    indexEncodeName =
        Common.getStoredFileName(SetUp.getTempDirectory(), "index_178_encode_parse_", "html");

    jsName = "index_178.js";
    radixNumber = 1593771; // default value, not always be useful!!

    baseURL = "http://manhua.dmzj.com"; // "http://manhua.178.com";
    waitingTime = 2000;
    retransmissionLimit = 30;

    tsukkomiMode = false;
  }
示例#19
0
  private void outputVolumeIndex(
      String tagName,
      String titleName,
      String titleIntroduction,
      List<String> volumeTitleList,
      List<String> snsSysIDList) {
    String outputDirectory = getBaseOutputDirectory();
    String text = "";
    int count = volumeTitleList.size();
    text += "TITLE_NAME = '" + getOutputText(titleName) + "';\n";
    text += "TITLE_INTRODUCTION = '" + getOutputText(titleIntroduction) + "';\n";
    text += "VOLUME_LIST = new Array( ";
    for (int i = 0; i < count; i++) {
      if (i > 0) text += ", ";
      text +=
          "'"
              + getOutputText(volumeTitleList.get(i))
              + "', "
              + "'"
              + getVolumeID(snsSysIDList.get(i))
              + "'";
    }
    text += "\n);\n";

    Common.outputFile(text, outputDirectory, tagName + ".js");
  }
示例#20
0
  /** @author user */
  public ParseKUKU() {
    enumName = "KUKU";
    regexs =
        new String[] {
          "(?s).*kukudm.com(?s).*", "(?s).*socomic.com(?s).*", "(?s).*socomic.net(?s).*"
        };
    parserName = this.getClass().getName();
    downloadBefore = true;
    siteID = Site.formString("KUKU");
    siteName = "kuku";
    indexName = Common.getStoredFileName(SetUp.getTempDirectory(), "index_kuku_parse_", "html");
    indexEncodeName =
        Common.getStoredFileName(SetUp.getTempDirectory(), "index_kuku_encode_parse_", "html");

    baseURL = "http://n.kukudm.com/";
  }
示例#21
0
  @Override
  public synchronized void setParameters() {
    Common.downloadFile(webSite, SetUp.getTempDirectory(), indexName, false, "");
    Common.newEncodeFile(SetUp.getTempDirectory(), indexName, indexEncodeName, Encoding.GBK);

    String tempStr = Common.getFileString(SetUp.getTempDirectory(), indexEncodeName);
    String[] lines = tempStr.split("\n");

    for (int i = 0; i < lines.length; i++) {
      String line = Common.getTraditionalChinese(lines[i]);

      // ".": contain all characters except "\r" and "\n"
      // "(?s).": contain all characters
      if (line.matches("(?s).*title(?s).*")) {
        // get title ex.<title>尸錄 4話</title>
        String[] temp = line.split("<|>");

        if (getWholeTitle() == null || getWholeTitle().equals(""))
          setWholeTitle(Common.getStringRemovedIllegalChar(temp[2]));
      } else if (line.matches("(?s).*page(?s).*")) {
        // get total page ex. | 共34頁 |
        int beginIndex = line.indexOf(Common.getStringUsingDefaultLanguage("共", "共"));
        int endIndex = line.indexOf(Common.getStringUsingDefaultLanguage("頁", "頁"));

        String temp = line.substring(beginIndex + 1, endIndex);
        totalPage = Integer.parseInt(temp);

        break;
      }
    }

    comicURL = new String[totalPage]; // totalPage = amount of comic pic
    SetUp.setWholeTitle(wholeTitle);
  }
示例#22
0
  private boolean needUpdate(String tagName, List<String> volumeTitleList) {
    if (volumeTitleList.size() == 0) return false;

    String text = Common.getFileString(getBaseOutputDirectory(), tagName + ".js");
    int lastVolumeIndex = volumeTitleList.size() - 1;
    String lastVolumeTitle = volumeTitleList.get(lastVolumeIndex);

    //  如果目錄裡面找不到最後一集,代表需要更新
    return (text.indexOf(lastVolumeTitle) < 0);
  }
示例#23
0
  @Override
  public List<List<String>> getVolumeTitleAndUrlOnMainPage(String urlString, String allPageString) {
    // combine volumeList and urlList into combinationList, return it.

    List<List<String>> combinationList = new ArrayList<List<String>>();
    List<String> urlList = new ArrayList<String>();
    List<String> volumeList = new ArrayList<String>();

    int beginIndex = allPageString.indexOf("<li><a href=\"http://www") - 1;
    int endIndex = allPageString.indexOf("</table>", beginIndex);
    String listString = allPageString.substring(beginIndex, endIndex);

    totalVolume = allPageString.split("<li><a href=\"http://www").length - 1;

    beginIndex = endIndex = 0;
    for (int i = 0; i < totalVolume; i++) {

      // 取得單集位址
      beginIndex = listString.indexOf("http://www", beginIndex);
      endIndex = listString.indexOf("\"", beginIndex);
      urlList.add(listString.substring(beginIndex, endIndex));

      // 取得單集名稱
      beginIndex = listString.indexOf("<span", beginIndex) + 1;
      beginIndex = listString.indexOf(">", beginIndex) + 1;
      endIndex = listString.indexOf("<", beginIndex);
      String volumeTitle = listString.substring(beginIndex, endIndex);

      volumeList.add(
          getVolumeWithFormatNumber(
              Common.getStringRemovedIllegalChar(
                  Common.getTraditionalChinese(volumeTitle.trim()))));
    }

    Common.debugPrintln("共有" + totalVolume + "集");

    combinationList.add(volumeList);
    combinationList.add(urlList);

    return combinationList;
  }
示例#24
0
  private int getExistedVolumeCount(String tagName) {
    String text = Common.getFileString(getBaseOutputDirectory(), tagName + ".js");
    int beginIndex = text.indexOf("new Array");
    int endIndex = text.indexOf(")", beginIndex);

    if (beginIndex < 0 || endIndex < 0) return 0;

    String temp = text.substring(beginIndex, endIndex);
    // print(temp);

    return temp.split(",").length;
  }
示例#25
0
  private void outputVolumeComment(
      String tagName,
      String volumeTitle,
      String fileName,
      String siteName,
      List<String> commentList) {
    String text = "";
    text += "VOLUME_TITLE = '" + volumeTitle + "';\n";
    text += siteName + " = new Array( \n";

    for (int i = 0; i < commentList.size(); i++) {
      if (i > 0) {
        text += ", ";
      }
      text += "'" + commentList.get(i) + "'";
    }
    text += "\n);";

    String outputDirectory = getBaseOutputDirectory() + tagName + Common.getSlash();
    Common.outputFile(text, outputDirectory, fileName + ".js");
  }
示例#26
0
  @Override
  public void parseComicURL() { // parse URL and save all URLs in comicURL  //
    // 先取得前面的下載伺服器網址

    String allPageString = Common.getFileString(SetUp.getTempDirectory(), indexName);
    Common.debugPrint("開始解析這一集有幾頁 : ");

    int beginIndex = 0, endIndex = 0;

    totalPage = allPageString.split("<option ").length;
    Common.debugPrintln("共 " + totalPage + " 頁");
    comicURL = new String[totalPage];

    String picURL = "";
    int p = 0; // 目前頁數
    for (int i = 0; i < totalPage && Run.isAlive; i++) {
      beginIndex = allPageString.indexOf("<img id");
      beginIndex = allPageString.indexOf("\"", beginIndex) + 1;
      endIndex = allPageString.indexOf("\"", beginIndex);
      String tempURL = allPageString.substring(beginIndex, endIndex);

      if (Common.isLegalURL(tempURL)) {
        comicURL[p++] = tempURL;
        Common.debugPrintln(p + " " + comicURL[p - 1]); // debug
        // 每解析一個網址就下載一張圖
        singlePageDownload(getTitle(), getWholeTitle(), comicURL[p - 1], totalPage, p, 0);
      } else {
        totalPage--;
      }
      // Common.downloadFile( comicURL[p - 1], "", p + ".jpg", false, "" );

      if (p < totalPage) {
        beginIndex = allPageString.indexOf("</select>");
        beginIndex = allPageString.indexOf("\"", beginIndex) + 1;
        endIndex = allPageString.indexOf("\"", beginIndex);
        tempURL = allPageString.substring(beginIndex, endIndex);
        if ("#".equals(tempURL)) {
          Common.debugPrintln("THE LAST PAGE !!");
          break;
        }

        String nextPageURL = "http://comic101.com" + tempURL;

        Common.downloadFile(nextPageURL, SetUp.getTempDirectory(), indexName, false, "");
        allPageString = Common.getFileString(SetUp.getTempDirectory(), indexName);
      }
    }

    // System.exit( 0 ); // debug
  }
示例#27
0
  @Override
  public String getTitleOnMainPage(String urlString, String allPageString) {
    int beginIndex, endIndex;

    beginIndex = urlString.indexOf("mop");
    beginIndex = urlString.indexOf("/", beginIndex) + 1;

    String backMainURL = "";
    if (urlString.matches(".*\\?id=.*")) {
      String tempString = urlString.substring(beginIndex, urlString.length());
      tempString = tempString.replaceAll("\\.jsp\\?id=", "/");

      backMainURL = tempString + ".html";
    } else backMainURL = urlString.substring(beginIndex, urlString.length());

    beginIndex = allPageString.indexOf(backMainURL + "\"");
    beginIndex = allPageString.indexOf(">", beginIndex) + 1;
    endIndex = allPageString.indexOf("<", beginIndex);
    String title = allPageString.substring(beginIndex, endIndex).trim();

    return Common.getStringRemovedIllegalChar(Common.getTraditionalChinese(title));
  }
示例#28
0
  public Run(String[] originalArgs, int runMode) {
    this(runMode);

    args = originalArgs;

    if (!Common.withGUI()) {
      SetUp set = new SetUp();
      set.readSetFile(); // set up the file name and directory
    }
    // test( args );

    webSite = "";
  }
示例#29
0
  /** @author user */
  public ParseCK() {
    regexs =
        new String[] {
          "(?s).*comic101.com(?s).*",
          "(?s).*comic.101.com(?s).*",
          "(?s).*mh.ck101.com(?s).*",
          "(?s).*comic.ck101.com(?s).*",
          "(?s).*.com/vols/\\d+/\\d+(?s).*"
        };
    enumName = "CK";
    parserName = this.getClass().getName();
    downloadBefore = true;
    siteID = Site.formString("CK");
    siteName = "CK101_Comic";
    indexName = Common.getStoredFileName(SetUp.getTempDirectory(), "index_ck_parse_", "html");
    indexEncodeName =
        Common.getStoredFileName(SetUp.getTempDirectory(), "index_ck_encode_parse_", "html");

    jsName = "index_ck.js";
    radixNumber = 151261471; // default value, not always be useful!!

    baseURL = "http://comic.ck101.com"; // "http://comic101.com";
  }
示例#30
0
  private void updateIndexFile(
      String tagName,
      String titleName,
      String titleIntroduction,
      String volumeTitle,
      String snsSysID) {
    String text = Common.getFileString(getBaseOutputDirectory(), tagName + ".js");

    int midIndex1 = text.indexOf("new Array(") + 11;
    int endIndex = text.length();

    if (midIndex1 < 11) {
      print("第 1 筆索引資料");
      // 新建index file
      List<String> volumeTitleList = new ArrayList<String>();
      List<String> snsSysIDList = new ArrayList<String>();
      volumeTitleList.add(volumeTitle);
      snsSysIDList.add(snsSysID);
      outputVolumeIndex(tagName, titleName, titleIntroduction, volumeTitleList, snsSysIDList);
      return;
    }

    print("第 n 筆索引資料");

    text =
        text.substring(0, midIndex1)
            + "'"
            + getOutputText(volumeTitle)
            + "', "
            + "'"
            + getVolumeID(snsSysID)
            + "', "
            + text.substring(midIndex1, endIndex);

    Common.outputFile(text, getBaseOutputDirectory(), tagName + ".js");
  }