예제 #1
0
  @Override
  public synchronized void setParameters() {
    Common.downloadFile(webSite, SetUp.getTempDirectory(), indexName, false, "");
    Common.newEncodeFile(SetUp.getTempDirectory(), indexName, indexEncodeName, Encoding.GBK);

    String tempStr = Common.getFileString(SetUp.getTempDirectory(), indexEncodeName);
    String[] lines = tempStr.split("\n");

    for (int i = 0; i < lines.length; i++) {
      String line = Common.getTraditionalChinese(lines[i]);

      // ".": contain all characters except "\r" and "\n"
      // "(?s).": contain all characters
      if (line.matches("(?s).*title(?s).*")) {
        // get title ex.<title>尸錄 4話</title>
        String[] temp = line.split("<|>");

        if (getWholeTitle() == null || getWholeTitle().equals(""))
          setWholeTitle(Common.getStringRemovedIllegalChar(temp[2]));
      } else if (line.matches("(?s).*page(?s).*")) {
        // get total page ex. | 共34頁 |
        int beginIndex = line.indexOf(Common.getStringUsingDefaultLanguage("共", "共"));
        int endIndex = line.indexOf(Common.getStringUsingDefaultLanguage("頁", "頁"));

        String temp = line.substring(beginIndex + 1, endIndex);
        totalPage = Integer.parseInt(temp);

        break;
      }
    }

    comicURL = new String[totalPage]; // totalPage = amount of comic pic
    SetUp.setWholeTitle(wholeTitle);
  }
예제 #2
0
  @Override
  public String getTitleOnMainPage(String urlString, String allPageString) {
    if (needTsukkomiMode(urlString)) {
      tsukkomiMode = true;
      return "Tsukkomi";
    }

    tsukkomiMode = false;

    int beginIndex = allPageString.indexOf("<h1>") + 4;
    int endIndex = allPageString.indexOf("</h1>", beginIndex);
    if (urlString.indexOf("mh.") > 0) {
      beginIndex = allPageString.indexOf("g_comic_name");
      beginIndex = allPageString.indexOf("\"", beginIndex) + 1;
      endIndex = allPageString.indexOf("\"", beginIndex);
    }

    // Common.debugPrintln( "B: " + beginIndex + "  E: " + endIndex );

    if (beginIndex < 0 || endIndex < 0) {
      return null;
    }

    String title = allPageString.substring(beginIndex, endIndex).trim();

    return Common.getStringRemovedIllegalChar(Common.getTraditionalChinese(title));
  }
예제 #3
0
  @Override
  public String getTitleOnMainPage(String urlString, String allPageString) {
    int beginIndex = allPageString.indexOf("<title>");
    beginIndex = allPageString.indexOf(">", beginIndex) + 1;
    int endIndex = allPageString.indexOf("</title", beginIndex);
    String title = allPageString.substring(beginIndex, endIndex).split("-")[0].trim();

    return Common.getStringRemovedIllegalChar(Common.getTraditionalChinese(title));
  }
예제 #4
0
  @Override
  public String getTitleOnMainPage(String urlString, String allPageString) {
    int beginIndex = allPageString.indexOf("valign=\"middle\"><b>");
    beginIndex = allPageString.indexOf("<b>", beginIndex) + 3;
    int endIndex = allPageString.indexOf("</b>", beginIndex);
    String title = allPageString.substring(beginIndex, endIndex).trim();

    return Common.getStringRemovedIllegalChar(Common.getTraditionalChinese(title));
  }
예제 #5
0
  @Override
  public String getTitleOnSingleVolumePage(String urlString) {
    String allPageString = getAllPageString(urlString);
    int beginIndex = Common.getIndexOfOrderKeyword(allPageString, ">>", 3) + 2;
    int endIndex = Common.getSmallerIndexOfTwoKeyword(allPageString, beginIndex, "[", ">>");
    String title = allPageString.substring(beginIndex, endIndex).trim();

    return Common.getStringRemovedIllegalChar(Common.getTraditionalChinese(title));
  }
예제 #6
0
  @Override
  public List<List<String>> getVolumeTitleAndUrlOnMainPage(String urlString, String allPageString) {
    // combine volumeList and urlList into combinationList, return it.

    List<List<String>> combinationList = new ArrayList<List<String>>();
    List<String> urlList = new ArrayList<String>();
    List<String> volumeList = new ArrayList<String>();

    if (tsukkomiMode) {
      urlList.add(urlString);
      volumeList.add("tsukkomi");
      combinationList.add(volumeList);
      combinationList.add(urlList);

      return combinationList;
    }

    int beginIndex = allPageString.indexOf("class=\"cartoon_online_border\"");
    int endIndex = allPageString.indexOf("document.write", beginIndex);

    if (urlString.indexOf("mh.") > 0) {
      beginIndex = allPageString.indexOf("chapter_list");
      endIndex = allPageString.indexOf("</script>", beginIndex);
    }

    String tempString = allPageString.substring(beginIndex, endIndex);

    int volumeCount = tempString.split("href=\"").length - 1;

    String volumeTitle = "";
    beginIndex = endIndex = 0;
    for (int i = 0; i < volumeCount; i++) {
      // 取得單集位址
      beginIndex = tempString.indexOf("href=\"", beginIndex) + 6;
      endIndex = tempString.indexOf("\"", beginIndex);
      urlList.add(baseURL + tempString.substring(beginIndex, endIndex));

      // 取得單集名稱
      beginIndex = tempString.indexOf(">", beginIndex) + 1;
      endIndex = tempString.indexOf("<", beginIndex);
      volumeTitle = tempString.substring(beginIndex, endIndex);

      volumeList.add(
          getVolumeWithFormatNumber(
              Common.getStringRemovedIllegalChar(
                  Common.getTraditionalChinese(volumeTitle.trim()))));
    }

    totalVolume = volumeCount;
    Common.debugPrintln("共有" + totalVolume + "集");

    combinationList.add(volumeList);
    combinationList.add(urlList);

    return combinationList;
  }
예제 #7
0
  @Override
  public String getTitleOnMainPage(String urlString, String allPageString) {
    String[] lines = allPageString.split("\n");

    int beginIndex = lines[0].indexOf("<title>", 1) + 7;
    int endIndex = lines[0].indexOf("_", beginIndex) - 4;

    return Common.getStringRemovedIllegalChar(
        Common.getTraditionalChinese(lines[0].substring(beginIndex, endIndex)));
  }
예제 #8
0
  @Override
  public List<List<String>> getVolumeTitleAndUrlOnMainPage(String urlString, String allPageString) {
    // combine volumeList and urlList into combinationList, return it.

    List<List<String>> combinationList = new ArrayList<List<String>>();
    List<String> urlList = new ArrayList<String>();
    List<String> volumeList = new ArrayList<String>();

    String[] lines = allPageString.split("\n");

    int beginIndex = 0;
    int endIndex = 0;
    String volumeURL = "";

    beginIndex = allPageString.indexOf("id='comiclistn'");
    endIndex = allPageString.indexOf("</table>", beginIndex);
    String tempString = allPageString.substring(beginIndex, endIndex);

    int volumeCount = tempString.split("<dd>").length - 1;

    // 單集位址的網域名稱(有四組,可置換)
    String baseVolumeURL = "http://comic.kukudm.com";
    beginIndex = endIndex = 0;
    for (int i = 0; i < volumeCount; i++) {
      // 取得單集位址
      beginIndex = tempString.indexOf("<dd>", beginIndex) + 1;
      beginIndex = tempString.indexOf("'", beginIndex) + 1;
      endIndex = tempString.indexOf("'", beginIndex);
      volumeURL = tempString.substring(beginIndex, endIndex);
      if (volumeURL.matches("http.*")) {
        urlList.add(tempString.substring(beginIndex, endIndex));
      } else {
        urlList.add(baseVolumeURL + tempString.substring(beginIndex, endIndex));
      }

      // 取得單集名稱
      beginIndex = tempString.indexOf(">", beginIndex) + 1;
      endIndex = tempString.indexOf("<", beginIndex);
      volumeList.add(
          getVolumeWithFormatNumber(
              Common.getStringRemovedIllegalChar(
                  Common.getTraditionalChinese(
                      tempString.substring(beginIndex, endIndex).trim()))));
    }

    totalVolume = volumeCount;
    Common.debugPrintln("共有" + totalVolume + "集");

    combinationList.add(volumeList);
    combinationList.add(urlList);

    return combinationList;
  }
예제 #9
0
  @Override
  public List<List<String>> getVolumeTitleAndUrlOnMainPage(String urlString, String allPageString) {
    // combine volumeList and urlList into combinationList, return it.

    List<List<String>> combinationList = new ArrayList<List<String>>();
    List<String> urlList = new ArrayList<String>();
    List<String> volumeList = new ArrayList<String>();

    String tempString = "";
    int beginIndex, endIndex;

    beginIndex = allPageString.indexOf("class=\"plie\"");
    endIndex = allPageString.indexOf("</ul>", beginIndex);

    // 存放集數頁面資訊的字串
    tempString = allPageString.substring(beginIndex, endIndex);

    int volumeCount = tempString.split("href=").length - 1;

    String volumeTitle = "";
    beginIndex = endIndex = 0;
    for (int i = 0; i < volumeCount; i++) {
      // 取得單集位址
      beginIndex = tempString.indexOf("href=", beginIndex);
      beginIndex = tempString.indexOf("\"", beginIndex) + 1;
      endIndex = tempString.indexOf("\"", beginIndex);
      urlList.add(baseURL + tempString.substring(beginIndex, endIndex));

      // 取得單集名稱
      beginIndex = tempString.indexOf(">", beginIndex) + 1;
      endIndex = tempString.indexOf("</a>", beginIndex);
      volumeTitle = tempString.substring(beginIndex, endIndex);
      volumeTitle = volumeTitle.replaceFirst("<br\\s+/{0,1}>", "");

      volumeList.add(
          getVolumeWithFormatNumber(
              Common.getStringRemovedIllegalChar(
                  Common.getTraditionalChinese(volumeTitle.trim()))));
    }

    totalVolume = volumeCount;
    Common.debugPrintln("共有" + totalVolume + "集");

    combinationList.add(volumeList);
    combinationList.add(urlList);

    return combinationList;
  }
예제 #10
0
  @Override
  public void setParameters() {
    Common.debugPrintln("開始解析各參數 :");

    Common.debugPrintln("開始解析title和wholeTitle :");

    if (getWholeTitle() == null || getWholeTitle().equals("")) {
      String allPageString = getAllPageString(webSite);
      int beginIndex = Common.getIndexOfOrderKeyword(allPageString, ">>", 4) + 2;
      int endIndex = allPageString.indexOf("<", beginIndex);
      String title = allPageString.substring(beginIndex, endIndex).trim();

      setWholeTitle(Common.getStringRemovedIllegalChar(Common.getTraditionalChinese(title)));
    }

    Common.debugPrintln("作品名稱(title) : " + getTitle());
    Common.debugPrintln("章節名稱(wholeTitle) : " + getWholeTitle());
  }
예제 #11
0
  @Override
  public List<List<String>> getVolumeTitleAndUrlOnMainPage(String urlString, String allPageString) {
    // combine volumeList and urlList into combinationList, return it.

    List<List<String>> combinationList = new ArrayList<List<String>>();
    List<String> urlList = new ArrayList<String>();
    List<String> volumeList = new ArrayList<String>();

    int beginIndex = allPageString.indexOf("<li><a href=\"http://www") - 1;
    int endIndex = allPageString.indexOf("</table>", beginIndex);
    String listString = allPageString.substring(beginIndex, endIndex);

    totalVolume = allPageString.split("<li><a href=\"http://www").length - 1;

    beginIndex = endIndex = 0;
    for (int i = 0; i < totalVolume; i++) {

      // 取得單集位址
      beginIndex = listString.indexOf("http://www", beginIndex);
      endIndex = listString.indexOf("\"", beginIndex);
      urlList.add(listString.substring(beginIndex, endIndex));

      // 取得單集名稱
      beginIndex = listString.indexOf("<span", beginIndex) + 1;
      beginIndex = listString.indexOf(">", beginIndex) + 1;
      endIndex = listString.indexOf("<", beginIndex);
      String volumeTitle = listString.substring(beginIndex, endIndex);

      volumeList.add(
          getVolumeWithFormatNumber(
              Common.getStringRemovedIllegalChar(
                  Common.getTraditionalChinese(volumeTitle.trim()))));
    }

    Common.debugPrintln("共有" + totalVolume + "集");

    combinationList.add(volumeList);
    combinationList.add(urlList);

    return combinationList;
  }
예제 #12
0
  @Override
  public String getTitleOnMainPage(String urlString, String allPageString) {
    int beginIndex, endIndex;

    beginIndex = urlString.indexOf("mop");
    beginIndex = urlString.indexOf("/", beginIndex) + 1;

    String backMainURL = "";
    if (urlString.matches(".*\\?id=.*")) {
      String tempString = urlString.substring(beginIndex, urlString.length());
      tempString = tempString.replaceAll("\\.jsp\\?id=", "/");

      backMainURL = tempString + ".html";
    } else backMainURL = urlString.substring(beginIndex, urlString.length());

    beginIndex = allPageString.indexOf(backMainURL + "\"");
    beginIndex = allPageString.indexOf(">", beginIndex) + 1;
    endIndex = allPageString.indexOf("<", beginIndex);
    String title = allPageString.substring(beginIndex, endIndex).trim();

    return Common.getStringRemovedIllegalChar(Common.getTraditionalChinese(title));
  }
예제 #13
0
  @Override
  public void setParameters() {
    Common.debugPrintln("開始解析各參數 :");
    Common.downloadFile(webSite, SetUp.getTempDirectory(), indexName, false, "");

    if (getWholeTitle() == null || getWholeTitle().equals("")) {
      Common.debugPrintln("開始解析title和wholeTitle :");
      String allPageString = Common.getFileString(SetUp.getTempDirectory(), indexEncodeName);

      int beginIndex = allPageString.indexOf("alt=");
      beginIndex = allPageString.indexOf("\"", beginIndex) + 1;
      int endIndex = allPageString.indexOf("\"", beginIndex);
      String tempTitleString = allPageString.substring(beginIndex, endIndex).trim();

      setWholeTitle(
          getVolumeWithFormatNumber(
              Common.getStringRemovedIllegalChar(
                  Common.getTraditionalChinese(tempTitleString.trim()))));
    }

    Common.debugPrintln("作品名稱(title) : " + getTitle());
    Common.debugPrintln("章節名稱(wholeTitle) : " + getWholeTitle());
  }
예제 #14
0
  private void handleTitleComment(String tagName, String text) {
    int beginIndex = text.indexOf("token32:");
    if (beginIndex < 0) {
      return;
    }
    beginIndex = text.indexOf("'", beginIndex) + 1;
    int endIndex = text.indexOf("'", beginIndex);

    if (beginIndex <= 0 || endIndex <= 0) {
      return;
    }

    String token32 = text.substring(beginIndex, endIndex);
    String commentURL = Common.getRegularURL("http://t.178.com/resource/show?token32=" + token32);
    print(tagName + "'s commentURL : " + commentURL);

    // 取得評論頁數
    text = getAllPageString(commentURL);
    int pageCount = 1;
    beginIndex = text.lastIndexOf("<li><a href=") - 5;
    if (beginIndex > 0) {
      beginIndex = text.lastIndexOf("<li><a href=", beginIndex);
      beginIndex = text.indexOf("page=", beginIndex);
      beginIndex = text.indexOf(">", beginIndex) + 1;
      endIndex = text.indexOf("<", beginIndex);
      pageCount = Integer.parseInt(text.substring(beginIndex, endIndex));
    }

    List<String> nameList = new ArrayList<String>();
    List<String> dateList = new ArrayList<String>();
    List<String> commentList = new ArrayList<String>();
    String temp = "";

    // 下載全部評論
    for (int i = 1; i <= pageCount; i++) {
      text = getAllPageString(commentURL + "&page=" + i);
      beginIndex = endIndex = 0;
      while (true) {
        beginIndex = text.indexOf("post-by hovercard", beginIndex);
        if (beginIndex < 0) break;

        // 取得評論的名字
        beginIndex = text.indexOf(">", beginIndex) + 1;
        endIndex = text.indexOf("<", beginIndex);
        temp = text.substring(beginIndex, endIndex).trim();
        nameList.add(temp);

        // 取得評論內容
        beginIndex = text.indexOf("-->", beginIndex);
        beginIndex = text.indexOf(">", beginIndex) + 1;
        endIndex = text.indexOf("<", beginIndex);
        temp = text.substring(beginIndex, endIndex).trim();
        commentList.add(temp);

        // 取得評論當下時間
        beginIndex = text.indexOf("<a href=", beginIndex);
        beginIndex = text.indexOf(">", beginIndex) + 1;
        endIndex = text.indexOf("<", beginIndex);
        temp = text.substring(beginIndex, endIndex).trim();
        temp = Common.getTraditionalChinese(temp);
        temp = getFormatDate(temp);
        dateList.add(temp);
      }
    }

    // 寫出評論
    List<List<String>> combinationList = new ArrayList<List<String>>();
    combinationList.add(nameList);
    combinationList.add(commentList);
    combinationList.add(dateList);

    String filePath = getBaseOutputDirectory() + tagName + Common.getSlash();
    outputListFile(combinationList, "TITLE_COMMONET", filePath, "comment.js");
  }
예제 #15
0
  // more :
  // http://interface3.i.178.com/~cite.embed.ViewAll?callback=?res_id=4606&sys_res_id=4606_8436&sys_name=manhua178
  // normal :
  // http://interface3.i.178.com/~cite.embed.VoteJS/sysname/manhua178/sys_id/6567_34593/token/0a7e131c24510879fa79ad4c8c6660bd
  private List<String> getCommentParseText(List<String> textList, String commentURL) {
    int beginIndex = 0;
    int endIndex = 0;
    String text = getAllPageString(commentURL);

    if (commentURL.indexOf("VoteJS") > 0) {
      beginIndex = text.indexOf("cite_vote_num");
      if (beginIndex < 0) {
        // 尚未評論
        textList.add("");
        return textList;
      }
      beginIndex = text.indexOf(">", beginIndex) + 1;
      endIndex = text.indexOf("<", beginIndex);
      textList.add(text.substring(beginIndex, endIndex));

      beginIndex = text.indexOf("postVote(", beginIndex);
      beginIndex = text.indexOf("(", beginIndex) + 1;
      endIndex = text.indexOf(",", beginIndex);
      res_id = text.substring(beginIndex, endIndex);
    }

    while (true) {
      beginIndex = text.indexOf("interactive-opinion-block-", beginIndex);
      if (beginIndex < 0) {
        break;
      }
      beginIndex = text.indexOf(">", beginIndex) + 1;
      endIndex = text.indexOf("<", beginIndex);
      String comment = text.substring(beginIndex, endIndex);
      comment = getUtf8Text(comment);
      comment = comment.replaceAll("\"|'", "");
      comment = Common.getTraditionalChinese(comment);

      if (comment.matches("更多")) {
        break;
      }
      textList.add(comment);

      beginIndex = text.indexOf("title=", beginIndex);
      if (beginIndex < 0) {
        break;
      }
      beginIndex = text.indexOf("\"", beginIndex) + 1;
      endIndex = text.indexOf(")", beginIndex);
      String temp = text.substring(beginIndex, endIndex);
      temp = getUtf8Text(temp);
      temp = temp.replaceAll("共有", "");
      temp = temp.replaceAll("人赞同此观点", "");
      String[] temps = temp.split("\\(");

      if (temps.length < 2) {
        print("FAIL -> " + temps.length + " : " + temp);
      }

      String num = temps[0];
      String ratio = temps[1];
      textList.add(num);
      textList.add(ratio);
    }

    return textList;
  }
예제 #16
0
  @Override
  public List<List<String>> getVolumeTitleAndUrlOnMainPage(String urlString, String allPageString) {
    // combine volumeList and urlList into combinationList, return it.

    List<List<String>> combinationList = new ArrayList<List<String>>();
    List<String> urlList = new ArrayList<String>();
    List<String> volumeList = new ArrayList<String>();

    String tempString = "";
    int lastPage = 0;
    int beginIndex, endIndex;

    urlString = urlString.split("0/0/")[0];

    // 接著在迴圈內下載每一頁, 取得每一集資訊
    int totalVolumeCount = 0;
    String pageURL = urlString;

    while (true) {
      if (lastPage++ > 0) {
        pageURL = urlString + "0/0/" + lastPage;
      }

      allPageString = getAllPageString(pageURL);

      beginIndex = allPageString.indexOf("class=\"comicBox\"");
      endIndex = allPageString.indexOf("class=\"fbComment\"", beginIndex);
      tempString = allPageString.substring(beginIndex, endIndex);

      // 代表此頁已經沒有集數了。
      if (tempString.indexOf("class=\"recTitle\"") < 0) break;

      // 取得存放一整頁面集數資訊
      beginIndex = allPageString.indexOf("class=\"comicBox\"");
      beginIndex = allPageString.indexOf("class=\"relativeRec", beginIndex);
      endIndex = allPageString.indexOf("</div>", beginIndex);
      tempString = allPageString.substring(beginIndex, endIndex);

      int volumeCount = tempString.split("<h3").length - 1; // 單一頁面的集數
      totalVolumeCount += volumeCount;

      String volumeURL = "";
      String volumeTitle = "";
      beginIndex = endIndex = 0;
      for (int j = 0; j < volumeCount; j++) {
        // 取得單集位址
        beginIndex = tempString.indexOf("<h3", beginIndex);
        beginIndex = tempString.indexOf("href=", beginIndex);
        beginIndex = tempString.indexOf("\"", beginIndex) + 1;
        endIndex = tempString.indexOf("\"", beginIndex);
        volumeURL = baseURL + tempString.substring(beginIndex, endIndex);
        urlList.add(baseURL + tempString.substring(beginIndex, endIndex));

        // 取得單集名稱
        beginIndex = tempString.indexOf("title=", beginIndex);
        beginIndex = tempString.indexOf("\"", beginIndex) + 1;
        endIndex = tempString.indexOf("\"", beginIndex);
        volumeTitle = tempString.substring(beginIndex, endIndex);

        volumeList.add(
            getVolumeWithFormatNumber(
                Common.getStringRemovedIllegalChar(
                    Common.getTraditionalChinese(volumeTitle.trim()))));

        Common.debugPrintln(volumeURL + " : " + volumeTitle);
      }
    }

    Common.debugPrintln("   共有 " + (lastPage - 1) + " 張目錄頁");

    totalVolume = totalVolumeCount;
    Common.debugPrintln("共有" + totalVolume + "集");

    combinationList.add(volumeList);
    combinationList.add(urlList);

    return combinationList;
  }