Пример #1
0
  private void outputVolumeIndex(
      String tagName,
      String titleName,
      String titleIntroduction,
      List<String> volumeTitleList,
      List<String> snsSysIDList) {
    String outputDirectory = getBaseOutputDirectory();
    String text = "";
    int count = volumeTitleList.size();
    text += "TITLE_NAME = '" + getOutputText(titleName) + "';\n";
    text += "TITLE_INTRODUCTION = '" + getOutputText(titleIntroduction) + "';\n";
    text += "VOLUME_LIST = new Array( ";
    for (int i = 0; i < count; i++) {
      if (i > 0) text += ", ";
      text +=
          "'"
              + getOutputText(volumeTitleList.get(i))
              + "', "
              + "'"
              + getVolumeID(snsSysIDList.get(i))
              + "'";
    }
    text += "\n);\n";

    Common.outputFile(text, outputDirectory, tagName + ".js");
  }
Пример #2
0
  private void buildIndexFile(String tagName) {
    String dirPath = getBaseOutputDirectory() + tagName + Common.getSlash();
    String path = "";
    String text = "";
    List<String> volumeTagList = new ArrayList<String>();
    File dir = new File(dirPath); // 你的log檔路徑
    File fileList[] = dir.listFiles(); // 得出檔案清單
    String volumeTitle = "";

    // 取得代號清單
    for (int i = 0; i < fileList.length; i++) {
      if (fileList[i].isFile()) { // 過濾檔案
        String[] temps = fileList[i].toString().split("\\\\");
        String volumeTag = temps[temps.length - 1].split("\\.")[0];
        if (!volumeTag.matches("comment")) volumeTagList.add(volumeTag);
        // print(i + " TAG : " + volumeTag);
      }
    }

    for (int i = 0; i < volumeTagList.size(); i++) {
      path = dirPath + volumeTagList.get(i) + ".js";
      text = Common.getFileString(path);

      if (text.split("'").length <= 1) continue;

      volumeTitle = text.split("'")[1];
    }

    System.exit(0);
  }
Пример #3
0
  private boolean needUpdate(String tagName, List<String> volumeTitleList) {
    if (volumeTitleList.size() == 0) return false;

    String text = Common.getFileString(getBaseOutputDirectory(), tagName + ".js");
    int lastVolumeIndex = volumeTitleList.size() - 1;
    String lastVolumeTitle = volumeTitleList.get(lastVolumeIndex);

    //  如果目錄裡面找不到最後一集,代表需要更新
    return (text.indexOf(lastVolumeTitle) < 0);
  }
Пример #4
0
 private void handleAllUpdatePage() {
   for (int i = 0; i < 10; i++) {
     String listURL = "http://manhua.dmzj.com/update_" + (i + 1) + ".shtml";
     List<String> tagNameList = getTagNameList(listURL);
     for (int j = 0; j < tagNameList.size(); j++) {
       String tagName = tagNameList.get(j);
       handleSingleTitle(tagName);
     }
   }
 }
Пример #5
0
  private List<String> getTagNameList(String url) {
    int beginIndex = 0;
    int endIndex = 0;
    int index = 0;
    List<String> urlList = new ArrayList<String>();
    String temp = "";

    String dummyTagName = "ghdxj";
    Common.deleteFile(getBaseOutputDirectory(), dummyTagName + ".js");
    Common.deleteFolder(getBaseOutputDirectory() + dummyTagName);
    urlList.add(dummyTagName); // 因為第一個都會取得錯誤的評論資料,所以想把錯都推給東方

    String allPageString = getAllPageString(url);

    while (true) {
      beginIndex = allPageString.indexOf(" href=", beginIndex);
      if (beginIndex < 0) break;
      beginIndex = allPageString.indexOf("=", beginIndex) + 2;
      if (beginIndex < 0) break;

      temp = allPageString.substring(beginIndex, beginIndex + 30);
      if (temp.indexOf(baseURL) >= 0) {
        // print("with BASE");
        // ex. href='http://manhua.dmzj.com/lianaibaojun/
        beginIndex = allPageString.indexOf(".com", beginIndex);
        if (beginIndex < 0) break;
        beginIndex = allPageString.indexOf("/", beginIndex);
        if (beginIndex < 0) break;
      }
      beginIndex++; // 從"/"之後開始
      endIndex = allPageString.indexOf("/", beginIndex);
      if (endIndex < 0) break;
      temp = allPageString.substring(beginIndex, endIndex);

      boolean existed = false;
      for (int i = 0; i < urlList.size(); i++) {
        if (urlList.get(i).equals(temp)) {
          existed = true;
          break;
        }
      }

      if (existed || isIllegalPage(temp)) {
        continue;
      }

      urlList.add(temp);
      // print("" + index + " : " + temp);
      index++;
      beginIndex = endIndex;
    }

    return urlList;
  }
Пример #6
0
  @Override
  public List<List<String>> getVolumeTitleAndUrlOnMainPage(String urlString, String allPageString) {
    // combine volumeList and urlList into combinationList, return it.

    List<List<String>> combinationList = new ArrayList<List<String>>();
    List<String> urlList = new ArrayList<String>();
    List<String> volumeList = new ArrayList<String>();

    String[] lines = allPageString.split("\n");

    int beginIndex = 0;
    int endIndex = 0;
    String volumeURL = "";

    beginIndex = allPageString.indexOf("id='comiclistn'");
    endIndex = allPageString.indexOf("</table>", beginIndex);
    String tempString = allPageString.substring(beginIndex, endIndex);

    int volumeCount = tempString.split("<dd>").length - 1;

    // 單集位址的網域名稱(有四組,可置換)
    String baseVolumeURL = "http://comic.kukudm.com";
    beginIndex = endIndex = 0;
    for (int i = 0; i < volumeCount; i++) {
      // 取得單集位址
      beginIndex = tempString.indexOf("<dd>", beginIndex) + 1;
      beginIndex = tempString.indexOf("'", beginIndex) + 1;
      endIndex = tempString.indexOf("'", beginIndex);
      volumeURL = tempString.substring(beginIndex, endIndex);
      if (volumeURL.matches("http.*")) {
        urlList.add(tempString.substring(beginIndex, endIndex));
      } else {
        urlList.add(baseVolumeURL + tempString.substring(beginIndex, endIndex));
      }

      // 取得單集名稱
      beginIndex = tempString.indexOf(">", beginIndex) + 1;
      endIndex = tempString.indexOf("<", beginIndex);
      volumeList.add(
          getVolumeWithFormatNumber(
              Common.getStringRemovedIllegalChar(
                  Common.getTraditionalChinese(
                      tempString.substring(beginIndex, endIndex).trim()))));
    }

    totalVolume = volumeCount;
    Common.debugPrintln("共有" + totalVolume + "集");

    combinationList.add(volumeList);
    combinationList.add(urlList);

    return combinationList;
  }
Пример #7
0
  @Override
  public List<List<String>> getVolumeTitleAndUrlOnMainPage(String urlString, String allPageString) {
    // combine volumeList and urlList into combinationList, return it.

    List<List<String>> combinationList = new ArrayList<List<String>>();
    List<String> urlList = new ArrayList<String>();
    List<String> volumeList = new ArrayList<String>();

    String tempString = "";
    int beginIndex, endIndex;

    beginIndex = allPageString.indexOf("class=\"plie\"");
    endIndex = allPageString.indexOf("</ul>", beginIndex);

    // 存放集數頁面資訊的字串
    tempString = allPageString.substring(beginIndex, endIndex);

    int volumeCount = tempString.split("href=").length - 1;

    String volumeTitle = "";
    beginIndex = endIndex = 0;
    for (int i = 0; i < volumeCount; i++) {
      // 取得單集位址
      beginIndex = tempString.indexOf("href=", beginIndex);
      beginIndex = tempString.indexOf("\"", beginIndex) + 1;
      endIndex = tempString.indexOf("\"", beginIndex);
      urlList.add(baseURL + tempString.substring(beginIndex, endIndex));

      // 取得單集名稱
      beginIndex = tempString.indexOf(">", beginIndex) + 1;
      endIndex = tempString.indexOf("</a>", beginIndex);
      volumeTitle = tempString.substring(beginIndex, endIndex);
      volumeTitle = volumeTitle.replaceFirst("<br\\s+/{0,1}>", "");

      volumeList.add(
          getVolumeWithFormatNumber(
              Common.getStringRemovedIllegalChar(
                  Common.getTraditionalChinese(volumeTitle.trim()))));
    }

    totalVolume = volumeCount;
    Common.debugPrintln("共有" + totalVolume + "集");

    combinationList.add(volumeList);
    combinationList.add(urlList);

    return combinationList;
  }
Пример #8
0
  @Override
  public List<List<String>> getVolumeTitleAndUrlOnMainPage(String urlString, String allPageString) {
    // combine volumeList and urlList into combinationList, return it.

    List<List<String>> combinationList = new ArrayList<List<String>>();
    List<String> urlList = new ArrayList<String>();
    List<String> volumeList = new ArrayList<String>();

    int beginIndex = allPageString.indexOf("<li><a href=\"http://www") - 1;
    int endIndex = allPageString.indexOf("</table>", beginIndex);
    String listString = allPageString.substring(beginIndex, endIndex);

    totalVolume = allPageString.split("<li><a href=\"http://www").length - 1;

    beginIndex = endIndex = 0;
    for (int i = 0; i < totalVolume; i++) {

      // 取得單集位址
      beginIndex = listString.indexOf("http://www", beginIndex);
      endIndex = listString.indexOf("\"", beginIndex);
      urlList.add(listString.substring(beginIndex, endIndex));

      // 取得單集名稱
      beginIndex = listString.indexOf("<span", beginIndex) + 1;
      beginIndex = listString.indexOf(">", beginIndex) + 1;
      endIndex = listString.indexOf("<", beginIndex);
      String volumeTitle = listString.substring(beginIndex, endIndex);

      volumeList.add(
          getVolumeWithFormatNumber(
              Common.getStringRemovedIllegalChar(
                  Common.getTraditionalChinese(volumeTitle.trim()))));
    }

    Common.debugPrintln("共有" + totalVolume + "集");

    combinationList.add(volumeList);
    combinationList.add(urlList);

    return combinationList;
  }
Пример #9
0
  private void handleAllRankPage() {
    String[] urlList =
        new String[] {
          "http://manhua.dmzj.com/rank/total-list-",
          "http://manhua.dmzj.com/rank/yiwanjie/total-list-",
          "http://manhua.dmzj.com/rank/gaoxiao/total-list-",
          "http://manhua.dmzj.com/rank/shaonian/total-list-"
        };

    for (int k = 0; k < urlList.length; k++) {
      String baseListURL = urlList[k];
      for (int i = 0; i < 5; i++) {
        String listURL = baseListURL + (i + 1) + ".shtml";
        List<String> tagNameList = getTagNameList(listURL);
        for (int j = 0; j < tagNameList.size(); j++) {
          String tagName = tagNameList.get(j);
          handleSingleTitle(tagName);
        }
      }
    }
  }
Пример #10
0
  private void outputVolumeComment(
      String tagName,
      String volumeTitle,
      String fileName,
      String siteName,
      List<String> commentList) {
    String text = "";
    text += "VOLUME_TITLE = '" + volumeTitle + "';\n";
    text += siteName + " = new Array( \n";

    for (int i = 0; i < commentList.size(); i++) {
      if (i > 0) {
        text += ", ";
      }
      text += "'" + commentList.get(i) + "'";
    }
    text += "\n);";

    String outputDirectory = getBaseOutputDirectory() + tagName + Common.getSlash();
    Common.outputFile(text, outputDirectory, fileName + ".js");
  }
Пример #11
0
  private void outputNewListFile(String url) {
    String text = getAllPageString(url);
    String[] temps = text.split("description>");
    int beginIndex = 0;
    int endIndex = 0;
    String temp = "";
    List<String> tagList = new ArrayList<String>();
    List<String> nameList = new ArrayList<String>();
    List<String> lastVolumeTitleList = new ArrayList<String>();
    List<String> lastVolumeIDList = new ArrayList<String>();

    for (int i = 0; i < temps.length; i++) {
      if (temps[i].indexOf("title=") < 0) continue;

      // 取得漫畫名稱
      beginIndex = temps[i].indexOf("title=");
      beginIndex = temps[i].indexOf("'", beginIndex) + 1;
      endIndex = temps[i].indexOf("'", beginIndex);
      temp = temps[i].substring(beginIndex, endIndex);
      nameList.add(temp);

      // 取得最新集數名稱
      beginIndex = temps[i].indexOf(">", beginIndex) + 1;
      endIndex = temps[i].indexOf("<", beginIndex);
      temp = temps[i].substring(beginIndex, endIndex);
      lastVolumeTitleList.add(temp);

      // 取得漫畫代號
      beginIndex = temps[i].indexOf("com/", beginIndex);
      beginIndex = temps[i].indexOf("/", beginIndex) + 1;
      endIndex = temps[i].indexOf("/", beginIndex);
      temp = temps[i].substring(beginIndex, endIndex);
      tagList.add(temp);

      // 取得最新集數ID
      beginIndex = temps[i].indexOf("chapterid=", beginIndex);
      beginIndex = temps[i].indexOf("=", beginIndex) + 1;
      endIndex = temps[i].indexOf("'", beginIndex);
      temp = temps[i].substring(beginIndex, endIndex);
      lastVolumeIDList.add(temp);
    }

    List<List<String>> combinationList = new ArrayList<List<String>>();
    combinationList.add(nameList);
    combinationList.add(tagList);
    combinationList.add(lastVolumeTitleList);
    combinationList.add(lastVolumeIDList);

    outputListFile(combinationList, "NEW_LIST", newListFileName);
  }
Пример #12
0
  @Override
  public List<List<String>> getVolumeTitleAndUrlOnMainPage(String urlString, String allPageString) {
    // combine volumeList and urlList into combinationList, return it.

    List<List<String>> combinationList = new ArrayList<List<String>>();
    List<String> urlList = new ArrayList<String>();
    List<String> volumeList = new ArrayList<String>();

    if (tsukkomiMode) {
      urlList.add(urlString);
      volumeList.add("tsukkomi");
      combinationList.add(volumeList);
      combinationList.add(urlList);

      return combinationList;
    }

    int beginIndex = allPageString.indexOf("class=\"cartoon_online_border\"");
    int endIndex = allPageString.indexOf("document.write", beginIndex);

    if (urlString.indexOf("mh.") > 0) {
      beginIndex = allPageString.indexOf("chapter_list");
      endIndex = allPageString.indexOf("</script>", beginIndex);
    }

    String tempString = allPageString.substring(beginIndex, endIndex);

    int volumeCount = tempString.split("href=\"").length - 1;

    String volumeTitle = "";
    beginIndex = endIndex = 0;
    for (int i = 0; i < volumeCount; i++) {
      // 取得單集位址
      beginIndex = tempString.indexOf("href=\"", beginIndex) + 6;
      endIndex = tempString.indexOf("\"", beginIndex);
      urlList.add(baseURL + tempString.substring(beginIndex, endIndex));

      // 取得單集名稱
      beginIndex = tempString.indexOf(">", beginIndex) + 1;
      endIndex = tempString.indexOf("<", beginIndex);
      volumeTitle = tempString.substring(beginIndex, endIndex);

      volumeList.add(
          getVolumeWithFormatNumber(
              Common.getStringRemovedIllegalChar(
                  Common.getTraditionalChinese(volumeTitle.trim()))));
    }

    totalVolume = volumeCount;
    Common.debugPrintln("共有" + totalVolume + "集");

    combinationList.add(volumeList);
    combinationList.add(urlList);

    return combinationList;
  }
Пример #13
0
  private void updateIndexFile(
      String tagName,
      String titleName,
      String titleIntroduction,
      String volumeTitle,
      String snsSysID) {
    String text = Common.getFileString(getBaseOutputDirectory(), tagName + ".js");

    int midIndex1 = text.indexOf("new Array(") + 11;
    int endIndex = text.length();

    if (midIndex1 < 11) {
      print("第 1 筆索引資料");
      // 新建index file
      List<String> volumeTitleList = new ArrayList<String>();
      List<String> snsSysIDList = new ArrayList<String>();
      volumeTitleList.add(volumeTitle);
      snsSysIDList.add(snsSysID);
      outputVolumeIndex(tagName, titleName, titleIntroduction, volumeTitleList, snsSysIDList);
      return;
    }

    print("第 n 筆索引資料");

    text =
        text.substring(0, midIndex1)
            + "'"
            + getOutputText(volumeTitle)
            + "', "
            + "'"
            + getVolumeID(snsSysID)
            + "', "
            + text.substring(midIndex1, endIndex);

    Common.outputFile(text, getBaseOutputDirectory(), tagName + ".js");
  }
Пример #14
0
  @Override
  public List<List<String>> getVolumeTitleAndUrlOnMainPage(String urlString, String allPageString) {
    // combine volumeList and urlList into combinationList, return it.

    List<List<String>> combinationList = new ArrayList<List<String>>();
    List<String> urlList = new ArrayList<String>();
    List<String> volumeList = new ArrayList<String>();

    String tempString = "";
    int lastPage = 0;
    int beginIndex, endIndex;

    urlString = urlString.split("0/0/")[0];

    // 接著在迴圈內下載每一頁, 取得每一集資訊
    int totalVolumeCount = 0;
    String pageURL = urlString;

    while (true) {
      if (lastPage++ > 0) {
        pageURL = urlString + "0/0/" + lastPage;
      }

      allPageString = getAllPageString(pageURL);

      beginIndex = allPageString.indexOf("class=\"comicBox\"");
      endIndex = allPageString.indexOf("class=\"fbComment\"", beginIndex);
      tempString = allPageString.substring(beginIndex, endIndex);

      // 代表此頁已經沒有集數了。
      if (tempString.indexOf("class=\"recTitle\"") < 0) break;

      // 取得存放一整頁面集數資訊
      beginIndex = allPageString.indexOf("class=\"comicBox\"");
      beginIndex = allPageString.indexOf("class=\"relativeRec", beginIndex);
      endIndex = allPageString.indexOf("</div>", beginIndex);
      tempString = allPageString.substring(beginIndex, endIndex);

      int volumeCount = tempString.split("<h3").length - 1; // 單一頁面的集數
      totalVolumeCount += volumeCount;

      String volumeURL = "";
      String volumeTitle = "";
      beginIndex = endIndex = 0;
      for (int j = 0; j < volumeCount; j++) {
        // 取得單集位址
        beginIndex = tempString.indexOf("<h3", beginIndex);
        beginIndex = tempString.indexOf("href=", beginIndex);
        beginIndex = tempString.indexOf("\"", beginIndex) + 1;
        endIndex = tempString.indexOf("\"", beginIndex);
        volumeURL = baseURL + tempString.substring(beginIndex, endIndex);
        urlList.add(baseURL + tempString.substring(beginIndex, endIndex));

        // 取得單集名稱
        beginIndex = tempString.indexOf("title=", beginIndex);
        beginIndex = tempString.indexOf("\"", beginIndex) + 1;
        endIndex = tempString.indexOf("\"", beginIndex);
        volumeTitle = tempString.substring(beginIndex, endIndex);

        volumeList.add(
            getVolumeWithFormatNumber(
                Common.getStringRemovedIllegalChar(
                    Common.getTraditionalChinese(volumeTitle.trim()))));

        Common.debugPrintln(volumeURL + " : " + volumeTitle);
      }
    }

    Common.debugPrintln("   共有 " + (lastPage - 1) + " 張目錄頁");

    totalVolume = totalVolumeCount;
    Common.debugPrintln("共有" + totalVolume + "集");

    combinationList.add(volumeList);
    combinationList.add(urlList);

    return combinationList;
  }
Пример #15
0
  @Override
  public void parseComicURL() { // parse URL and save all URLs in comicURL  //
    // 先取得前面的下載伺服器網址

    initNewData();

    String allPageString = Common.getFileString(SetUp.getTempDirectory(), indexName);
    Common.debugPrint("開始解析這一集有幾頁 : ");

    if (tsukkomiMode) {
      int beginIndex = 0;
      int endIndex = 0;
      String listURL = webSite;
      List<String> tagNameList = new ArrayList<String>();

      if (webSite.matches(".*/")) {
        listURL = webSite.substring(0, webSite.length() - 1);
      }

      if (isRssPage()) {
        print("is RSS page : " + listURL);
        outputNewListFile(listURL);
        tagNameList = getTagNameList(listURL);

        for (int i = 0; i < tagNameList.size(); i++) // 作品列表
        {
          String tagName = tagNameList.get(i);
          handleSingleTitle(tagName);
        }
      } else // ex.
      // http://manhua.dmzj.com/tags/category_search/0-0-0-all-0-0-1-447.shtml#category_nav_anchor
      {
        print("is Normal List Page : " + webSite);

        if (webSite.indexOf("/update_") > 0) {
          handleAllUpdatePage();
        } else if (webSite.indexOf("/rank/") > 0) {
          handleAllRankPage();
        }
      }

      System.exit(0);
    }

    // 取得所有位址編碼代號
    int beginIndex = allPageString.indexOf("'[") + 2;
    int endIndex = allPageString.indexOf("\"]", beginIndex) + 1;

    String allCodeString = allPageString.substring(beginIndex, endIndex);

    totalPage = allCodeString.split("\",\"").length;
    Common.debugPrintln("共 " + totalPage + " 頁");
    comicURL = new String[totalPage];
    refers = new String[totalPage];

    // 取得位址編碼代號的替換字元
    beginIndex = allPageString.indexOf(",'", endIndex) + 2;
    endIndex = allPageString.indexOf("'.", beginIndex);
    String allVarString = allPageString.substring(beginIndex, endIndex);

    String[] varTokens = allVarString.split("\\|");

    for (int i = 0; i < varTokens.length; i++) {
      Common.debugPrintln(i + " " + varTokens[i]); // test
    }
    // System.exit( 0 );

    String basePicURL = "http://images.dmzj.com/"; // "http://images.manhua.178.com/";
    String[] codeTokens = allCodeString.split("\",\"");

    codeTokens = getRealCodeTokens(codeTokens, varTokens);

    String firstCode = codeTokens[0].replaceAll("\"", "");

    String firstPicURL = "";
    Common.debugPrintln("第一張編碼:" + firstCode);
    firstPicURL = basePicURL + Common.getFixedChineseURL(getDecodeURL(firstCode));
    firstPicURL = firstPicURL.replaceAll("\\\\", "");

    Common.debugPrintln("第一張圖片網址:" + firstPicURL);

    // System.exit( 0 );

    String[] picNames = new String[totalPage];
    for (int i = 0; i < picNames.length; i++) {
      codeTokens[i] = codeTokens[i].replaceAll("\"", "");
      beginIndex = codeTokens[i].lastIndexOf("/") + 1;
      endIndex = codeTokens[i].length(); // .lastIndexOf( "\"" );
      // Common.debugPrintln( codeTokens[i] + " " + beginIndex + " " + endIndex );
      picNames[i] =
          Common.getFixedChineseURL(getDecodeURL(codeTokens[i].substring(beginIndex, endIndex)));

      // System.exit( 0 ); // debug
    }

    endIndex = firstPicURL.lastIndexOf("/") + 1;
    String parentPicURL = firstPicURL.substring(0, endIndex);

    for (int i = 0; i < codeTokens.length && Run.isAlive; i++) {
      comicURL[i] = parentPicURL + picNames[i]; // 存入每一頁的網頁網址
      refers[i] = webSite;
      // Common.debugPrintln( ( i + 1 ) + " " + comicURL[i]  ); // debug

    }

    // System.exit( 0 ); // debug
  }
Пример #16
0
  private void outputMainListFile(boolean stepByStepMode) {
    List<String> tagList = new ArrayList<String>();
    List<String> nameList = new ArrayList<String>();
    List<String> lastVolumeTitleList = new ArrayList<String>();
    List<String> lastVolumeIDList = new ArrayList<String>();
    String name = "";
    String path = "";
    String text = "";
    File dir = new File(getBaseOutputDirectory()); // 你的log檔路徑
    File fileList[] = dir.listFiles(); // 得出檔案清單

    // 取得代號清單
    for (int i = 0; i < fileList.length; i++) {
      if (fileList[i].isDirectory()) { // 過濾檔案
        String[] temps = fileList[i].toString().split("\\\\");
        name = temps[temps.length - 1];
        tagList.add(name);
        // print(i + " TAG : " + name);
      }
    }

    // 取得名稱清單
    for (int i = 0; i < tagList.size(); i++) {
      path = getBaseOutputDirectory() + tagList.get(i) + ".js";
      text = Common.getFileString(path);

      // print("------------" + text + "------------end");
      String[] temps = text.split("'");

      if (temps.length <= 1) {
        // buildIndexFile(tagList.get(i));
        continue;
      }

      name = temps[1]; // 取第一個''資料字串
      nameList.add(name);

      // print(i + " NAME : " + name);

      int beginIndex = 0;
      int endIndex = 0;
      String temp = "";

      if (stepByStepMode) // 新的放後面
      {
        beginIndex = text.indexOf(");", beginIndex) - 2;
        endIndex = text.lastIndexOf("'", beginIndex);
        beginIndex = text.lastIndexOf("'", endIndex - 2) + 1;
        temp = text.substring(beginIndex, endIndex);
        lastVolumeTitleList.add(temp);

        print("文件中最新一集: " + temp);

        beginIndex = text.indexOf(");", endIndex + 1) + 1;
        endIndex = text.lastIndexOf("'", beginIndex);
        beginIndex = text.lastIndexOf("'", endIndex - 2) + 1;
        temp = text.substring(beginIndex, endIndex);
        lastVolumeIDList.add(temp);
      } else // 新的放前面
      {
        beginIndex = text.indexOf("new Array(", beginIndex);
        beginIndex = text.indexOf("'", beginIndex) + 1;
        endIndex = text.indexOf("'", beginIndex);
        temp = text.substring(beginIndex, endIndex);
        lastVolumeTitleList.add(temp);

        beginIndex = text.indexOf("'", endIndex + 1) + 1;
        endIndex = text.indexOf("'", beginIndex);
        temp = text.substring(beginIndex, endIndex);
        lastVolumeIDList.add(temp);
      }

      // print(i + " VOLUME : " + temp);
    }

    List<List<String>> combinationList = new ArrayList<List<String>>();
    combinationList.add(nameList);
    combinationList.add(tagList);
    combinationList.add(lastVolumeTitleList);
    combinationList.add(lastVolumeIDList);
    outputListFile(combinationList, "MAIN_LIST", mainListFileName);
  }
Пример #17
0
  // more :
  // http://interface3.i.178.com/~cite.embed.ViewAll?callback=?res_id=4606&sys_res_id=4606_8436&sys_name=manhua178
  // normal :
  // http://interface3.i.178.com/~cite.embed.VoteJS/sysname/manhua178/sys_id/6567_34593/token/0a7e131c24510879fa79ad4c8c6660bd
  private List<String> getCommentParseText(List<String> textList, String commentURL) {
    int beginIndex = 0;
    int endIndex = 0;
    String text = getAllPageString(commentURL);

    if (commentURL.indexOf("VoteJS") > 0) {
      beginIndex = text.indexOf("cite_vote_num");
      if (beginIndex < 0) {
        // 尚未評論
        textList.add("");
        return textList;
      }
      beginIndex = text.indexOf(">", beginIndex) + 1;
      endIndex = text.indexOf("<", beginIndex);
      textList.add(text.substring(beginIndex, endIndex));

      beginIndex = text.indexOf("postVote(", beginIndex);
      beginIndex = text.indexOf("(", beginIndex) + 1;
      endIndex = text.indexOf(",", beginIndex);
      res_id = text.substring(beginIndex, endIndex);
    }

    while (true) {
      beginIndex = text.indexOf("interactive-opinion-block-", beginIndex);
      if (beginIndex < 0) {
        break;
      }
      beginIndex = text.indexOf(">", beginIndex) + 1;
      endIndex = text.indexOf("<", beginIndex);
      String comment = text.substring(beginIndex, endIndex);
      comment = getUtf8Text(comment);
      comment = comment.replaceAll("\"|'", "");
      comment = Common.getTraditionalChinese(comment);

      if (comment.matches("更多")) {
        break;
      }
      textList.add(comment);

      beginIndex = text.indexOf("title=", beginIndex);
      if (beginIndex < 0) {
        break;
      }
      beginIndex = text.indexOf("\"", beginIndex) + 1;
      endIndex = text.indexOf(")", beginIndex);
      String temp = text.substring(beginIndex, endIndex);
      temp = getUtf8Text(temp);
      temp = temp.replaceAll("共有", "");
      temp = temp.replaceAll("人赞同此观点", "");
      String[] temps = temp.split("\\(");

      if (temps.length < 2) {
        print("FAIL -> " + temps.length + " : " + temp);
      }

      String num = temps[0];
      String ratio = temps[1];
      textList.add(num);
      textList.add(ratio);
    }

    return textList;
  }
Пример #18
0
 private void initNewData() {
   newTitleList.clear();
   newTagList.clear();
   newVolumeTitleList.clear();
   newVolumeDirList.clear();
 }
Пример #19
0
  private void handleSingleTitle(String tagName) {
    int beginIndex = 0;
    int endIndex = 0;

    String titleURL = Common.getRegularURL(baseURL + "/" + tagName);
    String titleText = getAllPageString(titleURL);

    // 取得標題列表和網址列表
    tsukkomiMode = false;
    String titleName = getTitleOnMainPage(titleURL, titleText);

    if (titleName == null) {
      print("無效的作品主頁網址: " + titleURL);
      return;
    }

    List<List<String>> combinationList = getVolumeTitleAndUrlOnMainPage(titleURL, titleText);
    List<String> volumeTitleList = combinationList.get(0);
    List<String> volumeUrlList = combinationList.get(1);
    tsukkomiMode = true;

    // List<String> snsSysIDList = new ArrayList<String>(); // sns_sys_id
    // List<String> snsViewPointTokenList = new ArrayList<String>(); // sns_view_point_token
    String snsSysID = "";
    String snsViewPointToken = "";
    boolean stepByStepMode = false;
    String lastVolumeID = getLastVolumeID(titleText);
    String lastVolumeTitle = getLastVolumeTitle(titleText);

    // 用於main_list.js
    newTitleList.add(titleName);
    newTagList.add(tagName);
    newVolumeTitleList.add(lastVolumeTitle);
    newVolumeDirList.add(lastVolumeID);

    //  如果不需要更新,就跳過往下個去做
    if (!needUpdate(tagName, volumeTitleList)) {
      int lastIndex = volumeTitleList.size() - 1;
      if (lastIndex < 0 && titleText.indexOf("g_last_chapter_id") > 0) {
        print("集數列表因為版權而拿掉 , 需要一集一集慢慢爬");
        stepByStepMode = true;
      } else if (lastIndex < 0) {
        print("跳過 , 因為 " + titleName + "[" + tagName + "] 沒有任何集數 ");
        return;
      } else {
        print(
            "跳過 , 因為 " + titleName + "[" + tagName + "] 已有最新集數: " + volumeTitleList.get(lastIndex));
        return;
      }
    }

    String titleIntroduction = getTitleIntroduction(titleText);
    handleTitlePic(tagName, titleText);

    if (!stepByStepMode
        || !new File(getBaseOutputDirectory() + tagName + Common.getSlash() + "comment.js")
            .exists()) {
      handleTitleComment(tagName, titleText);
    }

    int volumeCount = volumeUrlList.size();
    int existedVolumeCount = getExistedVolumeCount(tagName);

    // print("" + tagName + ":" + volumeCount + "," + existedVolumeCount);
    // System.exit(0);

    // 取得每個集數的評論列表
    for (int j = existedVolumeCount; j < volumeCount && !stepByStepMode; j++) // 某個作品的集數列表
    {
      String volumeURL = volumeUrlList.get(j);
      String volumeText = getAllPageString(volumeURL);
      String volumeTitle = volumeTitleList.get(j);
      snsSysID = handleSingleVolume(tagName, titleName, titleIntroduction, volumeTitle, volumeText);
    }

    String nowVolumeID = lastVolumeID;
    while (stepByStepMode) {

      String volumeURL = baseURL + "/" + tagName + "/" + nowVolumeID + ".shtml";
      String volumeText = getAllPageString(volumeURL);

      beginIndex = volumeText.indexOf("g_chapter_name");
      beginIndex = volumeText.indexOf("\"", beginIndex) + 1;
      endIndex = volumeText.indexOf("\"", beginIndex);
      String volumeTitle = volumeText.substring(beginIndex, endIndex);
      print("正要處理的集數: " + volumeTitle + " : " + volumeURL);
      snsSysID = handleSingleVolume(tagName, titleName, titleIntroduction, volumeTitle, volumeText);

      nowVolumeID = getPreviousVolumeID(volumeText);

      if (nowVolumeID == null) break;
    }

    // outputMainListFile(stepByStepMode);
  }
Пример #20
0
  private void handleTitleComment(String tagName, String text) {
    int beginIndex = text.indexOf("token32:");
    if (beginIndex < 0) {
      return;
    }
    beginIndex = text.indexOf("'", beginIndex) + 1;
    int endIndex = text.indexOf("'", beginIndex);

    if (beginIndex <= 0 || endIndex <= 0) {
      return;
    }

    String token32 = text.substring(beginIndex, endIndex);
    String commentURL = Common.getRegularURL("http://t.178.com/resource/show?token32=" + token32);
    print(tagName + "'s commentURL : " + commentURL);

    // 取得評論頁數
    text = getAllPageString(commentURL);
    int pageCount = 1;
    beginIndex = text.lastIndexOf("<li><a href=") - 5;
    if (beginIndex > 0) {
      beginIndex = text.lastIndexOf("<li><a href=", beginIndex);
      beginIndex = text.indexOf("page=", beginIndex);
      beginIndex = text.indexOf(">", beginIndex) + 1;
      endIndex = text.indexOf("<", beginIndex);
      pageCount = Integer.parseInt(text.substring(beginIndex, endIndex));
    }

    List<String> nameList = new ArrayList<String>();
    List<String> dateList = new ArrayList<String>();
    List<String> commentList = new ArrayList<String>();
    String temp = "";

    // 下載全部評論
    for (int i = 1; i <= pageCount; i++) {
      text = getAllPageString(commentURL + "&page=" + i);
      beginIndex = endIndex = 0;
      while (true) {
        beginIndex = text.indexOf("post-by hovercard", beginIndex);
        if (beginIndex < 0) break;

        // 取得評論的名字
        beginIndex = text.indexOf(">", beginIndex) + 1;
        endIndex = text.indexOf("<", beginIndex);
        temp = text.substring(beginIndex, endIndex).trim();
        nameList.add(temp);

        // 取得評論內容
        beginIndex = text.indexOf("-->", beginIndex);
        beginIndex = text.indexOf(">", beginIndex) + 1;
        endIndex = text.indexOf("<", beginIndex);
        temp = text.substring(beginIndex, endIndex).trim();
        commentList.add(temp);

        // 取得評論當下時間
        beginIndex = text.indexOf("<a href=", beginIndex);
        beginIndex = text.indexOf(">", beginIndex) + 1;
        endIndex = text.indexOf("<", beginIndex);
        temp = text.substring(beginIndex, endIndex).trim();
        temp = Common.getTraditionalChinese(temp);
        temp = getFormatDate(temp);
        dateList.add(temp);
      }
    }

    // 寫出評論
    List<List<String>> combinationList = new ArrayList<List<String>>();
    combinationList.add(nameList);
    combinationList.add(commentList);
    combinationList.add(dateList);

    String filePath = getBaseOutputDirectory() + tagName + Common.getSlash();
    outputListFile(combinationList, "TITLE_COMMONET", filePath, "comment.js");
  }