예제 #1
0
  private void outputVolumeIndex(
      String tagName,
      String titleName,
      String titleIntroduction,
      List<String> volumeTitleList,
      List<String> snsSysIDList) {
    String outputDirectory = getBaseOutputDirectory();
    String text = "";
    int count = volumeTitleList.size();
    text += "TITLE_NAME = '" + getOutputText(titleName) + "';\n";
    text += "TITLE_INTRODUCTION = '" + getOutputText(titleIntroduction) + "';\n";
    text += "VOLUME_LIST = new Array( ";
    for (int i = 0; i < count; i++) {
      if (i > 0) text += ", ";
      text +=
          "'"
              + getOutputText(volumeTitleList.get(i))
              + "', "
              + "'"
              + getVolumeID(snsSysIDList.get(i))
              + "'";
    }
    text += "\n);\n";

    Common.outputFile(text, outputDirectory, tagName + ".js");
  }
예제 #2
0
  private void buildIndexFile(String tagName) {
    String dirPath = getBaseOutputDirectory() + tagName + Common.getSlash();
    String path = "";
    String text = "";
    List<String> volumeTagList = new ArrayList<String>();
    File dir = new File(dirPath); // 你的log檔路徑
    File fileList[] = dir.listFiles(); // 得出檔案清單
    String volumeTitle = "";

    // 取得代號清單
    for (int i = 0; i < fileList.length; i++) {
      if (fileList[i].isFile()) { // 過濾檔案
        String[] temps = fileList[i].toString().split("\\\\");
        String volumeTag = temps[temps.length - 1].split("\\.")[0];
        if (!volumeTag.matches("comment")) volumeTagList.add(volumeTag);
        // print(i + " TAG : " + volumeTag);
      }
    }

    for (int i = 0; i < volumeTagList.size(); i++) {
      path = dirPath + volumeTagList.get(i) + ".js";
      text = Common.getFileString(path);

      if (text.split("'").length <= 1) continue;

      volumeTitle = text.split("'")[1];
    }

    System.exit(0);
  }
예제 #3
0
 private void handleAllUpdatePage() {
   for (int i = 0; i < 10; i++) {
     String listURL = "http://manhua.dmzj.com/update_" + (i + 1) + ".shtml";
     List<String> tagNameList = getTagNameList(listURL);
     for (int j = 0; j < tagNameList.size(); j++) {
       String tagName = tagNameList.get(j);
       handleSingleTitle(tagName);
     }
   }
 }
예제 #4
0
  private boolean needUpdate(String tagName, List<String> volumeTitleList) {
    if (volumeTitleList.size() == 0) return false;

    String text = Common.getFileString(getBaseOutputDirectory(), tagName + ".js");
    int lastVolumeIndex = volumeTitleList.size() - 1;
    String lastVolumeTitle = volumeTitleList.get(lastVolumeIndex);

    //  如果目錄裡面找不到最後一集,代表需要更新
    return (text.indexOf(lastVolumeTitle) < 0);
  }
예제 #5
0
  private List<String> getTagNameList(String url) {
    int beginIndex = 0;
    int endIndex = 0;
    int index = 0;
    List<String> urlList = new ArrayList<String>();
    String temp = "";

    String dummyTagName = "ghdxj";
    Common.deleteFile(getBaseOutputDirectory(), dummyTagName + ".js");
    Common.deleteFolder(getBaseOutputDirectory() + dummyTagName);
    urlList.add(dummyTagName); // 因為第一個都會取得錯誤的評論資料,所以想把錯都推給東方

    String allPageString = getAllPageString(url);

    while (true) {
      beginIndex = allPageString.indexOf(" href=", beginIndex);
      if (beginIndex < 0) break;
      beginIndex = allPageString.indexOf("=", beginIndex) + 2;
      if (beginIndex < 0) break;

      temp = allPageString.substring(beginIndex, beginIndex + 30);
      if (temp.indexOf(baseURL) >= 0) {
        // print("with BASE");
        // ex. href='http://manhua.dmzj.com/lianaibaojun/
        beginIndex = allPageString.indexOf(".com", beginIndex);
        if (beginIndex < 0) break;
        beginIndex = allPageString.indexOf("/", beginIndex);
        if (beginIndex < 0) break;
      }
      beginIndex++; // 從"/"之後開始
      endIndex = allPageString.indexOf("/", beginIndex);
      if (endIndex < 0) break;
      temp = allPageString.substring(beginIndex, endIndex);

      boolean existed = false;
      for (int i = 0; i < urlList.size(); i++) {
        if (urlList.get(i).equals(temp)) {
          existed = true;
          break;
        }
      }

      if (existed || isIllegalPage(temp)) {
        continue;
      }

      urlList.add(temp);
      // print("" + index + " : " + temp);
      index++;
      beginIndex = endIndex;
    }

    return urlList;
  }
예제 #6
0
  private void handleAllRankPage() {
    String[] urlList =
        new String[] {
          "http://manhua.dmzj.com/rank/total-list-",
          "http://manhua.dmzj.com/rank/yiwanjie/total-list-",
          "http://manhua.dmzj.com/rank/gaoxiao/total-list-",
          "http://manhua.dmzj.com/rank/shaonian/total-list-"
        };

    for (int k = 0; k < urlList.length; k++) {
      String baseListURL = urlList[k];
      for (int i = 0; i < 5; i++) {
        String listURL = baseListURL + (i + 1) + ".shtml";
        List<String> tagNameList = getTagNameList(listURL);
        for (int j = 0; j < tagNameList.size(); j++) {
          String tagName = tagNameList.get(j);
          handleSingleTitle(tagName);
        }
      }
    }
  }
예제 #7
0
  private void outputVolumeComment(
      String tagName,
      String volumeTitle,
      String fileName,
      String siteName,
      List<String> commentList) {
    String text = "";
    text += "VOLUME_TITLE = '" + volumeTitle + "';\n";
    text += siteName + " = new Array( \n";

    for (int i = 0; i < commentList.size(); i++) {
      if (i > 0) {
        text += ", ";
      }
      text += "'" + commentList.get(i) + "'";
    }
    text += "\n);";

    String outputDirectory = getBaseOutputDirectory() + tagName + Common.getSlash();
    Common.outputFile(text, outputDirectory, fileName + ".js");
  }
예제 #8
0
  @Override
  public void parseComicURL() { // parse URL and save all URLs in comicURL  //
    // 先取得前面的下載伺服器網址

    initNewData();

    String allPageString = Common.getFileString(SetUp.getTempDirectory(), indexName);
    Common.debugPrint("開始解析這一集有幾頁 : ");

    if (tsukkomiMode) {
      int beginIndex = 0;
      int endIndex = 0;
      String listURL = webSite;
      List<String> tagNameList = new ArrayList<String>();

      if (webSite.matches(".*/")) {
        listURL = webSite.substring(0, webSite.length() - 1);
      }

      if (isRssPage()) {
        print("is RSS page : " + listURL);
        outputNewListFile(listURL);
        tagNameList = getTagNameList(listURL);

        for (int i = 0; i < tagNameList.size(); i++) // 作品列表
        {
          String tagName = tagNameList.get(i);
          handleSingleTitle(tagName);
        }
      } else // ex.
      // http://manhua.dmzj.com/tags/category_search/0-0-0-all-0-0-1-447.shtml#category_nav_anchor
      {
        print("is Normal List Page : " + webSite);

        if (webSite.indexOf("/update_") > 0) {
          handleAllUpdatePage();
        } else if (webSite.indexOf("/rank/") > 0) {
          handleAllRankPage();
        }
      }

      System.exit(0);
    }

    // 取得所有位址編碼代號
    int beginIndex = allPageString.indexOf("'[") + 2;
    int endIndex = allPageString.indexOf("\"]", beginIndex) + 1;

    String allCodeString = allPageString.substring(beginIndex, endIndex);

    totalPage = allCodeString.split("\",\"").length;
    Common.debugPrintln("共 " + totalPage + " 頁");
    comicURL = new String[totalPage];
    refers = new String[totalPage];

    // 取得位址編碼代號的替換字元
    beginIndex = allPageString.indexOf(",'", endIndex) + 2;
    endIndex = allPageString.indexOf("'.", beginIndex);
    String allVarString = allPageString.substring(beginIndex, endIndex);

    String[] varTokens = allVarString.split("\\|");

    for (int i = 0; i < varTokens.length; i++) {
      Common.debugPrintln(i + " " + varTokens[i]); // test
    }
    // System.exit( 0 );

    String basePicURL = "http://images.dmzj.com/"; // "http://images.manhua.178.com/";
    String[] codeTokens = allCodeString.split("\",\"");

    codeTokens = getRealCodeTokens(codeTokens, varTokens);

    String firstCode = codeTokens[0].replaceAll("\"", "");

    String firstPicURL = "";
    Common.debugPrintln("第一張編碼:" + firstCode);
    firstPicURL = basePicURL + Common.getFixedChineseURL(getDecodeURL(firstCode));
    firstPicURL = firstPicURL.replaceAll("\\\\", "");

    Common.debugPrintln("第一張圖片網址:" + firstPicURL);

    // System.exit( 0 );

    String[] picNames = new String[totalPage];
    for (int i = 0; i < picNames.length; i++) {
      codeTokens[i] = codeTokens[i].replaceAll("\"", "");
      beginIndex = codeTokens[i].lastIndexOf("/") + 1;
      endIndex = codeTokens[i].length(); // .lastIndexOf( "\"" );
      // Common.debugPrintln( codeTokens[i] + " " + beginIndex + " " + endIndex );
      picNames[i] =
          Common.getFixedChineseURL(getDecodeURL(codeTokens[i].substring(beginIndex, endIndex)));

      // System.exit( 0 ); // debug
    }

    endIndex = firstPicURL.lastIndexOf("/") + 1;
    String parentPicURL = firstPicURL.substring(0, endIndex);

    for (int i = 0; i < codeTokens.length && Run.isAlive; i++) {
      comicURL[i] = parentPicURL + picNames[i]; // 存入每一頁的網頁網址
      refers[i] = webSite;
      // Common.debugPrintln( ( i + 1 ) + " " + comicURL[i]  ); // debug

    }

    // System.exit( 0 ); // debug
  }
예제 #9
0
  private void outputMainListFile(boolean stepByStepMode) {
    List<String> tagList = new ArrayList<String>();
    List<String> nameList = new ArrayList<String>();
    List<String> lastVolumeTitleList = new ArrayList<String>();
    List<String> lastVolumeIDList = new ArrayList<String>();
    String name = "";
    String path = "";
    String text = "";
    File dir = new File(getBaseOutputDirectory()); // 你的log檔路徑
    File fileList[] = dir.listFiles(); // 得出檔案清單

    // 取得代號清單
    for (int i = 0; i < fileList.length; i++) {
      if (fileList[i].isDirectory()) { // 過濾檔案
        String[] temps = fileList[i].toString().split("\\\\");
        name = temps[temps.length - 1];
        tagList.add(name);
        // print(i + " TAG : " + name);
      }
    }

    // 取得名稱清單
    for (int i = 0; i < tagList.size(); i++) {
      path = getBaseOutputDirectory() + tagList.get(i) + ".js";
      text = Common.getFileString(path);

      // print("------------" + text + "------------end");
      String[] temps = text.split("'");

      if (temps.length <= 1) {
        // buildIndexFile(tagList.get(i));
        continue;
      }

      name = temps[1]; // 取第一個''資料字串
      nameList.add(name);

      // print(i + " NAME : " + name);

      int beginIndex = 0;
      int endIndex = 0;
      String temp = "";

      if (stepByStepMode) // 新的放後面
      {
        beginIndex = text.indexOf(");", beginIndex) - 2;
        endIndex = text.lastIndexOf("'", beginIndex);
        beginIndex = text.lastIndexOf("'", endIndex - 2) + 1;
        temp = text.substring(beginIndex, endIndex);
        lastVolumeTitleList.add(temp);

        print("文件中最新一集: " + temp);

        beginIndex = text.indexOf(");", endIndex + 1) + 1;
        endIndex = text.lastIndexOf("'", beginIndex);
        beginIndex = text.lastIndexOf("'", endIndex - 2) + 1;
        temp = text.substring(beginIndex, endIndex);
        lastVolumeIDList.add(temp);
      } else // 新的放前面
      {
        beginIndex = text.indexOf("new Array(", beginIndex);
        beginIndex = text.indexOf("'", beginIndex) + 1;
        endIndex = text.indexOf("'", beginIndex);
        temp = text.substring(beginIndex, endIndex);
        lastVolumeTitleList.add(temp);

        beginIndex = text.indexOf("'", endIndex + 1) + 1;
        endIndex = text.indexOf("'", beginIndex);
        temp = text.substring(beginIndex, endIndex);
        lastVolumeIDList.add(temp);
      }

      // print(i + " VOLUME : " + temp);
    }

    List<List<String>> combinationList = new ArrayList<List<String>>();
    combinationList.add(nameList);
    combinationList.add(tagList);
    combinationList.add(lastVolumeTitleList);
    combinationList.add(lastVolumeIDList);
    outputListFile(combinationList, "MAIN_LIST", mainListFileName);
  }
예제 #10
0
  private void handleSingleTitle(String tagName) {
    int beginIndex = 0;
    int endIndex = 0;

    String titleURL = Common.getRegularURL(baseURL + "/" + tagName);
    String titleText = getAllPageString(titleURL);

    // 取得標題列表和網址列表
    tsukkomiMode = false;
    String titleName = getTitleOnMainPage(titleURL, titleText);

    if (titleName == null) {
      print("無效的作品主頁網址: " + titleURL);
      return;
    }

    List<List<String>> combinationList = getVolumeTitleAndUrlOnMainPage(titleURL, titleText);
    List<String> volumeTitleList = combinationList.get(0);
    List<String> volumeUrlList = combinationList.get(1);
    tsukkomiMode = true;

    // List<String> snsSysIDList = new ArrayList<String>(); // sns_sys_id
    // List<String> snsViewPointTokenList = new ArrayList<String>(); // sns_view_point_token
    String snsSysID = "";
    String snsViewPointToken = "";
    boolean stepByStepMode = false;
    String lastVolumeID = getLastVolumeID(titleText);
    String lastVolumeTitle = getLastVolumeTitle(titleText);

    // 用於main_list.js
    newTitleList.add(titleName);
    newTagList.add(tagName);
    newVolumeTitleList.add(lastVolumeTitle);
    newVolumeDirList.add(lastVolumeID);

    //  如果不需要更新,就跳過往下個去做
    if (!needUpdate(tagName, volumeTitleList)) {
      int lastIndex = volumeTitleList.size() - 1;
      if (lastIndex < 0 && titleText.indexOf("g_last_chapter_id") > 0) {
        print("集數列表因為版權而拿掉 , 需要一集一集慢慢爬");
        stepByStepMode = true;
      } else if (lastIndex < 0) {
        print("跳過 , 因為 " + titleName + "[" + tagName + "] 沒有任何集數 ");
        return;
      } else {
        print(
            "跳過 , 因為 " + titleName + "[" + tagName + "] 已有最新集數: " + volumeTitleList.get(lastIndex));
        return;
      }
    }

    String titleIntroduction = getTitleIntroduction(titleText);
    handleTitlePic(tagName, titleText);

    if (!stepByStepMode
        || !new File(getBaseOutputDirectory() + tagName + Common.getSlash() + "comment.js")
            .exists()) {
      handleTitleComment(tagName, titleText);
    }

    int volumeCount = volumeUrlList.size();
    int existedVolumeCount = getExistedVolumeCount(tagName);

    // print("" + tagName + ":" + volumeCount + "," + existedVolumeCount);
    // System.exit(0);

    // 取得每個集數的評論列表
    for (int j = existedVolumeCount; j < volumeCount && !stepByStepMode; j++) // 某個作品的集數列表
    {
      String volumeURL = volumeUrlList.get(j);
      String volumeText = getAllPageString(volumeURL);
      String volumeTitle = volumeTitleList.get(j);
      snsSysID = handleSingleVolume(tagName, titleName, titleIntroduction, volumeTitle, volumeText);
    }

    String nowVolumeID = lastVolumeID;
    while (stepByStepMode) {

      String volumeURL = baseURL + "/" + tagName + "/" + nowVolumeID + ".shtml";
      String volumeText = getAllPageString(volumeURL);

      beginIndex = volumeText.indexOf("g_chapter_name");
      beginIndex = volumeText.indexOf("\"", beginIndex) + 1;
      endIndex = volumeText.indexOf("\"", beginIndex);
      String volumeTitle = volumeText.substring(beginIndex, endIndex);
      print("正要處理的集數: " + volumeTitle + " : " + volumeURL);
      snsSysID = handleSingleVolume(tagName, titleName, titleIntroduction, volumeTitle, volumeText);

      nowVolumeID = getPreviousVolumeID(volumeText);

      if (nowVolumeID == null) break;
    }

    // outputMainListFile(stepByStepMode);
  }