예제 #1
0
  private List<String> getTagNameList(String url) {
    int beginIndex = 0;
    int endIndex = 0;
    int index = 0;
    List<String> urlList = new ArrayList<String>();
    String temp = "";

    String dummyTagName = "ghdxj";
    Common.deleteFile(getBaseOutputDirectory(), dummyTagName + ".js");
    Common.deleteFolder(getBaseOutputDirectory() + dummyTagName);
    urlList.add(dummyTagName); // 因為第一個都會取得錯誤的評論資料,所以想把錯都推給東方

    String allPageString = getAllPageString(url);

    while (true) {
      beginIndex = allPageString.indexOf(" href=", beginIndex);
      if (beginIndex < 0) break;
      beginIndex = allPageString.indexOf("=", beginIndex) + 2;
      if (beginIndex < 0) break;

      temp = allPageString.substring(beginIndex, beginIndex + 30);
      if (temp.indexOf(baseURL) >= 0) {
        // print("with BASE");
        // ex. href='http://manhua.dmzj.com/lianaibaojun/
        beginIndex = allPageString.indexOf(".com", beginIndex);
        if (beginIndex < 0) break;
        beginIndex = allPageString.indexOf("/", beginIndex);
        if (beginIndex < 0) break;
      }
      beginIndex++; // 從"/"之後開始
      endIndex = allPageString.indexOf("/", beginIndex);
      if (endIndex < 0) break;
      temp = allPageString.substring(beginIndex, endIndex);

      boolean existed = false;
      for (int i = 0; i < urlList.size(); i++) {
        if (urlList.get(i).equals(temp)) {
          existed = true;
          break;
        }
      }

      if (existed || isIllegalPage(temp)) {
        continue;
      }

      urlList.add(temp);
      // print("" + index + " : " + temp);
      index++;
      beginIndex = endIndex;
    }

    return urlList;
  }