/*
   * 将前面产生的Prepare的BatchJob文件 添加上起始标志,并且重新生成新的文件,(不含Prepare的后缀了)
   * */
  public static void createAllOtherCityBatchJobReadyFile() {
    // 提取所有的城市和子域名的对应关系
    initCityAndSubdomainList();

    for (Pair pair : cityAndSubdomainList) {
      String URL_SUBDOMAIN = pair.getValue();
      String CITY_PINYING = URL_SUBDOMAIN.toUpperCase().charAt(0) + URL_SUBDOMAIN.substring(1);
      String BATCH_FILE_PREPARE_PATH =
          "./data/batch/"
              + SITE_NAME_PINYING
              + "/"
              + SITE_NAME_PINYING
              + CITY_PINYING
              + TYPE_PINYING
              + "BatchCreateJobFilePrepare.txt";
      String BATCH_FILE_READY_PATH =
          "./data/batch/"
              + SITE_NAME_PINYING
              + "/"
              + SITE_NAME_PINYING
              + CITY_PINYING
              + TYPE_PINYING
              + "BatchCreateJobFile.txt";

      String fileContent = FileUtil.getDataFile2StrKeepReturn(BATCH_FILE_PREPARE_PATH, "utf-8");
      fileContent = "^" + "\n" + fileContent + "$" + "\n";
      FileUtil.writeStr2File(fileContent, BATCH_FILE_READY_PATH, "utf-8");
      System.out.println(BATCH_FILE_READY_PATH + " is completed!");
    }
  }
 /*
  * http://www.anjuke.com/index/ 上的所有其它城市,(除去 北京 上海)  但是 包括 广州 深圳
  * */
 @Test
 public void allOtherCityWhole() {
   Element elem =
       ParserUtil.parseUrlWithRegexAndResultIndex(ALL_OTHER_CITY_URL, ALL_OTHER_CITY_REGEX, 0);
   Elements elements = ParserUtil.parseElementWithRegex(elem, "a");
   StringBuffer sb = new StringBuffer();
   for (Element element : elements) {
     String href = element.attr("href");
     String word = element.text();
     if (!(word.equals("北京") || word.equals("上海"))) {
       sb.append(href);
       sb.append(",");
       sb.append(word);
       sb.append("\n");
     }
   }
   System.out.println(sb.toString());
   FileUtil.writeStr2File(sb.toString(), ALL_OTHER_CITY_FILE_PATH, "utf-8");
 }
  // 提取所有的城市和子域名的对应关系
  public static void initCityAndSubdomainList() {
    String content = FileUtil.getDataFile2StrKeepReturn(ALL_OTHER_CITY_FILE_PATH, "utf-8");
    String[] lineArr = content.split("\n");
    cityAndSubdomainList = new ArrayList<Pair>();
    // 提取所有的城市和子域名的对应关系
    for (String line : lineArr) {
      String[] attrArr = line.split(",");
      String href = attrArr[0];
      String city = attrArr[1];
      String subDomain =
          href.substring(href.lastIndexOf("/") + 1, href.indexOf(".", href.lastIndexOf("/")));

      Pair pair = new Pair();
      pair.setKey(city);
      pair.setValue(subDomain);
      cityAndSubdomainList.add(pair);
      System.out.println(city + " : " + subDomain);
    }
    System.out.println("/----------------------------------------------------------------/");
    System.out.println("all cities and their subdomains extraction is completed!");
    System.out.println("/----------------------------------------------------------------/");
  }