Ejemplo n.º 1
0
 // 获取页面指定内容的Link
 public static List getLinksByConditions(String result, String coditions, String codeKind) {
   List links = null;
   Parser parser;
   NodeList nodelist;
   // 页面编码配置 To do by shengf
   parser = Parser.createParser(result, codeKind);
   NodeFilter linkFilter = new NodeClassFilter(LinkTag.class);
   try {
     links = new ArrayList();
     nodelist = parser.parse(linkFilter);
     Node[] nodes = nodelist.toNodeArray();
     int count = 1;
     for (int i = 0; i < nodes.length; i++) {
       Node node = nodes[i];
       if (node instanceof LinkTag) {
         LinkTag link = (LinkTag) node;
         if (link.toHtml().indexOf(coditions) != -1) {
           links.add(link);
           count++;
           if (count > CatchNum) {
             return links;
           }
         }
       }
     }
   } catch (ParserException e) {
     // TODO Auto-generated catch block
     e.printStackTrace();
   }
   return links;
 }
 public void testUrlModificationWithVisitor() throws Exception {
   Parser parser = Parser.createParser(HTML_WITH_LINK, null);
   UrlModifyingVisitor visitor = new UrlModifyingVisitor("localhost://");
   parser.visitAllNodesWith(visitor);
   String result = visitor.getModifiedResult();
   assertStringEquals("Expected HTML", MODIFIED_HTML, result);
 }
Ejemplo n.º 3
0
  /**
   * 获取新闻的内容
   *
   * @param newsContentFilter
   * @param parser
   * @return content 新闻内容
   */
  public String getNewsContent(NodeFilter newsContentFilter, Parser parser) {
    String content = null;
    StringBuilder builder = new StringBuilder();

    try {
      NodeList newsContentList = parser.parse(newsContentFilter);
      for (int i = 0; i < newsContentList.size(); i++) {
        Div newsContenTag = (Div) newsContentList.elementAt(i);
        builder = builder.append(newsContenTag.getStringText());
      }
      content = builder.toString(); // 转换为String 类型。
      if (content != null) {
        parser.reset();
        parser = Parser.createParser(content, "utf8");
        StringBean sb = new StringBean();
        sb.setCollapse(true);
        parser.visitAllNodesWith(sb);
        content = sb.getStrings();
        // String s = "\";} else{ document.getElementById('TurnAD444').innerHTML = \"\";} }
        // showTurnAD444(intTurnAD444); }catch(e){}";

        content = content.replaceAll("\\\".*[a-z].*\\}", "");

        content = content.replace("[我来说两句]", "");

      } else {
        System.out.println("没有得到新闻内容!");
      }

    } catch (ParserException ex) {
      Logger.getLogger(AreaTest.class.getName()).log(Level.SEVERE, null, ex);
    }

    return content;
  }
Ejemplo n.º 4
0
  public static void main(String[] args) throws Exception {
    RequestConfig requestConfig =
        RequestConfig.custom().setCookieSpec(CookieSpecs.STANDARD_STRICT).build();
    CloseableHttpClient httpclient =
        HttpClients.custom().setDefaultRequestConfig(requestConfig).build();
    int count = 1;
    for (int i = 0; i <= 16; i++) {
      int index = i;
      // System.out.println(index);
      HttpGet httpGet = new HttpGet(url3 + index + url4);
      HttpResponse response = httpclient.execute(httpGet);
      HttpEntity entity = response.getEntity();
      String htmls = null;
      if (entity != null) {
        htmls = EntityUtils.toString(entity).replace("\t", " ");
      }
      Parser parser = Parser.createParser(htmls, "utf-8");
      AndFilter dFilter =
          new AndFilter(new TagNameFilter("h2"), new HasAttributeFilter("class", "field-content"));
      NodeList nodes3 = parser.extractAllNodesThatMatch(dFilter);
      for (int k = 0; k < nodes3.size(); k++) {
        htmls = nodes3.elementAt(k).toHtml();
        parser = Parser.createParser(htmls, "utf-8");
        AndFilter ProfessionNameFilter =
            new AndFilter(new TagNameFilter("a"), new HasAttributeFilter("href"));
        NodeList nodes4 = parser.extractAllNodesThatMatch(ProfessionNameFilter);
        for (int j = 0; j < nodes4.size(); j++) {
          LinkTag link = (LinkTag) nodes4.elementAt(j);
          // if(link.getAttribute("href").contains("http://www.ulster.ac.uk/"))
          { // .replaceAll("<span[\\s\\S]*/span>","")
            String temp = link.toHtml();

            System.out.println(
                "{\""
                    + count
                    + "\",\"http://www.chi.ac.uk/"
                    + link.getAttribute("href")
                    + "\",\""
                    + html2Str(temp).replace("\r\n", "").trim()
                    + "\",\"0\"},");
            count++;
          }
        }
      }
    }
    // System.out.println("DONE.");
  }
Ejemplo n.º 5
0
  public static void setEventValidation(String html) throws ParserException {
    Parser parser = Parser.createParser(html, "gb2312");
    AndFilter filter =
        new AndFilter(
            new TagNameFilter("input"), new HasAttributeFilter("id", "__EVENTVALIDATION"));
    NodeList nodes = parser.parse(filter);
    InputTag node = (InputTag) nodes.elementAt(0);

    eventValidation = node.getAttribute("value");
  }
Ejemplo n.º 6
0
  public static void setViewState(String html) throws Exception {
    Parser parser = Parser.createParser(html, "gb2312");
    AndFilter filter =
        new AndFilter(new TagNameFilter("input"), new HasAttributeFilter("id", "__VIEWSTATE"));

    NodeList nodes = parser.parse(filter);
    InputTag node = (InputTag) nodes.elementAt(0);

    viewState = node.getAttribute("value");
  }
Ejemplo n.º 7
0
  private static void setStandardIdsToMap(Integer pageNo, String html) throws Exception {
    Parser parser = Parser.createParser(html, "gb2312");
    AndFilter viewStateFilter =
        new AndFilter(
            new TagNameFilter("table"),
            new HasAttributeFilter("id", "ctl00_ContentPlaceHolder1_StandardView"));

    NodeList nodes = parser.parse(viewStateFilter);
    TableTag node = (TableTag) nodes.elementAt(0);

    TableRow[] rows = node.getRows();
    for (int i = 1; i < rows.length; i++) {
      TableColumn[] cols = rows[i].getColumns();
      TableColumn col = cols[3];
      LinkTag tag = (LinkTag) ((Div) col.getChildren().elementAt(1)).getChildren().elementAt(2);
      if (tag == null) {
        List<Integer> lst = error.get(pageNo);
        if (lst == null) {
          lst = new ArrayList<Integer>();
        }
        lst.add(i);
        error.put(pageNo, lst);
        continue;
      }

      String href = tag.getAttribute("href");
      if (href == null) {
        List<Integer> lst = error.get(pageNo);
        if (lst == null) {
          lst = new ArrayList<Integer>();
        }
        lst.add(i);
        error.put(pageNo, lst);
        continue;
      }
      int start = href.indexOf("standardid=");
      int end = href.indexOf("&amp;");

      String standardId = href.substring(start, end).replaceAll("standardid=", "");

      List<String> lst = map.get(pageNo);
      if (lst == null) {
        lst = new ArrayList<String>();
      }
      lst.add(standardId);
      map.put(pageNo, lst);
    }
  }
Ejemplo n.º 8
0
  public static void setInnerHTML(Element root, String html) {

    // remove old root childs
    OverrideNodeList<Node> list = (OverrideNodeList<Node>) root.getChildNodes();
    list.getList().clear();

    if (html != null) {
      Parser parser = Parser.createParser(html, "UTF-8");
      try {
        parser.visitAllNodesWith(new GwtNodeVisitor(root));
      } catch (ParserException e) {
        throw new RuntimeException(
            "error while parsing <" + root.getTagName() + "> element's innerHTML : " + html, e);
      }
    }
  }
  /** Test a better method of modifying an HTML page. */
  public void testPageModification() throws Exception {
    Parser parser = Parser.createParser(HTML_WITH_LINK, null);
    NodeList list = parser.parse(null); // no filter
    // make an inner class that does the same thing as the UrlModifyingVisitor
    NodeVisitor visitor =
        new NodeVisitor() {
          String linkPrefix = "localhost://";

          public void visitTag(Tag tag) {
            if (tag instanceof LinkTag)
              ((LinkTag) tag).setLink(linkPrefix + ((LinkTag) tag).getLink());
            else if (tag instanceof ImageTag)
              ((ImageTag) tag).setImageURL(linkPrefix + ((ImageTag) tag).getImageURL());
          }
        };
    list.visitAllNodesWith(visitor);
    String result = list.toHtml();
    assertStringEquals("Expected HTML", MODIFIED_HTML, result);
  }
Ejemplo n.º 10
0
  private static void addDetailToMap(String key, String text) throws Exception {
    Parser parser = Parser.createParser(text, "gb2312");
    TagNameFilter tableFiler = new TagNameFilter("table");

    NodeList nodes = parser.parse(tableFiler);

    TableTag node = (TableTag) nodes.elementAt(5);

    TableRow[] rows = node.getRows();
    for (int i = 1; i < 11; i++) {
      TableColumn[] cols = rows[i].getColumns();

      StringBuffer txt1 = new StringBuffer();

      StringBuffer txt2 = new StringBuffer();

      NodeList span1 = cols[1].getChildren().elementAt(1).getChildren();

      for (int j = 0; j < span1.size(); j++) {
        if (span1.elementAt(j) instanceof TextNode) {
          txt1.append(span1.elementAt(j).getText()).append(" ");
        }
      }

      NodeList span2 = cols[3].getChildren().elementAt(1).getChildren();

      for (int j = 0; j < span2.size(); j++) {
        if (span2.elementAt(j) instanceof TextNode) {
          txt2.append(span2.elementAt(j).getText()).append(" ");
        }
      }

      List<String> lst = detailMap.get(key);
      if (lst == null) {
        lst = new ArrayList<String>();
      }
      lst.add(txt1.toString().trim());
      lst.add(txt2.toString().trim());
      detailMap.put(key, lst);
    }
  }
Ejemplo n.º 11
0
 // 土地交易单独处理
 public static List getLinksByConditions2(String result, String coditions, String codeKind) {
   List links = null;
   Parser parser;
   NodeList nodelist;
   parser = Parser.createParser(result, codeKind);
   NodeFilter linkFilter = new NodeClassFilter(LinkTag.class);
   try {
     links = new ArrayList();
     nodelist = parser.parse(linkFilter);
     Node[] nodes = nodelist.toNodeArray();
     int count = 1;
     for (int i = 0; i < nodes.length; i++) {
       Node node = nodes[i];
       if (node instanceof LinkTag) {
         LinkTag link = (LinkTag) node;
         if ((link.toHtml().indexOf(coditions) != -1)
             && (link.getChildrenHTML().indexOf("查看") == -1)) {
           // System.out.println(link.toHtml());
           // System.out.println(link.getLink());
           // System.out.println("test:" + link.getChildrenHTML());
           // Node nextNode = link.getParent().getNextSibling();
           // System.out.println(nextNode.getChildren().toHtml().replaceAll("/r/n","").trim());
           // nextNode =
           // nextNode.getNextSibling().getNextSibling();
           // System.out.println(nextNode.getChildren().toHtml().replaceAll("/r/n","").trim());
           links.add(link);
           count++;
           if (count > CatchNum) {
             return links;
           }
         }
       }
     }
   } catch (ParserException e) {
     // TODO Auto-generated catch block
     e.printStackTrace();
   }
   return links;
 }
  public static HashMap<String, String> SouthamptonGetDetails(String[] url) {

    while (true) {
      try {
        HashMap<String, String> result = new HashMap<String, String>();
        RequestConfig requestConfig =
            RequestConfig.custom().setSocketTimeout(10000).setConnectTimeout(10000).build();
        CloseableHttpClient httpclient =
            HttpClients.custom().setDefaultRequestConfig(requestConfig).build();

        HttpGet httpGet = new HttpGet(url[1]);
        HttpResponse response = httpclient.execute(httpGet);
        HttpEntity entity = response.getEntity();

        String htmls = null;
        if (entity != null) {
          htmls = EntityUtils.toString(entity).replace("\t", " ");
          // System.out.println(htmls);

        }
        System.out.println("Got reply!");
        // htmls=HTMLFilter(htmls);

        Parser parser = null;

        // **********************************get school**********************
        parser = Parser.createParser(htmls.replace("span", "form"), "utf-8");
        AndFilter SFilter =
            new AndFilter(
                new TagNameFilter("form"), // table class="CSCPreviewTable grey"
                new HasAttributeFilter("class", "first-owner"));
        NodeList nodes4 = parser.extractAllNodesThatMatch(SFilter);
        if (nodes4.size() > 0) {

          String school = html2Str(nodes4.elementAt(0).toHtml());
          result.put("School", school);
        }

        // **********************************get entry structure**********************

        parser = Parser.createParser(htmls, "utf-8");
        AndFilter ESFilter =
            new AndFilter(
                new TagNameFilter("div"), // table class="CSCPreviewTable grey"
                new HasAttributeFilter("class", "body__inner w-doublewide copy"));
        NodeList nodes1 = parser.extractAllNodesThatMatch(ESFilter);
        String structure = "";
        String[] ProgramURL = null;
        if (nodes1.size() > 0) {
          String AllContents = nodes1.toHtml();
          String[] SP = AllContents.split("<h2 id=");
          for (int i = 1; i < SP.length; i++) {
            String row = "<h2 id=" + SP[i];
            if (row.contains("<h2 id=\"requirements\">Requirements</h2>")) // Structure
            {
              structure =
                  (html2Str(
                          row.replace("<br />", "\r\n")
                              .replace("</strong>", "")
                              .replace("<strong>", "")
                              .replace("</", "\r\n</")
                              .replace("\t", " ")
                              .replace("&amp;", " "))
                      .replace("\r\n\r\n", "\r\n"));
              structure = HTMLFilter(structure);
              result.put("Structure", structure);
            } // <a href="/program/BSC">Bachelor of Science (BSC)</a>
            else if (row.contains("<h2 id=\"relevant-degrees\">Relevant Degrees</h2>")) {
              parser = Parser.createParser(row, "utf-8");
              AndFilter ProfessionNameFilter =
                  new AndFilter(new TagNameFilter("a"), new HasAttributeFilter("href"));
              NodeList nodes5 = parser.extractAllNodesThatMatch(ProfessionNameFilter);
              for (int j = 0; j < nodes4.size(); j++) {
                LinkTag link = (LinkTag) nodes5.elementAt(j);
                if (!link.getAttribute("href").equals("#")) {
                  String code = link.getAttribute("href").replace("/program/", "");
                  ProgramURL = getProgram(code);
                  result.put("Scholarship", code);
                  // title=HTMLFilter(html2Str(link.toHtml()));
                }
              }
            }
          }
        }

        // ****************IELTS
        result.put("IELTS Average Requirement", "6.5");
        result.put("IELTS Lowest Requirement", "6.0");

        // **************************get title & type**********************

        result.put("Title", url[4] + " " + url[2]);

        result.put("Level", url[3]);
        if (ProgramURL != null) {
          result.put("Type", ProgramURL[2]);
          result.put("Length (months)", ProgramURL[5]);
        }

        httpclient.close();
        return result;
      } catch (Exception ee) {
        System.out.println("Retrying..." + url[0]);
        ee.printStackTrace();
      }
    }
  } // ...
  @Override
  public List<JclqScheduleItem> fetchJclqSchedule(String officialDate) throws FetchFailedException {
    Map<String, String> headerParams = new HashMap<String, String>();
    headerParams.put("Referer", "http://info.sporttery.cn");
    headerParams.put(
        "User-Agent",
        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.168 Safari/535.19");
    List<JclqScheduleItem> jclqScheduleItemList = new ArrayList<JclqScheduleItem>();

    String encoding = "gbk";
    JclqScheduleItem jclqScheduleItem = null;
    String siteName = "中国竞彩网网[竞彩篮球赛程]";
    String logHeader = siteName + SCHEDULE_URL;

    try {
      String webInfo =
          CoreFetcherUtils.URLGetWithHeaderParams(SCHEDULE_URL, headerParams, null, encoding);
      if (webInfo == null || webInfo.indexOf("404 Not Found") > 0) {
        logger.error(logHeader + ",data is null or 404 Not Found");
        throw new FetchFailedException("404 Not Found");
      }

      Parser parser = Parser.createParser(webInfo, encoding);

      NodeList nodeList =
          parser.extractAllNodesThatMatch(new CssSelectorNodeFilter("div[class='box-tbl']"));
      if (null != nodeList && nodeList.size() > 0) {
        NodeFilter tableFilter = new TagNameFilter("table");
        Parser parser2 = Parser.createParser(nodeList.toHtml(), encoding);
        NodeList tableNodeList = parser2.extractAllNodesThatMatch(tableFilter);
        if (tableNodeList != null && tableNodeList.size() > 0) {
          TableTag catchTableTag = new TableTag();
          catchTableTag = (TableTag) tableNodeList.elementAt(0);
          if (catchTableTag != null) {
            TableRow[] catchRows = catchTableTag.getRows();
            TableColumn[] catchColumns = null;
            for (int i = 2; i < catchRows.length; i++) {
              catchColumns = catchRows[i].getColumns();
              if (catchColumns != null && catchColumns.length >= 6) {
                jclqScheduleItem = new JclqScheduleItem();

                String officialNum = catchColumns[0].toPlainTextString().trim();
                if (officialNum.length() < 5) {
                  continue;
                }

                // 先解析比赛时间
                String matchDateStr = catchColumns[3].toPlainTextString().trim();
                String[] yearStr = matchDateStr.split("-");
                if (yearStr[0].length() <= 2) {
                  matchDateStr = "20" + matchDateStr + ":00";
                } else {
                  matchDateStr = matchDateStr + ":00";
                }
                Date matchDate = CoreDateUtils.parseDate(matchDateStr, CoreDateUtils.DATETIME);
                Calendar matchDateCalendar = Calendar.getInstance();
                matchDateCalendar.setTime(matchDate);
                matchDateCalendar.add(Calendar.MINUTE, 1);
                jclqScheduleItem.setMatchDate(matchDateCalendar.getTime());

                // 根据周几、当前时间和比赛时间计算官方发布的日期
                Calendar cd = Calendar.getInstance();
                // 将时分秒等区域清零
                cd.set(Calendar.HOUR_OF_DAY, 0);
                cd.set(Calendar.MINUTE, 0);
                cd.set(Calendar.SECOND, 0);
                cd.set(Calendar.MILLISECOND, 0);

                int nowWeekDay = cd.get(Calendar.DAY_OF_WEEK);
                int fetchWeekDay = weekDay.get(officialNum.substring(0, 2));

                if (nowWeekDay != fetchWeekDay) {
                  int m = fetchWeekDay - nowWeekDay;
                  if (m < -1) {
                    cd.add(Calendar.DATE, m + 7);
                  } else {
                    cd.add(Calendar.DATE, m);
                  }
                }

                // 如果计算出来的日期超过了比赛时间,减去一周
                if (cd.after(matchDateCalendar)) {
                  cd.add(Calendar.DATE, -7);
                }

                // 如果计算出来的日期距离比赛时间相隔超过一周,加上一周的倍数
                // 一周的毫秒数
                long weekTimeInMillis = 3600 * 1000 * 24 * 7;
                long diffTimeInMillis = matchDateCalendar.getTimeInMillis() - cd.getTimeInMillis();
                if (diffTimeInMillis > weekTimeInMillis) {
                  // 计算相差几周
                  int diffWeekCount = (int) (diffTimeInMillis / weekTimeInMillis);
                  cd.add(Calendar.DATE, 7 * diffWeekCount);
                }

                jclqScheduleItem.setMatchNum(
                    CoreDateUtils.formatDate(cd.getTime(), "yyyyMMdd")
                        + LotteryConstant.JCLQ_MATCH_NUM_CODE_DEFAULT
                        + officialNum.substring(2));
                jclqScheduleItem.setOfficialDate(
                    CoreDateUtils.parseDate(CoreDateUtils.formatDate(cd.getTime())));
                Integer oNum = null;
                try {
                  oNum = Integer.valueOf(officialNum.substring(2));
                } catch (Exception e) {
                  logger.error("截取官方编码时,转换为Integer错误", e);
                }
                jclqScheduleItem.setOfficialNum(oNum);
                jclqScheduleItem.setMatchName(
                    JclqUtil.convertMatchName(
                        catchColumns[1].toPlainTextString().trim(),
                        LotteryType.JCLQ_SF,
                        FetcherType.T_PENGINEAPI));

                String team = catchColumns[2].toPlainTextString().trim();
                String[] teamStr = team.split("VS");
                jclqScheduleItem.setAwayTeam(teamStr[0].trim());
                jclqScheduleItem.setHomeTeam(teamStr[1].trim());

                if ("已开售".equals(catchColumns[4].toPlainTextString().trim())) {
                  jclqScheduleItem.setStatus(JclqRaceStatus.OPEN);
                } else {
                  jclqScheduleItem.setStatus(JclqRaceStatus.UNOPEN);
                }
                if (catchColumns[5].toPlainTextString().trim().indexOf("胜负单关") > 0) {
                  jclqScheduleItem.setDynamicSaleSfStatus(JclqDynamicSaleStatus.SALE_UNOPEN);
                } else {
                  jclqScheduleItem.setDynamicSaleSfStatus(JclqDynamicSaleStatus.SALE_OPEN);
                }
                if (catchColumns[5].toPlainTextString().trim().indexOf("胜负过关") > 0) {
                  jclqScheduleItem.setStaticSaleSfStatus(JclqStaticSaleStatus.SALE_UNOPEN);
                } else {
                  jclqScheduleItem.setStaticSaleSfStatus(JclqStaticSaleStatus.SALE_OPEN);
                }
                if (catchColumns[5].toPlainTextString().trim().indexOf("让分胜负单关") > 0) {
                  jclqScheduleItem.setDynamicSaleRfsfStatus(JclqDynamicSaleStatus.SALE_UNOPEN);
                } else {
                  jclqScheduleItem.setDynamicSaleRfsfStatus(JclqDynamicSaleStatus.SALE_OPEN);
                }
                if (catchColumns[5].toPlainTextString().trim().indexOf("让分胜负过关") > 0) {
                  jclqScheduleItem.setStaticSaleRfsfStatus(JclqStaticSaleStatus.SALE_UNOPEN);
                } else {
                  jclqScheduleItem.setStaticSaleRfsfStatus(JclqStaticSaleStatus.SALE_OPEN);
                }
                if (catchColumns[5].toPlainTextString().trim().indexOf("胜分差单关") > 0) {
                  jclqScheduleItem.setDynamicSaleSfcStatus(JclqDynamicSaleStatus.SALE_UNOPEN);
                } else {
                  jclqScheduleItem.setDynamicSaleSfcStatus(JclqDynamicSaleStatus.SALE_OPEN);
                }
                if (catchColumns[5].toPlainTextString().trim().indexOf("胜分差过关") > 0) {
                  jclqScheduleItem.setStaticSaleSfcStatus(JclqStaticSaleStatus.SALE_UNOPEN);
                } else {
                  jclqScheduleItem.setStaticSaleSfcStatus(JclqStaticSaleStatus.SALE_OPEN);
                }
                if (catchColumns[5].toPlainTextString().trim().indexOf("大小分单关") > 0) {
                  jclqScheduleItem.setDynamicSaleDxfStatus(JclqDynamicSaleStatus.SALE_UNOPEN);
                } else {
                  jclqScheduleItem.setDynamicSaleDxfStatus(JclqDynamicSaleStatus.SALE_OPEN);
                }
                if (catchColumns[5].toPlainTextString().trim().indexOf("大小分过关") > 0) {
                  jclqScheduleItem.setStaticSaleDxfStatus(JclqStaticSaleStatus.SALE_UNOPEN);
                } else {
                  jclqScheduleItem.setStaticSaleDxfStatus(JclqStaticSaleStatus.SALE_OPEN);
                }
                jclqScheduleItemList.add(jclqScheduleItem);
              }
            } // end for catchRows
          } // end if catchTableTag!=null
        } // end if(tableNodeList!=null&&tableNodeList.size()>0)
      } else {
        logger.error(logHeader + "竞彩篮球赛程数据表格不存在,返回null");
        throw new FetchFailedException("竞彩篮球赛程数据表格不存在");
      }
    } catch (Exception e) {
      logger.error(logHeader + "竞彩篮球赛程错误" + e.getMessage(), e);
      throw new FetchFailedException(e.getMessage());
    }
    return jclqScheduleItemList;
  }
Ejemplo n.º 14
0
  /**
   * Creates a list of Grids based on the given HTML string. This works only for table-based HTML
   * documents.
   *
   * @param html the HTML string.
   * @return a list of Grids.
   */
  public static List<Grid> fromHtml(String html) throws Exception {
    if (html == null || html.trim().isEmpty()) {
      return null;
    }

    List<Grid> grids = new ArrayList<>();

    Parser parser = Parser.createParser(html, "UTF-8");

    Node[] tables = parser.extractAllNodesThatMatch(new TagNameFilter("table")).toNodeArray();

    for (Node t : tables) {
      Grid grid = new ListGrid();

      TableTag table = (TableTag) t;

      TableRow[] rows = table.getRows();

      Integer firstColumnCount = null;

      for (TableRow row : rows) {
        if (getColumnCount(row) == 0) // Ignore if no cells
        {
          log.warn("Ignoring row with no columns");
          continue;
        }

        Node[] cells = row.getChildren().extractAllNodesThatMatch(HTML_ROW_FILTER).toNodeArray();

        if (firstColumnCount == null) // First row becomes header
        {
          firstColumnCount = getColumnCount(row);

          for (Node c : cells) {
            TagNode cell = (TagNode) c;

            grid.addHeader(new GridHeader(getValue(cell), false, false));

            Integer colSpan = MathUtils.parseInt(cell.getAttribute("colspan"));

            if (colSpan != null && colSpan > 1) {
              grid.addEmptyHeaders((colSpan - 1));
            }
          }
        } else // Rest becomes rows
        {
          if (firstColumnCount != getColumnCount(row)) // Ignore
          {
            log.warn(
                "Ignoring row which has "
                    + row.getColumnCount()
                    + " columns since table has "
                    + firstColumnCount
                    + " columns");
            continue;
          }

          grid.addRow();

          for (Node c : cells) {
            // TODO row span

            TagNode cell = (TagNode) c;

            grid.addValue(getValue(cell));

            Integer colSpan = MathUtils.parseInt(cell.getAttribute("colspan"));

            if (colSpan != null && colSpan > 1) {
              grid.addEmptyValues((colSpan - 1));
            }
          }
        }
      }

      grids.add(grid);
    }

    return grids;
  }
  @Override
  protected LotteryDraw parseLotteryDrawResult(String html) {
    LotteryType lotteryType = this.getLotteryType();
    Parser parser = null;
    try {
      parser = Parser.createParser(html, CharsetConstant.CHARSET_UTF8);
    } catch (Exception e) {
      logger.error("解析html内容出错: {}", html, e);
      return null;
    }

    LotteryDraw lotteryDraw = new LotteryDraw();
    lotteryDraw.setLotteryType(lotteryType);

    // 解析基本信息
    try {
      NodeFilter tInfoFilter = new HasAttributeFilter("class", "tInfo");
      NodeList tInfoNodeList = parser.extractAllNodesThatMatch(tInfoFilter);
      if (tInfoNodeList.size() == 0) {
        return null;
      }
      parser.setInputHTML(tInfoNodeList.elementAt(0).toHtml());
      // 取四个红色部分,依次为彩期、销售总额、开奖日期、开奖号码
      NodeFilter redFilter = new HasAttributeFilter("class", "fc-red");
      NodeList redNodeList = parser.extractAllNodesThatMatch(redFilter);
      if (redNodeList.size() < 4) {
        logger.error("解析的内容不符合要求: {}", tInfoNodeList.elementAt(0).toHtml());
        return null;
      }
      lotteryDraw.setPhase(redNodeList.elementAt(0).toPlainTextString().trim());
      lotteryDraw.setVolumeOfSales(
          StringUtils.replace(redNodeList.elementAt(1).toPlainTextString().trim(), ",", ""));
      Date drawDate =
          CoreDateUtils.parseDate(
              redNodeList.elementAt(2).toPlainTextString().trim(), "yyyy年MM月dd日");
      if (drawDate != null) {
        lotteryDraw.setTimeDraw(CoreDateUtils.formatDateTime(drawDate));
      }
      lotteryDraw.setResult(
          StringUtils.replace(redNodeList.elementAt(3).toPlainTextString().trim(), " ", ","));
    } catch (ParserException e) {
      logger.error(e.getMessage(), e);
      return null;
    }

    // 解析详情信息
    try {
      parser.setInputHTML(html);
      NodeFilter dInfoFilter = new HasAttributeFilter("class", "dInfo");
      NodeList dInfoNodeList = parser.extractAllNodesThatMatch(dInfoFilter);
      if (dInfoNodeList.size() == 0) {
        return null;
      }
      parser.setInputHTML(dInfoNodeList.elementAt(0).toHtml());
      NodeFilter prizeFilter = new TagNameFilter("p");
      NodeList prizeNodeList = parser.extractAllNodesThatMatch(prizeFilter);
      if (prizeNodeList.size() == 0) {
        return null;
      }
      String[] splitted =
          prizeNodeList
              .elementAt(0)
              .toPlainTextString()
              .split("--------------------------------------------------");
      if (splitted.length < 2) {
        logger.error("未解析到{}开奖详情: {}", lotteryType.getName());
        return lotteryDraw;
      }
      splitted = StringUtils.split(splitted[1].trim(), "&nbsp;");

      List<LotteryDrawPrizeItem> resultDetail = new ArrayList<LotteryDrawPrizeItem>();
      int index = -1;
      LotteryDrawPrizeItem prizeItem = null;
      for (int i = 0; i < splitted.length; i++) {
        String s = splitted[i].trim();
        if (s.length() == 0) {
          continue;
        }
        index++;
        if (index % 4 == 0) {
          // 一行有4列
          index = 0;
          prizeItem = new LotteryDrawPrizeItem();
          resultDetail.add(prizeItem);
        }
        switch (index) {
          case 0:
            prizeItem.setName(s);
            break;
          case 1:
            prizeItem.setWinningCount(StringUtils.replace(s, "注", ""));
            break;
          case 2:
            prizeItem.setPrizeAmount(
                CoreStringUtils.replaceAll(
                    s,
                    new String[][] {
                      {"元", ""},
                      {",", ""}
                    }));
            break;
          default:
            break;
        }
      }
      lotteryDraw.setResultDetail(resultDetail);
    } catch (ParserException e) {
      logger.error(e.getMessage(), e);
    }

    return lotteryDraw;
  }
  @Override
  public LotteryDraw fetchResultDetail(String phase) {

    LotteryDraw lotteryDraw = null;
    lotteryDraw = nowPhaseResult();
    if (phase == null || "".equals(phase) || lotteryDraw.getPhase().equals(phase)) {
      return lotteryDraw;
    } else {
      lotteryDraw = null;
    }

    String url = RESULT_MORE_LOCALITY_URL;

    String data = null;
    String pageInfo = "结果页面" + url;
    String encoding = "utf-8";
    String logHeader =
        "=="
            + lotteryScope
            + "=="
            + siteName
            + "=="
            + pageInfo
            + "==抓取=="
            + getLotteryType().getName()
            + "==";

    try {
      data = CoreFetcherUtils.URLGet(url, null, encoding);
    } catch (Exception e) {
      logger.error("获取html数据失败" + e.getMessage());
      return null;
    }

    if (data == null || data.indexOf("404 Not Found") > 0 || data.isEmpty()) {
      logger.error(logHeader + "data is null or 404 Not Found");
      return null;
    }
    Parser parser = null;
    try {
      parser = Parser.createParser(data, encoding);
    } catch (Exception e) {
      logger.error("解析html页面失败" + e.getMessage());
      return null;
    }
    NodeFilter filter = new HasAttributeFilter("class", "mytable");
    NodeList nodeList = null;

    try {
      nodeList = parser.extractAllNodesThatMatch(filter);
      TableTag tableTag = (TableTag) nodeList.elementAt(0);
      TableRow[] tableRows = tableTag.getRows();
      for (int i = 1; i < tableRows.length; i++) {
        TableColumn[] tableColumns = tableRows[i].getColumns();
        String phaseTmp = tableColumns[0].toPlainTextString();
        if (phaseTmp != null && !"".equals(phaseTmp) && phase.equals(phaseTmp)) {
          lotteryDraw = new LotteryDraw();
          // 彩期
          lotteryDraw.setPhase(phaseTmp);
          // 开奖结果
          String strResult = tableColumns[1].toPlainTextString();
          strResult = strResult.trim().replace(" ", ",");
          lotteryDraw.setResult(strResult);
          // 彩种
          lotteryDraw.setLotteryType(super.getLotteryType());
          break;
        }
      }
    } catch (ParserException e) {
      logger.error("数据解析错误==" + e.getMessage(), e);
      return null;
    }
    return lotteryDraw;
  }
  public static HashMap<String, String> SouthamptonGetDetails2(String[] url) // for ECS
      {

    while (true) {
      try {
        HashMap<String, String> result = new HashMap<String, String>();
        RequestConfig requestConfig =
            RequestConfig.custom().setSocketTimeout(10000).setConnectTimeout(10000).build();
        CloseableHttpClient httpclient =
            HttpClients.custom().setDefaultRequestConfig(requestConfig).build();

        HttpGet httpGet = new HttpGet(url[1]);
        HttpResponse response = httpclient.execute(httpGet);
        HttpEntity entity = response.getEntity();

        String htmls = null;
        if (entity != null) {
          htmls = EntityUtils.toString(entity).replace("\t", " ");
          // System.out.println(htmls);

        }
        System.out.println("Got reply!");
        // htmls=HTMLFilter(htmls);
        Parser parser = null;
        HtmlPage page = new HtmlPage(parser);
        if (htmls.contains("September") || htmls.contains("september")) {
          result.put("Month of Entry", "9");

        } else if (htmls.contains("October") || htmls.contains("october")) {
          result.put("Month of Entry", "10");
        } else {
          result.put("Month of Entry", "");
        }

        // div class="widgetCourse" h1
        // **************************get title & type**********************
        parser = Parser.createParser(htmls, "utf-8");
        AndFilter TitleFilter =
            new AndFilter(
                new TagNameFilter("h2"), new HasAttributeFilter("class", "uos-sia-title"));
        NodeList nodes4 = parser.extractAllNodesThatMatch(TitleFilter);
        if (nodes4.size() > 0) {

          String title = HTMLFilter(html2Str(nodes4.toHtml()));

          result.put("Title", title);
          result.put("Type", GetType(title));
        }

        // **********************************get school**********************

        result.put("School", "Electronics and Computer Science (ECS)");

        // **********************************get fee**********************

        Pattern p = Pattern.compile("&pound;[0-9]+");
        Matcher m = p.matcher(htmls.replace(",", ""));
        ArrayList<Integer> money = new ArrayList<Integer>();
        while (m.find()) {
          money.add(Integer.parseInt(m.group().replace("&pound;", "")));
        }
        int max = 0;
        for (int w = 0; w < money.size(); w++) {
          if (money.get(w) > max) {
            max = money.get(w);
          }
        }
        if (max != 0) {
          System.out.println(max);
          result.put("Tuition Fee", "" + max);
        }

        // **************************get entry**********************
        parser = Parser.createParser(htmls, "utf-8");
        AndFilter EntryFilter =
            new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "pane_entry"));
        NodeList nodes5 = parser.extractAllNodesThatMatch(EntryFilter);
        String entryAll = "";
        if (nodes5.size() > 0) {
          for (int i = 0; i < nodes5.size(); i++) {

            Node node = (Node) nodes5.elementAt(i);

            entryAll = (html2Str(node.toHtml().replace(">", "> "))).replace("\r", "");
            entryAll = entryAll.replace("\n", " ");
            entryAll = HTMLFilter(entryAll);
            result.put("Academic Entry Requirement", entryAll);
          }
        }

        // **************************get entry**********************
        parser = Parser.createParser(htmls, "utf-8");
        AndFilter StructureFilter =
            new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "pane_modules"));
        NodeList nodes6 = parser.extractAllNodesThatMatch(StructureFilter);
        String structure = "";
        if (nodes6.size() > 0) {
          for (int i = 0; i < nodes6.size(); i++) {

            Node node = (Node) nodes6.elementAt(i);

            structure =
                (html2Str(
                        node.toHtml()
                            .replace("<br />", "\r\n")
                            .replace("</strong>", "")
                            .replace("<strong>", "")
                            .replace("</", "\r\n</")
                            .replace("\t", " ")
                            .replace("&amp;", " "))
                    .replace("\r\n\r\n", "\r\n"));
            structure = HTMLFilter(structure);
            result.put("Structure", structure);
          }
        }

        // *****************Length
        String length = getLastYear(structure);
        result.put("Length (months)", length);

        // ****************IELTS
        String International = entryAll;
        ArrayList<String> list = new ArrayList<String>();
        if (International.contains("7.5")) {
          list.add("7.5");
        }
        if (International.contains("7.0") || International.contains(" 7 ")) {
          list.add("7.0");
        }
        if (International.contains("6.5")) {
          list.add("6.5");
        }
        if (International.contains("6.0") || International.contains(" 6 ")) {
          list.add("6.0");
        }
        if (International.contains("5.5")) {
          list.add("5.5");
        }
        if (list.size() == 1) {
          result.put("IELTS Average Requirement", list.get(0));
          result.put("IELTS Lowest Requirement", list.get(0));
        } else if (list.size() >= 2) {
          result.put("IELTS Average Requirement", list.get(0));
          result.put("IELTS Lowest Requirement", list.get(1));
        } else {
          result.put("IELTS Average Requirement", "6.0");

          result.put("IELTS Lowest Requirement", "5.5");
        }

        // finance/

        result.put("Level", "Undergraduate");
        result.put("Scholarship", "");

        httpclient.close();
        return result;
      } catch (Exception ee) {
        System.out.println("Retrying...");
        ee.printStackTrace();
      }
    }
  } // ...