Example #1
0
  /**
   * Creates a list of Grids based on the given HTML string. This works only for table-based HTML
   * documents.
   *
   * @param html the HTML string.
   * @return a list of Grids.
   */
  public static List<Grid> fromHtml(String html) throws Exception {
    if (html == null || html.trim().isEmpty()) {
      return null;
    }

    List<Grid> grids = new ArrayList<>();

    Parser parser = Parser.createParser(html, "UTF-8");

    Node[] tables = parser.extractAllNodesThatMatch(new TagNameFilter("table")).toNodeArray();

    for (Node t : tables) {
      Grid grid = new ListGrid();

      TableTag table = (TableTag) t;

      TableRow[] rows = table.getRows();

      Integer firstColumnCount = null;

      for (TableRow row : rows) {
        if (getColumnCount(row) == 0) // Ignore if no cells
        {
          log.warn("Ignoring row with no columns");
          continue;
        }

        Node[] cells = row.getChildren().extractAllNodesThatMatch(HTML_ROW_FILTER).toNodeArray();

        if (firstColumnCount == null) // First row becomes header
        {
          firstColumnCount = getColumnCount(row);

          for (Node c : cells) {
            TagNode cell = (TagNode) c;

            grid.addHeader(new GridHeader(getValue(cell), false, false));

            Integer colSpan = MathUtils.parseInt(cell.getAttribute("colspan"));

            if (colSpan != null && colSpan > 1) {
              grid.addEmptyHeaders((colSpan - 1));
            }
          }
        } else // Rest becomes rows
        {
          if (firstColumnCount != getColumnCount(row)) // Ignore
          {
            log.warn(
                "Ignoring row which has "
                    + row.getColumnCount()
                    + " columns since table has "
                    + firstColumnCount
                    + " columns");
            continue;
          }

          grid.addRow();

          for (Node c : cells) {
            // TODO row span

            TagNode cell = (TagNode) c;

            grid.addValue(getValue(cell));

            Integer colSpan = MathUtils.parseInt(cell.getAttribute("colspan"));

            if (colSpan != null && colSpan > 1) {
              grid.addEmptyValues((colSpan - 1));
            }
          }
        }
      }

      grids.add(grid);
    }

    return grids;
  }