/** Returns the number of columns/cells in the given row, including cell spacing. */ private static int getColumnCount(TableRow row) { Node[] cells = row.getChildren().extractAllNodesThatMatch(HTML_ROW_FILTER).toNodeArray(); int cols = 0; for (Node cell : cells) { Integer colSpan = MathUtils.parseInt(((TagNode) cell).getAttribute("colspan")); cols += colSpan != null ? colSpan : 1; } return cols; }
/** * Creates a list of Grids based on the given HTML string. This works only for table-based HTML * documents. * * @param html the HTML string. * @return a list of Grids. */ public static List<Grid> fromHtml(String html) throws Exception { if (html == null || html.trim().isEmpty()) { return null; } List<Grid> grids = new ArrayList<>(); Parser parser = Parser.createParser(html, "UTF-8"); Node[] tables = parser.extractAllNodesThatMatch(new TagNameFilter("table")).toNodeArray(); for (Node t : tables) { Grid grid = new ListGrid(); TableTag table = (TableTag) t; TableRow[] rows = table.getRows(); Integer firstColumnCount = null; for (TableRow row : rows) { if (getColumnCount(row) == 0) // Ignore if no cells { log.warn("Ignoring row with no columns"); continue; } Node[] cells = row.getChildren().extractAllNodesThatMatch(HTML_ROW_FILTER).toNodeArray(); if (firstColumnCount == null) // First row becomes header { firstColumnCount = getColumnCount(row); for (Node c : cells) { TagNode cell = (TagNode) c; grid.addHeader(new GridHeader(getValue(cell), false, false)); Integer colSpan = MathUtils.parseInt(cell.getAttribute("colspan")); if (colSpan != null && colSpan > 1) { grid.addEmptyHeaders((colSpan - 1)); } } } else // Rest becomes rows { if (firstColumnCount != getColumnCount(row)) // Ignore { log.warn( "Ignoring row which has " + row.getColumnCount() + " columns since table has " + firstColumnCount + " columns"); continue; } grid.addRow(); for (Node c : cells) { // TODO row span TagNode cell = (TagNode) c; grid.addValue(getValue(cell)); Integer colSpan = MathUtils.parseInt(cell.getAttribute("colspan")); if (colSpan != null && colSpan > 1) { grid.addEmptyValues((colSpan - 1)); } } } } grids.add(grid); } return grids; }