public List<TimeTable> parseTimeTables(String html) { Parser parser = new Parser(); try { parser.setInputHTML(html); parser.setEncoding("utf-8"); } catch (ParserException e) { // TODO Auto-generated catch block e.printStackTrace(); } List<TimeTable> list = new ArrayList<TimeTable>(); NodeFilter filter = new NodeClassFilter(TableTag.class); NodeList nodeList = null; try { nodeList = parser.extractAllNodesThatMatch(filter); } catch (ParserException e) { // TODO Auto-generated catch block e.printStackTrace(); } for (int i = 0; i < nodeList.size(); i++) { if (nodeList.elementAt(i) instanceof TableTag) { TableTag tag = (TableTag) nodeList.elementAt(i); if (tag.getText().indexOf("[课程号]") == -1) { continue; } TableRow[] rows = tag.getRows(); for (int j = 1; j < rows.length; j++) { TableRow row = (TableRow) rows[j]; TableColumn[] columns = row.getColumns(); boolean isCourse = false; TimeTable timeTable = null; for (int k = 0; k < columns.length; k++) { Node columnNode = columns[k]; String info = columnNode.toPlainTextString().trim(); System.out.println(info + "===" + k); switch (k) { case 1: int start = info.indexOf("["); int end = info.indexOf("]"); timeTable = new TimeTable(); timeTable.setCourseCode(info.substring(start + 1, end)); timeTable.setCourseName(info.substring(end + 1)); break; case 3: timeTable.setCredit(Double.parseDouble(info)); break; case 4: timeTable.setType(info); break; case 5: int t_start = info.indexOf("]"); timeTable.setTeacher(info.substring(t_start + 1)); break; case 8: List<TimeAndAdress> ta_list = praseStr(info); for (TimeAndAdress ta : ta_list) { timeTable.setAddress(ta.getAddress()); timeTable.setTime(ta.getTime()); timeTable.setCycle(ta.getCycle()); timeTable.setSingleDouble(ta.getSingleDouble()); timeTable.setWeek(ta.getWeek()); list.add(timeTable.clone()); } break; default: break; } } } // end for j } } return list; }
public List<TimeTable> parseTimeTables(String html) { Parser parser = new Parser(); try { parser.setInputHTML(html); parser.setEncoding("utf-8"); } catch (ParserException e) { // TODO Auto-generated catch block e.printStackTrace(); } List<TimeTable> list = new ArrayList<TimeTable>(); NodeFilter tagfilter = new NodeClassFilter(TableTag.class); NodeFilter idFilter = new HasAttributeFilter("id", "reportArea"); NodeFilter filter = new AndFilter(tagfilter, idFilter); NodeList nodeList = null; try { nodeList = parser.extractAllNodesThatMatch(filter); } catch (ParserException e) { // TODO Auto-generated catch block e.printStackTrace(); } for (int i = 0; i < nodeList.size(); i++) { if (nodeList.elementAt(i) instanceof TableTag) { TableTag tag = (TableTag) nodeList.elementAt(i); TableRow[] rows = tag.getRows(); for (int j = 0; j < rows.length; j++) { TableRow row = (TableRow) rows[j]; TableColumn[] columns = row.getColumns(); boolean isCourse = false; TimeTable timeTable = null; for (int k = 0; k < columns.length; k++) { Node columnNode = columns[k]; String info = columnNode.toPlainTextString().trim(); // System.out.println(info+"=="+k); if (k == 1 && info.indexOf("[") != -1) { timeTable = new TimeTable(); String courseCode = info.substring(1, 9); String coursesname = info.substring(10); timeTable.setCourseName(coursesname); timeTable.setCourseCode(courseCode); isCourse = true; } if (k == 2 && isCourse) { double credit = Double.parseDouble(info); timeTable.setCredit(credit); } if (k == 3 && isCourse) { timeTable.setType(info); } if (k == 4 && isCourse) { timeTable.setTeacher(info); } if (k == 5 && isCourse) { timeTable.setClassId(info); } if (k == 6 && isCourse) { timeTable.setClassNum(info); } if (k == 11 && isCourse) { List<TimeAndAdress> ta_list = praseStr(info); for (TimeAndAdress ta : ta_list) { timeTable.setAddress(ta.getAddress()); timeTable.setTime(ta.getTime()); timeTable.setCycle(ta.getCycle()); timeTable.setSingleDouble(ta.getSingleDouble()); timeTable.setWeek(ta.getWeek()); list.add(timeTable.clone()); } } } // end for k } // end for j } } return list; }