/** Test model based extraction */ public void testExtractFromModel() { Range r = doc.getRange(); String[] text = new String[r.numParagraphs()]; for (int i = 0; i < r.numParagraphs(); i++) { Paragraph p = r.getParagraph(i); text[i] = p.text(); } assertEquals(p_text.length, text.length); for (int i = 0; i < p_text.length; i++) { assertEquals(p_text[i], text[i]); } }
@Override protected void processParagraphes( HWPFDocument wordDocument, Element flow, Range range, int currentTableLevel) { // TODO mc process paragraphes final ListTables listTables = wordDocument.getListTables(); int currentListInfo = 0; final int paragraphs = range.numParagraphs(); for (int p = 0; p < paragraphs; p++) { Paragraph paragraph = range.getParagraph(p); // 加入图片 CharacterRun cr = paragraph.getCharacterRun(0); this.processImage(flow, cr); // table if (paragraph.isInTable() && paragraph.getTableLevel() != currentTableLevel) { if (paragraph.getTableLevel() < currentTableLevel) throw new IllegalStateException( "Trying to process table cell with higher level (" + paragraph.getTableLevel() + ") than current table level (" + currentTableLevel + ") as inner table part"); Table table = range.getTable(paragraph); processTable(wordDocument, flow, table); p += table.numParagraphs(); p--; continue; } // 换页 if (paragraph.text().equals("\u000c")) { processPageBreak(wordDocument, flow); } if (paragraph.getIlfo() != currentListInfo) { currentListInfo = paragraph.getIlfo(); } // 嵌套段落 if (currentListInfo != 0) { if (listTables != null) { final ListFormatOverride listFormatOverride = listTables.getOverride(paragraph.getIlfo()); String label = getBulletText(listTables, paragraph, listFormatOverride.getLsid()); if ("".equals(label)) { itemSymbol = true; /* Element span = htmlDocumentFacade.getDocument().createElement("span"); span.setAttribute("style", "font-size:12.0pt;line-height:150%;font-family:Wingdings;mso-ascii-font-family:Wingdings;mso-hide:none;mso-ansi-language:EN-US;mso-fareast-language:ZH-CN;font-weight:normal;mso-bidi-font-weight:normal;font-style:normal;mso-bidi-font-style:normal;text-underline:windowtext none;text-decoration:none;background:transparent"); span.setTextContent("Ø"); flow.appendChild(span); */ } processParagraph(wordDocument, flow, currentTableLevel, paragraph, label); } else { logger.log( POILogger.WARN, "Paragraph #" + paragraph.getStartOffset() + "-" + paragraph.getEndOffset() + " has reference to list structure #" + currentListInfo + ", but listTables not defined in file"); processParagraph( wordDocument, flow, currentTableLevel, paragraph, AbstractWordUtils.EMPTY); } } else { processParagraph(wordDocument, flow, currentTableLevel, paragraph, AbstractWordUtils.EMPTY); } } }