/** Test model based extraction */
  public void testExtractFromModel() {
    Range r = doc.getRange();

    String[] text = new String[r.numParagraphs()];
    for (int i = 0; i < r.numParagraphs(); i++) {
      Paragraph p = r.getParagraph(i);
      text[i] = p.text();
    }

    assertEquals(p_text.length, text.length);
    for (int i = 0; i < p_text.length; i++) {
      assertEquals(p_text[i], text[i]);
    }
  }
示例#2
0
 public void delete(int start, int length) {
   Range r = new Range(start, start + length, this);
   r.delete();
 }
示例#3
0
  @Override
  protected void processParagraphes(
      HWPFDocument wordDocument, Element flow, Range range, int currentTableLevel) {
    // TODO  mc process paragraphes

    final ListTables listTables = wordDocument.getListTables();
    int currentListInfo = 0;

    final int paragraphs = range.numParagraphs();
    for (int p = 0; p < paragraphs; p++) {
      Paragraph paragraph = range.getParagraph(p);

      //			加入图片
      CharacterRun cr = paragraph.getCharacterRun(0);
      this.processImage(flow, cr);
      //          table
      if (paragraph.isInTable() && paragraph.getTableLevel() != currentTableLevel) {
        if (paragraph.getTableLevel() < currentTableLevel)
          throw new IllegalStateException(
              "Trying to process table cell with higher level ("
                  + paragraph.getTableLevel()
                  + ") than current table level ("
                  + currentTableLevel
                  + ") as inner table part");

        Table table = range.getTable(paragraph);
        processTable(wordDocument, flow, table);

        p += table.numParagraphs();
        p--;
        continue;
      }
      //          换页
      if (paragraph.text().equals("\u000c")) {
        processPageBreak(wordDocument, flow);
      }
      if (paragraph.getIlfo() != currentListInfo) {
        currentListInfo = paragraph.getIlfo();
      }
      //          嵌套段落
      if (currentListInfo != 0) {
        if (listTables != null) {

          final ListFormatOverride listFormatOverride = listTables.getOverride(paragraph.getIlfo());

          String label = getBulletText(listTables, paragraph, listFormatOverride.getLsid());

          if ("".equals(label)) {
            itemSymbol = true;
            /*
            Element span = htmlDocumentFacade.getDocument().createElement("span");
            span.setAttribute("style", "font-size:12.0pt;line-height:150%;font-family:Wingdings;mso-ascii-font-family:Wingdings;mso-hide:none;mso-ansi-language:EN-US;mso-fareast-language:ZH-CN;font-weight:normal;mso-bidi-font-weight:normal;font-style:normal;mso-bidi-font-style:normal;text-underline:windowtext none;text-decoration:none;background:transparent");
            span.setTextContent("Ø");

            flow.appendChild(span);
            */
          }

          processParagraph(wordDocument, flow, currentTableLevel, paragraph, label);
        } else {
          logger.log(
              POILogger.WARN,
              "Paragraph #"
                  + paragraph.getStartOffset()
                  + "-"
                  + paragraph.getEndOffset()
                  + " has reference to list structure #"
                  + currentListInfo
                  + ", but listTables not defined in file");

          processParagraph(
              wordDocument, flow, currentTableLevel, paragraph, AbstractWordUtils.EMPTY);
        }
      } else {
        processParagraph(wordDocument, flow, currentTableLevel, paragraph, AbstractWordUtils.EMPTY);
      }
    }
  }
示例#4
0
 private String GetRawText() {
   Range range = document.getRange();
   String text = "";
   text = range.text().trim();
   return text;
 }