Пример #1
0
 @Internal
 public TextPieceTable getTextTable() {
   return _cft.getTextPieceTable();
 }
Пример #2
0
  /**
   * Writes out the word file that is represented by an instance of this class.
   *
   * @param out The OutputStream to write to.
   * @throws IOException If there is an unexpected IOException from the passed in OutputStream.
   */
  public void write(OutputStream out) throws IOException {
    // initialize our streams for writing.
    HWPFFileSystem docSys = new HWPFFileSystem();
    HWPFOutputStream wordDocumentStream = docSys.getStream(STREAM_WORD_DOCUMENT);
    HWPFOutputStream tableStream = docSys.getStream(STREAM_TABLE_1);
    // HWPFOutputStream dataStream = docSys.getStream("Data");
    int tableOffset = 0;

    // FileInformationBlock fib = (FileInformationBlock)_fib.clone();
    // clear the offsets and sizes in our FileInformationBlock.
    _fib.clearOffsetsSizes();

    // determine the FileInformationBLock size
    int fibSize = _fib.getSize();
    fibSize +=
        POIFSConstants.SMALLER_BIG_BLOCK_SIZE - (fibSize % POIFSConstants.SMALLER_BIG_BLOCK_SIZE);

    // preserve space for the FileInformationBlock because we will be writing
    // it after we write everything else.
    byte[] placeHolder = new byte[fibSize];
    wordDocumentStream.write(placeHolder);
    int mainOffset = wordDocumentStream.getOffset();

    // write out the StyleSheet.
    _fib.setFcStshf(tableOffset);
    _ss.writeTo(tableStream);
    _fib.setLcbStshf(tableStream.getOffset() - tableOffset);
    tableOffset = tableStream.getOffset();

    // get fcMin and fcMac because we will be writing the actual text with the
    // complex table.
    int fcMin = mainOffset;

    /*
     * clx (encoding of the sprm lists for a complex file and piece table
     * for a any file) Written immediately after the end of the previously
     * recorded structure. This is recorded in all Word documents
     *
     * Microsoft Office Word 97-2007 Binary File Format (.doc)
     * Specification; Page 23 of 210
     */

    // write out the Complex table, includes text.
    _fib.setFcClx(tableOffset);
    _cft.writeTo(wordDocumentStream, tableStream);
    _fib.setLcbClx(tableStream.getOffset() - tableOffset);
    tableOffset = tableStream.getOffset();
    int fcMac = wordDocumentStream.getOffset();

    /*
     * dop (document properties record) Written immediately after the end of
     * the previously recorded structure. This is recorded in all Word
     * documents
     *
     * Microsoft Office Word 97-2007 Binary File Format (.doc)
     * Specification; Page 23 of 210
     */

    // write out the DocumentProperties.
    _fib.setFcDop(tableOffset);
    _dop.writeTo(tableStream);
    _fib.setLcbDop(tableStream.getOffset() - tableOffset);
    tableOffset = tableStream.getOffset();

    /*
     * plcfBkmkf (table recording beginning CPs of bookmarks) Written
     * immediately after the sttbfBkmk, if the document contains bookmarks.
     *
     * Microsoft Office Word 97-2007 Binary File Format (.doc)
     * Specification; Page 24 of 210
     */
    if (_bookmarksTables != null) {
      _bookmarksTables.writePlcfBkmkf(_fib, tableStream);
      tableOffset = tableStream.getOffset();
    }

    /*
     * plcfBkmkl (table recording limit CPs of bookmarks) Written
     * immediately after the plcfBkmkf, if the document contains bookmarks.
     *
     * Microsoft Office Word 97-2007 Binary File Format (.doc)
     * Specification; Page 24 of 210
     */
    if (_bookmarksTables != null) {
      _bookmarksTables.writePlcfBkmkl(_fib, tableStream);
      tableOffset = tableStream.getOffset();
    }

    /*
     * plcfbteChpx (bin table for CHP FKPs) Written immediately after the
     * previously recorded table. This is recorded in all Word documents.
     *
     * Microsoft Office Word 97-2007 Binary File Format (.doc)
     * Specification; Page 24 of 210
     */

    // write out the CHPBinTable.
    _fib.setFcPlcfbteChpx(tableOffset);
    _cbt.writeTo(wordDocumentStream, tableStream, fcMin, _cft.getTextPieceTable());
    _fib.setLcbPlcfbteChpx(tableStream.getOffset() - tableOffset);
    tableOffset = tableStream.getOffset();

    /*
     * plcfbtePapx (bin table for PAP FKPs) Written immediately after the
     * plcfbteChpx. This is recorded in all Word documents.
     *
     * Microsoft Office Word 97-2007 Binary File Format (.doc)
     * Specification; Page 24 of 210
     */

    // write out the PAPBinTable.
    _fib.setFcPlcfbtePapx(tableOffset);
    _pbt.writeTo(wordDocumentStream, tableStream, _cft.getTextPieceTable());
    _fib.setLcbPlcfbtePapx(tableStream.getOffset() - tableOffset);
    tableOffset = tableStream.getOffset();

    /*
     * plcfendRef (endnote reference position table) Written immediately
     * after the previously recorded table if the document contains endnotes
     *
     * plcfendTxt (endnote text position table) Written immediately after
     * the plcfendRef if the document contains endnotes
     *
     * Microsoft Office Word 97-2007 Binary File Format (.doc)
     * Specification; Page 24 of 210
     */
    _endnotesTables.writeRef(_fib, tableStream);
    _endnotesTables.writeTxt(_fib, tableStream);
    tableOffset = tableStream.getOffset();

    /*
     * plcffld*** (table of field positions and statuses for annotation
     * subdocument) Written immediately after the previously recorded table,
     * if the ******* subdocument contains fields.
     *
     * Microsoft Office Word 97-2007 Binary File Format (.doc)
     * Specification; Page 24 of 210
     */

    if (_fieldsTables != null) {
      _fieldsTables.write(_fib, tableStream);
      tableOffset = tableStream.getOffset();
    }

    /*
     * plcffndRef (footnote reference position table) Written immediately
     * after the stsh if the document contains footnotes
     *
     * plcffndTxt (footnote text position table) Written immediately after
     * the plcffndRef if the document contains footnotes
     *
     * Microsoft Office Word 97-2007 Binary File Format (.doc)
     * Specification; Page 24 of 210
     */
    _footnotesTables.writeRef(_fib, tableStream);
    _footnotesTables.writeTxt(_fib, tableStream);
    tableOffset = tableStream.getOffset();

    /*
     * plcfsed (section table) Written immediately after the previously
     * recorded table. Recorded in all Word documents
     *
     * Microsoft Office Word 97-2007 Binary File Format (.doc)
     * Specification; Page 25 of 210
     */

    // write out the SectionTable.
    _fib.setFcPlcfsed(tableOffset);
    _st.writeTo(wordDocumentStream, tableStream);
    _fib.setLcbPlcfsed(tableStream.getOffset() - tableOffset);
    tableOffset = tableStream.getOffset();

    // write out the list tables
    if (_lt != null) {
      /*
       * plcflst (list formats) Written immediately after the end of the
       * previously recorded, if there are any lists defined in the
       * document. This begins with a short count of LSTF structures
       * followed by those LSTF structures. This is immediately followed
       * by the allocated data hanging off the LSTFs. This data consists
       * of the array of LVLs for each LSTF. (Each LVL consists of an LVLF
       * followed by two grpprls and an XST.)
       *
       * Microsoft Office Word 97-2007 Binary File Format (.doc)
       * Specification; Page 25 of 210
       */
      _lt.writeListDataTo(_fib, tableStream);
      tableOffset = tableStream.getOffset();

      /*
       * plflfo (more list formats) Written immediately after the end of
       * the plcflst and its accompanying data, if there are any lists
       * defined in the document. This consists first of a PL of LFO
       * records, followed by the allocated data (if any) hanging off the
       * LFOs. The allocated data consists of the array of LFOLVLFs for
       * each LFO (and each LFOLVLF is immediately followed by some LVLs).
       *
       * Microsoft Office Word 97-2007 Binary File Format (.doc)
       * Specification; Page 26 of 210
       */
      _fib.setFcPlfLfo(tableStream.getOffset());
      _lt.writeListOverridesTo(tableStream);
      _fib.setLcbPlfLfo(tableStream.getOffset() - tableOffset);
      tableOffset = tableStream.getOffset();
    }

    /*
     * sttbfBkmk (table of bookmark name strings) Written immediately after
     * the previously recorded table, if the document contains bookmarks.
     *
     * Microsoft Office Word 97-2007 Binary File Format (.doc)
     * Specification; Page 27 of 210
     */
    if (_bookmarksTables != null) {
      _bookmarksTables.writeSttbfBkmk(_fib, tableStream);
      tableOffset = tableStream.getOffset();
    }

    /*
     * sttbSavedBy (last saved by string table) Written immediately after
     * the previously recorded table.
     *
     * Microsoft Office Word 97-2007 Binary File Format (.doc)
     * Specification; Page 27 of 210
     */

    // write out the saved-by table.
    if (_sbt != null) {
      _fib.setFcSttbSavedBy(tableOffset);
      _sbt.writeTo(tableStream);
      _fib.setLcbSttbSavedBy(tableStream.getOffset() - tableOffset);

      tableOffset = tableStream.getOffset();
    }

    // write out the revision mark authors table.
    if (_rmat != null) {
      _fib.setFcSttbfRMark(tableOffset);
      _rmat.writeTo(tableStream);
      _fib.setLcbSttbfRMark(tableStream.getOffset() - tableOffset);

      tableOffset = tableStream.getOffset();
    }

    // write out the FontTable.
    _fib.setFcSttbfffn(tableOffset);
    _ft.writeTo(tableStream);
    _fib.setLcbSttbfffn(tableStream.getOffset() - tableOffset);
    tableOffset = tableStream.getOffset();

    // set some variables in the FileInformationBlock.
    _fib.getFibBase().setFcMin(fcMin);
    _fib.getFibBase().setFcMac(fcMac);
    _fib.setCbMac(wordDocumentStream.getOffset());

    // make sure that the table, doc and data streams use big blocks.
    byte[] mainBuf = wordDocumentStream.toByteArray();
    if (mainBuf.length < 4096) {
      byte[] tempBuf = new byte[4096];
      System.arraycopy(mainBuf, 0, tempBuf, 0, mainBuf.length);
      mainBuf = tempBuf;
    }

    // Table1 stream will be used
    _fib.getFibBase().setFWhichTblStm(true);

    // write out the FileInformationBlock.
    // _fib.serialize(mainBuf, 0);
    _fib.writeTo(mainBuf, tableStream);

    byte[] tableBuf = tableStream.toByteArray();
    if (tableBuf.length < 4096) {
      byte[] tempBuf = new byte[4096];
      System.arraycopy(tableBuf, 0, tempBuf, 0, tableBuf.length);
      tableBuf = tempBuf;
    }

    byte[] dataBuf = _dataStream;
    if (dataBuf == null) {
      dataBuf = new byte[4096];
    }
    if (dataBuf.length < 4096) {
      byte[] tempBuf = new byte[4096];
      System.arraycopy(dataBuf, 0, tempBuf, 0, dataBuf.length);
      dataBuf = tempBuf;
    }

    // create new document preserving order of entries
    POIFSFileSystem pfs = new POIFSFileSystem();
    boolean docWritten = false;
    boolean dataWritten = false;
    boolean objectPoolWritten = false;
    boolean tableWritten = false;
    boolean propertiesWritten = false;
    for (Iterator<Entry> iter = directory.getEntries(); iter.hasNext(); ) {
      Entry entry = iter.next();
      if (entry.getName().equals(STREAM_WORD_DOCUMENT)) {
        if (!docWritten) {
          pfs.createDocument(new ByteArrayInputStream(mainBuf), STREAM_WORD_DOCUMENT);
          docWritten = true;
        }
      } else if (entry.getName().equals(STREAM_OBJECT_POOL)) {
        if (!objectPoolWritten) {
          _objectPool.writeTo(pfs.getRoot());
          objectPoolWritten = true;
        }
      } else if (entry.getName().equals(STREAM_TABLE_0) || entry.getName().equals(STREAM_TABLE_1)) {
        if (!tableWritten) {
          pfs.createDocument(new ByteArrayInputStream(tableBuf), STREAM_TABLE_1);
          tableWritten = true;
        }
      } else if (entry.getName().equals(SummaryInformation.DEFAULT_STREAM_NAME)
          || entry.getName().equals(DocumentSummaryInformation.DEFAULT_STREAM_NAME)) {
        if (!propertiesWritten) {
          writeProperties(pfs);
          propertiesWritten = true;
        }
      } else if (entry.getName().equals(STREAM_DATA)) {
        if (!dataWritten) {
          pfs.createDocument(new ByteArrayInputStream(dataBuf), STREAM_DATA);
          dataWritten = true;
        }
      } else {
        EntryUtils.copyNodeRecursively(entry, pfs.getRoot());
      }
    }

    if (!docWritten) pfs.createDocument(new ByteArrayInputStream(mainBuf), STREAM_WORD_DOCUMENT);
    if (!tableWritten) pfs.createDocument(new ByteArrayInputStream(tableBuf), STREAM_TABLE_1);
    if (!propertiesWritten) writeProperties(pfs);
    if (!dataWritten) pfs.createDocument(new ByteArrayInputStream(dataBuf), STREAM_DATA);
    if (!objectPoolWritten) _objectPool.writeTo(pfs.getRoot());

    pfs.writeFilesystem(out);
    this.directory = pfs.getRoot();

    /*
     * since we updated all references in FIB and etc, using new arrays to
     * access data
     */
    this.directory = pfs.getRoot();
    this._tableStream = tableStream.toByteArray();
    this._dataStream = dataBuf;
  }
Пример #3
0
  /**
   * This constructor loads a Word document from a specific point in a POIFSFileSystem, probably not
   * the default. Used typically to open embeded documents.
   *
   * @param directory The DirectoryNode that contains the Word document.
   * @throws IOException If there is an unexpected IOException from the passed in POIFSFileSystem.
   */
  public HWPFDocument(DirectoryNode directory) throws IOException {
    // Load the main stream and FIB
    // Also handles HPSF bits
    super(directory);

    // Is this document too old for us?
    if (_fib.getFibBase().getNFib() < 106) {
      throw new OldWordFileFormatException(
          "The document is too old - Word 95 or older. Try HWPFOldDocument instead?");
    }

    // use the fib to determine the name of the table stream.
    String name = STREAM_TABLE_0;
    if (_fib.getFibBase().isFWhichTblStm()) {
      name = STREAM_TABLE_1;
    }

    // Grab the table stream.
    DocumentEntry tableProps;
    try {
      tableProps = (DocumentEntry) directory.getEntry(name);
    } catch (FileNotFoundException fnfe) {
      throw new IllegalStateException(
          "Table Stream '"
              + name
              + "' wasn't found - Either the document is corrupt, or is Word95 (or earlier)");
    }

    // read in the table stream.
    _tableStream = new byte[tableProps.getSize()];
    directory.createDocumentInputStream(name).read(_tableStream);

    _fib.fillVariableFields(_mainStream, _tableStream);

    // read in the data stream.
    try {
      DocumentEntry dataProps = (DocumentEntry) directory.getEntry(STREAM_DATA);
      _dataStream = new byte[dataProps.getSize()];
      directory.createDocumentInputStream(STREAM_DATA).read(_dataStream);
    } catch (java.io.FileNotFoundException e) {
      _dataStream = new byte[0];
    }

    // Get the cp of the start of text in the main stream
    // The latest spec doc says this is always zero!
    int fcMin = 0;
    // fcMin = _fib.getFcMin()

    // Start to load up our standard structures.
    _dop = new DocumentProperties(_tableStream, _fib.getFcDop(), _fib.getLcbDop());
    _cft = new ComplexFileTable(_mainStream, _tableStream, _fib.getFcClx(), fcMin);
    TextPieceTable _tpt = _cft.getTextPieceTable();

    // Now load the rest of the properties, which need to be adjusted
    //  for where text really begin
    _cbt =
        new CHPBinTable(
            _mainStream, _tableStream, _fib.getFcPlcfbteChpx(), _fib.getLcbPlcfbteChpx(), _tpt);
    _pbt =
        new PAPBinTable(
            _mainStream,
            _tableStream,
            _dataStream,
            _fib.getFcPlcfbtePapx(),
            _fib.getLcbPlcfbtePapx(),
            _tpt);

    _text = _tpt.getText();

    /*
     * in this mode we preserving PAPX/CHPX structure from file, so text may
     * miss from output, and text order may be corrupted
     */
    boolean preserveBinTables = false;
    try {
      preserveBinTables = Boolean.parseBoolean(System.getProperty(PROPERTY_PRESERVE_BIN_TABLES));
    } catch (Exception exc) {
      // ignore;
    }

    if (!preserveBinTables) {
      _cbt.rebuild(_cft);
      _pbt.rebuild(_text, _cft);
    }

    /*
     * Property to disable text rebuilding. In this mode changing the text
     * will lead to unpredictable behavior
     */
    boolean preserveTextTable = false;
    try {
      preserveTextTable = Boolean.parseBoolean(System.getProperty(PROPERTY_PRESERVE_TEXT_TABLE));
    } catch (Exception exc) {
      // ignore;
    }
    if (!preserveTextTable) {
      _cft = new ComplexFileTable();
      _tpt = _cft.getTextPieceTable();
      final TextPiece textPiece = new SinglentonTextPiece(_text);
      _tpt.add(textPiece);
      _text = textPiece.getStringBuilder();
    }

    // Read FSPA and Escher information
    // _fspa = new FSPATable(_tableStream, _fib.getFcPlcspaMom(),
    // _fib.getLcbPlcspaMom(), getTextTable().getTextPieces());
    _fspaHeaders = new FSPATable(_tableStream, _fib, FSPADocumentPart.HEADER);
    _fspaMain = new FSPATable(_tableStream, _fib, FSPADocumentPart.MAIN);

    if (_fib.getFcDggInfo() != 0) {
      _escherRecordHolder =
          new EscherRecordHolder(_tableStream, _fib.getFcDggInfo(), _fib.getLcbDggInfo());
    } else {
      _escherRecordHolder = new EscherRecordHolder();
    }

    // read in the pictures stream
    _pictures = new PicturesTable(this, _dataStream, _mainStream, _fspaMain, _escherRecordHolder);
    // And the art shapes stream
    _officeArts = new ShapesTable(_tableStream, _fib);

    // And escher pictures
    _officeDrawingsHeaders = new OfficeDrawingsImpl(_fspaHeaders, _escherRecordHolder, _mainStream);
    _officeDrawingsMain = new OfficeDrawingsImpl(_fspaMain, _escherRecordHolder, _mainStream);

    _st =
        new SectionTable(
            _mainStream,
            _tableStream,
            _fib.getFcPlcfsed(),
            _fib.getLcbPlcfsed(),
            fcMin,
            _tpt,
            _fib.getSubdocumentTextStreamLength(SubdocumentType.MAIN));
    _ss = new StyleSheet(_tableStream, _fib.getFcStshf());
    _ft = new FontTable(_tableStream, _fib.getFcSttbfffn(), _fib.getLcbSttbfffn());

    int listOffset = _fib.getFcPlcfLst();
    int lfoOffset = _fib.getFcPlfLfo();
    if (listOffset != 0 && _fib.getLcbPlcfLst() != 0) {
      _lt = new ListTables(_tableStream, _fib.getFcPlcfLst(), _fib.getFcPlfLfo());
    }

    int sbtOffset = _fib.getFcSttbSavedBy();
    int sbtLength = _fib.getLcbSttbSavedBy();
    if (sbtOffset != 0 && sbtLength != 0) {
      _sbt = new SavedByTable(_tableStream, sbtOffset, sbtLength);
    }

    int rmarkOffset = _fib.getFcSttbfRMark();
    int rmarkLength = _fib.getLcbSttbfRMark();
    if (rmarkOffset != 0 && rmarkLength != 0) {
      _rmat = new RevisionMarkAuthorTable(_tableStream, rmarkOffset, rmarkLength);
    }

    _bookmarksTables = new BookmarksTables(_tableStream, _fib);
    _bookmarks = new BookmarksImpl(_bookmarksTables);

    _endnotesTables = new NotesTables(NoteType.ENDNOTE, _tableStream, _fib);
    _endnotes = new NotesImpl(_endnotesTables);
    _footnotesTables = new NotesTables(NoteType.FOOTNOTE, _tableStream, _fib);
    _footnotes = new NotesImpl(_footnotesTables);

    _fieldsTables = new FieldsTables(_tableStream, _fib);
    _fields = new FieldsImpl(_fieldsTables);
  }