Пример #1
0
  /** Creates a Workbook from the given NPOIFSFileSystem, which may be password protected */
  private static Workbook create(NPOIFSFileSystem fs, String password)
      throws IOException, InvalidFormatException {
    DirectoryNode root = fs.getRoot();

    // Encrypted OOXML files go inside OLE2 containers, is this one?
    if (root.hasEntry(Decryptor.DEFAULT_POIFS_ENTRY)) {
      EncryptionInfo info = new EncryptionInfo(fs);
      Decryptor d = Decryptor.getInstance(info);

      boolean passwordCorrect = false;
      InputStream stream = null;
      try {
        if (password != null && d.verifyPassword(password)) {
          passwordCorrect = true;
        }
        if (!passwordCorrect && d.verifyPassword(Decryptor.DEFAULT_PASSWORD)) {
          passwordCorrect = true;
        }
        if (passwordCorrect) {
          stream = d.getDataStream(root);
        }
      } catch (GeneralSecurityException e) {
        throw new IOException(e);
      }

      if (!passwordCorrect) {
        if (password != null) throw new EncryptedDocumentException("Password incorrect");
        else
          throw new EncryptedDocumentException(
              "The supplied spreadsheet is protected, but no password was supplied");
      }

      OPCPackage pkg = OPCPackage.open(stream);
      return create(pkg);
    }

    // If we get here, it isn't an encrypted XLSX file
    // So, treat it as a regular HSSF XLS one
    if (password != null) {
      Biff8EncryptionKey.setCurrentUserPassword(password);
    }
    Workbook wb = new HSSFWorkbook(root, true);
    Biff8EncryptionKey.setCurrentUserPassword(null);
    return wb;
  }
  /**
   * Processes a file into essentially record events.
   *
   * @param req an Instance of HSSFRequest which has your registered listeners
   * @param dir a DirectoryNode containing your workbook
   */
  public void processWorkbookEvents(HSSFRequest req, DirectoryNode dir) throws IOException {
    // some old documents have "WORKBOOK" or "BOOK"
    final String name;
    Set<String> entryNames = dir.getEntryNames();
    if (entryNames.contains("Workbook")) {
      name = "Workbook";
    } else if (entryNames.contains("WORKBOOK")) {
      name = "WORKBOOK";
    } else if (entryNames.contains("BOOK")) {
      name = "BOOK";
    } else {
      name = "Workbook";
    }

    InputStream in = dir.createDocumentInputStream(name);

    processEvents(req, in);
  }
  /**
   * This constructor loads a Word document from a specific point in a POIFSFileSystem, probably not
   * the default. Used typically to open embeded documents.
   *
   * @param directory The DirectoryNode that contains the Word document.
   * @throws IOException If there is an unexpected IOException from the passed in POIFSFileSystem.
   */
  public HWPFDocumentCore(DirectoryNode directory) throws IOException {
    // Sort out the hpsf properties
    super(directory);

    // read in the main stream.
    DocumentEntry documentProps = (DocumentEntry) directory.getEntry("WordDocument");
    _mainStream = new byte[documentProps.getSize()];

    directory.createDocumentInputStream(STREAM_WORD_DOCUMENT).read(_mainStream);

    // Create our FIB, and check for the doc being encrypted
    _fib = new FileInformationBlock(_mainStream);

    DirectoryEntry objectPoolEntry;
    try {
      objectPoolEntry = (DirectoryEntry) directory.getEntry(STREAM_OBJECT_POOL);
    } catch (FileNotFoundException exc) {
      objectPoolEntry = null;
    }
    _objectPool = new ObjectPoolImpl(objectPoolEntry);
  }
Пример #4
0
  /**
   * Add a embedded object to this presentation
   *
   * @return 0-based index of the embedded object
   */
  public int addEmbed(POIFSFileSystem poiData) {
    DirectoryNode root = poiData.getRoot();

    // prepare embedded data
    if (new ClassID().equals(root.getStorageClsid())) {
      // need to set class id
      Map<String, ClassID> olemap = getOleMap();
      ClassID classID = null;
      for (Map.Entry<String, ClassID> entry : olemap.entrySet()) {
        if (root.hasEntry(entry.getKey())) {
          classID = entry.getValue();
          break;
        }
      }
      if (classID == null) {
        throw new IllegalArgumentException("Unsupported embedded document");
      }

      root.setStorageClsid(classID);
    }

    ExEmbed exEmbed = new ExEmbed();
    // remove unneccessary infos, so we don't need to specify the type
    // of the ole object multiple times
    Record children[] = exEmbed.getChildRecords();
    exEmbed.removeChild(children[2]);
    exEmbed.removeChild(children[3]);
    exEmbed.removeChild(children[4]);

    ExEmbedAtom eeEmbed = exEmbed.getExEmbedAtom();
    eeEmbed.setCantLockServerB(true);

    ExOleObjAtom eeAtom = exEmbed.getExOleObjAtom();
    eeAtom.setDrawAspect(ExOleObjAtom.DRAW_ASPECT_VISIBLE);
    eeAtom.setType(ExOleObjAtom.TYPE_EMBEDDED);
    // eeAtom.setSubType(ExOleObjAtom.SUBTYPE_EXCEL);
    // should be ignored?!?, see MS-PPT ExOleObjAtom, but Libre Office sets it ...
    eeAtom.setOptions(1226240);

    ExOleObjStg exOleObjStg = new ExOleObjStg();
    try {
      final String OLESTREAM_NAME = "\u0001Ole";
      if (!root.hasEntry(OLESTREAM_NAME)) {
        // the following data was taken from an example libre office document
        // beside this "\u0001Ole" record there were several other records, e.g. CompObj,
        // OlePresXXX, but it seems, that they aren't neccessary
        byte oleBytes[] = {1, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
        poiData.createDocument(new ByteArrayInputStream(oleBytes), OLESTREAM_NAME);
      }

      ByteArrayOutputStream bos = new ByteArrayOutputStream();
      poiData.writeFilesystem(bos);
      exOleObjStg.setData(bos.toByteArray());
    } catch (IOException e) {
      throw new HSLFException(e);
    }

    int psrId = addPersistentObject(exOleObjStg);
    exOleObjStg.setPersistId(psrId);
    eeAtom.setObjStgDataRef(psrId);

    int objectId = addToObjListAtom(exEmbed);
    eeAtom.setObjID(objectId);
    return objectId;
  }
 /**
  * Processes a file into essentially record events.
  *
  * @param req an Instance of HSSFRequest which has your registered listeners
  * @param dir a DirectoryNode containing your workbook
  * @return numeric user-specified result code.
  */
 public short abortableProcessWorkbookEvents(HSSFRequest req, DirectoryNode dir)
     throws IOException, HSSFUserException {
   InputStream in = dir.createDocumentInputStream("Workbook");
   return abortableProcessEvents(req, in);
 }
  // will throw IOException if not actually POIFS
  // can return null byte[]
  private byte[] handleEmbeddedPOIFS(
      InputStream is, Metadata metadata, AtomicInteger unknownFilenameCount) throws IOException {

    NPOIFSFileSystem fs = null;
    byte[] ret = null;
    try {

      fs = new NPOIFSFileSystem(is);

      DirectoryNode root = fs.getRoot();

      if (root == null) {
        return ret;
      }

      if (root.hasEntry("Package")) {
        Entry ooxml = root.getEntry("Package");
        TikaInputStream stream =
            TikaInputStream.get(new DocumentInputStream((DocumentEntry) ooxml));

        ByteArrayOutputStream out = new ByteArrayOutputStream();

        IOUtils.copy(stream, out);
        ret = out.toByteArray();
      } else {
        // try poifs
        POIFSDocumentType type = POIFSDocumentType.detectType(root);
        if (type == POIFSDocumentType.OLE10_NATIVE) {
          try {
            // Try to un-wrap the OLE10Native record:
            Ole10Native ole = Ole10Native.createFromEmbeddedOleObject(root);
            ret = ole.getDataBuffer();
          } catch (Ole10NativeException ex) {
            // Not a valid OLE10Native record, skip it
          }
        } else if (type == POIFSDocumentType.COMP_OBJ) {

          DocumentEntry contentsEntry;
          try {
            contentsEntry = (DocumentEntry) root.getEntry("CONTENTS");
          } catch (FileNotFoundException ioe) {
            contentsEntry = (DocumentEntry) root.getEntry("Contents");
          }

          DocumentInputStream inp = null;
          try {
            inp = new DocumentInputStream(contentsEntry);
            ret = new byte[contentsEntry.getSize()];
            inp.readFully(ret);
          } finally {
            if (inp != null) {
              inp.close();
            }
          }
        } else {

          ByteArrayOutputStream out = new ByteArrayOutputStream();
          is.reset();
          IOUtils.copy(is, out);
          ret = out.toByteArray();
          metadata.set(
              Metadata.RESOURCE_NAME_KEY,
              "file_" + unknownFilenameCount.getAndIncrement() + "." + type.getExtension());
          metadata.set(Metadata.CONTENT_TYPE, type.getType().toString());
        }
      }
    } finally {
      if (fs != null) {
        fs.close();
      }
    }
    return ret;
  }
Пример #7
0
  /**
   * This constructor loads a Word document from a specific point in a POIFSFileSystem, probably not
   * the default. Used typically to open embeded documents.
   *
   * @param directory The DirectoryNode that contains the Word document.
   * @throws IOException If there is an unexpected IOException from the passed in POIFSFileSystem.
   */
  public HWPFDocument(DirectoryNode directory) throws IOException {
    // Load the main stream and FIB
    // Also handles HPSF bits
    super(directory);

    // Is this document too old for us?
    if (_fib.getFibBase().getNFib() < 106) {
      throw new OldWordFileFormatException(
          "The document is too old - Word 95 or older. Try HWPFOldDocument instead?");
    }

    // use the fib to determine the name of the table stream.
    String name = STREAM_TABLE_0;
    if (_fib.getFibBase().isFWhichTblStm()) {
      name = STREAM_TABLE_1;
    }

    // Grab the table stream.
    DocumentEntry tableProps;
    try {
      tableProps = (DocumentEntry) directory.getEntry(name);
    } catch (FileNotFoundException fnfe) {
      throw new IllegalStateException(
          "Table Stream '"
              + name
              + "' wasn't found - Either the document is corrupt, or is Word95 (or earlier)");
    }

    // read in the table stream.
    _tableStream = new byte[tableProps.getSize()];
    directory.createDocumentInputStream(name).read(_tableStream);

    _fib.fillVariableFields(_mainStream, _tableStream);

    // read in the data stream.
    try {
      DocumentEntry dataProps = (DocumentEntry) directory.getEntry(STREAM_DATA);
      _dataStream = new byte[dataProps.getSize()];
      directory.createDocumentInputStream(STREAM_DATA).read(_dataStream);
    } catch (java.io.FileNotFoundException e) {
      _dataStream = new byte[0];
    }

    // Get the cp of the start of text in the main stream
    // The latest spec doc says this is always zero!
    int fcMin = 0;
    // fcMin = _fib.getFcMin()

    // Start to load up our standard structures.
    _dop = new DocumentProperties(_tableStream, _fib.getFcDop(), _fib.getLcbDop());
    _cft = new ComplexFileTable(_mainStream, _tableStream, _fib.getFcClx(), fcMin);
    TextPieceTable _tpt = _cft.getTextPieceTable();

    // Now load the rest of the properties, which need to be adjusted
    //  for where text really begin
    _cbt =
        new CHPBinTable(
            _mainStream, _tableStream, _fib.getFcPlcfbteChpx(), _fib.getLcbPlcfbteChpx(), _tpt);
    _pbt =
        new PAPBinTable(
            _mainStream,
            _tableStream,
            _dataStream,
            _fib.getFcPlcfbtePapx(),
            _fib.getLcbPlcfbtePapx(),
            _tpt);

    _text = _tpt.getText();

    /*
     * in this mode we preserving PAPX/CHPX structure from file, so text may
     * miss from output, and text order may be corrupted
     */
    boolean preserveBinTables = false;
    try {
      preserveBinTables = Boolean.parseBoolean(System.getProperty(PROPERTY_PRESERVE_BIN_TABLES));
    } catch (Exception exc) {
      // ignore;
    }

    if (!preserveBinTables) {
      _cbt.rebuild(_cft);
      _pbt.rebuild(_text, _cft);
    }

    /*
     * Property to disable text rebuilding. In this mode changing the text
     * will lead to unpredictable behavior
     */
    boolean preserveTextTable = false;
    try {
      preserveTextTable = Boolean.parseBoolean(System.getProperty(PROPERTY_PRESERVE_TEXT_TABLE));
    } catch (Exception exc) {
      // ignore;
    }
    if (!preserveTextTable) {
      _cft = new ComplexFileTable();
      _tpt = _cft.getTextPieceTable();
      final TextPiece textPiece = new SinglentonTextPiece(_text);
      _tpt.add(textPiece);
      _text = textPiece.getStringBuilder();
    }

    // Read FSPA and Escher information
    // _fspa = new FSPATable(_tableStream, _fib.getFcPlcspaMom(),
    // _fib.getLcbPlcspaMom(), getTextTable().getTextPieces());
    _fspaHeaders = new FSPATable(_tableStream, _fib, FSPADocumentPart.HEADER);
    _fspaMain = new FSPATable(_tableStream, _fib, FSPADocumentPart.MAIN);

    if (_fib.getFcDggInfo() != 0) {
      _escherRecordHolder =
          new EscherRecordHolder(_tableStream, _fib.getFcDggInfo(), _fib.getLcbDggInfo());
    } else {
      _escherRecordHolder = new EscherRecordHolder();
    }

    // read in the pictures stream
    _pictures = new PicturesTable(this, _dataStream, _mainStream, _fspaMain, _escherRecordHolder);
    // And the art shapes stream
    _officeArts = new ShapesTable(_tableStream, _fib);

    // And escher pictures
    _officeDrawingsHeaders = new OfficeDrawingsImpl(_fspaHeaders, _escherRecordHolder, _mainStream);
    _officeDrawingsMain = new OfficeDrawingsImpl(_fspaMain, _escherRecordHolder, _mainStream);

    _st =
        new SectionTable(
            _mainStream,
            _tableStream,
            _fib.getFcPlcfsed(),
            _fib.getLcbPlcfsed(),
            fcMin,
            _tpt,
            _fib.getSubdocumentTextStreamLength(SubdocumentType.MAIN));
    _ss = new StyleSheet(_tableStream, _fib.getFcStshf());
    _ft = new FontTable(_tableStream, _fib.getFcSttbfffn(), _fib.getLcbSttbfffn());

    int listOffset = _fib.getFcPlcfLst();
    int lfoOffset = _fib.getFcPlfLfo();
    if (listOffset != 0 && _fib.getLcbPlcfLst() != 0) {
      _lt = new ListTables(_tableStream, _fib.getFcPlcfLst(), _fib.getFcPlfLfo());
    }

    int sbtOffset = _fib.getFcSttbSavedBy();
    int sbtLength = _fib.getLcbSttbSavedBy();
    if (sbtOffset != 0 && sbtLength != 0) {
      _sbt = new SavedByTable(_tableStream, sbtOffset, sbtLength);
    }

    int rmarkOffset = _fib.getFcSttbfRMark();
    int rmarkLength = _fib.getLcbSttbfRMark();
    if (rmarkOffset != 0 && rmarkLength != 0) {
      _rmat = new RevisionMarkAuthorTable(_tableStream, rmarkOffset, rmarkLength);
    }

    _bookmarksTables = new BookmarksTables(_tableStream, _fib);
    _bookmarks = new BookmarksImpl(_bookmarksTables);

    _endnotesTables = new NotesTables(NoteType.ENDNOTE, _tableStream, _fib);
    _endnotes = new NotesImpl(_endnotesTables);
    _footnotesTables = new NotesTables(NoteType.FOOTNOTE, _tableStream, _fib);
    _footnotes = new NotesImpl(_footnotesTables);

    _fieldsTables = new FieldsTables(_tableStream, _fib);
    _fields = new FieldsImpl(_fieldsTables);
  }