// write word public boolean writeWordFile(String path, String content) { boolean w = false; try { // byte b[] = content.getBytes("ISO-8859-1"); byte b[] = content.getBytes(); ByteArrayInputStream bais = new ByteArrayInputStream(b); POIFSFileSystem fs = new POIFSFileSystem(); // DirectoryEntry directory = fs.getRoot(); // DocumentEntry de = directory.createDocument("WordDocument", // bais); FileOutputStream ostream = new FileOutputStream(path); fs.writeFilesystem(ostream); bais.close(); ostream.close(); } catch (IOException e) { e.printStackTrace(); } return w; }
protected void setUp() throws Exception { ModelFactory mf = new ModelFactory(); assertTrue("listeners member cannot be null", mf.listeners != null); models = new ArrayList(3); factory = new ModelFactory(); book = new HSSFWorkbook(); ByteArrayOutputStream stream = setupRunFile(book); POIFSFileSystem fs = new POIFSFileSystem(new ByteArrayInputStream(stream.toByteArray())); in = fs.createDocumentInputStream("Workbook"); }
public void testWriteProperties() throws Exception { // Just check we can write them back out into a filesystem POIFSFileSystem outFS = new POIFSFileSystem(); doc.readProperties(); doc.writeProperties(outFS); // Should now hold them assertNotNull(outFS.createDocumentInputStream("\005SummaryInformation")); assertNotNull(outFS.createDocumentInputStream("\005DocumentSummaryInformation")); }
/** * Setup is used to get the document ready. Gets the DocumentSummaryInformation and the * SummaryInformation to reasonable values */ public void setUp() { bout = new ByteArrayOutputStream(); poifs = new POIFSFileSystem(); dir = poifs.getRoot(); dsi = null; try { DocumentEntry dsiEntry = (DocumentEntry) dir.getEntry(DocumentSummaryInformation.DEFAULT_STREAM_NAME); DocumentInputStream dis = new DocumentInputStream(dsiEntry); PropertySet ps = new PropertySet(dis); dis.close(); dsi = new DocumentSummaryInformation(ps); } catch (FileNotFoundException ex) { /* There is no document summary information yet. We have to create a * new one. */ dsi = PropertySetFactory.newDocumentSummaryInformation(); assertNotNull(dsi); } catch (IOException e) { e.printStackTrace(); fail(); } catch (NoPropertySetStreamException e) { e.printStackTrace(); fail(); } catch (MarkUnsupportedException e) { e.printStackTrace(); fail(); } catch (UnexpectedPropertySetTypeException e) { e.printStackTrace(); fail(); } assertNotNull(dsi); try { DocumentEntry dsiEntry = (DocumentEntry) dir.getEntry(SummaryInformation.DEFAULT_STREAM_NAME); DocumentInputStream dis = new DocumentInputStream(dsiEntry); PropertySet ps = new PropertySet(dis); dis.close(); si = new SummaryInformation(ps); } catch (FileNotFoundException ex) { /* There is no document summary information yet. We have to create a * new one. */ si = PropertySetFactory.newSummaryInformation(); assertNotNull(si); } catch (IOException e) { e.printStackTrace(); fail(); } catch (NoPropertySetStreamException e) { e.printStackTrace(); fail(); } catch (MarkUnsupportedException e) { e.printStackTrace(); fail(); } catch (UnexpectedPropertySetTypeException e) { e.printStackTrace(); fail(); } assertNotNull(dsi); }
/** * Read an excel file and spit out what we find. * * @param args Expect one argument that is the file to read. * @throws IOException When there is an error processing the file. */ public static void main(String[] args) throws IOException { try { Class.forName("com.microsoft.sqlserver.jdbc.SQLServerDriver"); // con = DriverManager // .getConnection("jdbc:sqlserver://10.130.133.3:1433;DatabaseName=RTB;user=i264678;password=;SelectMethod=cursor "); con = DriverManager.getConnection( "jdbc:sqlserver://10.135.128.227:1433;DatabaseName=RTBWTC;user=i264678;password=;SelectMethod=cursor "); System.out.println("connected"); // create a new file input stream with the input file specified // at the command line FileInputStream fin = new FileInputStream(args[0]); // create a new org.apache.poi.poifs.filesystem.Filesystem POIFSFileSystem poifs = new POIFSFileSystem(fin); // get the Workbook (excel part) stream in a InputStream InputStream din = poifs.createDocumentInputStream("Workbook"); // construct out HSSFRequest object HSSFRequest req = new HSSFRequest(); // lazy listen for ALL records with the listener shown above req.addListenerForAllRecords(new LoadServiceFromExcel()); // create our event factory HSSFEventFactory factory = new HSSFEventFactory(); // process our events based on the document input stream factory.processEvents(req, din); // once all the events are processed close our file input stream fin.close(); // and our document input stream (don't want to leak these!) din.close(); System.out.println("done."); } catch (ClassNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (SQLException e) { // TODO Auto-generated catch block e.printStackTrace(); } finally { try { con.close(); } catch (SQLException e) { // TODO Auto-generated catch block e.printStackTrace(); } } }
public void testWriteReadProperties() throws Exception { ByteArrayOutputStream baos = new ByteArrayOutputStream(); // Write them out POIFSFileSystem outFS = new POIFSFileSystem(); doc.readProperties(); doc.writeProperties(outFS); outFS.writeFilesystem(baos); // Create a new version ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray()); POIFSFileSystem inFS = new POIFSFileSystem(bais); // Check they're still there doc.filesystem = inFS; doc.readProperties(); // Delegate test testReadProperties(); }
/** * Runs the example program. The application expects one or two arguments: * * <ol> * <li> * <p>The first argument is the disk file name of the POI filesystem to copy. * <li> * <p>The second argument is optional. If it is given, it is the name of a disk file the * copy of the POI filesystem will be written to. If it is not given, the copy will be * written to a temporary file which will be deleted at the end of the program. * </ol> * * @param args Command-line arguments. * @exception MarkUnsupportedException if a POI document stream does not support the mark() * operation. * @exception NoPropertySetStreamException if the application tries to create a property set from * a POI document stream that is not a property set stream. * @exception IOException if any I/O exception occurs. * @exception UnsupportedEncodingException if a character encoding is not supported. */ public static void main(final String[] args) throws NoPropertySetStreamException, MarkUnsupportedException, UnsupportedEncodingException, IOException { String originalFileName = null; String copyFileName = null; /* Check the command-line arguments. */ if (args.length == 1) { originalFileName = args[0]; File f = TempFile.createTempFile("CopyOfPOIFileSystem-", ".ole2"); f.deleteOnExit(); copyFileName = f.getAbsolutePath(); } else if (args.length == 2) { originalFileName = args[0]; copyFileName = args[1]; } else { System.err.println("Usage: " + CopyCompare.class.getName() + "originPOIFS [copyPOIFS]"); System.exit(1); } /* Read the origin POIFS using the eventing API. The real work is done * in the class CopyFile which is registered here as a POIFSReader. */ final POIFSReader r = new POIFSReader(); final CopyFile cf = new CopyFile(copyFileName); r.registerListener(cf); r.read(new FileInputStream(originalFileName)); /* Write the new POIFS to disk. */ cf.close(); /* Read all documents from the original POI file system and compare them * with the equivalent document from the copy. */ final POIFSFileSystem opfs = new POIFSFileSystem(new FileInputStream(originalFileName)); final POIFSFileSystem cpfs = new POIFSFileSystem(new FileInputStream(copyFileName)); final DirectoryEntry oRoot = opfs.getRoot(); final DirectoryEntry cRoot = cpfs.getRoot(); final StringBuffer messages = new StringBuffer(); if (equal(oRoot, cRoot, messages)) System.out.println("Equal"); else System.out.println("Not equal: " + messages.toString()); }
/** 导入 excel 2003 biff格式 如果是xml格式的 可以使用SAX(未测试) */ @Test public void testImportExcel2003() throws Exception { long beginTime = System.currentTimeMillis(); String fileName = "D:\\Backup\\Book1.xls"; List<ExcelData> dataList = Lists.newArrayList(); // 输入流 InputStream fis = new BufferedInputStream(new FileInputStream(fileName)); // 创建 org.apache.poi.poifs.filesystem.Filesystem POIFSFileSystem poifs = new POIFSFileSystem(fis); // 从输入流 得到 Workbook(excel 部分)流 InputStream din = poifs.createDocumentInputStream("Workbook"); // 构造 HSSFRequest HSSFRequest req = new HSSFRequest(); // 添加监听器 req.addListenerForAllRecords(new Excel2003Listener(dataList)); // 创建事件工厂 HSSFEventFactory factory = new HSSFEventFactory(); // 根据文档输入流处理事件 factory.processEvents(req, din); // 关闭输入流 fis.close(); // 关闭文档流 din.close(); System.out.println(dataList.size()); // 把最后剩下的不足batchSize大小 if (dataList.size() > 0) { doBatchSave(dataList); } long endTime = System.currentTimeMillis(); log.info("耗时(秒):" + (endTime - beginTime) / 1000); }
/** * Creates the appropriate HSSFWorkbook / XSSFWorkbook from the given InputStream, which may be * password protected. * * <p>Your input stream MUST either support mark/reset, or be wrapped as a {@link * PushbackInputStream}! Note that using an {@link InputStream} has a higher memory footprint than * using a {@link File}. * * <p>Note that in order to properly release resources the Workbook should be closed after use. * Note also that loading from an InputStream requires more memory than loading from a File, so * prefer {@link #create(File)} where possible. * * @throws EncryptedDocumentException If the wrong password is given for a protected file * @throws EmptyFileException If an empty stream is given */ public static Workbook create(InputStream inp, String password) throws IOException, InvalidFormatException, EncryptedDocumentException { // If clearly doesn't do mark/reset, wrap up if (!inp.markSupported()) { inp = new PushbackInputStream(inp, 8); } // Ensure that there is at least some data there byte[] header8 = IOUtils.peekFirst8Bytes(inp); // Try to create if (POIFSFileSystem.hasPOIFSHeader(header8)) { NPOIFSFileSystem fs = new NPOIFSFileSystem(inp); return create(fs, password); } if (POIXMLDocument.hasOOXMLHeader(inp)) { return new XSSFWorkbook(OPCPackage.open(inp)); } throw new IllegalArgumentException( "Your InputStream was neither an OLE2 stream, nor an OOXML stream"); }
/** * Ensures that the directory hierarchy for a document in a POI fileystem is in place. When a * document is to be created somewhere in a POI filesystem its directory must be created first. * This method creates all directories between the POI filesystem root and the directory the * document should belong to which do not yet exist. * * <p>Unfortunately POI does not offer a simple method to interrogate the POIFS whether a * certain child node (file or directory) exists in a directory. However, since we always start * with an empty POIFS which contains the root directory only and since each directory in the * POIFS is created by this method we can maintain the POIFS's directory hierarchy ourselves: * The {@link DirectoryEntry} of each directory created is stored in a {@link Map}. The * directories' path names map to the corresponding {@link DirectoryEntry} instances. * * @param poiFs The POI filesystem the directory hierarchy is created in, if needed. * @param path The document's path. This method creates those directory components of this * hierarchy which do not yet exist. * @return The directory entry of the document path's parent. The caller should use this {@link * DirectoryEntry} to create documents in it. */ public DirectoryEntry getPath(final POIFSFileSystem poiFs, final POIFSDocumentPath path) { try { /* Check whether this directory has already been created. */ final String s = path.toString(); DirectoryEntry de = (DirectoryEntry) paths.get(s); if (de != null) /* Yes: return the corresponding DirectoryEntry. */ return de; /* No: We have to create the directory - or return the root's * DirectoryEntry. */ int l = path.length(); if (l == 0) /* Get the root directory. It does not have to be created * since it always exists in a POIFS. */ de = poiFs.getRoot(); else { /* Create a subordinate directory. The first step is to * ensure that the parent directory exists: */ de = getPath(poiFs, path.getParent()); /* Now create the target directory: */ de = de.createDirectory(path.getComponent(path.length() - 1)); } paths.put(s, de); return de; } catch (IOException ex) { /* This exception will be thrown if the directory already * exists. However, since we have full control about directory * creation we can ensure that this will never happen. */ ex.printStackTrace(System.err); throw new RuntimeException(ex.toString()); /* FIXME (2): Replace the previous line by the following once we * no longer need JDK 1.3 compatibility. */ // throw new RuntimeException(ex); } }
protected HSSFWorkbook makeWorkbook(final Report report, final ELContext ctx) throws Exception { final byte[] emptyWorkbookData = new HSSFWorkbook().getBytes(); final POIFSFileSystem fs; if (report.getTemplate() != null) { fs = new POIFSFileSystem(new ByteArrayInputStream(report.getTemplate())); } else { fs = new POIFSFileSystem(); fs.createDocument(new ByteArrayInputStream(emptyWorkbookData), "Workbook"); } final MutablePropertySet siProperties = new MutablePropertySet(); final MutableSection siSection = (MutableSection) siProperties.getSections().get(0); siSection.setFormatID(SectionIDMap.SUMMARY_INFORMATION_ID); final MutableProperty p0 = new MutableProperty(); p0.setID(PropertyIDMap.PID_CREATE_DTM); p0.setType(Variant.VT_FILETIME); p0.setValue(new Date()); siSection.setProperty(p0); final String application = report.getDescription().getApplication(ctx); if (application != null) { final MutableProperty p = new MutableProperty(); p.setID(PropertyIDMap.PID_APPNAME); p.setType(Variant.VT_LPWSTR); p.setValue(application); siSection.setProperty(p); } final String author = report.getDescription().getAuthor(ctx); if (author != null) { final MutableProperty p = new MutableProperty(); p.setID(PropertyIDMap.PID_AUTHOR); p.setType(Variant.VT_LPWSTR); p.setValue(author); siSection.setProperty(p); } final String version = report.getDescription().getVersion(ctx); if (version != null) { final MutableProperty p = new MutableProperty(); p.setID(PropertyIDMap.PID_REVNUMBER); p.setType(Variant.VT_LPWSTR); p.setValue(version); siSection.setProperty(p); } final String title = report.getDescription().getTitle(ctx); if (title != null) { final MutableProperty p = new MutableProperty(); p.setID(PropertyIDMap.PID_TITLE); p.setType(Variant.VT_LPWSTR); p.setValue(title); siSection.setProperty(p); } final String subject = report.getDescription().getSubject(ctx); if (subject != null) { final MutableProperty p = new MutableProperty(); p.setID(PropertyIDMap.PID_SUBJECT); p.setType(Variant.VT_LPWSTR); p.setValue(subject); siSection.setProperty(p); } final String comments = report.getDescription().getComments(ctx); if (comments != null) { final MutableProperty p = new MutableProperty(); p.setID(PropertyIDMap.PID_COMMENTS); p.setType(Variant.VT_LPWSTR); p.setValue(comments); siSection.setProperty(p); } final MutablePropertySet dsiProperties = new MutablePropertySet(); final MutableSection dsiSection = (MutableSection) dsiProperties.getSections().get(0); dsiSection.setFormatID(SectionIDMap.DOCUMENT_SUMMARY_INFORMATION_ID[0]); final String company = report.getDescription().getCompany(ctx); if (company != null) { final MutableProperty p = new MutableProperty(); p.setID(PropertyIDMap.PID_COMPANY); p.setType(Variant.VT_LPWSTR); p.setValue(company); dsiSection.setProperty(p); } final String category = report.getDescription().getCategory(ctx); if (category != null) { final MutableProperty p = new MutableProperty(); p.setID(PropertyIDMap.PID_CATEGORY); p.setType(Variant.VT_LPWSTR); p.setValue(category); dsiSection.setProperty(p); } fs.createDocument(siProperties.toInputStream(), SummaryInformation.DEFAULT_STREAM_NAME); fs.createDocument( dsiProperties.toInputStream(), DocumentSummaryInformation.DEFAULT_STREAM_NAME); return new HSSFWorkbook(fs, true); }
public String extractText(InputStream in) throws IOException { ArrayList<WordTextPiece> text = new ArrayList<WordTextPiece>(); POIFSFileSystem fsys = new POIFSFileSystem(in); DocumentEntry headerProps = (DocumentEntry) fsys.getRoot().getEntry("WordDocument"); DocumentInputStream din = fsys.createDocumentInputStream("WordDocument"); byte[] header = new byte[headerProps.getSize()]; din.read(header); din.close(); // Prende le informazioni dall'header del documento int info = LittleEndian.getShort(header, 0xa); boolean useTable1 = (info & 0x200) != 0; // boolean useTable1 = true; // Prende informazioni dalla piece table int complexOffset = LittleEndian.getInt(header, 0x1a2); // int complexOffset = LittleEndian.getInt(header); String tableName = null; if (useTable1) { tableName = "1Table"; } else { tableName = "0Table"; } DocumentEntry table = (DocumentEntry) fsys.getRoot().getEntry(tableName); byte[] tableStream = new byte[table.getSize()]; din = fsys.createDocumentInputStream(tableName); din.read(tableStream); din.close(); din = null; fsys = null; table = null; headerProps = null; int multiple = findText(tableStream, complexOffset, text); StringBuffer sb = new StringBuffer(); tableStream = null; for (int x = 0; x < text.size(); x++) { WordTextPiece nextPiece = (WordTextPiece) text.get(x); int start = nextPiece.getStart(); int length = nextPiece.getLength(); boolean unicode = nextPiece.usesUnicode(); String toStr = null; if (unicode) { toStr = new String(header, start, length * multiple, "UTF-8"); } else { toStr = new String(header, start, length, "big5"); } sb.append(toStr).append(" "); } return sb.toString(); }
/** * Writes the POI file system to a disk file. * * @throws FileNotFoundException * @throws IOException */ public void close() throws FileNotFoundException, IOException { out = new FileOutputStream(dstName); poiFs.writeFilesystem(out); out.close(); }
/** * Writes out the word file that is represented by an instance of this class. * * @param out The OutputStream to write to. * @throws IOException If there is an unexpected IOException from the passed in OutputStream. */ public void write(OutputStream out) throws IOException { // initialize our streams for writing. HWPFFileSystem docSys = new HWPFFileSystem(); HWPFOutputStream wordDocumentStream = docSys.getStream(STREAM_WORD_DOCUMENT); HWPFOutputStream tableStream = docSys.getStream(STREAM_TABLE_1); // HWPFOutputStream dataStream = docSys.getStream("Data"); int tableOffset = 0; // FileInformationBlock fib = (FileInformationBlock)_fib.clone(); // clear the offsets and sizes in our FileInformationBlock. _fib.clearOffsetsSizes(); // determine the FileInformationBLock size int fibSize = _fib.getSize(); fibSize += POIFSConstants.SMALLER_BIG_BLOCK_SIZE - (fibSize % POIFSConstants.SMALLER_BIG_BLOCK_SIZE); // preserve space for the FileInformationBlock because we will be writing // it after we write everything else. byte[] placeHolder = new byte[fibSize]; wordDocumentStream.write(placeHolder); int mainOffset = wordDocumentStream.getOffset(); // write out the StyleSheet. _fib.setFcStshf(tableOffset); _ss.writeTo(tableStream); _fib.setLcbStshf(tableStream.getOffset() - tableOffset); tableOffset = tableStream.getOffset(); // get fcMin and fcMac because we will be writing the actual text with the // complex table. int fcMin = mainOffset; /* * clx (encoding of the sprm lists for a complex file and piece table * for a any file) Written immediately after the end of the previously * recorded structure. This is recorded in all Word documents * * Microsoft Office Word 97-2007 Binary File Format (.doc) * Specification; Page 23 of 210 */ // write out the Complex table, includes text. _fib.setFcClx(tableOffset); _cft.writeTo(wordDocumentStream, tableStream); _fib.setLcbClx(tableStream.getOffset() - tableOffset); tableOffset = tableStream.getOffset(); int fcMac = wordDocumentStream.getOffset(); /* * dop (document properties record) Written immediately after the end of * the previously recorded structure. This is recorded in all Word * documents * * Microsoft Office Word 97-2007 Binary File Format (.doc) * Specification; Page 23 of 210 */ // write out the DocumentProperties. _fib.setFcDop(tableOffset); _dop.writeTo(tableStream); _fib.setLcbDop(tableStream.getOffset() - tableOffset); tableOffset = tableStream.getOffset(); /* * plcfBkmkf (table recording beginning CPs of bookmarks) Written * immediately after the sttbfBkmk, if the document contains bookmarks. * * Microsoft Office Word 97-2007 Binary File Format (.doc) * Specification; Page 24 of 210 */ if (_bookmarksTables != null) { _bookmarksTables.writePlcfBkmkf(_fib, tableStream); tableOffset = tableStream.getOffset(); } /* * plcfBkmkl (table recording limit CPs of bookmarks) Written * immediately after the plcfBkmkf, if the document contains bookmarks. * * Microsoft Office Word 97-2007 Binary File Format (.doc) * Specification; Page 24 of 210 */ if (_bookmarksTables != null) { _bookmarksTables.writePlcfBkmkl(_fib, tableStream); tableOffset = tableStream.getOffset(); } /* * plcfbteChpx (bin table for CHP FKPs) Written immediately after the * previously recorded table. This is recorded in all Word documents. * * Microsoft Office Word 97-2007 Binary File Format (.doc) * Specification; Page 24 of 210 */ // write out the CHPBinTable. _fib.setFcPlcfbteChpx(tableOffset); _cbt.writeTo(wordDocumentStream, tableStream, fcMin, _cft.getTextPieceTable()); _fib.setLcbPlcfbteChpx(tableStream.getOffset() - tableOffset); tableOffset = tableStream.getOffset(); /* * plcfbtePapx (bin table for PAP FKPs) Written immediately after the * plcfbteChpx. This is recorded in all Word documents. * * Microsoft Office Word 97-2007 Binary File Format (.doc) * Specification; Page 24 of 210 */ // write out the PAPBinTable. _fib.setFcPlcfbtePapx(tableOffset); _pbt.writeTo(wordDocumentStream, tableStream, _cft.getTextPieceTable()); _fib.setLcbPlcfbtePapx(tableStream.getOffset() - tableOffset); tableOffset = tableStream.getOffset(); /* * plcfendRef (endnote reference position table) Written immediately * after the previously recorded table if the document contains endnotes * * plcfendTxt (endnote text position table) Written immediately after * the plcfendRef if the document contains endnotes * * Microsoft Office Word 97-2007 Binary File Format (.doc) * Specification; Page 24 of 210 */ _endnotesTables.writeRef(_fib, tableStream); _endnotesTables.writeTxt(_fib, tableStream); tableOffset = tableStream.getOffset(); /* * plcffld*** (table of field positions and statuses for annotation * subdocument) Written immediately after the previously recorded table, * if the ******* subdocument contains fields. * * Microsoft Office Word 97-2007 Binary File Format (.doc) * Specification; Page 24 of 210 */ if (_fieldsTables != null) { _fieldsTables.write(_fib, tableStream); tableOffset = tableStream.getOffset(); } /* * plcffndRef (footnote reference position table) Written immediately * after the stsh if the document contains footnotes * * plcffndTxt (footnote text position table) Written immediately after * the plcffndRef if the document contains footnotes * * Microsoft Office Word 97-2007 Binary File Format (.doc) * Specification; Page 24 of 210 */ _footnotesTables.writeRef(_fib, tableStream); _footnotesTables.writeTxt(_fib, tableStream); tableOffset = tableStream.getOffset(); /* * plcfsed (section table) Written immediately after the previously * recorded table. Recorded in all Word documents * * Microsoft Office Word 97-2007 Binary File Format (.doc) * Specification; Page 25 of 210 */ // write out the SectionTable. _fib.setFcPlcfsed(tableOffset); _st.writeTo(wordDocumentStream, tableStream); _fib.setLcbPlcfsed(tableStream.getOffset() - tableOffset); tableOffset = tableStream.getOffset(); // write out the list tables if (_lt != null) { /* * plcflst (list formats) Written immediately after the end of the * previously recorded, if there are any lists defined in the * document. This begins with a short count of LSTF structures * followed by those LSTF structures. This is immediately followed * by the allocated data hanging off the LSTFs. This data consists * of the array of LVLs for each LSTF. (Each LVL consists of an LVLF * followed by two grpprls and an XST.) * * Microsoft Office Word 97-2007 Binary File Format (.doc) * Specification; Page 25 of 210 */ _lt.writeListDataTo(_fib, tableStream); tableOffset = tableStream.getOffset(); /* * plflfo (more list formats) Written immediately after the end of * the plcflst and its accompanying data, if there are any lists * defined in the document. This consists first of a PL of LFO * records, followed by the allocated data (if any) hanging off the * LFOs. The allocated data consists of the array of LFOLVLFs for * each LFO (and each LFOLVLF is immediately followed by some LVLs). * * Microsoft Office Word 97-2007 Binary File Format (.doc) * Specification; Page 26 of 210 */ _fib.setFcPlfLfo(tableStream.getOffset()); _lt.writeListOverridesTo(tableStream); _fib.setLcbPlfLfo(tableStream.getOffset() - tableOffset); tableOffset = tableStream.getOffset(); } /* * sttbfBkmk (table of bookmark name strings) Written immediately after * the previously recorded table, if the document contains bookmarks. * * Microsoft Office Word 97-2007 Binary File Format (.doc) * Specification; Page 27 of 210 */ if (_bookmarksTables != null) { _bookmarksTables.writeSttbfBkmk(_fib, tableStream); tableOffset = tableStream.getOffset(); } /* * sttbSavedBy (last saved by string table) Written immediately after * the previously recorded table. * * Microsoft Office Word 97-2007 Binary File Format (.doc) * Specification; Page 27 of 210 */ // write out the saved-by table. if (_sbt != null) { _fib.setFcSttbSavedBy(tableOffset); _sbt.writeTo(tableStream); _fib.setLcbSttbSavedBy(tableStream.getOffset() - tableOffset); tableOffset = tableStream.getOffset(); } // write out the revision mark authors table. if (_rmat != null) { _fib.setFcSttbfRMark(tableOffset); _rmat.writeTo(tableStream); _fib.setLcbSttbfRMark(tableStream.getOffset() - tableOffset); tableOffset = tableStream.getOffset(); } // write out the FontTable. _fib.setFcSttbfffn(tableOffset); _ft.writeTo(tableStream); _fib.setLcbSttbfffn(tableStream.getOffset() - tableOffset); tableOffset = tableStream.getOffset(); // set some variables in the FileInformationBlock. _fib.getFibBase().setFcMin(fcMin); _fib.getFibBase().setFcMac(fcMac); _fib.setCbMac(wordDocumentStream.getOffset()); // make sure that the table, doc and data streams use big blocks. byte[] mainBuf = wordDocumentStream.toByteArray(); if (mainBuf.length < 4096) { byte[] tempBuf = new byte[4096]; System.arraycopy(mainBuf, 0, tempBuf, 0, mainBuf.length); mainBuf = tempBuf; } // Table1 stream will be used _fib.getFibBase().setFWhichTblStm(true); // write out the FileInformationBlock. // _fib.serialize(mainBuf, 0); _fib.writeTo(mainBuf, tableStream); byte[] tableBuf = tableStream.toByteArray(); if (tableBuf.length < 4096) { byte[] tempBuf = new byte[4096]; System.arraycopy(tableBuf, 0, tempBuf, 0, tableBuf.length); tableBuf = tempBuf; } byte[] dataBuf = _dataStream; if (dataBuf == null) { dataBuf = new byte[4096]; } if (dataBuf.length < 4096) { byte[] tempBuf = new byte[4096]; System.arraycopy(dataBuf, 0, tempBuf, 0, dataBuf.length); dataBuf = tempBuf; } // create new document preserving order of entries POIFSFileSystem pfs = new POIFSFileSystem(); boolean docWritten = false; boolean dataWritten = false; boolean objectPoolWritten = false; boolean tableWritten = false; boolean propertiesWritten = false; for (Iterator<Entry> iter = directory.getEntries(); iter.hasNext(); ) { Entry entry = iter.next(); if (entry.getName().equals(STREAM_WORD_DOCUMENT)) { if (!docWritten) { pfs.createDocument(new ByteArrayInputStream(mainBuf), STREAM_WORD_DOCUMENT); docWritten = true; } } else if (entry.getName().equals(STREAM_OBJECT_POOL)) { if (!objectPoolWritten) { _objectPool.writeTo(pfs.getRoot()); objectPoolWritten = true; } } else if (entry.getName().equals(STREAM_TABLE_0) || entry.getName().equals(STREAM_TABLE_1)) { if (!tableWritten) { pfs.createDocument(new ByteArrayInputStream(tableBuf), STREAM_TABLE_1); tableWritten = true; } } else if (entry.getName().equals(SummaryInformation.DEFAULT_STREAM_NAME) || entry.getName().equals(DocumentSummaryInformation.DEFAULT_STREAM_NAME)) { if (!propertiesWritten) { writeProperties(pfs); propertiesWritten = true; } } else if (entry.getName().equals(STREAM_DATA)) { if (!dataWritten) { pfs.createDocument(new ByteArrayInputStream(dataBuf), STREAM_DATA); dataWritten = true; } } else { EntryUtils.copyNodeRecursively(entry, pfs.getRoot()); } } if (!docWritten) pfs.createDocument(new ByteArrayInputStream(mainBuf), STREAM_WORD_DOCUMENT); if (!tableWritten) pfs.createDocument(new ByteArrayInputStream(tableBuf), STREAM_TABLE_1); if (!propertiesWritten) writeProperties(pfs); if (!dataWritten) pfs.createDocument(new ByteArrayInputStream(dataBuf), STREAM_DATA); if (!objectPoolWritten) _objectPool.writeTo(pfs.getRoot()); pfs.writeFilesystem(out); this.directory = pfs.getRoot(); /* * since we updated all references in FIB and etc, using new arrays to * access data */ this.directory = pfs.getRoot(); this._tableStream = tableStream.toByteArray(); this._dataStream = dataBuf; }
/** * This constructor loads a Word document from a POIFSFileSystem * * @param pfilesystem The POIFSFileSystem that contains the Word document. * @throws IOException If there is an unexpected IOException from the passed in POIFSFileSystem. */ public HWPFDocumentCore(POIFSFileSystem pfilesystem) throws IOException { this(pfilesystem.getRoot()); }
public void parseEmbedded( InputStream inputStream, ContentHandler contentHandler, Metadata metadata, boolean outputHtml) throws SAXException, IOException { String name = metadata.get(Metadata.RESOURCE_NAME_KEY); if (name == null) { name = "file" + count++; } MediaType contentType = detector.detect(inputStream, metadata); if (name.indexOf('.') == -1 && contentType != null) { try { name += config.getMimeRepository().forName(contentType.toString()).getExtension(); } catch (MimeTypeException e) { e.printStackTrace(); } } String relID = metadata.get(Metadata.EMBEDDED_RELATIONSHIP_ID); if (relID != null && !name.startsWith(relID)) { name = relID + "_" + name; } File outputFile = new File(extractDir, name); File parent = outputFile.getParentFile(); if (!parent.exists()) { if (!parent.mkdirs()) { throw new IOException("unable to create directory \"" + parent + "\""); } } System.out.println("Extracting '" + name + "' (" + contentType + ") to " + outputFile); FileOutputStream os = null; try { os = new FileOutputStream(outputFile); if (inputStream instanceof TikaInputStream) { TikaInputStream tin = (TikaInputStream) inputStream; if (tin.getOpenContainer() != null && tin.getOpenContainer() instanceof DirectoryEntry) { POIFSFileSystem fs = new POIFSFileSystem(); copy((DirectoryEntry) tin.getOpenContainer(), fs.getRoot()); fs.writeFilesystem(os); } else { IOUtils.copy(inputStream, os); } } else { IOUtils.copy(inputStream, os); } } catch (Exception e) { logger.warn("Ignoring unexpected exception trying to save embedded file " + name, e); } finally { if (os != null) { os.close(); } } }
/** * Closes the ByteArrayOutputStream and reads it into a ByteArrayInputStream. When finished * writing information this method is used in the tests to start reading from the created document * and then the see if the results match. */ public void closeAndReOpen() { try { dsi.write(dir, DocumentSummaryInformation.DEFAULT_STREAM_NAME); si.write(dir, SummaryInformation.DEFAULT_STREAM_NAME); } catch (WritingNotSupportedException e) { e.printStackTrace(); fail(); } catch (IOException e) { e.printStackTrace(); fail(); } si = null; dsi = null; try { poifs.writeFilesystem(bout); bout.flush(); } catch (IOException e) { e.printStackTrace(); fail(); } InputStream is = new ByteArrayInputStream(bout.toByteArray()); assertNotNull(is); POIFSFileSystem poifs = null; try { poifs = new POIFSFileSystem(is); } catch (IOException e) { e.printStackTrace(); fail(); } try { is.close(); } catch (IOException e) { e.printStackTrace(); fail(); } assertNotNull(poifs); /* Read the document summary information. */ DirectoryEntry dir = poifs.getRoot(); try { DocumentEntry dsiEntry = (DocumentEntry) dir.getEntry(DocumentSummaryInformation.DEFAULT_STREAM_NAME); DocumentInputStream dis = new DocumentInputStream(dsiEntry); PropertySet ps = new PropertySet(dis); dis.close(); dsi = new DocumentSummaryInformation(ps); } catch (FileNotFoundException ex) { fail(); } catch (IOException e) { e.printStackTrace(); fail(); } catch (NoPropertySetStreamException e) { e.printStackTrace(); fail(); } catch (MarkUnsupportedException e) { e.printStackTrace(); fail(); } catch (UnexpectedPropertySetTypeException e) { e.printStackTrace(); fail(); } try { DocumentEntry dsiEntry = (DocumentEntry) dir.getEntry(SummaryInformation.DEFAULT_STREAM_NAME); DocumentInputStream dis = new DocumentInputStream(dsiEntry); PropertySet ps = new PropertySet(dis); dis.close(); si = new SummaryInformation(ps); } catch (FileNotFoundException ex) { /* There is no document summary information yet. We have to create a * new one. */ si = PropertySetFactory.newSummaryInformation(); assertNotNull(si); } catch (IOException e) { e.printStackTrace(); fail(); } catch (NoPropertySetStreamException e) { e.printStackTrace(); fail(); } catch (MarkUnsupportedException e) { e.printStackTrace(); fail(); } catch (UnexpectedPropertySetTypeException e) { e.printStackTrace(); fail(); } }
/** * Add a embedded object to this presentation * * @return 0-based index of the embedded object */ public int addEmbed(POIFSFileSystem poiData) { DirectoryNode root = poiData.getRoot(); // prepare embedded data if (new ClassID().equals(root.getStorageClsid())) { // need to set class id Map<String, ClassID> olemap = getOleMap(); ClassID classID = null; for (Map.Entry<String, ClassID> entry : olemap.entrySet()) { if (root.hasEntry(entry.getKey())) { classID = entry.getValue(); break; } } if (classID == null) { throw new IllegalArgumentException("Unsupported embedded document"); } root.setStorageClsid(classID); } ExEmbed exEmbed = new ExEmbed(); // remove unneccessary infos, so we don't need to specify the type // of the ole object multiple times Record children[] = exEmbed.getChildRecords(); exEmbed.removeChild(children[2]); exEmbed.removeChild(children[3]); exEmbed.removeChild(children[4]); ExEmbedAtom eeEmbed = exEmbed.getExEmbedAtom(); eeEmbed.setCantLockServerB(true); ExOleObjAtom eeAtom = exEmbed.getExOleObjAtom(); eeAtom.setDrawAspect(ExOleObjAtom.DRAW_ASPECT_VISIBLE); eeAtom.setType(ExOleObjAtom.TYPE_EMBEDDED); // eeAtom.setSubType(ExOleObjAtom.SUBTYPE_EXCEL); // should be ignored?!?, see MS-PPT ExOleObjAtom, but Libre Office sets it ... eeAtom.setOptions(1226240); ExOleObjStg exOleObjStg = new ExOleObjStg(); try { final String OLESTREAM_NAME = "\u0001Ole"; if (!root.hasEntry(OLESTREAM_NAME)) { // the following data was taken from an example libre office document // beside this "\u0001Ole" record there were several other records, e.g. CompObj, // OlePresXXX, but it seems, that they aren't neccessary byte oleBytes[] = {1, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; poiData.createDocument(new ByteArrayInputStream(oleBytes), OLESTREAM_NAME); } ByteArrayOutputStream bos = new ByteArrayOutputStream(); poiData.writeFilesystem(bos); exOleObjStg.setData(bos.toByteArray()); } catch (IOException e) { throw new HSLFException(e); } int psrId = addPersistentObject(exOleObjStg); exOleObjStg.setPersistId(psrId); eeAtom.setObjStgDataRef(psrId); int objectId = addToObjListAtom(exEmbed); eeAtom.setObjID(objectId); return objectId; }
public ExcelExtractor(POIFSFileSystem fs) throws IOException { this(fs.getRoot(), fs); }
public InputStream getDataStream(POIFSFileSystem fs) throws IOException, GeneralSecurityException { return getDataStream(fs.getRoot()); }
/** * Processes a file into essentially record events. * * @param req an Instance of HSSFRequest which has your registered listeners * @param fs a POIFS filesystem containing your workbook * @return numeric user-specified result code. */ public short abortableProcessWorkbookEvents(HSSFRequest req, POIFSFileSystem fs) throws IOException, HSSFUserException { return abortableProcessWorkbookEvents(req, fs.getRoot()); }
/** * Processes a file into essentially record events. * * @param req an Instance of HSSFRequest which has your registered listeners * @param fs a POIFS filesystem containing your workbook */ public void processWorkbookEvents(HSSFRequest req, POIFSFileSystem fs) throws IOException { processWorkbookEvents(req, fs.getRoot()); }