public void run() throws IOException { NPOIFSFileSystem fs = new NPOIFSFileSystem(new File(file), true); try { InputStream din = BiffViewer.getPOIFSInputStream(fs); try { RecordInputStream rinp = new RecordInputStream(din); while (rinp.hasNextRecord()) { int sid = rinp.getNextSid(); rinp.nextRecord(); int size = rinp.available(); Class<? extends Record> clz = RecordFactory.getRecordClass(sid); System.out.print(formatSID(sid) + " - " + formatSize(size) + " bytes"); if (clz != null) { System.out.print(" \t"); System.out.print(clz.getName().replace("org.apache.poi.hssf.record.", "")); } System.out.println(); byte[] data = rinp.readRemainder(); if (data.length > 0) { System.out.print(" "); System.out.println(formatData(data)); } } } finally { din.close(); } } finally { fs.close(); } }
public void testFactory(String file, String protectedFile, String password) throws Exception { SlideShow<?, ?> ss; // from file ss = SlideShowFactory.create(fromFile(file)); assertNotNull(ss); // from stream ss = SlideShowFactory.create(fromStream(file)); assertNotNull(ss); // from NPOIFS if (!file.contains("pptx")) { NPOIFSFileSystem npoifs = new NPOIFSFileSystem(fromFile(file)); ss = SlideShowFactory.create(npoifs); assertNotNull(ss); npoifs.close(); } // from protected file ss = SlideShowFactory.create(fromFile(protectedFile), password); assertNotNull(ss); // from protected stream ss = SlideShowFactory.create(fromStream(protectedFile), password); assertNotNull(ss); // from protected NPOIFS NPOIFSFileSystem npoifs = new NPOIFSFileSystem(fromFile(protectedFile)); ss = SlideShowFactory.create(npoifs, password); assertNotNull(ss); npoifs.close(); }
public static Workbook getWorkBook(String filePath) throws IOException { if (!filePath.contains(".xlsx")) { NPOIFSFileSystem fs = new NPOIFSFileSystem(new File(filePath)); HSSFWorkbook wb = new HSSFWorkbook(fs.getRoot(), false); fs.close(); return wb; } else { FileInputStream fileInputStream = new FileInputStream(filePath); XSSFWorkbook xssfWorkbook = new XSSFWorkbook(fileInputStream); fileInputStream.close(); return xssfWorkbook; } }
public void testAreDocumentsIdentical() throws Exception { POIFSFileSystem fs = new POIFSFileSystem(); DirectoryEntry dirA = fs.createDirectory("DirA"); DirectoryEntry dirB = fs.createDirectory("DirB"); DocumentEntry entryA1 = dirA.createDocument("Entry1", new ByteArrayInputStream(dataSmallA)); DocumentEntry entryA1b = dirA.createDocument("Entry1b", new ByteArrayInputStream(dataSmallA)); DocumentEntry entryA2 = dirA.createDocument("Entry2", new ByteArrayInputStream(dataSmallB)); DocumentEntry entryB1 = dirB.createDocument("Entry1", new ByteArrayInputStream(dataSmallA)); // Names must match assertEquals(false, entryA1.getName().equals(entryA1b.getName())); assertEquals(false, EntryUtils.areDocumentsIdentical(entryA1, entryA1b)); // Contents must match assertEquals(false, EntryUtils.areDocumentsIdentical(entryA1, entryA2)); // Parents don't matter if contents + names are the same assertEquals(false, entryA1.getParent().equals(entryB1.getParent())); assertEquals(true, EntryUtils.areDocumentsIdentical(entryA1, entryB1)); // Can work with NPOIFS + POIFS ByteArrayOutputStream tmpO = new ByteArrayOutputStream(); fs.writeFilesystem(tmpO); ByteArrayInputStream tmpI = new ByteArrayInputStream(tmpO.toByteArray()); NPOIFSFileSystem nfs = new NPOIFSFileSystem(tmpI); DirectoryEntry dN1 = (DirectoryEntry) nfs.getRoot().getEntry("DirA"); DirectoryEntry dN2 = (DirectoryEntry) nfs.getRoot().getEntry("DirB"); DocumentEntry eNA1 = (DocumentEntry) dN1.getEntry(entryA1.getName()); DocumentEntry eNA2 = (DocumentEntry) dN1.getEntry(entryA2.getName()); DocumentEntry eNB1 = (DocumentEntry) dN2.getEntry(entryB1.getName()); assertEquals(false, EntryUtils.areDocumentsIdentical(eNA1, eNA2)); assertEquals(true, EntryUtils.areDocumentsIdentical(eNA1, eNB1)); assertEquals(false, EntryUtils.areDocumentsIdentical(eNA1, entryA1b)); assertEquals(false, EntryUtils.areDocumentsIdentical(eNA1, entryA2)); assertEquals(true, EntryUtils.areDocumentsIdentical(eNA1, entryA1)); assertEquals(true, EntryUtils.areDocumentsIdentical(eNA1, entryB1)); }
private static Set<String> getTopLevelNames(TikaInputStream stream) throws IOException { // Force the document stream to a (possibly temporary) file // so we don't modify the current position of the stream File file = stream.getFile(); try { NPOIFSFileSystem fs = new NPOIFSFileSystem(file, true); // Optimize a possible later parsing process by keeping // a reference to the already opened POI file system stream.setOpenContainer(fs); return getTopLevelNames(fs.getRoot()); } catch (IOException e) { // Parse error in POI, so we don't know the file type return Collections.emptySet(); } catch (RuntimeException e) { // Another problem in POI return Collections.emptySet(); } }
/** * read in a file and write it back out again * * @param args names of the files; arg[ 0 ] is the input file, arg[ 1 ] is the output file * @exception IOException */ public static void main(String args[]) throws IOException { if (args.length != 2) { System.err.println("two arguments required: input filename and output filename"); System.exit(1); } FileInputStream istream = new FileInputStream(args[0]); try { FileOutputStream ostream = new FileOutputStream(args[1]); try { NPOIFSFileSystem fs = new NPOIFSFileSystem(istream); try { fs.writeFilesystem(ostream); } finally { fs.close(); } } finally { ostream.close(); } } finally { istream.close(); } }
/** * @param bytes * @param metadata incoming metadata * @param unknownFilenameCount * @return byte[] for contents of obj data * @throws IOException */ protected byte[] parse(byte[] bytes, Metadata metadata, AtomicInteger unknownFilenameCount) throws IOException { ByteArrayInputStream is = new ByteArrayInputStream(bytes); long version = readUInt(is); metadata.add(RTFMetadata.EMB_APP_VERSION, Long.toString(version)); long formatId = readUInt(is); // 2 is an embedded object. 1 is a link. if (formatId != 2L) { return null; } String className = readLengthPrefixedAnsiString(is).trim(); String topicName = readLengthPrefixedAnsiString(is).trim(); String itemName = readLengthPrefixedAnsiString(is).trim(); if (className != null && className.length() > 0) { metadata.add(RTFMetadata.EMB_CLASS, className); } if (topicName != null && topicName.length() > 0) { metadata.add(RTFMetadata.EMB_TOPIC, topicName); } if (itemName != null && itemName.length() > 0) { metadata.add(RTFMetadata.EMB_ITEM, itemName); } long dataSz = readUInt(is); // readBytes tests for reading too many bytes byte[] embObjBytes = readBytes(is, dataSz); if (className.toLowerCase(Locale.ROOT).equals("package")) { return handlePackage(embObjBytes, metadata); } else if (className.toLowerCase(Locale.ROOT).equals("pbrush")) { // simple bitmap bytes return embObjBytes; } else { ByteArrayInputStream embIs = new ByteArrayInputStream(embObjBytes); if (NPOIFSFileSystem.hasPOIFSHeader(embIs)) { try { return handleEmbeddedPOIFS(embIs, metadata, unknownFilenameCount); } catch (IOException e) { // swallow } } } return embObjBytes; }
/** Creates a Workbook from the given NPOIFSFileSystem, which may be password protected */ private static Workbook create(NPOIFSFileSystem fs, String password) throws IOException, InvalidFormatException { DirectoryNode root = fs.getRoot(); // Encrypted OOXML files go inside OLE2 containers, is this one? if (root.hasEntry(Decryptor.DEFAULT_POIFS_ENTRY)) { EncryptionInfo info = new EncryptionInfo(fs); Decryptor d = Decryptor.getInstance(info); boolean passwordCorrect = false; InputStream stream = null; try { if (password != null && d.verifyPassword(password)) { passwordCorrect = true; } if (!passwordCorrect && d.verifyPassword(Decryptor.DEFAULT_PASSWORD)) { passwordCorrect = true; } if (passwordCorrect) { stream = d.getDataStream(root); } } catch (GeneralSecurityException e) { throw new IOException(e); } if (!passwordCorrect) { if (password != null) throw new EncryptedDocumentException("Password incorrect"); else throw new EncryptedDocumentException( "The supplied spreadsheet is protected, but no password was supplied"); } OPCPackage pkg = OPCPackage.open(stream); return create(pkg); } // If we get here, it isn't an encrypted XLSX file // So, treat it as a regular HSSF XLS one if (password != null) { Biff8EncryptionKey.setCurrentUserPassword(password); } Workbook wb = new HSSFWorkbook(root, true); Biff8EncryptionKey.setCurrentUserPassword(null); return wb; }
public InputStream getDataStream(NPOIFSFileSystem fs) throws IOException, GeneralSecurityException { return getDataStream(fs.getRoot()); }
// will throw IOException if not actually POIFS // can return null byte[] private byte[] handleEmbeddedPOIFS( InputStream is, Metadata metadata, AtomicInteger unknownFilenameCount) throws IOException { NPOIFSFileSystem fs = null; byte[] ret = null; try { fs = new NPOIFSFileSystem(is); DirectoryNode root = fs.getRoot(); if (root == null) { return ret; } if (root.hasEntry("Package")) { Entry ooxml = root.getEntry("Package"); TikaInputStream stream = TikaInputStream.get(new DocumentInputStream((DocumentEntry) ooxml)); ByteArrayOutputStream out = new ByteArrayOutputStream(); IOUtils.copy(stream, out); ret = out.toByteArray(); } else { // try poifs POIFSDocumentType type = POIFSDocumentType.detectType(root); if (type == POIFSDocumentType.OLE10_NATIVE) { try { // Try to un-wrap the OLE10Native record: Ole10Native ole = Ole10Native.createFromEmbeddedOleObject(root); ret = ole.getDataBuffer(); } catch (Ole10NativeException ex) { // Not a valid OLE10Native record, skip it } } else if (type == POIFSDocumentType.COMP_OBJ) { DocumentEntry contentsEntry; try { contentsEntry = (DocumentEntry) root.getEntry("CONTENTS"); } catch (FileNotFoundException ioe) { contentsEntry = (DocumentEntry) root.getEntry("Contents"); } DocumentInputStream inp = null; try { inp = new DocumentInputStream(contentsEntry); ret = new byte[contentsEntry.getSize()]; inp.readFully(ret); } finally { if (inp != null) { inp.close(); } } } else { ByteArrayOutputStream out = new ByteArrayOutputStream(); is.reset(); IOUtils.copy(is, out); ret = out.toByteArray(); metadata.set( Metadata.RESOURCE_NAME_KEY, "file_" + unknownFilenameCount.getAndIncrement() + "." + type.getExtension()); metadata.set(Metadata.CONTENT_TYPE, type.getType().toString()); } } } finally { if (fs != null) { fs.close(); } } return ret; }