public int read(byte[] b, int off, int len) throws IOException { dieIfClosed(); if (b == null) { throw new IllegalArgumentException("buffer must not be null"); } if (off < 0 || len < 0 || b.length < off + len) { throw new IndexOutOfBoundsException("can't read past buffer boundaries"); } if (len == 0) { return 0; } if (atEOD()) { return EOF; } int limit = Math.min(available(), len); readFully(b, off, limit); return limit; }
public void readFully(byte[] buf) { readFully(buf, 0, buf.length); }
// will throw IOException if not actually POIFS // can return null byte[] private byte[] handleEmbeddedPOIFS( InputStream is, Metadata metadata, AtomicInteger unknownFilenameCount) throws IOException { NPOIFSFileSystem fs = null; byte[] ret = null; try { fs = new NPOIFSFileSystem(is); DirectoryNode root = fs.getRoot(); if (root == null) { return ret; } if (root.hasEntry("Package")) { Entry ooxml = root.getEntry("Package"); TikaInputStream stream = TikaInputStream.get(new DocumentInputStream((DocumentEntry) ooxml)); ByteArrayOutputStream out = new ByteArrayOutputStream(); IOUtils.copy(stream, out); ret = out.toByteArray(); } else { // try poifs POIFSDocumentType type = POIFSDocumentType.detectType(root); if (type == POIFSDocumentType.OLE10_NATIVE) { try { // Try to un-wrap the OLE10Native record: Ole10Native ole = Ole10Native.createFromEmbeddedOleObject(root); ret = ole.getDataBuffer(); } catch (Ole10NativeException ex) { // Not a valid OLE10Native record, skip it } } else if (type == POIFSDocumentType.COMP_OBJ) { DocumentEntry contentsEntry; try { contentsEntry = (DocumentEntry) root.getEntry("CONTENTS"); } catch (FileNotFoundException ioe) { contentsEntry = (DocumentEntry) root.getEntry("Contents"); } DocumentInputStream inp = null; try { inp = new DocumentInputStream(contentsEntry); ret = new byte[contentsEntry.getSize()]; inp.readFully(ret); } finally { if (inp != null) { inp.close(); } } } else { ByteArrayOutputStream out = new ByteArrayOutputStream(); is.reset(); IOUtils.copy(is, out); ret = out.toByteArray(); metadata.set( Metadata.RESOURCE_NAME_KEY, "file_" + unknownFilenameCount.getAndIncrement() + "." + type.getExtension()); metadata.set(Metadata.CONTENT_TYPE, type.getType().toString()); } } } finally { if (fs != null) { fs.close(); } } return ret; }