/** * Represents embedded picture extracted from Word Document * * @author Dmitry Romanov */ public final class Picture { private static final POILogger log = POILogFactory.getLogger(Picture.class); // public static final int FILENAME_OFFSET = 0x7C; // public static final int FILENAME_SIZE_OFFSET = 0x6C; static final int PICF_OFFSET = 0x0; static final int PICT_HEADER_OFFSET = 0x4; static final int MFPMM_OFFSET = 0x6; static final int PICF_SHAPE_OFFSET = 0xE; static final int PICMD_OFFSET = 0x1C; static final int UNKNOWN_HEADER_SIZE = 0x49; public static final byte[] GIF = new byte[] {'G', 'I', 'F'}; public static final byte[] PNG = new byte[] {(byte) 0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A}; public static final byte[] JPG = new byte[] {(byte) 0xFF, (byte) 0xD8}; public static final byte[] BMP = new byte[] {'B', 'M'}; public static final byte[] TIFF = new byte[] {0x49, 0x49, 0x2A, 0x00}; public static final byte[] TIFF1 = new byte[] {0x4D, 0x4D, 0x00, 0x2A}; public static final byte[] EMF = {0x01, 0x00, 0x00, 0x00}; public static final byte[] WMF1 = { (byte) 0xD7, (byte) 0xCD, (byte) 0xC6, (byte) 0x9A, 0x00, 0x00 }; public static final byte[] WMF2 = {0x01, 0x00, 0x09, 0x00, 0x00, 0x03}; // Windows 3.x // TODO: DIB, PICT public static final byte[] IHDR = new byte[] {'I', 'H', 'D', 'R'}; public static final byte[] COMPRESSED1 = {(byte) 0xFE, 0x78, (byte) 0xDA}; public static final byte[] COMPRESSED2 = {(byte) 0xFE, 0x78, (byte) 0x9C}; private int dataBlockStartOfsset; private int pictureBytesStartOffset; private int dataBlockSize; private int size; // private String fileName; private byte[] rawContent; private byte[] content; private byte[] _dataStream; private int aspectRatioX; private int aspectRatioY; private int height = -1; private int width = -1; public Picture(int dataBlockStartOfsset, byte[] _dataStream, boolean fillBytes) { this._dataStream = _dataStream; this.dataBlockStartOfsset = dataBlockStartOfsset; this.dataBlockSize = LittleEndian.getInt(_dataStream, dataBlockStartOfsset); this.pictureBytesStartOffset = getPictureBytesStartOffset(dataBlockStartOfsset, _dataStream, dataBlockSize); this.size = dataBlockSize - (pictureBytesStartOffset - dataBlockStartOfsset); if (size < 0) {} this.aspectRatioX = extractAspectRatioX(_dataStream, dataBlockStartOfsset); this.aspectRatioY = extractAspectRatioY(_dataStream, dataBlockStartOfsset); if (fillBytes) { fillImageContent(); } } public Picture(byte[] _dataStream) { this._dataStream = _dataStream; this.dataBlockStartOfsset = 0; this.dataBlockSize = _dataStream.length; this.pictureBytesStartOffset = 0; this.size = _dataStream.length; } private void fillWidthHeight() { String ext = suggestFileExtension(); // trying to extract width and height from pictures content: if ("jpg".equalsIgnoreCase(ext)) { fillJPGWidthHeight(); } else if ("png".equalsIgnoreCase(ext)) { fillPNGWidthHeight(); } } private static int extractAspectRatioX(byte[] _dataStream, int dataBlockStartOffset) { return LittleEndian.getShort(_dataStream, dataBlockStartOffset + 0x20) / 10; } private static int extractAspectRatioY(byte[] _dataStream, int dataBlockStartOffset) { return LittleEndian.getShort(_dataStream, dataBlockStartOffset + 0x22) / 10; } /** * Tries to suggest a filename: hex representation of picture structure offset in "Data" stream * plus extension that is tried to determine from first byte of picture's content. * * @return suggested file name */ public String suggestFullFileName() { String fileExt = suggestFileExtension(); return Integer.toHexString(dataBlockStartOfsset) + (fileExt.length() > 0 ? "." + fileExt : ""); } /** * Writes Picture's content bytes to specified OutputStream. Is useful when there is need to write * picture bytes directly to stream, omitting its representation in memory as distinct byte array. * * @param out a stream to write to * @throws IOException if some exception is occured while writing to specified out */ public void writeImageContent(OutputStream out) throws IOException { if (rawContent != null && rawContent.length > 0) { out.write(rawContent, 0, size); } else { out.write(_dataStream, pictureBytesStartOffset, size); } } /** * @return The offset of this picture in the picture bytes, used when matching up with {@link * CharacterRun#getPicOffset()} */ public int getStartOffset() { return dataBlockStartOfsset; } /** @return picture's content as byte array */ public byte[] getContent() { if (content == null || content.length <= 0) { fillImageContent(); } return content; } public byte[] getRawContent() { if (rawContent == null || rawContent.length <= 0) { fillRawImageContent(); } return rawContent; } /** @return size in bytes of the picture */ public int getSize() { return size; } /** returns horizontal aspect ratio for picture provided by user */ public int getAspectRatioX() { return aspectRatioX; } /** returns vertical aspect ratio for picture provided by user */ public int getAspectRatioY() { return aspectRatioY; } /** * tries to suggest extension for picture's file by matching signatures of popular image formats * to first bytes of picture's contents * * @return suggested file extension */ public String suggestFileExtension() { String extension = suggestFileExtension(_dataStream, pictureBytesStartOffset); if ("".equals(extension)) { // May be compressed. Get the uncompressed content and inspect that. extension = suggestFileExtension(getContent(), 0); } return extension; } /** Returns the mime type for the image */ public String getMimeType() { String extension = suggestFileExtension(); if ("jpg".equals(extension)) { return "image/jpeg"; } if ("png".equals(extension)) { return "image/png"; } if ("gif".equals(extension)) { return "image/gif"; } if ("bmp".equals(extension)) { return "image/bmp"; } if ("tiff".equals(extension)) { return "image/tiff"; } if ("wmf".equals(extension)) { return "image/x-wmf"; } if ("emf".equals(extension)) { return "image/x-emf"; } return "image/unknown"; } private String suggestFileExtension(byte[] _dataStream, int pictureBytesStartOffset) { if (matchSignature(_dataStream, JPG, pictureBytesStartOffset)) { return "jpg"; } else if (matchSignature(_dataStream, PNG, pictureBytesStartOffset)) { return "png"; } else if (matchSignature(_dataStream, GIF, pictureBytesStartOffset)) { return "gif"; } else if (matchSignature(_dataStream, BMP, pictureBytesStartOffset)) { return "bmp"; } else if (matchSignature(_dataStream, TIFF, pictureBytesStartOffset) || matchSignature(_dataStream, TIFF1, pictureBytesStartOffset)) { return "tiff"; } else { // Need to load the image content before we can try the following tests fillImageContent(); if (matchSignature(content, WMF1, 0) || matchSignature(content, WMF2, 0)) { return "wmf"; } else if (matchSignature(content, EMF, 0)) { return "emf"; } } // TODO: DIB, PICT return ""; } private static boolean matchSignature( byte[] dataStream, byte[] signature, int pictureBytesOffset) { boolean matched = pictureBytesOffset < dataStream.length; for (int i = 0; (i + pictureBytesOffset) < dataStream.length && i < signature.length; i++) { if (dataStream[i + pictureBytesOffset] != signature[i]) { matched = false; break; } } return matched; } // public String getFileName() // { // return fileName; // } // private static String extractFileName(int blockStartIndex, byte[] dataStream) { // int fileNameStartOffset = blockStartIndex + 0x7C; // int fileNameSizeOffset = blockStartIndex + FILENAME_SIZE_OFFSET; // int fileNameSize = LittleEndian.getShort(dataStream, fileNameSizeOffset); // // int fileNameIndex = fileNameStartOffset; // char[] fileNameChars = new char[(fileNameSize-1)/2]; // int charIndex = 0; // while(charIndex<fileNameChars.length) { // short aChar = LittleEndian.getShort(dataStream, fileNameIndex); // fileNameChars[charIndex] = (char)aChar; // charIndex++; // fileNameIndex += 2; // } // String fileName = new String(fileNameChars); // return fileName.trim(); // } private void fillRawImageContent() { this.rawContent = new byte[size]; System.arraycopy(_dataStream, pictureBytesStartOffset, rawContent, 0, size); } private void fillImageContent() { byte[] rawContent = getRawContent(); // HACK: Detect compressed images. In reality there should be some way to determine // this from the first 32 bytes, but I can't see any similarity between all the // samples I have obtained, nor any similarity in the data block contents. if (matchSignature(rawContent, COMPRESSED1, 32) || matchSignature(rawContent, COMPRESSED2, 32)) { try { InflaterInputStream in = new InflaterInputStream( new ByteArrayInputStream(rawContent, 33, rawContent.length - 33)); ByteArrayOutputStream out = new ByteArrayOutputStream(); byte[] buf = new byte[4096]; int readBytes; while ((readBytes = in.read(buf)) > 0) { out.write(buf, 0, readBytes); } content = out.toByteArray(); } catch (IOException e) { // Problems reading from the actual ByteArrayInputStream should never happen // so this will only ever be a ZipException. log.log(POILogger.INFO, "Possibly corrupt compression or non-compressed data", e); } } else { // Raw data is not compressed. content = rawContent; } } private static int getPictureBytesStartOffset( int dataBlockStartOffset, byte[] _dataStream, int dataBlockSize) { int realPicoffset = dataBlockStartOffset; final int dataBlockEndOffset = dataBlockSize + dataBlockStartOffset; // Skip over the PICT block int PICTFBlockSize = LittleEndian.getShort( _dataStream, dataBlockStartOffset + PICT_HEADER_OFFSET); // Should be 68 bytes // Now the PICTF1 int PICTF1BlockOffset = PICTFBlockSize + PICT_HEADER_OFFSET; short MM_TYPE = LittleEndian.getShort(_dataStream, dataBlockStartOffset + PICT_HEADER_OFFSET + 2); if (MM_TYPE == 0x66) { // Skip the stPicName int cchPicName = LittleEndian.getUnsignedByte(_dataStream, PICTF1BlockOffset); PICTF1BlockOffset += 1 + cchPicName; } int PICTF1BlockSize = LittleEndian.getShort(_dataStream, dataBlockStartOffset + PICTF1BlockOffset); int unknownHeaderOffset = (PICTF1BlockSize + PICTF1BlockOffset) < dataBlockEndOffset ? (PICTF1BlockSize + PICTF1BlockOffset) : PICTF1BlockOffset; realPicoffset += (unknownHeaderOffset + UNKNOWN_HEADER_SIZE); if (realPicoffset >= dataBlockEndOffset) { realPicoffset -= UNKNOWN_HEADER_SIZE; } return realPicoffset; } private void fillJPGWidthHeight() { /* http://www.codecomments.com/archive281-2004-3-158083.html Algorhitm proposed by Patrick TJ McPhee: read 2 bytes make sure they are 'ffd8'x repeatedly: read 2 bytes make sure the first one is 'ff'x if the second one is 'd9'x stop else if the second one is c0 or c2 (or possibly other values ...) skip 2 bytes read one byte into depth read two bytes into height read two bytes into width else read two bytes into length skip forward length-2 bytes Also used Ruby code snippet from: http://www.bigbold.com/snippets/posts/show/805 for reference */ int pointer = pictureBytesStartOffset + 2; int firstByte = _dataStream[pointer]; int secondByte = _dataStream[pointer + 1]; int endOfPicture = pictureBytesStartOffset + size; while (pointer < endOfPicture - 1) { do { firstByte = _dataStream[pointer]; secondByte = _dataStream[pointer + 1]; pointer += 2; } while (!(firstByte == (byte) 0xFF) && pointer < endOfPicture - 1); if (firstByte == ((byte) 0xFF) && pointer < endOfPicture - 1) { if (secondByte == (byte) 0xD9 || secondByte == (byte) 0xDA) { break; } else if ((secondByte & 0xF0) == 0xC0 && secondByte != (byte) 0xC4 && secondByte != (byte) 0xC8 && secondByte != (byte) 0xCC) { pointer += 5; this.height = getBigEndianShort(_dataStream, pointer); this.width = getBigEndianShort(_dataStream, pointer + 2); break; } else { pointer++; pointer++; int length = getBigEndianShort(_dataStream, pointer); pointer += length; } } else { pointer++; } } } private void fillPNGWidthHeight() { /* Used PNG file format description from http://www.wotsit.org/download.asp?f=png */ int HEADER_START = pictureBytesStartOffset + PNG.length + 4; if (matchSignature(_dataStream, IHDR, HEADER_START)) { int IHDR_CHUNK_WIDTH = HEADER_START + 4; this.width = getBigEndianInt(_dataStream, IHDR_CHUNK_WIDTH); this.height = getBigEndianInt(_dataStream, IHDR_CHUNK_WIDTH + 4); } } /** returns pixel width of the picture or -1 if dimensions determining was failed */ public int getWidth() { if (width == -1) { fillWidthHeight(); } return width; } /** returns pixel height of the picture or -1 if dimensions determining was failed */ public int getHeight() { if (height == -1) { fillWidthHeight(); } return height; } private static int getBigEndianInt(byte[] data, int offset) { return (((data[offset] & 0xFF) << 24) + ((data[offset + 1] & 0xFF) << 16) + ((data[offset + 2] & 0xFF) << 8) + (data[offset + 3] & 0xFF)); } private static int getBigEndianShort(byte[] data, int offset) { return (((data[offset] & 0xFF) << 8) + (data[offset + 1] & 0xFF)); } }
/** * Title: Sup Book - EXTERNALBOOK (0x01AE) * * <p>Description: A External Workbook Description (Supplemental Book) Its only a dummy record for * making new ExternSheet Record * * <p>REFERENCE: 5.38 * * <p> * * @author Libin Roman (Vista Portal LDT. Developer) * @author Andrew C. Oliver ([email protected]) */ public final class SupBookRecord extends StandardRecord { private static final POILogger logger = POILogFactory.getLogger(SupBookRecord.class); public static final short sid = 0x01AE; private static final short SMALL_RECORD_SIZE = 4; private static final short TAG_INTERNAL_REFERENCES = 0x0401; private static final short TAG_ADD_IN_FUNCTIONS = 0x3A01; private short field_1_number_of_sheets; private String field_2_encoded_url; private String[] field_3_sheet_names; private boolean _isAddInFunctions; protected static final char CH_VOLUME = 1; protected static final char CH_SAME_VOLUME = 2; protected static final char CH_DOWN_DIR = 3; protected static final char CH_UP_DIR = 4; protected static final char CH_LONG_VOLUME = 5; protected static final char CH_STARTUP_DIR = 6; protected static final char CH_ALT_STARTUP_DIR = 7; protected static final char CH_LIB_DIR = 8; protected static final String PATH_SEPERATOR = System.getProperty("file.separator"); public static SupBookRecord createInternalReferences(short numberOfSheets) { return new SupBookRecord(false, numberOfSheets); } public static SupBookRecord createAddInFunctions() { return new SupBookRecord( true, (short) 1 /* this field MUST be 0x0001 for add-in referencing */); } public static SupBookRecord createExternalReferences(String url, String[] sheetNames) { return new SupBookRecord(url, sheetNames); } private SupBookRecord(boolean isAddInFuncs, short numberOfSheets) { // else not 'External References' field_1_number_of_sheets = numberOfSheets; field_2_encoded_url = null; field_3_sheet_names = null; _isAddInFunctions = isAddInFuncs; } public SupBookRecord(String url, String[] sheetNames) { field_1_number_of_sheets = (short) sheetNames.length; field_2_encoded_url = url; field_3_sheet_names = sheetNames; _isAddInFunctions = false; } public boolean isExternalReferences() { return field_3_sheet_names != null; } public boolean isInternalReferences() { return field_3_sheet_names == null && !_isAddInFunctions; } public boolean isAddInFunctions() { return field_3_sheet_names == null && _isAddInFunctions; } /** * called by the constructor, should set class level fields. Should throw runtime exception for * bad/incomplete data. * * @param in the stream to read from */ public SupBookRecord(RecordInputStream in) { int recLen = in.remaining(); field_1_number_of_sheets = in.readShort(); if (recLen > SMALL_RECORD_SIZE) { // 5.38.1 External References _isAddInFunctions = false; field_2_encoded_url = in.readString(); String[] sheetNames = new String[field_1_number_of_sheets]; for (int i = 0; i < sheetNames.length; i++) { sheetNames[i] = in.readString(); } field_3_sheet_names = sheetNames; return; } // else not 'External References' field_2_encoded_url = null; field_3_sheet_names = null; short nextShort = in.readShort(); if (nextShort == TAG_INTERNAL_REFERENCES) { // 5.38.2 'Internal References' _isAddInFunctions = false; } else if (nextShort == TAG_ADD_IN_FUNCTIONS) { // 5.38.3 'Add-In Functions' _isAddInFunctions = true; if (field_1_number_of_sheets != 1) { throw new RuntimeException( "Expected 0x0001 for number of sheets field in 'Add-In Functions' but got (" + field_1_number_of_sheets + ")"); } } else { throw new RuntimeException( "invalid EXTERNALBOOK code (" + Integer.toHexString(nextShort) + ")"); } } public String toString() { StringBuffer sb = new StringBuffer(); sb.append(getClass().getName()).append(" [SUPBOOK "); if (isExternalReferences()) { sb.append("External References"); sb.append(" nSheets=").append(field_1_number_of_sheets); sb.append(" url=").append(field_2_encoded_url); } else if (_isAddInFunctions) { sb.append("Add-In Functions"); } else { sb.append("Internal References "); sb.append(" nSheets= ").append(field_1_number_of_sheets); } sb.append("]"); return sb.toString(); } protected int getDataSize() { if (!isExternalReferences()) { return SMALL_RECORD_SIZE; } int sum = 2; // u16 number of sheets sum += StringUtil.getEncodedSize(field_2_encoded_url); for (int i = 0; i < field_3_sheet_names.length; i++) { sum += StringUtil.getEncodedSize(field_3_sheet_names[i]); } return sum; } public void serialize(LittleEndianOutput out) { out.writeShort(field_1_number_of_sheets); if (isExternalReferences()) { StringUtil.writeUnicodeString(out, field_2_encoded_url); for (int i = 0; i < field_3_sheet_names.length; i++) { StringUtil.writeUnicodeString(out, field_3_sheet_names[i]); } } else { int field2val = _isAddInFunctions ? TAG_ADD_IN_FUNCTIONS : TAG_INTERNAL_REFERENCES; out.writeShort(field2val); } } public void setNumberOfSheets(short number) { field_1_number_of_sheets = number; } public short getNumberOfSheets() { return field_1_number_of_sheets; } public short getSid() { return sid; } public String getURL() { String encodedUrl = field_2_encoded_url; switch (encodedUrl.charAt(0)) { case 0: // Reference to an empty workbook name return encodedUrl.substring(1); // will this just be empty string? case 1: // encoded file name return decodeFileName(encodedUrl); case 2: // Self-referential external reference return encodedUrl.substring(1); } return encodedUrl; } private static String decodeFileName(String encodedUrl) { /* see "MICROSOFT OFFICE EXCEL 97-2007 BINARY FILE FORMAT SPECIFICATION" */ StringBuilder sb = new StringBuilder(); for (int i = 1; i < encodedUrl.length(); i++) { char c = encodedUrl.charAt(i); switch (c) { case CH_VOLUME: char driveLetter = encodedUrl.charAt(++i); if (driveLetter == '@') { sb.append("\\\\"); } else { // Windows notation for drive letters sb.append(driveLetter).append(":"); } break; case CH_SAME_VOLUME: sb.append(PATH_SEPERATOR); break; case CH_DOWN_DIR: sb.append(PATH_SEPERATOR); break; case CH_UP_DIR: sb.append("..").append(PATH_SEPERATOR); break; case CH_LONG_VOLUME: // Don't known to handle... logger.log(POILogger.WARN, "Found unexpected key: ChLongVolume - IGNORING"); break; case CH_STARTUP_DIR: case CH_ALT_STARTUP_DIR: case CH_LIB_DIR: logger.log(POILogger.WARN, "EXCEL.EXE path unkown - using this directoy instead: ."); sb.append(".").append(PATH_SEPERATOR); break; default: sb.append(c); } } return sb.toString(); } public String[] getSheetNames() { return field_3_sheet_names.clone(); } public void setURL(String pUrl) { // Keep the first marker character! field_2_encoded_url = field_2_encoded_url.substring(0, 1) + pUrl; } }