/** * Gets the content stream of a page as a PdfStream object. * * @param pageNumber the page of which you want the stream * @param compressionLevel the compression level you want to apply to the stream * @return a PdfStream object * @since 2.1.3 (the method already existed without param compressionLevel) */ PdfStream getFormXObject(int pageNumber, int compressionLevel) throws IOException { PdfDictionary page = reader.getPageNRelease(pageNumber); PdfObject contents = PdfReader.getPdfObjectRelease(page.get(PdfName.CONTENTS)); PdfDictionary dic = new PdfDictionary(); byte bout[] = null; if (contents != null) { if (contents.isStream()) dic.putAll((PRStream) contents); else bout = reader.getPageContent(pageNumber, file); } else bout = new byte[0]; dic.put(PdfName.RESOURCES, PdfReader.getPdfObjectRelease(page.get(PdfName.RESOURCES))); dic.put(PdfName.TYPE, PdfName.XOBJECT); dic.put(PdfName.SUBTYPE, PdfName.FORM); PdfImportedPage impPage = (PdfImportedPage) importedPages.get(new Integer(pageNumber)); dic.put(PdfName.BBOX, new PdfRectangle(impPage.getBoundingBox())); PdfArray matrix = impPage.getMatrix(); if (matrix == null) dic.put(PdfName.MATRIX, IDENTITYMATRIX); else dic.put(PdfName.MATRIX, matrix); dic.put(PdfName.FORMTYPE, ONE); PRStream stream; if (bout == null) { stream = new PRStream((PRStream) contents, dic); } else { stream = new PRStream(reader, bout, compressionLevel); stream.putAll(dic); } return stream; }
/** * This method searches for all image objects from the currently processed PDF file and stores * them as PDF in the given export directory or in the same directory where the original PDF file * is stored. * * <p>The filename of the images is build based on the original PDF filename (without extension) * and additional details like page number, image number and if available the internal image name. * * @param fullExportDirectoryPath The optional full export path where the images should be stored. * If not given, the location of the original PDF file is used. * @throws Exception */ private void imageExtractor(String fullExportDirectoryPath) throws Exception { if (fullExportDirectoryPath != null) { fullExportDirectoryPath = GlobalTools.checkDirectoryPath(fullExportDirectoryPath); File exportDirectory = new File(fullExportDirectoryPath); if (!exportDirectory.exists()) { exportDirectory.mkdirs(); } // end if } // end if int totalNumberOfPDFObjects = pdfReader.getXrefSize(); for (int pdfObjectCounter = 0; pdfObjectCounter < totalNumberOfPDFObjects; pdfObjectCounter++) { PdfObject pdfObject = pdfReader.getPdfObject(pdfObjectCounter); if (pdfObject != null) { if (pdfObject.isStream()) { PdfStream pdfStream = (PdfStream) pdfObject; PdfObject pdfObjectSubType = pdfStream.get(PdfName.SUBTYPE); if (pdfObject == null) { logger.debug("The internal PDF object is null."); } // end if if (!pdfObject.isStream()) { logger.debug("The internal PDF object is not representing a stream object."); } // end if // Check PDF subtype and make sure it's an Image type if (pdfObjectSubType != null && pdfObjectSubType.toString().equals(PdfName.IMAGE.toString())) { // Now we have a PDF stream object with an image but what is that exactly? // byte[] byteArrayImage = PdfReader.getStreamBytesRaw((PRStream) pdfStream); byte[] byteArrayImage = null; if (PdfName.FLATEDECODE.equals(pdfStream.getAsName(PdfName.FILTER))) { byteArrayImage = PdfReader.getStreamBytes((PRStream) pdfStream); // else if other filter (not supported yet) } else { byteArrayImage = PdfReader.getStreamBytesRaw((PRStream) pdfStream); } // end if..else // Test PdfImage - START /* PdfImage pdfImage = (PdfImage) ((PdfStream)((PRStream)pdfStream)); logger.trace("pdfImage --> pdfName --> Width...........: " + pdfStream.get(PdfName.WIDTH)); logger.trace("pdfImage --> pdfName --> Height..........: " + pdfStream.get(PdfName.HEIGHT)); logger.trace("pdfImage --> pdfName --> Id..............: " + pdfImage.get(PdfName.ID)); logger.trace("pdfImage --> pdfName --> Image...........: " + pdfImage.get(PdfName.IMAGE)); logger.trace("pdfImage --> pdfName --> ImageB..........: " + pdfImage.get(PdfName.IMAGEB)); logger.trace("pdfImage --> pdfName --> ImageC..........: " + pdfImage.get(PdfName.IMAGEC)); logger.trace("pdfImage --> pdfName --> ImageI..........: " + pdfImage.get(PdfName.IMAGEI)); logger.trace("pdfImage --> pdfName --> Imagemask.......: " + pdfImage.get(PdfName.IMAGEMASK)); logger.trace("pdfImage --> pdfName --> Info............: " + pdfImage.get(PdfName.INFO)); logger.trace("pdfImage --> pdfName --> Name............: " + pdfImage.get(PdfName.NAME)); logger.trace("pdfImage --> pdfName --> Named...........: " + pdfImage.get(PdfName.NAMED)); logger.trace("pdfImage --> pdfName --> Page............: " + pdfStream.get(PdfName.PAGE)); */ // Test PdfImage - STOP // STREAM /* logger.trace("pdfObject.toString()): " + pdfObject.toString()); logger.trace("pdfObjectCounter.................: " + pdfObjectCounter); logger.trace("pdfObject.getIndRef().getNumber(): " + (pdfObject.getIndRef()!=null?pdfObject.getIndRef().toString():"null")); logger.trace("pdfStream.getIndRef().getNumber(): " + (pdfStream.getIndRef()!=null?pdfStream.getIndRef().toString():"null")); logger.trace("pdfStream --> pdfName --> toString........: " + pdfStream.toString()); logger.trace("pdfStream --> pdfName --> Width...........: " + pdfStream.get(PdfName.WIDTH)); logger.trace("pdfStream --> pdfName --> Height..........: " + pdfStream.get(PdfName.HEIGHT)); logger.trace("pdfStream --> pdfName --> BitsPerComponent: " + pdfStream.get(PdfName.BITSPERCOMPONENT)); logger.trace("pdfStream --> pdfName --> BitsPerSample...: " + pdfStream.get(PdfName.BITSPERSAMPLE)); logger.trace("pdfStream --> pdfName --> ColorSpace......: " + pdfStream.get(PdfName.COLORSPACE)); logger.trace("pdfStream --> pdfName --> CCITTFaxDecode..: " + pdfStream.get(PdfName.CCITTFAXDECODE)); logger.trace("pdfStream --> pdfName --> Document........: " + pdfStream.get(PdfName.DOCUMENT)); logger.trace("pdfStream --> pdfName --> Decode..........: " + pdfStream.get(PdfName.DECODE)); logger.trace("pdfStream --> pdfName --> DecodeParms.....: " + pdfStream.get(PdfName.DECODEPARMS)); logger.trace("pdfStream --> pdfName --> DeviceGray......: " + pdfStream.get(PdfName.DEVICEGRAY)); logger.trace("pdfStream --> pdfName --> DeviceCMYK......: " + pdfStream.get(PdfName.DEVICECMYK)); logger.trace("pdfStream --> pdfName --> DeviceRGB.......: " + pdfStream.get(PdfName.DEVICERGB)); logger.trace("pdfStream --> pdfName --> Filter..........: " + pdfStream.get(PdfName.FILTER)); logger.trace("pdfStream --> pdfName --> Filter - as name: " + pdfStream.getAsName(PdfName.FILTER)); logger.trace("pdfStream --> pdfName --> Id..............: " + pdfStream.get(PdfName.ID)); logger.trace("pdfStream --> pdfName --> Image...........: " + pdfStream.get(PdfName.IMAGE)); logger.trace("pdfStream --> pdfName --> ImageB..........: " + pdfStream.get(PdfName.IMAGEB)); logger.trace("pdfStream --> pdfName --> ImageC..........: " + pdfStream.get(PdfName.IMAGEC)); logger.trace("pdfStream --> pdfName --> ImageI..........: " + pdfStream.get(PdfName.IMAGEI)); logger.trace("pdfStream --> pdfName --> Imagemask.......: " + pdfStream.get(PdfName.IMAGEMASK)); logger.trace("pdfStream --> pdfName --> Info............: " + pdfStream.get(PdfName.INFO)); logger.trace("pdfStream --> pdfName --> Name............: " + pdfStream.get(PdfName.NAME)); logger.trace("pdfStream --> pdfName --> Named...........: " + pdfStream.get(PdfName.NAMED)); logger.trace("pdfStream --> pdfName --> Named - as name.: " + pdfStream.getAsName(PdfName.NAMED)); logger.trace("pdfStream --> pdfName --> Page............: " + pdfStream.get(PdfName.PAGE)); logger.trace("pdfStream --> pdfName --> PageElement.....: " + pdfStream.get(PdfName.PAGEELEMENT)); logger.trace("pdfStream --> pdfName --> Pdf.............: " + pdfStream.get(PdfName.PDF)); logger.trace("pdfStream --> pdfName --> PdfDocEncoding..: " + pdfStream.get(PdfName.PDFDOCENCODING)); logger.trace("pdfStream --> pdfName --> Position........: " + pdfStream.get(PdfName.POSITION)); logger.trace("pdfStream --> pdfName --> Producer........: " + pdfStream.get(PdfName.PRODUCER)); logger.trace("pdfStream --> pdfName --> Properties......: " + pdfStream.get(PdfName.PROPERTIES)); logger.trace("pdfStream --> pdfName --> Sect............: " + pdfStream.get(PdfName.SECT)); logger.trace("pdfStream --> pdfName --> SubType.........: " + pdfStream.get(PdfName.SUBTYPE)); logger.trace("pdfStream --> pdfName --> Supplement......: " + pdfStream.get(PdfName.SUPPLEMENT)); logger.trace("pdfStream --> pdfName --> Title...........: " + pdfStream.get(PdfName.TITLE)); */ // logger.trace("pdfImage --> pdfName --> : " + (pdfName.).toString()); // Extract the image name String streamImageName = (pdfStream.get(PdfName.NAME) == null ? null : pdfStream.get(PdfName.NAME).toString()); if (streamImageName != null && streamImageName.length() > 1 && streamImageName.startsWith("/")) { streamImageName = streamImageName.substring(1); } else { streamImageName = null; } // end if..else String exportFileWithoutExtension = (fullExportDirectoryPath != null ? fullExportDirectoryPath : this.fullPDFDirectoryPath) + GlobalTools.getFileNameWithoutExtension(this.fullPDFFilePath) + "_(" + "p000" + "_ref" + REF_NUMBER_FORMAT.format(pdfObjectCounter) + (streamImageName == null ? "_unk" : "_" + streamImageName) + ")"; // Test FileOutputStream fileOutputStream = new FileOutputStream(exportFileWithoutExtension + ".jpg"); /* * Write given byte array to a file. */ fileOutputStream.write(byteArrayImage); fileOutputStream.flush(); fileOutputStream.close(); fileOutputStream = null; /* * Check image details */ int pdfImageBitsPerComponent = -1; try { if (pdfStream.get(PdfName.BITSPERCOMPONENT).isNumber()) { pdfImageBitsPerComponent = new Integer(pdfStream.get(PdfName.BITSPERCOMPONENT).toString()).intValue(); } } catch (NumberFormatException ex) { } int pdfImageHeight = -1; try { if (pdfStream.get(PdfName.HEIGHT).isNumber()) { pdfImageHeight = new Integer(pdfStream.get(PdfName.HEIGHT).toString()).intValue(); } } catch (NumberFormatException ex) { } int pdfImageWidth = -1; try { if (pdfStream.get(PdfName.WIDTH).isNumber()) { pdfImageWidth = new Integer(pdfStream.get(PdfName.WIDTH).toString()).intValue(); } } catch (NumberFormatException ex) { } logger.debug("Height..........:" + pdfImageHeight); logger.debug("Width...........:" + pdfImageWidth); logger.debug("BitsPerComponent:" + pdfImageBitsPerComponent); // or you could try making a java.awt.Image from the array: if (PdfName.DEVICERGB.equals(pdfStream.get(PdfName.COLORSPACE)) && PdfName.FLATEDECODE.equals(pdfStream.get(PdfName.FILTER)) && pdfImageBitsPerComponent > 0 && pdfImageWidth > 0 && pdfImageHeight > 0) { BufferedImage bufferedImage = ImageProcessingTools.toBufferedImage( byteArrayImage, pdfImageWidth, pdfImageHeight, pdfImageBitsPerComponent); if (bufferedImage != null) { ImageIO.write( bufferedImage, "PNG", new FileOutputStream(exportFileWithoutExtension + "_imageIO" + ".png")); } // end if } else if (PdfName.DEVICEGRAY.equals(pdfStream.get(PdfName.COLORSPACE)) && PdfName.RUNLENGTHDECODE.equals(pdfStream.get(PdfName.FILTER)) && pdfImageBitsPerComponent > 0 && pdfImageWidth > 0) { BufferedImage bufferedImage = ImageProcessingTools.toBufferedImage( ImageProcessingTools.runLengthDecode(byteArrayImage), pdfImageWidth, 2233, pdfImageBitsPerComponent); if (bufferedImage != null) { ImageIO.write( bufferedImage, "PNG", new FileOutputStream(exportFileWithoutExtension + "_imageIO" + ".png")); } // end if } // end if... /* Image image = Toolkit.getDefaultToolkit().createImage(imageByteArray); BufferedImage bufferedImage = ImageProcessingTools.toBufferedImage(image, pdfImageWidth, pdfImageHeight); if (bufferedImage != null) { System.out.println("Image-Height....:" + bufferedImage.getHeight()); System.out.println("Image-Width.....:" + bufferedImage.getWidth()); System.out.println("Image-isAlphaP..:" + bufferedImage.isAlphaPremultiplied()); File pngOutputFile = new File(exportFileWithoutExtension + "_imageIO.jpg"); ImageIO.write(bufferedImage, "jpg", pngOutputFile); } */ /**/ } // end if } // end if } // end if } // end for }
public boolean isStream() { return object.isStream(); }