コード例 #1
0
  /**
   * Unpacks a file attachment.
   *
   * @param reader The object that reads the PDF document
   * @param filespec The dictonary containing the file specifications
   * @throws IOException
   */
  protected static Object[] unpackFile(PdfReader reader, PdfDictionary filespec)
      throws IOException {
    Object arr[] = new Object[2]; // use to store name and file bytes
    if (filespec == null) {
      return null;
    }

    PdfName type = (PdfName) PdfReader.getPdfObject(filespec.get(PdfName.TYPE));
    if (!PdfName.F.equals(type) && !PdfName.FILESPEC.equals(type)) {
      return null;
    }

    PdfDictionary ef = (PdfDictionary) PdfReader.getPdfObject(filespec.get(PdfName.EF));
    if (ef == null) {
      return null;
    }

    PdfString fn = (PdfString) PdfReader.getPdfObject(filespec.get(PdfName.F));
    if (fn == null) {
      return null;
    }

    File fLast = new File(fn.toUnicodeString());
    PRStream prs = (PRStream) PdfReader.getPdfObject(ef.get(PdfName.F));
    if (prs == null) {
      return null;
    }

    byte attachmentByte[] = PdfReader.getStreamBytes(prs);
    arr[0] = fLast.getName();
    arr[1] = attachmentByte;

    return arr;
  }
コード例 #2
0
ファイル: PDFAttachmentReader.java プロジェクト: chikoski/sqs
  /**
   * Unpacks a file attachment.
   *
   * @param reader The object that reads the PDF document
   * @param filespec The dictionary containing the file specifications
   * @param outPath The path where the attachment has to be written
   * @throws IOException
   */
  private static byte[] unpackFile(PdfReader reader, PdfDictionary filespec, String suffix)
      throws IOException {
    if (filespec == null) {
      return null;
    }
    PdfName type = (PdfName) PdfReader.getPdfObject(filespec.get(PdfName.TYPE));
    if (!PdfName.F.equals(type) && !PdfName.FILESPEC.equals(type)) {
      return null;
    }
    PdfDictionary ef = (PdfDictionary) PdfReader.getPdfObject(filespec.get(PdfName.EF));
    if (ef == null) {
      return null;
    }
    PdfString fn = (PdfString) PdfReader.getPdfObject(filespec.get(PdfName.F));
    if (fn == null) {
      return null;
    }
    File fLast = new File(fn.toUnicodeString());
    String filename = fLast.getName();

    if (!filename.endsWith(suffix)) {
      return null;
    }

    PRStream prs = (PRStream) PdfReader.getPdfObject(ef.get(PdfName.F));
    if (prs == null) {
      return null;
    }
    return PdfReader.getStreamBytes(prs);
  }
コード例 #3
0
 /**
  * Unpacks a file attachment.
  *
  * @param reader The object that reads the PDF document
  * @param filespec The dictionary containing the file specifications
  * @param outPath The path where the attachment has to be written
  * @throws IOException
  */
 public static void unpackFile(PdfReader reader, PdfDictionary filespec, String outPath)
     throws IOException {
   if (filespec == null) return;
   PdfName type = filespec.getAsName(PdfName.TYPE);
   if (!PdfName.F.equals(type) && !PdfName.FILESPEC.equals(type)) return;
   PdfDictionary ef = filespec.getAsDict(PdfName.EF);
   if (ef == null) return;
   PdfString fn = filespec.getAsString(PdfName.F);
   System.out.println("Unpacking file '" + fn + "' to " + outPath);
   if (fn == null) return;
   File fLast = new File(fn.toUnicodeString());
   File fullPath = new File(outPath, fLast.getName());
   if (fullPath.exists()) return;
   PRStream prs = (PRStream) PdfReader.getPdfObject(ef.get(PdfName.F));
   if (prs == null) return;
   byte b[] = PdfReader.getStreamBytes(prs);
   FileOutputStream fout = new FileOutputStream(fullPath);
   fout.write(b);
   fout.close();
 }
コード例 #4
0
  /**
   * This method searches for all image objects from the currently processed PDF file and stores
   * them as PDF in the given export directory or in the same directory where the original PDF file
   * is stored.
   *
   * <p>The filename of the images is build based on the original PDF filename (without extension)
   * and additional details like page number, image number and if available the internal image name.
   *
   * @param fullExportDirectoryPath The optional full export path where the images should be stored.
   *     If not given, the location of the original PDF file is used.
   * @throws Exception
   */
  private void imageExtractor(String fullExportDirectoryPath) throws Exception {
    if (fullExportDirectoryPath != null) {
      fullExportDirectoryPath = GlobalTools.checkDirectoryPath(fullExportDirectoryPath);
      File exportDirectory = new File(fullExportDirectoryPath);
      if (!exportDirectory.exists()) {
        exportDirectory.mkdirs();
      } // end if
    } // end if

    int totalNumberOfPDFObjects = pdfReader.getXrefSize();
    for (int pdfObjectCounter = 0; pdfObjectCounter < totalNumberOfPDFObjects; pdfObjectCounter++) {
      PdfObject pdfObject = pdfReader.getPdfObject(pdfObjectCounter);
      if (pdfObject != null) {
        if (pdfObject.isStream()) {
          PdfStream pdfStream = (PdfStream) pdfObject;
          PdfObject pdfObjectSubType = pdfStream.get(PdfName.SUBTYPE);

          if (pdfObject == null) {
            logger.debug("The internal PDF object is null.");
          } // end if

          if (!pdfObject.isStream()) {
            logger.debug("The internal PDF object is not representing a stream object.");
          } // end if

          // Check PDF subtype and make sure it's an Image type
          if (pdfObjectSubType != null
              && pdfObjectSubType.toString().equals(PdfName.IMAGE.toString())) {
            // Now we have a PDF stream object with an image but what is that exactly?
            // byte[] byteArrayImage = PdfReader.getStreamBytesRaw((PRStream) pdfStream);
            byte[] byteArrayImage = null;
            if (PdfName.FLATEDECODE.equals(pdfStream.getAsName(PdfName.FILTER))) {
              byteArrayImage = PdfReader.getStreamBytes((PRStream) pdfStream);
              // else if other filter (not supported yet)
            } else {
              byteArrayImage = PdfReader.getStreamBytesRaw((PRStream) pdfStream);
            } // end if..else

            // Test PdfImage - START
            /*
            PdfImage pdfImage = (PdfImage) ((PdfStream)((PRStream)pdfStream));
            logger.trace("pdfImage --> pdfName --> Width...........: " + pdfStream.get(PdfName.WIDTH));
            logger.trace("pdfImage --> pdfName --> Height..........: " + pdfStream.get(PdfName.HEIGHT));
            logger.trace("pdfImage --> pdfName --> Id..............: " + pdfImage.get(PdfName.ID));
            logger.trace("pdfImage --> pdfName --> Image...........: " + pdfImage.get(PdfName.IMAGE));
            logger.trace("pdfImage --> pdfName --> ImageB..........: " + pdfImage.get(PdfName.IMAGEB));
            logger.trace("pdfImage --> pdfName --> ImageC..........: " + pdfImage.get(PdfName.IMAGEC));
            logger.trace("pdfImage --> pdfName --> ImageI..........: " + pdfImage.get(PdfName.IMAGEI));
            logger.trace("pdfImage --> pdfName --> Imagemask.......: " + pdfImage.get(PdfName.IMAGEMASK));
            logger.trace("pdfImage --> pdfName --> Info............: " + pdfImage.get(PdfName.INFO));
            logger.trace("pdfImage --> pdfName --> Name............: " + pdfImage.get(PdfName.NAME));
            logger.trace("pdfImage --> pdfName --> Named...........: " + pdfImage.get(PdfName.NAMED));
            logger.trace("pdfImage --> pdfName --> Page............: " + pdfStream.get(PdfName.PAGE));
            */
            // Test PdfImage - STOP

            // STREAM
            /*
            logger.trace("pdfObject.toString()): " + pdfObject.toString());
            logger.trace("pdfObjectCounter.................: " + pdfObjectCounter);
            logger.trace("pdfObject.getIndRef().getNumber(): " + (pdfObject.getIndRef()!=null?pdfObject.getIndRef().toString():"null"));
            logger.trace("pdfStream.getIndRef().getNumber(): " + (pdfStream.getIndRef()!=null?pdfStream.getIndRef().toString():"null"));
            logger.trace("pdfStream --> pdfName --> toString........: " + pdfStream.toString());
            logger.trace("pdfStream --> pdfName --> Width...........: " + pdfStream.get(PdfName.WIDTH));
            logger.trace("pdfStream --> pdfName --> Height..........: " + pdfStream.get(PdfName.HEIGHT));
            logger.trace("pdfStream --> pdfName --> BitsPerComponent: " + pdfStream.get(PdfName.BITSPERCOMPONENT));
            logger.trace("pdfStream --> pdfName --> BitsPerSample...: " + pdfStream.get(PdfName.BITSPERSAMPLE));
            logger.trace("pdfStream --> pdfName --> ColorSpace......: " + pdfStream.get(PdfName.COLORSPACE));
            logger.trace("pdfStream --> pdfName --> CCITTFaxDecode..: " + pdfStream.get(PdfName.CCITTFAXDECODE));
            logger.trace("pdfStream --> pdfName --> Document........: " + pdfStream.get(PdfName.DOCUMENT));
            logger.trace("pdfStream --> pdfName --> Decode..........: " + pdfStream.get(PdfName.DECODE));
            logger.trace("pdfStream --> pdfName --> DecodeParms.....: " + pdfStream.get(PdfName.DECODEPARMS));
            logger.trace("pdfStream --> pdfName --> DeviceGray......: " + pdfStream.get(PdfName.DEVICEGRAY));
            logger.trace("pdfStream --> pdfName --> DeviceCMYK......: " + pdfStream.get(PdfName.DEVICECMYK));
            logger.trace("pdfStream --> pdfName --> DeviceRGB.......: " + pdfStream.get(PdfName.DEVICERGB));
            logger.trace("pdfStream --> pdfName --> Filter..........: " + pdfStream.get(PdfName.FILTER));
            logger.trace("pdfStream --> pdfName --> Filter - as name: " + pdfStream.getAsName(PdfName.FILTER));

            logger.trace("pdfStream --> pdfName --> Id..............: " + pdfStream.get(PdfName.ID));
            logger.trace("pdfStream --> pdfName --> Image...........: " + pdfStream.get(PdfName.IMAGE));
            logger.trace("pdfStream --> pdfName --> ImageB..........: " + pdfStream.get(PdfName.IMAGEB));
            logger.trace("pdfStream --> pdfName --> ImageC..........: " + pdfStream.get(PdfName.IMAGEC));
            logger.trace("pdfStream --> pdfName --> ImageI..........: " + pdfStream.get(PdfName.IMAGEI));
            logger.trace("pdfStream --> pdfName --> Imagemask.......: " + pdfStream.get(PdfName.IMAGEMASK));
            logger.trace("pdfStream --> pdfName --> Info............: " + pdfStream.get(PdfName.INFO));
            logger.trace("pdfStream --> pdfName --> Name............: " + pdfStream.get(PdfName.NAME));
            logger.trace("pdfStream --> pdfName --> Named...........: " + pdfStream.get(PdfName.NAMED));
            logger.trace("pdfStream --> pdfName --> Named - as name.: " + pdfStream.getAsName(PdfName.NAMED));

            logger.trace("pdfStream --> pdfName --> Page............: " + pdfStream.get(PdfName.PAGE));
            logger.trace("pdfStream --> pdfName --> PageElement.....: " + pdfStream.get(PdfName.PAGEELEMENT));
            logger.trace("pdfStream --> pdfName --> Pdf.............: " + pdfStream.get(PdfName.PDF));
            logger.trace("pdfStream --> pdfName --> PdfDocEncoding..: " + pdfStream.get(PdfName.PDFDOCENCODING));
            logger.trace("pdfStream --> pdfName --> Position........: " + pdfStream.get(PdfName.POSITION));
            logger.trace("pdfStream --> pdfName --> Producer........: " + pdfStream.get(PdfName.PRODUCER));
            logger.trace("pdfStream --> pdfName --> Properties......: " + pdfStream.get(PdfName.PROPERTIES));
            logger.trace("pdfStream --> pdfName --> Sect............: " + pdfStream.get(PdfName.SECT));
            logger.trace("pdfStream --> pdfName --> SubType.........: " + pdfStream.get(PdfName.SUBTYPE));
            logger.trace("pdfStream --> pdfName --> Supplement......: " + pdfStream.get(PdfName.SUPPLEMENT));
            logger.trace("pdfStream --> pdfName --> Title...........: " + pdfStream.get(PdfName.TITLE));
            */
            // logger.trace("pdfImage --> pdfName --> : " + (pdfName.).toString());

            // Extract the image name
            String streamImageName =
                (pdfStream.get(PdfName.NAME) == null
                    ? null
                    : pdfStream.get(PdfName.NAME).toString());
            if (streamImageName != null
                && streamImageName.length() > 1
                && streamImageName.startsWith("/")) {
              streamImageName = streamImageName.substring(1);
            } else {
              streamImageName = null;
            } // end if..else

            String exportFileWithoutExtension =
                (fullExportDirectoryPath != null
                        ? fullExportDirectoryPath
                        : this.fullPDFDirectoryPath)
                    + GlobalTools.getFileNameWithoutExtension(this.fullPDFFilePath)
                    + "_("
                    + "p000"
                    + "_ref"
                    + REF_NUMBER_FORMAT.format(pdfObjectCounter)
                    + (streamImageName == null ? "_unk" : "_" + streamImageName)
                    + ")";

            // Test
            FileOutputStream fileOutputStream =
                new FileOutputStream(exportFileWithoutExtension + ".jpg");

            /*
             * Write given byte array to a file.
             */
            fileOutputStream.write(byteArrayImage);
            fileOutputStream.flush();
            fileOutputStream.close();
            fileOutputStream = null;

            /*
             * Check image details
             */
            int pdfImageBitsPerComponent = -1;
            try {
              if (pdfStream.get(PdfName.BITSPERCOMPONENT).isNumber()) {
                pdfImageBitsPerComponent =
                    new Integer(pdfStream.get(PdfName.BITSPERCOMPONENT).toString()).intValue();
              }
            } catch (NumberFormatException ex) {
            }

            int pdfImageHeight = -1;
            try {
              if (pdfStream.get(PdfName.HEIGHT).isNumber()) {
                pdfImageHeight = new Integer(pdfStream.get(PdfName.HEIGHT).toString()).intValue();
              }
            } catch (NumberFormatException ex) {
            }

            int pdfImageWidth = -1;
            try {
              if (pdfStream.get(PdfName.WIDTH).isNumber()) {
                pdfImageWidth = new Integer(pdfStream.get(PdfName.WIDTH).toString()).intValue();
              }
            } catch (NumberFormatException ex) {
            }

            logger.debug("Height..........:" + pdfImageHeight);
            logger.debug("Width...........:" + pdfImageWidth);
            logger.debug("BitsPerComponent:" + pdfImageBitsPerComponent);

            // or you could try making a java.awt.Image from the array:

            if (PdfName.DEVICERGB.equals(pdfStream.get(PdfName.COLORSPACE))
                && PdfName.FLATEDECODE.equals(pdfStream.get(PdfName.FILTER))
                && pdfImageBitsPerComponent > 0
                && pdfImageWidth > 0
                && pdfImageHeight > 0) {

              BufferedImage bufferedImage =
                  ImageProcessingTools.toBufferedImage(
                      byteArrayImage, pdfImageWidth, pdfImageHeight, pdfImageBitsPerComponent);
              if (bufferedImage != null) {
                ImageIO.write(
                    bufferedImage,
                    "PNG",
                    new FileOutputStream(exportFileWithoutExtension + "_imageIO" + ".png"));
              } // end if
            } else if (PdfName.DEVICEGRAY.equals(pdfStream.get(PdfName.COLORSPACE))
                && PdfName.RUNLENGTHDECODE.equals(pdfStream.get(PdfName.FILTER))
                && pdfImageBitsPerComponent > 0
                && pdfImageWidth > 0) {

              BufferedImage bufferedImage =
                  ImageProcessingTools.toBufferedImage(
                      ImageProcessingTools.runLengthDecode(byteArrayImage),
                      pdfImageWidth,
                      2233,
                      pdfImageBitsPerComponent);
              if (bufferedImage != null) {
                ImageIO.write(
                    bufferedImage,
                    "PNG",
                    new FileOutputStream(exportFileWithoutExtension + "_imageIO" + ".png"));
              } // end if
            } // end if...

            /*
            Image image = Toolkit.getDefaultToolkit().createImage(imageByteArray);


            BufferedImage bufferedImage = ImageProcessingTools.toBufferedImage(image, pdfImageWidth, pdfImageHeight);
            if (bufferedImage != null) {
            	System.out.println("Image-Height....:" + bufferedImage.getHeight());
            	System.out.println("Image-Width.....:" + bufferedImage.getWidth());
            	System.out.println("Image-isAlphaP..:" + bufferedImage.isAlphaPremultiplied());
            	File pngOutputFile = new File(exportFileWithoutExtension + "_imageIO.jpg");
            	ImageIO.write(bufferedImage, "jpg", pngOutputFile);
            }
            */
            /**/

          } // end if
        } // end if
      } // end if
    } // end for
  }