@Override
  public void transform(ArchivalUnit au, PdfDocument pdfDocument) throws PdfException {
    pdfDocument.unsetModificationDate();
    PdfUtil.normalizeTrailerId(pdfDocument);
    pdfDocument.unsetMetadata();
    PDDocumentInformation pdDocInfo = ((GtvPdfBoxDocument) pdfDocument).getPdDocumentInformation();
    if (pdDocInfo.getCustomMetadataValue(GtvPdfBoxDocument.PDFDATE) != null) {
      pdDocInfo.setCustomMetadataValue(GtvPdfBoxDocument.PDFDATE, null);
    }
    if (pdDocInfo.getCustomMetadataValue(GtvPdfBoxDocument.PDFUSER) != null) {
      pdDocInfo.setCustomMetadataValue(GtvPdfBoxDocument.PDFUSER, null);
    }

    PdfStateMachineWorker worker = new PdfStateMachineWorker();
    boolean anyXform = false;
    for (PdfPage pdfPage : pdfDocument.getPages()) {
      PdfTokenStream pdfTokenStream = pdfPage.getPageTokenStream();
      worker.process(pdfTokenStream);
      if (worker.getResult()) {
        anyXform = true;
        List<PdfToken> tokens = pdfTokenStream.getTokens();
        // clear tokens including text markers
        tokens.subList(worker.getBegin(), worker.getEnd() + 1).clear();
        pdfTokenStream.setTokens(tokens);
      }
    }
    if (log.isDebug2()) {
      log.debug2("Transform: " + anyXform);
    }
  }
Exemple #2
0
 /**
  * Adds a blank page.
  *
  * @param rect The page dimension
  * @param rotation The rotation angle in degrees
  * @since 2.1.5
  */
 public void addPage(Rectangle rect, int rotation) {
   PdfRectangle mediabox = new PdfRectangle(rect, rotation);
   PageResources resources = new PageResources();
   PdfPage page = new PdfPage(mediabox, new HashMap(), resources.getResources(), 0);
   page.put(PdfName.TABS, getTabs());
   root.addPage(page);
   ++currentPageNumber;
 }
 /**
  * Rewrites the contents of the PDF page being transformed with those of the output list, if at
  * least one change has been indicated.
  *
  * @param pdfPage A PDF page (belonging to the PDF document).
  * @throws IOException if any processing error occurs.
  */
 protected synchronized void writeResult(PdfPage pdfPage) throws IOException {
   if (atLeastOneChange) {
     logger.debug3("Writing result to PDF page");
     PDStream resultStream = pdfPage.getPdfDocument().makePdStream();
     OutputStream outputStream = resultStream.createOutputStream();
     ContentStreamWriter tokenWriter = new ContentStreamWriter(outputStream);
     tokenWriter.writeTokens(getOutputList());
     pdfPage.setContents(resultStream);
   } else {
     logger.debug3("No result to write to PDF page");
   }
 }
  /**
   * Applies this transform to a PDF page.
   *
   * <p>Using the configured PDF operator processors, a PDF stream engine ({@link PDFStreamEngine}
   * parent class) iterates through the page's token stream, aggregating operands and invoking the
   * appropriate PDF operator processor for each PDF operator encountered. An output list of tokens
   * is created. At the end of the iteration, if any PDF operator processor has indicated to this
   * transform that it has changed the output list compared to the original stream, the PDF page's
   * contents are replaced with those of the output list (otherwise the PDF page is unchanged).
   *
   * @param pdfPage A PDF page (belonging to the PDF document).
   * @throws PageTransformException if the output list stack does not have exactly one output list
   *     at the end of the transform.
   * @throws IOException if any other processing error occurs.
   * @see PDFStreamEngine#processStream
   * @see #reset
   */
  public synchronized boolean transform(PdfPage pdfPage) throws IOException {
    logger.debug3("Begin page stream transform");

    // Iterate over stream
    reset();
    this.currentPdfPage = pdfPage;
    processStream(pdfPage.getPdPage(), pdfPage.findResources(), pdfPage.getContentStream());

    // Sanity check
    if (listStack.size() != 1) {
      String logMessage =
          "Split/merge mismatch: after processing stream, list stack has size " + listStack.size();
      logger.error(logMessage);
      throw new PageTransformException(logMessage);
    }

    writeResult(pdfPage);
    logger.debug2("Page stream transform result: " + atLeastOneChange);
    return atLeastOneChange;
  }