@Override public void transform(ArchivalUnit au, PdfDocument pdfDocument) throws PdfException { pdfDocument.unsetModificationDate(); PdfUtil.normalizeTrailerId(pdfDocument); pdfDocument.unsetMetadata(); PDDocumentInformation pdDocInfo = ((GtvPdfBoxDocument) pdfDocument).getPdDocumentInformation(); if (pdDocInfo.getCustomMetadataValue(GtvPdfBoxDocument.PDFDATE) != null) { pdDocInfo.setCustomMetadataValue(GtvPdfBoxDocument.PDFDATE, null); } if (pdDocInfo.getCustomMetadataValue(GtvPdfBoxDocument.PDFUSER) != null) { pdDocInfo.setCustomMetadataValue(GtvPdfBoxDocument.PDFUSER, null); } PdfStateMachineWorker worker = new PdfStateMachineWorker(); boolean anyXform = false; for (PdfPage pdfPage : pdfDocument.getPages()) { PdfTokenStream pdfTokenStream = pdfPage.getPageTokenStream(); worker.process(pdfTokenStream); if (worker.getResult()) { anyXform = true; List<PdfToken> tokens = pdfTokenStream.getTokens(); // clear tokens including text markers tokens.subList(worker.getBegin(), worker.getEnd() + 1).clear(); pdfTokenStream.setTokens(tokens); } } if (log.isDebug2()) { log.debug2("Transform: " + anyXform); } }
/** * Adds a blank page. * * @param rect The page dimension * @param rotation The rotation angle in degrees * @since 2.1.5 */ public void addPage(Rectangle rect, int rotation) { PdfRectangle mediabox = new PdfRectangle(rect, rotation); PageResources resources = new PageResources(); PdfPage page = new PdfPage(mediabox, new HashMap(), resources.getResources(), 0); page.put(PdfName.TABS, getTabs()); root.addPage(page); ++currentPageNumber; }
/** * Rewrites the contents of the PDF page being transformed with those of the output list, if at * least one change has been indicated. * * @param pdfPage A PDF page (belonging to the PDF document). * @throws IOException if any processing error occurs. */ protected synchronized void writeResult(PdfPage pdfPage) throws IOException { if (atLeastOneChange) { logger.debug3("Writing result to PDF page"); PDStream resultStream = pdfPage.getPdfDocument().makePdStream(); OutputStream outputStream = resultStream.createOutputStream(); ContentStreamWriter tokenWriter = new ContentStreamWriter(outputStream); tokenWriter.writeTokens(getOutputList()); pdfPage.setContents(resultStream); } else { logger.debug3("No result to write to PDF page"); } }
/** * Applies this transform to a PDF page. * * <p>Using the configured PDF operator processors, a PDF stream engine ({@link PDFStreamEngine} * parent class) iterates through the page's token stream, aggregating operands and invoking the * appropriate PDF operator processor for each PDF operator encountered. An output list of tokens * is created. At the end of the iteration, if any PDF operator processor has indicated to this * transform that it has changed the output list compared to the original stream, the PDF page's * contents are replaced with those of the output list (otherwise the PDF page is unchanged). * * @param pdfPage A PDF page (belonging to the PDF document). * @throws PageTransformException if the output list stack does not have exactly one output list * at the end of the transform. * @throws IOException if any other processing error occurs. * @see PDFStreamEngine#processStream * @see #reset */ public synchronized boolean transform(PdfPage pdfPage) throws IOException { logger.debug3("Begin page stream transform"); // Iterate over stream reset(); this.currentPdfPage = pdfPage; processStream(pdfPage.getPdPage(), pdfPage.findResources(), pdfPage.getContentStream()); // Sanity check if (listStack.size() != 1) { String logMessage = "Split/merge mismatch: after processing stream, list stack has size " + listStack.size(); logger.error(logMessage); throw new PageTransformException(logMessage); } writeResult(pdfPage); logger.debug2("Page stream transform result: " + atLeastOneChange); return atLeastOneChange; }