/** * Applies this transform to a PDF page. * * <p>Using the configured PDF operator processors, a PDF stream engine ({@link PDFStreamEngine} * parent class) iterates through the page's token stream, aggregating operands and invoking the * appropriate PDF operator processor for each PDF operator encountered. An output list of tokens * is created. At the end of the iteration, if any PDF operator processor has indicated to this * transform that it has changed the output list compared to the original stream, the PDF page's * contents are replaced with those of the output list (otherwise the PDF page is unchanged). * * @param pdfPage A PDF page (belonging to the PDF document). * @throws PageTransformException if the output list stack does not have exactly one output list * at the end of the transform. * @throws IOException if any other processing error occurs. * @see PDFStreamEngine#processStream * @see #reset */ public synchronized boolean transform(PdfPage pdfPage) throws IOException { logger.debug3("Begin page stream transform"); // Iterate over stream reset(); this.currentPdfPage = pdfPage; processStream(pdfPage.getPdPage(), pdfPage.findResources(), pdfPage.getContentStream()); // Sanity check if (listStack.size() != 1) { String logMessage = "Split/merge mismatch: after processing stream, list stack has size " + listStack.size(); logger.error(logMessage); throw new PageTransformException(logMessage); } writeResult(pdfPage); logger.debug2("Page stream transform result: " + atLeastOneChange); return atLeastOneChange; }
/** Pushes an empty list onto the output list stack. */ public synchronized void splitOutputList() { logger.debug3("Splitting"); listStack.push(new ArrayList()); }
/** * Resets the state of this transform; this clears the stack and pushes an empty list onto it, and * clears the flag that indicates there has been at least one change. * * <p>This method is called at the beginning of {@link #transform}. * * <p>This method is <em>not</em> called at the end of {@link #transform}. Though the contents of * the resulting output list may have been written to the PDF page already, clients may wish to * inspect the final result without having to re-parse the token stream of the PDF page. However, * the result list may be quite large and the stack will hold on to it until the next call to this * method, which may represent a memory issue. Thus clients <em>may</em> call {@link #reset} after * a call to {@link #transform} to clear the output list stack. */ public synchronized void reset() { logger.debug3("Resetting the page stream transform"); atLeastOneChange = false; listStack.clear(); listStack.push(new ArrayList()); // FIXME: initial capacity? }
/** * Pops the output list currently at the top of the output list stack and discards it, and appends * the given replacement list to the output list immediately underneath. * * @param replacement A list of tokens to be appended to the output list currently immediately * under the top of the stack. * @throws EmptyStackException if there is currently only one output list on the stack. */ public synchronized void mergeOutputList(List replacement) { logger.debug3("Merging with replacement"); listStack.pop(); // discard result List newTop = (List) listStack.peek(); newTop.addAll(replacement); }
/** * Pops the output list currently at the top of the output list stack and appends it to the end of * the output list immediately underneath. * * @throws EmptyStackException if there is currently only one output list on the stack. * @see #mergeOutputList(List) */ public synchronized void mergeOutputList() { logger.debug3("Merging"); List oldTop = (List) listStack.pop(); List newTop = (List) listStack.peek(); newTop.addAll(oldTop); }
/** * Gets the output list currently at the top of the output list stack. * * @return The list of tokens currently at the top of the list stack. * @see #splitOutputList * @see #mergeOutputList() * @see #mergeOutputList(List) */ public synchronized List getOutputList() { return (List) listStack.peek(); }