private void split(String[] args) throws Exception { String password = ""; String split = null; String startPage = null; String endPage = null; boolean useNonSeqParser = false; Splitter splitter = new Splitter(); String pdfFile = null; for (int i = 0; i < args.length; i++) { if (args[i].equals(PASSWORD)) { i++; if (i >= args.length) { usage(); } password = args[i]; } else if (args[i].equals(SPLIT)) { i++; if (i >= args.length) { usage(); } split = args[i]; } else if (args[i].equals(START_PAGE)) { i++; if (i >= args.length) { usage(); } startPage = args[i]; } else if (args[i].equals(END_PAGE)) { i++; if (i >= args.length) { usage(); } endPage = args[i]; } else if (args[i].equals(NONSEQ)) { useNonSeqParser = true; } else { if (pdfFile == null) { pdfFile = args[i]; } } } if (pdfFile == null) { usage(); } else { PDDocument document = null; List<PDDocument> documents = null; try { if (useNonSeqParser) { document = PDDocument.loadNonSeq(new File(pdfFile), null, password); } else { document = PDDocument.load(pdfFile); if (document.isEncrypted()) { try { document.decrypt(password); } catch (InvalidPasswordException e) { if (args.length == 4) // they supplied the wrong password { System.err.println("Error: The supplied password is incorrect."); System.exit(2); } else { // they didn't supply a password and the default of "" was wrong. System.err.println("Error: The document is encrypted."); usage(); } } } } int numberOfPages = document.getNumberOfPages(); boolean startEndPageSet = false; if (startPage != null) { splitter.setStartPage(Integer.parseInt(startPage)); if (split == null) { splitter.setSplitAtPage(numberOfPages); } } if (endPage != null) { splitter.setEndPage(Integer.parseInt(endPage)); if (split == null) { splitter.setSplitAtPage(Integer.parseInt(endPage)); } } if (split != null) { splitter.setSplitAtPage(Integer.parseInt(split)); } else { if (!startEndPageSet) { splitter.setSplitAtPage(1); } } documents = splitter.split(document); for (int i = 0; i < documents.size(); i++) { PDDocument doc = documents.get(i); String fileName = pdfFile.substring(0, pdfFile.length() - 4) + "-" + i + ".pdf"; writeDocument(doc, fileName); doc.close(); } } finally { if (document != null) { document.close(); } for (int i = 0; documents != null && i < documents.size(); i++) { PDDocument doc = (PDDocument) documents.get(i); doc.close(); } } } }
/** * @param reader * @param writer * @param options * @throws Exception */ protected final void action( Action ruleAction, NodeRef actionedUponNodeRef, ContentReader reader, Map<String, Object> options) { PDDocument pdf = null; InputStream is = null; File tempDir = null; ContentWriter writer = null; try { // Get the split frequency int splitFrequency = 0; String splitFrequencyString = options.get(PARAM_SPLIT_AT_PAGE).toString(); if (!splitFrequencyString.equals("")) { try { splitFrequency = Integer.valueOf(splitFrequencyString); } catch (NumberFormatException e) { throw new AlfrescoRuntimeException(e.getMessage(), e); } } // Get contentReader inputStream is = reader.getContentInputStream(); // stream the document in pdf = PDDocument.load(is); // split the PDF and put the pages in a list Splitter splitter = new Splitter(); // Need to adjust the input value to get the split at the right page splitter.setSplitAtPage(splitFrequency - 1); // Split the pages List<PDDocument> pdfs = splitter.split(pdf); // Start page split numbering at int page = 1; // build a temp dir, name based on the ID of the noderef we are // importing File alfTempDir = TempFileProvider.getTempDir(); tempDir = new File(alfTempDir.getPath() + File.separatorChar + actionedUponNodeRef.getId()); tempDir.mkdir(); // FLAG: This is ugly.....get the first PDF. PDDocument firstPDF = (PDDocument) pdfs.remove(0); int pagesInFirstPDF = firstPDF.getNumberOfPages(); String lastPage = ""; String pg = "_pg"; if (pagesInFirstPDF > 1) { pg = "_pgs"; lastPage = "-" + pagesInFirstPDF; } String fileNameSansExt = getFilenameSansExt(actionedUponNodeRef, FILE_EXTENSION); firstPDF.save( tempDir + "" + File.separatorChar + fileNameSansExt + pg + page + lastPage + FILE_EXTENSION); try { firstPDF.close(); } catch (IOException e) { throw new AlfrescoRuntimeException(e.getMessage(), e); } // FLAG: Like I said: "_UGLY_" ..... and it gets worse PDDocument secondPDF = null; Iterator<PDDocument> its = pdfs.iterator(); int pagesInSecondPDF = 0; while (its.hasNext()) { if (secondPDF != null) { // Get the split document and save it into the temp dir with // new name PDDocument splitpdf = (PDDocument) its.next(); int pagesInThisPDF = splitpdf.getNumberOfPages(); pagesInSecondPDF = pagesInSecondPDF + pagesInThisPDF; PDFMergerUtility merger = new PDFMergerUtility(); merger.appendDocument(secondPDF, splitpdf); merger.mergeDocuments(); try { splitpdf.close(); } catch (IOException e) { throw new AlfrescoRuntimeException(e.getMessage(), e); } } else { secondPDF = (PDDocument) its.next(); pagesInSecondPDF = secondPDF.getNumberOfPages(); } } if (pagesInSecondPDF > 1) { pg = "_pgs"; lastPage = "-" + (pagesInSecondPDF + pagesInFirstPDF); } else { pg = "_pg"; lastPage = ""; } // This is where we should save the appended PDF // put together the name and save the PDF secondPDF.save( tempDir + "" + File.separatorChar + fileNameSansExt + pg + splitFrequency + lastPage + FILE_EXTENSION); for (File file : tempDir.listFiles()) { try { if (file.isFile()) { // Get a writer and prep it for putting it back into the // repo NodeRef destinationNode = createDestinationNode( file.getName(), (NodeRef) ruleAction.getParameterValue(PARAM_DESTINATION_FOLDER), actionedUponNodeRef); writer = serviceRegistry .getContentService() .getWriter(destinationNode, ContentModel.PROP_CONTENT, true); writer.setEncoding(reader.getEncoding()); // original // encoding writer.setMimetype(FILE_MIMETYPE); // Put it in the repo writer.putContent(file); // Clean up file.delete(); } } catch (FileExistsException e) { throw new AlfrescoRuntimeException("Failed to process file.", e); } } } catch (COSVisitorException e) { throw new AlfrescoRuntimeException(e.getMessage(), e); } catch (IOException e) { throw new AlfrescoRuntimeException(e.getMessage(), e); } finally { if (pdf != null) { try { pdf.close(); } catch (IOException e) { throw new AlfrescoRuntimeException(e.getMessage(), e); } } if (is != null) { try { is.close(); } catch (IOException e) { throw new AlfrescoRuntimeException(e.getMessage(), e); } } if (tempDir != null) { tempDir.delete(); } } }