Example #1
0
  private void split(String[] args) throws Exception {
    String password = "";
    String split = null;
    String startPage = null;
    String endPage = null;
    boolean useNonSeqParser = false;
    Splitter splitter = new Splitter();
    String pdfFile = null;
    for (int i = 0; i < args.length; i++) {
      if (args[i].equals(PASSWORD)) {
        i++;
        if (i >= args.length) {
          usage();
        }
        password = args[i];
      } else if (args[i].equals(SPLIT)) {
        i++;
        if (i >= args.length) {
          usage();
        }
        split = args[i];
      } else if (args[i].equals(START_PAGE)) {
        i++;
        if (i >= args.length) {
          usage();
        }
        startPage = args[i];
      } else if (args[i].equals(END_PAGE)) {
        i++;
        if (i >= args.length) {
          usage();
        }
        endPage = args[i];
      } else if (args[i].equals(NONSEQ)) {
        useNonSeqParser = true;
      } else {
        if (pdfFile == null) {
          pdfFile = args[i];
        }
      }
    }

    if (pdfFile == null) {
      usage();
    } else {
      PDDocument document = null;
      List<PDDocument> documents = null;
      try {
        if (useNonSeqParser) {
          document = PDDocument.loadNonSeq(new File(pdfFile), null, password);
        } else {
          document = PDDocument.load(pdfFile);
          if (document.isEncrypted()) {
            try {
              document.decrypt(password);
            } catch (InvalidPasswordException e) {
              if (args.length == 4) // they supplied the wrong password
              {
                System.err.println("Error: The supplied password is incorrect.");
                System.exit(2);
              } else {
                // they didn't supply a password and the default of "" was wrong.
                System.err.println("Error: The document is encrypted.");
                usage();
              }
            }
          }
        }

        int numberOfPages = document.getNumberOfPages();
        boolean startEndPageSet = false;
        if (startPage != null) {
          splitter.setStartPage(Integer.parseInt(startPage));
          if (split == null) {
            splitter.setSplitAtPage(numberOfPages);
          }
        }
        if (endPage != null) {
          splitter.setEndPage(Integer.parseInt(endPage));
          if (split == null) {
            splitter.setSplitAtPage(Integer.parseInt(endPage));
          }
        }
        if (split != null) {
          splitter.setSplitAtPage(Integer.parseInt(split));
        } else {
          if (!startEndPageSet) {
            splitter.setSplitAtPage(1);
          }
        }

        documents = splitter.split(document);
        for (int i = 0; i < documents.size(); i++) {
          PDDocument doc = documents.get(i);
          String fileName = pdfFile.substring(0, pdfFile.length() - 4) + "-" + i + ".pdf";
          writeDocument(doc, fileName);
          doc.close();
        }

      } finally {
        if (document != null) {
          document.close();
        }
        for (int i = 0; documents != null && i < documents.size(); i++) {
          PDDocument doc = (PDDocument) documents.get(i);
          doc.close();
        }
      }
    }
  }
  /**
   * @param reader
   * @param writer
   * @param options
   * @throws Exception
   */
  protected final void action(
      Action ruleAction,
      NodeRef actionedUponNodeRef,
      ContentReader reader,
      Map<String, Object> options) {
    PDDocument pdf = null;
    InputStream is = null;
    File tempDir = null;
    ContentWriter writer = null;

    try {
      // Get the split frequency
      int splitFrequency = 0;

      String splitFrequencyString = options.get(PARAM_SPLIT_AT_PAGE).toString();
      if (!splitFrequencyString.equals("")) {
        try {
          splitFrequency = Integer.valueOf(splitFrequencyString);
        } catch (NumberFormatException e) {
          throw new AlfrescoRuntimeException(e.getMessage(), e);
        }
      }

      // Get contentReader inputStream
      is = reader.getContentInputStream();
      // stream the document in
      pdf = PDDocument.load(is);
      // split the PDF and put the pages in a list
      Splitter splitter = new Splitter();
      // Need to adjust the input value to get the split at the right page
      splitter.setSplitAtPage(splitFrequency - 1);

      // Split the pages
      List<PDDocument> pdfs = splitter.split(pdf);

      // Start page split numbering at
      int page = 1;

      // build a temp dir, name based on the ID of the noderef we are
      // importing
      File alfTempDir = TempFileProvider.getTempDir();
      tempDir = new File(alfTempDir.getPath() + File.separatorChar + actionedUponNodeRef.getId());
      tempDir.mkdir();

      // FLAG: This is ugly.....get the first PDF.
      PDDocument firstPDF = (PDDocument) pdfs.remove(0);

      int pagesInFirstPDF = firstPDF.getNumberOfPages();

      String lastPage = "";
      String pg = "_pg";

      if (pagesInFirstPDF > 1) {
        pg = "_pgs";
        lastPage = "-" + pagesInFirstPDF;
      }

      String fileNameSansExt = getFilenameSansExt(actionedUponNodeRef, FILE_EXTENSION);
      firstPDF.save(
          tempDir
              + ""
              + File.separatorChar
              + fileNameSansExt
              + pg
              + page
              + lastPage
              + FILE_EXTENSION);

      try {
        firstPDF.close();
      } catch (IOException e) {
        throw new AlfrescoRuntimeException(e.getMessage(), e);
      }

      // FLAG: Like I said: "_UGLY_" ..... and it gets worse
      PDDocument secondPDF = null;

      Iterator<PDDocument> its = pdfs.iterator();

      int pagesInSecondPDF = 0;

      while (its.hasNext()) {
        if (secondPDF != null) {
          // Get the split document and save it into the temp dir with
          // new name
          PDDocument splitpdf = (PDDocument) its.next();

          int pagesInThisPDF = splitpdf.getNumberOfPages();
          pagesInSecondPDF = pagesInSecondPDF + pagesInThisPDF;

          PDFMergerUtility merger = new PDFMergerUtility();
          merger.appendDocument(secondPDF, splitpdf);
          merger.mergeDocuments();

          try {
            splitpdf.close();
          } catch (IOException e) {
            throw new AlfrescoRuntimeException(e.getMessage(), e);
          }

        } else {
          secondPDF = (PDDocument) its.next();

          pagesInSecondPDF = secondPDF.getNumberOfPages();
        }
      }

      if (pagesInSecondPDF > 1) {

        pg = "_pgs";
        lastPage = "-" + (pagesInSecondPDF + pagesInFirstPDF);

      } else {
        pg = "_pg";
        lastPage = "";
      }

      // This is where we should save the appended PDF
      // put together the name and save the PDF
      secondPDF.save(
          tempDir
              + ""
              + File.separatorChar
              + fileNameSansExt
              + pg
              + splitFrequency
              + lastPage
              + FILE_EXTENSION);

      for (File file : tempDir.listFiles()) {
        try {
          if (file.isFile()) {
            // Get a writer and prep it for putting it back into the
            // repo
            NodeRef destinationNode =
                createDestinationNode(
                    file.getName(),
                    (NodeRef) ruleAction.getParameterValue(PARAM_DESTINATION_FOLDER),
                    actionedUponNodeRef);
            writer =
                serviceRegistry
                    .getContentService()
                    .getWriter(destinationNode, ContentModel.PROP_CONTENT, true);

            writer.setEncoding(reader.getEncoding()); // original
            // encoding
            writer.setMimetype(FILE_MIMETYPE);

            // Put it in the repo
            writer.putContent(file);

            // Clean up
            file.delete();
          }
        } catch (FileExistsException e) {
          throw new AlfrescoRuntimeException("Failed to process file.", e);
        }
      }
    } catch (COSVisitorException e) {
      throw new AlfrescoRuntimeException(e.getMessage(), e);
    } catch (IOException e) {
      throw new AlfrescoRuntimeException(e.getMessage(), e);
    } finally {
      if (pdf != null) {
        try {
          pdf.close();
        } catch (IOException e) {
          throw new AlfrescoRuntimeException(e.getMessage(), e);
        }
      }
      if (is != null) {
        try {
          is.close();
        } catch (IOException e) {
          throw new AlfrescoRuntimeException(e.getMessage(), e);
        }
      }

      if (tempDir != null) {
        tempDir.delete();
      }
    }
  }