示例#1
1
  /**
   * This will add the contents to the lucene document.
   *
   * @param document The document to add the contents to.
   * @param is The stream to get the contents from.
   * @param documentLocation The location of the document, used just for debug messages.
   * @throws IOException If there is an error parsing the document.
   */
  private void addContent(Document document, InputStream is, String documentLocation)
      throws IOException {
    PDDocument pdfDocument = null;
    PDFTextStripper stripper;
    try {
      pdfDocument = PDDocument.load(is);
      if (pdfDocument.isEncrypted()) {
        // Just try using the default password and move on
        pdfDocument.decrypt("");
      }

      // create a writer where to append the text content.
      StringWriter writer = new StringWriter();
      stripper = new PDFTextStripper();
      try {
        stripper.writeText(pdfDocument, writer);

      } catch (Exception e) {
        System.out.println("Error in stripper.writeText()");
      }
      String contents = writer.getBuffer().toString();

      StringReader reader = new StringReader(contents);
      addTextField(document, Indexer.contents, reader);
      PDDocumentInformation info = pdfDocument.getDocumentInformation();
      if (info != null) {
        addTextField(document, Indexer.Author, info.getAuthor());
        try {
          addTextField(document, Indexer.created, info.getCreationDate());
        } catch (IOException io) {
          // ignore, bad date but continue with indexing
        }

        addTextField(document, Indexer.keywords, info.getKeywords());
        try {
          addTextField(document, Indexer.modified, info.getModificationDate());
        } catch (IOException io) {
          // ignore, bad date but continue with indexing
        }
        addTextField(document, "Subject", info.getSubject());
        addTextField(document, Indexer.Title, info.getTitle());
      }
      int summarySize = Math.min(contents.length(), 500);
      String summary = contents.substring(0, summarySize);
      // Add the summary as an UnIndexed field, so that it is stored and
      // returned
      // with hit documents for display.
      addUnindexedField(document, Indexer.summary, summary);
    } catch (CryptographyException e) {
      throw new IOException("Error decrypting document(" + documentLocation + "): " + e);
    } catch (InvalidPasswordException e) {
      // they didn't suppply a password and the default of "" was wrong.
      throw new IOException(
          "Error: The document(" + documentLocation + ") is encrypted and will not be indexed.");
    } finally {
      if (pdfDocument != null) {
        pdfDocument.close();
      }
    }
  }
  public void parse(File file, int maxPaths, ProgressMonitor monitor) throws Exception {
    monitor.beginTask(tr("Parsing PDF", 1));

    PDDocument document = PDDocument.load(file);

    if (document.isEncrypted()) {
      throw new Exception(tr("Encrypted documents not supported."));
    }

    List<?> allPages = document.getDocumentCatalog().getAllPages();

    if (allPages.size() != 1) {
      throw new Exception(tr("The PDF file must have exactly one page."));
    }

    PDPage page = (PDPage) allPages.get(0);
    PDRectangle pageSize = page.findMediaBox();
    Integer rotationVal = page.getRotation();
    int rotation = 0;
    if (rotationVal != null) {
      rotation = rotationVal.intValue();
    }

    GraphicsProcessor p = new GraphicsProcessor(target, rotation, maxPaths, monitor);
    PageDrawer drawer = new PageDrawer();
    drawer.drawPage(p, page);
    this.target.bounds =
        new Rectangle2D.Double(
            pageSize.getLowerLeftX(),
            pageSize.getLowerLeftY(),
            pageSize.getWidth(),
            pageSize.getHeight());

    monitor.finishTask();
  }
示例#3
0
  /**
   * Constructor sets the input and output file and convert the pdf, docx and doc files to text .
   *
   * @param infile,outfile
   * @return
   */
  public TextConvertor(String infile, String outfile) {
    try {
      File input = new File(infile); // The file from where you would like to extract
      FileInputStream fis = new FileInputStream(input.getAbsolutePath());
      int x = fis.read();
      int y = fis.read();
      fis = new FileInputStream(input.getAbsolutePath());
      if (x == 37 && y == 80) {
        filetype = "pdf";
        pd = PDDocument.load(input);
        PDF2Text(outfile);
      } else if (x == 80 && y == 75) {
        filetype = "docx";

        dx = new XWPFDocument(fis);
        DOCX2Text(outfile);
      } else if (x == 208 && y == 207) {
        filetype = "doc";
        dc = new HWPFDocument(fis);
        DOC2Text(outfile);
      }

    } catch (Exception e) {
      e.printStackTrace();
    }
  }
示例#4
0
 /**
  * This will print the documents data.
  *
  * @param args The command line arguments.
  * @throws Exception If there is an error parsing the document.
  */
 public static void main(String[] args) throws Exception {
   if (args.length != 1) {
     usage();
   } else {
     PDDocument document = null;
     try {
       document = PDDocument.load(args[0]);
       if (document.isEncrypted()) {
         try {
           document.decrypt("");
         } catch (InvalidPasswordException e) {
           System.err.println("Error: Document is encrypted with a password.");
           System.exit(1);
         }
       }
       PrintTextLocations printer = new PrintTextLocations();
       List allPages = document.getDocumentCatalog().getAllPages();
       for (int i = 0; i < allPages.size(); i++) {
         PDPage page = (PDPage) allPages.get(i);
         System.out.println("Processing page: " + i);
         PDStream contents = page.getContents();
         if (contents != null) {
           printer.processStream(page, page.findResources(), page.getContents().getStream());
         }
       }
     } finally {
       if (document != null) {
         document.close();
       }
     }
   }
 }
示例#5
0
 public static String getFileContent(File file) throws FileNotFoundException, IOException {
   String ext = FilenameUtils.getExtension(file.getName());
   String outContent = "";
   try {
     if (ext.toLowerCase().equals("doc")) {
       if (file != null) {
         WordExtractor we = new WordExtractor(new FileInputStream(file));
         outContent = we.getText();
       } else {
         logger.warning("file not found : " + file);
       }
     } else if (ext.toLowerCase().equals("pdf")) {
       PDDocument doc = PDDocument.load(file);
       PDFTextStripper text = new PDFTextStripper();
       outContent = text.getText(doc);
       doc.close();
     } else if (StringHelper.isHTML(file.getName())) {
       return loadStringFromFile(file);
     }
   } catch (Throwable t) {
     logger.warning("error when read : " + file + "+ [" + t.getMessage() + "]");
     t.printStackTrace();
   }
   return outContent;
 }
  private void _generateImagesPB(FileVersion fileVersion, InputStream inputStream)
      throws Exception {

    boolean generatePreview = _isGeneratePreview(fileVersion);
    boolean generateThumbnail = _isGenerateThumbnail(fileVersion);

    PDDocument pdDocument = null;

    try {
      pdDocument = PDDocument.load(inputStream);

      PDDocumentCatalog pdDocumentCatalog = pdDocument.getDocumentCatalog();

      List<PDPage> pdPages = pdDocumentCatalog.getAllPages();

      for (int i = 0; i < pdPages.size(); i++) {
        PDPage pdPage = pdPages.get(i);

        if (generateThumbnail && (i == 0)) {
          _generateImagesPB(
              fileVersion,
              pdPage,
              PropsValues.DL_FILE_ENTRY_THUMBNAIL_DPI,
              PropsValues.DL_FILE_ENTRY_THUMBNAIL_HEIGHT,
              PropsValues.DL_FILE_ENTRY_THUMBNAIL_WIDTH,
              true,
              0);

          if (_log.isInfoEnabled()) {
            _log.info("PDFBox generated a thumbnail for " + fileVersion.getFileVersionId());
          }
        }

        if (!generatePreview) {
          break;
        }

        _generateImagesPB(
            fileVersion,
            pdPage,
            PropsValues.DL_FILE_ENTRY_PREVIEW_DOCUMENT_DPI,
            PropsValues.DL_FILE_ENTRY_PREVIEW_DOCUMENT_HEIGHT,
            PropsValues.DL_FILE_ENTRY_PREVIEW_DOCUMENT_WIDTH,
            false,
            i + 1);
      }

      if (_log.isInfoEnabled() && generatePreview) {
        _log.info(
            "PDFBox generated "
                + getPreviewFileCount(fileVersion)
                + " preview pages for "
                + fileVersion.getFileVersionId());
      }
    } finally {
      if (pdDocument != null) {
        pdDocument.close();
      }
    }
  }
  /**
   * 目次情報をPDFに挿入する。
   *
   * @param chapterList 目次情報の配列
   * @param destinationFileName 挿入先のPDFのファイル名
   * @throws Exception
   */
  public void createIndex(List<ChapterModel> chapterList, String destinationFileName)
      throws Exception {
    PDDocument document = PDDocument.load(destinationFileName);
    try {
      PDDocumentOutline outline = new PDDocumentOutline();
      document.getDocumentCatalog().setDocumentOutline(outline);
      PDOutlineItem pagesOutline = new PDOutlineItem();
      pagesOutline.setTitle("All Pages");
      outline.appendChild(pagesOutline);
      List pages = document.getDocumentCatalog().getAllPages();
      for (int i = 0; i < pages.size(); i++) {
        for (ChapterModel model : chapterList) {
          if (i == model.getPageNum()) {
            PDPage page = (PDPage) pages.get(i);
            PDPageFitWidthDestination dest = new PDPageFitWidthDestination();
            dest.setPage(page);
            PDOutlineItem bookmark = new PDOutlineItem();
            bookmark.setDestination(dest);
            bookmark.setTitle(model.getTitle());
            pagesOutline.appendChild(bookmark);
          }
        }
      }
      pagesOutline.openNode();
      outline.openNode();

      document.save(destinationFileName);
    } catch (Exception e) {
      e.printStackTrace();
    } finally {
      document.close();
    }
  }
  @Test
  public void removeProtection() throws Exception {
    final byte[] original = TestResources.get(TestResources.get("/locked.pdf"));

    try (ByteArrayInputStream bytes = new ByteArrayInputStream(original)) {
      final PDDocument unlocked = PDDocument.load(bytes);
      assertTrue(unlocked.isEncrypted());
    }

    final byte[] unprotected =
        new PdfBoxPasswordProtection().removeProtection(original, "password".getBytes());

    try (ByteArrayInputStream bytes = new ByteArrayInputStream(unprotected)) {
      final PDDocument unlocked = PDDocument.load(bytes);
      assertFalse(unlocked.isEncrypted());
    }
  }
示例#9
0
  /**
   * Extracts text from a PDF and stores it in the document. Takes an input stream rather than a
   * file name.
   *
   * @param filesInputStream An input stream pointing to a PDF file.
   * @throws IOException
   */
  private static char[] loadPDF(InputStream filesInputStream) throws IOException {
    PDDocument doc = PDDocument.load(filesInputStream);
    PDFTextStripper pdfStripper = new PDFTextStripper();
    pdfStripper.setSortByPosition(false);
    char[] origText = pdfStripper.getText(doc).toCharArray();
    doc.close();

    return origText;
  }
 @Test
 public void testDontThrowNPEInShfill() throws IOException {
   PDDocument pdf_document = PDDocument.load("src/test/resources/technology/tabula/labor.pdf");
   ObjectExtractor oe = new ObjectExtractor(pdf_document);
   PageIterator pi = oe.extract();
   try {
     Page p = pi.next();
   } catch (NullPointerException e) {
     fail("NPE in ObjectExtractor " + e.toString());
   }
 }
 @Test
 public void testGoodPassword() throws IOException {
   PDDocument pdf_document = PDDocument.load("src/test/resources/technology/tabula/encrypted.pdf");
   ObjectExtractor oe = new ObjectExtractor(pdf_document, "userpassword");
   List<Page> pages = new ArrayList<Page>();
   PageIterator pi = oe.extract();
   while (pi.hasNext()) {
     pages.add(pi.next());
   }
   assertEquals(1, pages.size());
 }
  @Test
  public void testShouldDetectRulings() throws IOException {
    PDDocument pdf_document =
        PDDocument.load("src/test/resources/technology/tabula/should_detect_rulings.pdf");
    ObjectExtractor oe = new ObjectExtractor(pdf_document);
    PageIterator pi = oe.extract();

    while (pi.hasNext()) {
      assertNotEquals(0, pi.next().getRulings().size());
    }
  }
示例#13
0
  /**
   * Method return root object of model implementation from pdf box model together with the
   * hierarchy.
   *
   * @param path path to PDF file
   * @return root object representing by {@link org.verapdf.model.coslayer.CosDict}
   * @throws FileNotFoundException when target file is not exist
   * @throws IOException when target file is not pdf or pdf file is not contain root object
   */
  public static org.verapdf.model.baselayer.Object getRoot(String path) throws IOException {
    final File file = new File(path);
    if (!file.exists()) {
      throw new FileNotFoundException("Current file '" + path + "' not exists.");
    }

    Object root;
    try (PDDocument document = PDDocument.load(file)) {
      root = new PBCosDocument(document, file.length());
    }
    return root;
  }
示例#14
0
  /**
   * This will add the contents to the lucene document.
   *
   * @param document The document to add the contents to.
   * @param is The stream to get the contents from.
   * @param documentLocation The location of the document, used just for debug messages.
   * @throws IOException If there is an error parsing the document.
   */
  private void addContent(Document document, InputStream is, String documentLocation)
      throws IOException {
    PDDocument pdfDocument = null;
    try {
      pdfDocument = PDDocument.load(is, "");

      // create a writer where to append the text content.
      StringWriter writer = new StringWriter();
      if (stripper == null) {
        stripper = new PDFTextStripper();
      }
      stripper.writeText(pdfDocument, writer);

      // Note: the buffer to string operation is costless;
      // the char array value of the writer buffer and the content string
      // is shared as long as the buffer content is not modified, which will
      // not occur here.
      String contents = writer.getBuffer().toString();

      StringReader reader = new StringReader(contents);

      // Add the tag-stripped contents as a Reader-valued Text field so it will
      // get tokenized and indexed.
      addTextField(document, "contents", reader);

      PDDocumentInformation info = pdfDocument.getDocumentInformation();
      if (info != null) {
        addTextField(document, "Author", info.getAuthor());
        addTextField(document, "CreationDate", info.getCreationDate());
        addTextField(document, "Creator", info.getCreator());
        addTextField(document, "Keywords", info.getKeywords());
        addTextField(document, "ModificationDate", info.getModificationDate());
        addTextField(document, "Producer", info.getProducer());
        addTextField(document, "Subject", info.getSubject());
        addTextField(document, "Title", info.getTitle());
        addTextField(document, "Trapped", info.getTrapped());
      }
      int summarySize = Math.min(contents.length(), 500);
      String summary = contents.substring(0, summarySize);
      // Add the summary as an UnIndexed field, so that it is stored and returned
      // with hit documents for display.
      addUnindexedField(document, "summary", summary);
    } catch (InvalidPasswordException e) {
      // they didn't suppply a password and the default of "" was wrong.
      throw new IOException(
          "Error: The document(" + documentLocation + ") is encrypted and will not be indexed.", e);
    } finally {
      if (pdfDocument != null) {
        pdfDocument.close();
      }
    }
  }
 @Test
 public void testCanReadPDFWithOwnerEncryption() throws IOException {
   PDDocument pdf_document =
       PDDocument.load("src/test/resources/technology/tabula/S2MNCEbirdisland.pdf");
   ObjectExtractor oe = new ObjectExtractor(pdf_document);
   PageIterator pi = oe.extract();
   int i = 0;
   while (pi.hasNext()) {
     i++;
     pi.next();
   }
   assertEquals(2, i);
 }
示例#16
0
  public static String loadPdfToString(String filename) throws IOException {
    PDDocument document = PDDocument.load(new File(filename));
    PDFTextStripper reader = new PDFTextStripper();
    String pdfText = reader.getText(document);
    document.close();

    // writing pdf text to a text file
    // try (FileWriter fw = new FileWriter("data/sigmod.txt")) {
    // fw.write(pdfText);
    // }

    return pdfText;
  }
 @Test
 public void testTextExtractionDoesNotRaise() throws IOException {
   // PDDocument pdf_document =
   // PDDocument.load("src/test/resources/technology/tabula/test_text_exception.pdf");
   PDDocument pdf_document =
       PDDocument.load("src/test/resources/technology/tabula/rotated_page.pdf");
   ObjectExtractor oe = new ObjectExtractor(pdf_document);
   PageIterator pi = oe.extract();
   while (pi.hasNext()) {
     System.out.println(pi.next().getText());
     // pages.add(pi.next());
   }
 }
示例#18
0
  @SuppressWarnings("unchecked")
  public static void main_3(String[] args) throws IOException {

    PDDocument doc = PDDocument.load(iconFile);

    List<PDPage> pages = doc.getDocumentCatalog().getAllPages();

    List<COSObject> objects = doc.getDocument().getObjects();

    for (COSObject cosObject : objects) {

      COSBase cosbase = cosObject.getObject();

      if (cosObject.getObject() instanceof COSStream) {

        COSStream cosstream = (COSStream) cosbase;

        COSBase filter = cosstream.getDictionaryObject(COSName.FILTER);

        COSBase subtype = cosstream.getDictionaryObject(COSName.SUBTYPE);

        if (subtype != null && subtype.equals(COSName.IMAGE)) {

          System.out.println(filter);

          InputStream filtered = cosstream.getFilteredStream();
          // PDStream stream = new PDStream(costream);

          System.out.println(Hex.encodeHex(IOUtils.toByteArray(filtered)));
        }
      }
    }

    for (PDPage pdPage : pages) {

      PDResources resources = pdPage.getResources();

      Map<String, PDXObject> images = resources.getXObjects();

      Set<String> keys = images.keySet();

      for (String key : keys) {

        PDXObject image = images.get(key);

        byte[] imgData = image.getPDStream().getByteArray();

        System.out.println(Hex.encodeHex(imgData));
      }
    }
  }
示例#19
0
  /**
   * This will parse a document.
   *
   * @param input The input stream for the document.
   * @return The document.
   * @throws IOException If there is an error parsing the document.
   */
  private static PDDocument parseDocument(InputStream input) throws IOException {
    PDDocument document = PDDocument.load(input);
    if (document.isEncrypted()) {
      try {
        document.decrypt("");
      } catch (org.apache.pdfbox.exceptions.InvalidPasswordException e) {
        System.err.println("Error: The document is encrypted.");
      } catch (org.apache.pdfbox.exceptions.CryptographyException e) {
        e.printStackTrace();
      }
    }

    return document;
  }
示例#20
0
  public void writeText(final CAS aCas, final InputStream aIs) throws IOException {
    final PDDocument doc = PDDocument.load(aIs);

    try {
      if (doc.isEncrypted()) {
        throw new IOException("Encrypted documents currently not supported");
      }

      cas = aCas;
      text = new StringBuilder();

      writeText(doc);
    } finally {
      doc.close();
    }
  }
示例#21
0
  @Override
  public void sign(
      final InputStream pdfData,
      final byte[] signatureValue,
      final OutputStream signedStream,
      final PAdESSignatureParameters parameters,
      final DigestAlgorithm digestAlgorithm)
      throws DSSException {

    File toSignFile = null;
    File signedFile = null;
    FileInputStream fileInputStream = null;
    FileInputStream finalFileInputStream = null;
    PDDocument pdDocument = null;
    try {

      toSignFile = DSSPDFUtils.getFileFromPdfData(pdfData);

      pdDocument = PDDocument.load(toSignFile);
      final PDSignature pdSignature = createSignatureDictionary(parameters);

      signedFile = File.createTempFile("sd-dss-", "-signed.pdf");
      final FileOutputStream fileOutputStream =
          DSSPDFUtils.getFileOutputStream(toSignFile, signedFile);

      signDocumentAndReturnDigest(
          parameters,
          signatureValue,
          signedFile,
          fileOutputStream,
          pdDocument,
          pdSignature,
          digestAlgorithm);

      finalFileInputStream = new FileInputStream(signedFile);
      IOUtils.copy(finalFileInputStream, signedStream);
    } catch (IOException e) {
      throw new DSSException(e);
    } finally {
      IOUtils.closeQuietly(fileInputStream);
      IOUtils.closeQuietly(finalFileInputStream);
      IOUtils.closeQuietly(pdDocument);
      DSSUtils.delete(toSignFile);
      DSSUtils.delete(signedFile);
    }
  }
 ExtractPageContent(String filePath) {
   this.filePath = filePath;
   try {
     reader = new PdfReader(filePath);
     parser = new PdfReaderContentParser(reader);
     getContents();
   } catch (Exception e) {
     try {
       PDDocument doc = PDDocument.load(filePath);
       PDFTextStripper stripper = new PDFTextStripper();
       this.fileContents = stripper.getText(doc);
       doc.close();
     } catch (IOException e1) {
       // TODO Auto-generated catch block
       // e1.printStackTrace();
     }
   }
 }
  /**
   * Método para la indexación individual de cada fichero PDF
   *
   * @param f el fichero PDF
   * @param writer el IndexWriter
   * @throws IOException
   */
  public static void indexFile(File f, IndexWriter writer) throws IOException {

    // Cargamos el fichero mediante PDFBox
    PDDocument pddDocument = PDDocument.load(f.getAbsolutePath());
    PDFTextStripper textStripper = new PDFTextStripper();
    int numPages = pddDocument.getNumberOfPages();
    String pageContent;

    // Declaramos un Field propio
    FieldType fieldText = new FieldType();
    fieldText.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
    fieldText.setStored(false);
    fieldText.setStoreTermVectorOffsets(true);
    fieldText.setStoreTermVectorPositions(true);
    fieldText.setStoreTermVectors(true);

    // Recorremos e indexamos cada una de las páginas del fichero, almacenando el número de página y
    // el título del fichero, e indexando el contenido
    for (int i = 0; i < numPages; i++) {
      if (i == 0) {
        i++;
      }
      textStripper.setStartPage(i);
      textStripper.setEndPage(i);
      // coger una página
      pageContent = textStripper.getText(pddDocument);
      if (pageContent != null && !pageContent.isEmpty()) {
        pageContent = pageContent.toLowerCase();
      }

      if (pageContent != null) {
        // Declaramos el documento a indexar para esa página

        // Número de página
        // Contenido de la página
        // Título del fichero

        // Añadimos el documento
      }
    }

    // Cerramos el fichero PDF

  }
示例#24
0
  private void breakPage(PDDocument document, PrintCursor cursor, PrintData printData)
      throws IOException {
    if (cursor.currentStream != null) {
      cursor.currentStream.close();
    }

    if (printData.templateResource == null) {
      document.addPage(new PDPage(printData.pageConfig.getPageSize()));
    } else {
      PDDocument templateDoc = PDDocument.load(printData.templateResource.getInputStream());
      cursor.cacheTempalte(templateDoc);
      PDPage templatePage = templateDoc.getDocumentCatalog().getPages().get(0);
      document.importPage(templatePage);
    }
    PDPage currPage = document.getDocumentCatalog().getPages().get(++cursor.currentPageNumber);
    cursor.currentStream =
        new PDPageContentStream(document, currPage, PDPageContentStream.AppendMode.APPEND, false);
    cursor.yPos = printData.pageConfig.getStartY(cursor.currentPageNumber);
    cursor.xPos = printData.pageConfig.getStartX();
  }
示例#25
0
  @Override
  public void addDssDictionary(
      InputStream inputStream, OutputStream outpuStream, List<DSSDictionaryCallback> callbacks) {
    File toSignFile = null;
    File signedFile = null;
    FileInputStream fis = null;
    PDDocument pdDocument = null;
    try {
      toSignFile = DSSPDFUtils.getFileFromPdfData(inputStream);
      pdDocument = PDDocument.load(toSignFile);

      signedFile = File.createTempFile("sd-dss-", "-signed.pdf");

      final FileOutputStream fileOutputStream =
          DSSPDFUtils.getFileOutputStream(toSignFile, signedFile);

      if (CollectionUtils.isNotEmpty(callbacks)) {
        final COSDictionary cosDictionary = pdDocument.getDocumentCatalog().getCOSDictionary();
        cosDictionary.setItem("DSS", buildDSSDictionary(callbacks));
        cosDictionary.setNeedToBeUpdate(true);
      }

      if (pdDocument.getDocumentId() == null) {
        pdDocument.setDocumentId(0L);
      }
      pdDocument.saveIncremental(inputStream, fileOutputStream);

      fis = new FileInputStream(signedFile);
      IOUtils.copy(fis, outpuStream);
    } catch (Exception e) {
      throw new DSSException(e);
    } finally {
      IOUtils.closeQuietly(pdDocument);
      IOUtils.closeQuietly(fis);
      DSSUtils.delete(toSignFile);
      DSSUtils.delete(signedFile);
    }
  }
示例#26
0
  public static void main(String[] args) {

    PDDocument pd;
    try {
      File input = new File("pdf/1.pdf"); // The PDF file from where you would like to extract
      pd = PDDocument.load(input);
      int numberOfPages = pd.getNumberOfPages();

      PDFTextStripper stripper = new PDFTextStripper();
      String fullText = stripper.getText(pd);

      int indexReferences = fullText.lastIndexOf("References\n");
      String textOutReferences =
          fullText.substring(0, indexReferences > 0 ? indexReferences : fullText.length());
      String textOutStop = removeStopWords(textOutReferences);

      findMoreCiteds(textOutStop);

      extractReferences(fullText);

      stripper.setEndPage(3);
      String startText = stripper.getText(pd);

      System.out.println("Autores");
      extractAuthor(startText);
      System.out.println("Objetivos");
      extractObjective(startText);
      System.out.println("\n\nProblemas");
      extractProblem(startText);
      System.out.println("\n\nMetodologia");
      extractMethodology(fullText);
      System.out.println("\n\nContribuições");
      extractContributes(fullText);
      pd.close();
    } catch (Exception e) {
      e.printStackTrace();
    }
  }
示例#27
0
  public static void main(String[] args) throws Exception {

    File file = new File("C:/Users/jatin.goyal/Desktop/demoexcel.pdf");
    PDDocument pd = PDDocument.load(file);
    System.out.println(pd.getNumberOfPages());
    PDFTextStripper st = new PDFTextStripper();
    st.setStartPage(1);
    // st.setEndPage(4);

    //		 PDFTextStripperByArea stripper = new PDFTextStripperByArea();
    //	        stripper.setSortByPosition( true );
    //	        Rectangle rect1 = new Rectangle( 50, 140, 60, 20 );
    //	        Rectangle rect2 = new Rectangle( 110, 140, 20, 20 );
    //	        stripper.addRegion( "row1column1", rect1 );
    //	        stripper.addRegion( "row1column2", rect2 );
    //	        List allPages = pd.getDocumentCatalog().getAllPages();
    //	        PDPage firstPage = (PDPage)allPages.get( 0 );
    //	        stripper.extractRegions( firstPage );
    //	        System.out.println(stripper.getTextForRegion( "row1column1" ));
    //	        System.out.println(stripper.getTextForRegion( "row1column2" ));

    System.out.println(st.getText(pd));
  }
示例#28
0
  private boolean isDSSDictionaryPresentInPreviousRevision(byte[] originalBytes) {
    ByteArrayInputStream bais = null;
    PDDocument doc = null;
    PdfDssDict dssDictionary = null;
    try {
      bais = new ByteArrayInputStream(originalBytes);
      doc = PDDocument.load(bais);
      List<PDSignature> pdSignatures = doc.getSignatureDictionaries();
      if (CollectionUtils.isNotEmpty(pdSignatures)) {
        PdfDict catalog = new PdfBoxDict(doc.getDocumentCatalog().getCOSDictionary(), doc);
        dssDictionary = PdfDssDict.extract(catalog);
      }
    } catch (Exception e) {
      logger.warn(
          "Cannot check in previous revisions if DSS dictionary already exist : " + e.getMessage(),
          e);
    } finally {
      IOUtils.closeQuietly(bais);
      IOUtils.closeQuietly(doc);
    }

    return dssDictionary != null;
  }
  public static void main(String[] args)
      throws IOException, InterruptedException, IM4JavaException,
          PdfException { // is - is the inputstream of the pdf file
    System.out.println("inside grader");

    // required debugging code
    Mongo m = new Mongo();
    DB db = m.getDB("ecomm_database");
    DBCollection coll = db.getCollection("testschemas");
    ObjectMapper mapper = new ObjectMapper();

    //		String message = "4fda1af52f910cc6200000d3"; //test id, that i will have in the real version
    String message = "500bb8811a316fda2400003b"; // id of second test
    DBObject TestObject =
        coll.findOne(new BasicDBObject("_id", new ObjectId(message))); // the actual mongo query
    System.out.println("Test Object = " + TestObject);
    JsonNode rootNode = mapper.readValue(TestObject.toString().getBytes("UTF-8"), JsonNode.class);
    JsonNode TestAnswerSheet = rootNode.get("TestAnswerSheet"); // TestAnswerSheet
    JsonNode Questions = rootNode.get("Questions");
    System.out.println("size of Questions = " + Questions.size());
    int numofquestions = Questions.size();
    System.out.println("size of answers = " + TestAnswerSheet.size());
    int numofstudents =
        rootNode.get("NumberOfStudents").getIntValue(); // grab the number of students
    System.out.println("Numer of students = " + numofstudents);

    //	    FillScore(Questions);

    //        for(int x = 0; x < Answers.size(); x++){
    //
    //		   	int IDS = Answers.get(x).get("IDS").getIntValue(); //grab the question
    //		   	String QID = new String(Answers.get(x).get("IDS").getTextValue()); //grab the question
    //		   	System.out.println("IDS = " + QID );
    //
    //        }//end of grade results

    //		JFrame frame = new JFrame(); //window popup //for debuggin
    // reading in file
    //		File PDF_file = new File("/Users/angellopozo/Documents/TestImages/PDF_CRICLEV2.pdf");

    /*
     *
     * 					Start of real code
     *
     */

    //		//workign with jpedal, will read from inputstream
    //	      PdfDecoder decode_pdf = new PdfDecoder(true);
    //	      try{
    ////	      decode_pdf.openPdfFileFromInputStream(is,true); //file
    //	      decode_pdf.openPdfFile("/Users/angellopozo/Dropbox/My
    // Code/java/MainRabbitMongo/Resources/CreatedPDF_Mongo_Test.pdf");  ///DEUG LINE
    ////	      BufferedImage img = decode_pdf.getPageAsImage(1);
    ////	      decode_pdf.closePdfFile();
    ////	      File fileToSave = new File("/Users/angellopozo/Dropbox/My
    // Code/java/MainRabbitMongo/src/main/java/RPC/jpedalRPCTEST1.jpg");
    ////		  ImageIO.write(img, "jpg", fileToSave);
    ////		  JFrame frame = new JFrame("jpedal buffered image");
    ////			Panel panel = new Panel();
    ////			frame.getContentPane().add(new JLabel(new ImageIcon(img)));
    ////			frame.pack();
    //////			frame.setLocationRelativeTo(null);
    ////			frame.setVisible(true);
    //	      PdfFileInformation fileinfo = decode_pdf.getFileInformationData();
    //	      String[] Fnames = fileinfo.getFieldValues();
    //	      for(int i = 0 ; i < Fnames.length; i++){
    //	    	  System.out.println("fname info = " + Fnames[i]);
    //	      }
    //	      System.out.println("xml data = " + fileinfo.getFileXMLMetaData());
    //	      System.out.println("name of the input stream file = " + decode_pdf.getFileName());
    //	      }
    //	      catch(PdfException e) {
    //			    e.printStackTrace();//return back and do the rpc to the user ... use return and check
    // returns?
    //
    //	      }

    //		File PDF_file = new File("/Users/angellopozo/Dropbox/My
    // Code/java/MainRabbitMongo/Resources/CreatedPDF_TestMongo_Graded.pdf"); //to large, need to do
    // some scaling
    //		File PDF_file = new File("/Users/angellopozo/Dropbox/My
    // Code/java/MainRabbitMongo/Resources/CreatedPDF_Mongo_Test_Inputs.pdf"); //working
    //		File PDF_file = new File("/Users/angellopozo/Dropbox/My
    // Code/java/MainRabbitMongo/Resources/CreatedPDF_Mongo_Grade_Random.pdf");
    //		File PDF_file = new File("/Users/angellopozo/Dropbox/My
    // Code/java/MainRabbitMongo/Resources/CreatedPDF_TestMongo_Graded_Vsmaller.pdf");
    File PDF_file =
        new File(
            "/Users/angellopozo/Dropbox/My Code/java/MainRabbitMongo/Resources/CreatedPDF_Mongo_Random_withScore_testnum2_Grade_LARGE.pdf");
    //	    File PDF_file = new File("/Users/angellopozo/Dropbox/My
    // Code/java/MainRabbitMongo/Resources/CreatedPDF_Mongo_Random_withScore_testnum2_Grade_LARGE_MISTAKES_doubles.pdf");
    //	    File PDF_file = new File("/Users/angellopozo/Dropbox/My
    // Code/java/MainRabbitMongo/Resources/CreatedPDF_Mongo_Random_withScore_testnum2_Grade_LARGE_MISTAKES_noreply.pdf");

    // just testing. I get a bufferedImageLuminanceSource.java.39 -> grabbing image file dimentions.
    //		PdfDecoder decode_pdf = new PdfDecoder(true);
    //		decode_pdf.openPdfFile("/Users/angellopozo/Dropbox/My
    // Code/java/MainRabbitMongo/Resources/CreatedPDF_Mongo_Grade_Random.pdf");
    //		int numpages = decode_pdf.getPageCount();

    PDDocument doc = PDDocument.load(PDF_file); // used to get page numbers
    int numpages = doc.getNumberOfPages(); // get page numbers for for loop
    int[] CorrectlyAnswered = new int[Questions.size()]; // number of correct answers
    int[] IncorrectlyAnswered =
        new int[Questions.size()]; // number of incorrectly answered responses
    byStudent bystudent =
        new byStudent(
            numofquestions,
            numofstudents); // create grading instance //Initialize with number of students
    byQuestion byquestion = new byQuestion(numofquestions, numofstudents);
    System.out.println("result size = " + CorrectlyAnswered.length);
    // need to fill the score array in byquestions
    for (int i = 0; i < Questions.size(); i++) {
      //			System.out.println("Score for this question = " +
      // Questions.get(i).get("Score").getDoubleValue());
      byquestion.ScoreDefault[i] = Questions.get(i).get("Score").getDoubleValue();
    } // end of filling score array in byquestion

    //		int numpages = decode_pdf.getPageCount(); //get page numbers for for loop
    System.out.println(
        "number of pages = "
            + numpages); // check to make sure the number of pages is reasonable, dont want this to
    // be too large call Db and return
    System.out.println("____________________________________");
    //		   JFrame frame = new JFrame(); //window popup
    //		ArrayList Results = new ArrayList(); //Array of the answer locations
    //		ArrayList WA = new ArrayList(); //array of wrong answers that were selected by the students
    //		ArrayList SR = new ArrayList(); //holding accumulated data below. selected answers array
    int numoffails = 0;
    int Aindex = 0;
    //		int Qindex = 0;
    int[][] Selections = new int[2][Questions.size()]; // student , question
    int[][] SelectionTotal = new int[Questions.size()][4]; // question, answer selected
    for (int i = 0; i < numpages; i++) { // for every page

      //		    	File PDF_file = new File("/Users/angellopozo/Documents/TestImages/PDF_CRICLEV2.pdf");
      // convert page to PDF
      BufferedImage PDF_img = ConvertPageToImage(PDF_file, i);
      //		    	 BufferedImage PDF_img = decode_pdf.getPageAsImage(i);

      // START creating luminance source
      LuminanceSource lumSource = new BufferedImageLuminanceSource(PDF_img);
      BinaryBitmap bitmap = new BinaryBitmap(new HybridBinarizer(lumSource));

      Reader reader = new QRCodeReader(); // create qr reader
      GenericMultipleBarcodeReader multireader = new GenericMultipleBarcodeReader(reader);

      Hashtable<DecodeHintType, Object> hints = new Hashtable<DecodeHintType, Object>();
      hints.put(DecodeHintType.TRY_HARDER, Boolean.TRUE);

      TreeMap<String, Rectangle2D> sortedBarcodeResults = new TreeMap<String, Rectangle2D>();
      Result results[] = null;
      try {
        results = multireader.decodeMultiple(bitmap, hints);
      } catch (ReaderException re) {
        return;
      } // end of try
      // END creating luminance source

      // go through each found QR Code and draw a box around it
      BufferedImage outimage = PDF_img; // copy of the pdf image
      Graphics2D g2 = outimage.createGraphics();
      g2.setColor(Color.green);
      g2.setStroke(new BasicStroke(3));
      // draw boxes around the found qrcodes
      int index = 0; // debug line to save images
      for (Result result : results) {
        System.out.println("barcode result: " + result.getText());
        double x1 = result.getResultPoints()[0].getX(); // top left
        double y1 = result.getResultPoints()[0].getY(); // top left
        double x2 = result.getResultPoints()[1].getX(); // top right
        double y2 = result.getResultPoints()[1].getY(); // top right
        double x3 = result.getResultPoints()[2].getX(); // bottom left
        double y3 = result.getResultPoints()[2].getY(); // bottom left
        // double x4 = result.getResultPoints()[3].getX(); //bottom right (bottom right square
        // location..some qr have it)
        //  double y4 = result.getResultPoints()[3].getY(); //bottom right (bottom right square
        // location..some qr have it)
        Rectangle2D rectbox = new Rectangle2D.Double(x2, y2, (x3 - x2), (y1 - y2));
        // Double buffer = 10.0;//highly dependent on the size of the qrcode
        // Rectangle2D rectbox = new Rectangle2D.Double(x2-buffer, y2-buffer, (x3-x2)+2*buffer,
        // (y1-y2)+2*buffer);
        //						    System.out.println("barcode location: " + x1 +" "+ y1 +" "+ x2 +" "+ y2 + " " +
        // x3 +" "+ y3);
        // System.out.println("barcode location: " + x3 +" "+ y3+" "+ x4+" "+ y4+"\n");// +" "+
        // (x2-x1) +" "+ (y2-y1) +"\n");
        sortedBarcodeResults.put(
            result.getText(), rectbox); // (qrdecoded string , rectangle box in pixels)

        g2.draw(rectbox); // draw box around qrcode

        Rectangle2D bubblebox =
            new Rectangle2D.Double(
                x2 + (x3 - x2) + 15, y2 - 20, 45, (y1 - y2) + 55); // box around bubbles
        g2.draw(bubblebox); // area that the bubbles exist in the image

        BufferedImage subBubble =
            PDF_img.getSubimage(
                (int) (x2 + (x3 - x2) + 15),
                (int) (y2 - 20),
                45,
                (int) ((y1 - y2) + 55)); // box around bubbles
        IplImage ipl_subBubble = IplImage.createFrom(subBubble); // convert subimage into iplimage
        IplImage ipl_subBubble_large =
            cvCreateImage(
                cvSize(ipl_subBubble.width() * 4, ipl_subBubble.height() * 4),
                ipl_subBubble.depth(),
                ipl_subBubble.nChannels());
        cvResize(ipl_subBubble, ipl_subBubble_large, CV_INTER_CUBIC); // enlarge image
        IplImage ipl_subBubble_gray =
            cvCreateImage(
                cvSize(ipl_subBubble_large.width(), ipl_subBubble_large.height()),
                IPL_DEPTH_8U,
                1); // create black and white version of page
        // IplImage ipl_subBubble_gray = ipl_subBubble_large.clone();

        if (ipl_subBubble_large.nChannels() > 1) {
          cvCvtColor(ipl_subBubble_large, ipl_subBubble_gray, CV_RGB2GRAY);
        } else {
          //  	IplImage ipl_subBubble_gray = ipl_subBubble_large.clone();
        }

        cvThreshold(ipl_subBubble_gray, ipl_subBubble_gray, 100, 255, CV_THRESH_OTSU);
        cvSmooth(ipl_subBubble_gray, ipl_subBubble_gray, CV_GAUSSIAN, 9, 9, 2, 2);
        CvMemStorage circles = CvMemStorage.create();

        // show bubbles, check this if no grading is working
        //							CanvasFrame smoothed = new CanvasFrame("gray image");
        //							smoothed.setDefaultCloseOperation(javax.swing.JFrame.EXIT_ON_CLOSE);
        //							smoothed.showImage(ipl_subBubble_gray);

        CvSeq seq =
            cvHoughCircles(
                ipl_subBubble_gray,
                circles,
                CV_HOUGH_GRADIENT,
                1,
                50,
                80,
                20,
                32,
                (int) (ipl_subBubble_gray.height() / (7)));

        Integer[][] FilledBubbles =
            new Integer[4][4]; // arry holds the #of pixels seen and the y dimention of subimage
        //							Vector<CvPoint> centers = new Vector<CvPoint>(4);//the 4 can be seq.total()
        for (int j = 0; j < seq.total(); j++) { // draw a circle around each circle found
          CvPoint3D32f xyr = new CvPoint3D32f(cvGetSeqElem(seq, j));
          CvPoint center = new CvPoint(Math.round(xyr.x()), Math.round(xyr.y()));
          int radius = Math.round(xyr.z());
          cvCircle(ipl_subBubble_large, center, 3, CvScalar.GREEN, -1, 8, 0); // center of circle
          cvCircle(ipl_subBubble_large, center, radius, CvScalar.BLUE, 3, 8, 0); // outer circle
          FilledBubbles[j][0] =
              FindBubbleSelected(center, radius, ipl_subBubble_gray); // bubble selected area
          //						        FilledBubbles[j][0] = 1; //here to get rid of dimensions error
          FilledBubbles[j][1] = Math.round(center.x());
          FilledBubbles[j][2] = Math.round(center.y());
          FilledBubbles[j][3] = Math.round(radius);
          // System.out.println("Filled bubble Count = "+ FilledBubbles[j]);
        } // end of look for circles for

        //							//the algorithm may not find circles //was trying to fix an old error, solved it by
        // fixing th size of the image on hte pdf to image conversion
        //							int anynull = anynulls(FilledBubbles);
        ////							System.out.println("anynull = "+ anynull);
        //							if(anynull == 1){
        //								numoffails++;
        //								continue; //this question, not all circles were found.
        //							}//end of null check //this means not all 4 circles were found

        //							System.out.println("filled bubbles size = " + FilledBubbles[0].length);
        //							System.out.println("filled bubbles size = " + FilledBubbles.length);
        FilledBubbles =
            SortbyYdimention(
                FilledBubbles); // note to self, check for nulls because that woud be an issue....

        // print out area of bubble
        //					        for(Integer[] tp : FilledBubbles){
        //					        	System.out.println("Filled bubble Count = "+ tp[0] + " loc = "+ tp[1]);
        //					        }

        int[] selectResult =
            ReturnIndexOfmax(FilledBubbles); // maxindex = the answer submitted by the student
        int maxIndex = selectResult[0];
        int isfound = 1;
        int ismulti = 0;
        if (selectResult[1] > 1
            || selectResult[2]
                == 1) { // selectResult[1] = number of bubbles , selectResult[2] = no selections
          // made
          System.out.println("more than one bubble was selected");
          //					        	Aindex++; //index for looping through answer array //need to be
          // incremented to keep data correct
          //					        	index++; //(0-number of questions) //need to be incremented to keep data
          // correct
          //					        	numoffails++; //student selected too many inputs, hence trying to cheat
          // and
          isfound = 0;
          ismulti = 1;
          //					        	continue;
        } // end of slectResults[1] if

        /* GRADE THE RESULTS!!! */
        //  TestObject =mongo query result, Aindex  = question being looked at

        String QID =
            new String(
                TestAnswerSheet.get(Aindex).get("IDS").getTextValue()); // grab the question  ID
        int CorrectAnswerloc =
            TestAnswerSheet.get(Aindex).get("Answer").getIntValue(); // correct answer location

        System.out.println("Correc answer location = " + CorrectAnswerloc);
        System.out.println("IDS = " + QID + " QI = " + Aindex);

        int iscorrect = 0;
        if (ismulti == 1) { // if multiple selected
          iscorrect = 0;
        } else { // if only one input for a question is found
          iscorrect = checkcorrectness(CorrectAnswerloc, maxIndex);
        }

        // create the student selections by question found
        BasicDBObject newvals = new BasicDBObject();
        String Answersnum = new String("TestAnswerSheet." + Integer.toString(Aindex));
        newvals.put(Answersnum + ".found", isfound);
        newvals.put(Answersnum + ".multiselect", ismulti);
        //					        newvals.put(Answersnum + ".correct", iscorrect);
        //					        newvals.put(Answersnum + ".selected", maxIndex);
        BasicDBObject posop = new BasicDBObject("$set", newvals);
        System.out.println("inc query = " + posop.toString());
        coll.update(new BasicDBObject("_id", new ObjectId(message)), posop);

        //					        System.out.println("first character = " + QID.charAt(0));
        //					        System.out.println("last character = " + QID.charAt(2));

        char stud =
            QID.charAt(0); // this is the student //QID starts at 1, not at 0 hence the negative
        char Q = QID.charAt(2); // this is the question
        System.out.println("Student num = " + stud);
        System.out.println(
            "Q num = "
                + Character.getNumericValue(Q - 1)); // QID starts at 1, not at 0 hence the negative

        // Aggregate information to create Test Results array
        int Qint =
            Aindex
                % numofquestions; // Qint = the question number of the test -1(includes 0 hence the
        // -1) //should be equivalent to char Q
        //					        System.out.println("Score for this question = " +
        // Questions.get(Qint).get("Score").getDoubleValue());
        if (iscorrect == 1) {
          System.out.println("mod result = " + Qint);
          System.out.println("Question = " + Qint + " is correct = " + iscorrect);
          CorrectlyAnswered[Qint] =
              CorrectlyAnswered[Qint] + 1; // byquestion.IncrementCorrectlyAnswered(Qint);
          byquestion.IncrementCorrectlyAnswered(Qint);
          bystudent.IncrementCorrectlyAnswered(Character.getNumericValue(stud));
          byquestion.InsertScore(Character.getNumericValue(stud), Qint);
        } else if (iscorrect
            == 0) { // wrong answer was selected // Selections // or multiple selections
          System.out.println("mod result = " + Qint);
          System.out.println("Question = " + Qint + " is Incorrect = " + iscorrect);
          IncorrectlyAnswered[Qint] =
              IncorrectlyAnswered[Qint] + 1; // byquestion.IncrementCorrectlyAnswered(Qint);
          byquestion.IncrementIncorrectlyAnswered(Qint);
          bystudent.IncrementIncorrectlyAnswered(Character.getNumericValue(stud));
        }

        byquestion.IncrementSelectedAnswer(
            maxIndex, Qint); // increment the number of times a selection was made

        Selections[Character.getNumericValue(stud)][Qint] = maxIndex;
        SelectionTotal[Qint][maxIndex] =
            SelectionTotal[Qint][maxIndex]
                + 1; // byquestion.IncrementSelectedWrongAnwer(Qint, maxIndex);
        bystudent.IncrementRepliedTo(Character.getNumericValue(stud));

        Aindex++; // index for looping through answer array
        /* END GRADE THE RESULTS!!! */
        //  TestObject

        // draw the red circles
        CvPoint slectedcenter =
            new CvPoint(
                FilledBubbles[maxIndex][1].intValue(), FilledBubbles[maxIndex][2].intValue());
        cvCircle(
            ipl_subBubble_large,
            slectedcenter,
            FilledBubbles[maxIndex][3].intValue(),
            CvScalar.RED,
            3,
            8,
            0);

        // saving subimages to i can debug results
        //							String subimagename = new String("subimage_"+i+"_"+index+".jpg");
        index++; // (0-number of questions)
        //							cvSaveImage(subimagename,ipl_subBubble_large);
        // create image window named "My Image"
        //							String que = new String("_for_"+ result.getText());
        //						    final CanvasFrame canvas = new CanvasFrame("Bubbles_Found"+que);
        //						 // request closing of the application when the image window is closed
        //						    canvas.setDefaultCloseOperation(javax.swing.JFrame.EXIT_ON_CLOSE);
        //						 // show image on window
        //						    canvas.showImage(ipl_subBubble_large);

        System.out.println("____________________________________");
      } // end of for results loop
      // end drawing boxes around each QR CODE

      //					//START code to display in JFRAME
      //					if(i == 0){
      //			       frame.getContentPane().setLayout(new FlowLayout());
      //			       frame.getContentPane().add(new JLabel(new ImageIcon(outimage)));
      //			       frame.pack();
      //			       frame.setVisible(true);
      //					}
      //					else {
      //
      //						frame.getContentPane().add(new JLabel(new ImageIcon(outimage)));
      //				        frame.pack();
      //				        frame.setVisible(true);
      //
      //					}
      //					//END code to display in JFRAME

    } // end of for loop of pages

    // putput how well teh students performed on test
    for (int i = 0; i < numofstudents; i++) {
      System.out.println(
          "student" + i + "answered Correctly: " + bystudent.CorrectlyAnswered[i] + " Questions");
      System.out.println(
          "student"
              + i
              + "answered Incorrectly: "
              + bystudent.IncorrectlyAnswered[i]
              + " Questions");
      System.out.println("student" + i + "answered: " + bystudent.RepliedTo[i] + " Questions");
    }

    // results by student and question
    for (int i = 0; i < Selections.length; i++) {
      for (int j = 0; j < Selections[0].length; j++) {
        System.out.println("Student (" + i + "," + j + ") selected = " + Selections[i][j]);
      }
    }

    // results by question and reply
    for (int i = 0; i < SelectionTotal.length; i++) {
      System.out.println(
          "Selection below = "
              + byquestion.SelectedWrongAnswer_0[i]
              + " "
              + byquestion.SelectedWrongAnswer_1[i]
              + " "
              + byquestion.SelectedWrongAnswer_2[i]
              + " "
              + byquestion.SelectedCorrectAnswer[i]
              + " ");
      System.out.println(
          "correctly answered = " + byquestion.CorrectlyAnswered[i] + " " + CorrectlyAnswered[i]);
      for (int j = 0; j < SelectionTotal[0].length; j++) {
        System.out.println("Quesetion (" + i + "," + j + ") selected = " + SelectionTotal[i][j]);
      }
    } // end of selctiontotal for loop

    byquestion.ComputePercentCorrectlyAnswered();
    byquestion.ComputePercentIncorrectlyAnswered();
    byquestion.ComputePercentCorrectSTD();
    byquestion.ComputeMeanScoreByQuestion(); // average score for any question by question
    //		byquestion.ComputeMeanScoreByStudent(); //average score for any one question by student
    byquestion.ComputeMeanbyQuestionSTD();
    bystudent.ComputeTotalScores(
        byquestion.Scoresbystudent); // compute the total scores for any student
    bystudent.ComputeMeanTotalScore(byquestion.Scoresbystudent);
    byTest bytest = new byTest(numofquestions, numofstudents, bystudent);
    bytest.ComputeMeanScoreTest();
    bytest.ComputeMeanScoreSTD();
    bytest.ComputePercentCorrecltyAnswered();
    bytest.ComputePercentIncorrecltyAnswered();

    // create Test Results by question
    ArrayList<BasicDBObject> TestResultbyQuestion =
        new ArrayList<BasicDBObject>(); // Array of the answer locations
    for (int j = 0; j < byquestion.CorrectlyAnswered.length; j++) {
      BasicDBObject ByQuestionVals = new BasicDBObject();
      ByQuestionVals.put("SelectedWrongAnswer_0", byquestion.SelectedWrongAnswer_0[j]);
      ByQuestionVals.put("SelectedWrongAnswer_1", byquestion.SelectedWrongAnswer_1[j]);
      ByQuestionVals.put("SelectedWrongAnswer_2", byquestion.SelectedWrongAnswer_2[j]);
      ByQuestionVals.put("SelectedCorrectAnswer", byquestion.SelectedCorrectAnswer[j]);
      ByQuestionVals.put("CorrectlyAnswered", byquestion.CorrectlyAnswered[j]);
      ByQuestionVals.put("IncorrectlyAnswered", byquestion.IncorrectlyAnswered[j]);
      ByQuestionVals.put("PercentCorrect", byquestion.PercentCorrectlyAnswered[j]);
      ByQuestionVals.put("PercentIncorrect", byquestion.PercentIncorrectlyAnswered[j]);
      ByQuestionVals.put("STD", byquestion.STD[j]);
      ByQuestionVals.put("Mean", byquestion.ScoreMean[j]); // means score for this question
      ByQuestionVals.put("_id", new ObjectId());
      TestResultbyQuestion.add(ByQuestionVals); // add Rvals into the Testresultarray listarray
      //			System.out.println("Question " + j + " numcorrect = " + CorrectlyAnswered[j]);
    }

    // create Test Results by test
    BasicDBObject ByTestVals = new BasicDBObject();
    ByTestVals.put("Mean", bytest.ScoreMean);
    ByTestVals.put("STD", bytest.ScoreSTD);
    ByTestVals.put("PercentCorrect", bytest.PercentCorrectlyAnswered);
    ByTestVals.put("PercentInorrect", bytest.PercentIncorrectlyAnswered);
    ByTestVals.put("_id", new ObjectId());

    // create graded exists
    BasicDBObject TestGradedVals = new BasicDBObject();
    TestGradedVals.put("WasGraded", 1);
    Date now = new Date();
    TestGradedVals.put("GradeOn", now);
    TestGradedVals.put("_id", new ObjectId());

    // create Test Results by  student
    ArrayList<BasicDBObject> TestResultbyStudent =
        new ArrayList<BasicDBObject>(); // Array of the answers by student
    for (int j = 0; j < bystudent.CorrectlyAnswered.length; j++) {
      BasicDBObject ByStudentVals = new BasicDBObject();
      ByStudentVals.put("CorrectlyAnswered", bystudent.CorrectlyAnswered[j]);
      ByStudentVals.put("IncorrectlyAnswered", bystudent.IncorrectlyAnswered[j]);
      ByStudentVals.put("RepliedTo", bystudent.RepliedTo[j]);
      ByStudentVals.put("ScoreTotal", bystudent.ScoreTotal[j]);
      //			ByStudentVals.put("ScoreMean", bystudent.ScoreMean[j]); //this is still wrong, unless i
      // want ot show the mean of score for any 1 question
      ByStudentVals.put("_id", new ObjectId());
      TestResultbyStudent.add(ByStudentVals); // add Rvals into the Testresultarray listarray
      //			System.out.println("Question " + j + " numcorrect = " + CorrectlyAnswered[j]);
    }

    // v1
    BasicDBObject TRbyQuestions = new BasicDBObject("TRbyQuestions", TestResultbyQuestion);
    BasicDBObject set = new BasicDBObject("$set", TRbyQuestions);
    //		System.out.println("Test result query = " + TRbyQuestions);
    coll.update(new BasicDBObject("_id", new ObjectId(message)), set);

    BasicDBObject TRbyTest = new BasicDBObject("TRbyTest", ByTestVals);
    BasicDBObject settest = new BasicDBObject("$set", TRbyTest);
    coll.update(new BasicDBObject("_id", new ObjectId(message)), settest);

    BasicDBObject TestGradedobject = new BasicDBObject("TestGraded", TestGradedVals);
    BasicDBObject settestgraded = new BasicDBObject("$set", TestGradedobject);
    coll.update(new BasicDBObject("_id", new ObjectId(message)), settestgraded);

    BasicDBObject TRbyStudent = new BasicDBObject("TRbyStudents", TestResultbyStudent);
    BasicDBObject set1 = new BasicDBObject("$set", TRbyStudent);
    coll.update(new BasicDBObject("_id", new ObjectId(message)), set1);

    // v2
    //		DBObject TestObject2 = coll.findOne(new BasicDBObject("_id", new ObjectId(message))); //the
    // actual mongo query
    //		TestObject2.put("CorrectlyAnswered", TestResultsarray);
    //		coll.save(TestObject2);

    System.out.println("Failed to grade " + numoffails + " questions");
    doc.close();
  } // end of Grader
  private void _generateImagesPB(FileVersion fileVersion, File file) throws Exception {

    String tempFileId =
        DLUtil.getTempFileId(fileVersion.getFileEntryId(), fileVersion.getVersion());

    File thumbnailFile = getThumbnailTempFile(tempFileId);

    int previewFilesCount = 0;

    PDDocument pdDocument = null;

    try {
      pdDocument = PDDocument.load(file);

      previewFilesCount = pdDocument.getNumberOfPages();
    } finally {
      if (pdDocument != null) {
        pdDocument.close();
      }
    }

    File[] previewFiles = new File[previewFilesCount];

    for (int i = 0; i < previewFilesCount; i++) {
      previewFiles[i] = getPreviewTempFile(tempFileId, i);
    }

    boolean generatePreview = _isGeneratePreview(fileVersion);
    boolean generateThumbnail = _isGenerateThumbnail(fileVersion);

    if (PropsValues.DL_FILE_ENTRY_PREVIEW_FORK_PROCESS_ENABLED) {
      ProcessCallable<String> processCallable =
          new LiferayPDFBoxProcessCallable(
              ServerDetector.getServerId(),
              PropsUtil.get(PropsKeys.LIFERAY_HOME),
              Log4JUtil.getCustomLogSettings(),
              file,
              thumbnailFile,
              previewFiles,
              getThumbnailType(fileVersion),
              getPreviewType(fileVersion),
              PropsValues.DL_FILE_ENTRY_PREVIEW_DOCUMENT_DPI,
              PropsValues.DL_FILE_ENTRY_PREVIEW_DOCUMENT_MAX_HEIGHT,
              PropsValues.DL_FILE_ENTRY_PREVIEW_DOCUMENT_MAX_WIDTH,
              generatePreview,
              generateThumbnail);

      Future<String> future =
          ProcessExecutor.execute(ClassPathUtil.getPortalClassPath(), processCallable);

      String processIdentity = String.valueOf(fileVersion.getFileVersionId());

      futures.put(processIdentity, future);

      future.get();
    } else {
      LiferayPDFBoxConverter liferayConverter =
          new LiferayPDFBoxConverter(
              file,
              thumbnailFile,
              previewFiles,
              getPreviewType(fileVersion),
              getThumbnailType(fileVersion),
              PropsValues.DL_FILE_ENTRY_PREVIEW_DOCUMENT_DPI,
              PropsValues.DL_FILE_ENTRY_PREVIEW_DOCUMENT_MAX_HEIGHT,
              PropsValues.DL_FILE_ENTRY_PREVIEW_DOCUMENT_MAX_WIDTH,
              generatePreview,
              generateThumbnail);

      liferayConverter.generateImagesPB();
    }

    if (generateThumbnail) {
      try {
        storeThumbnailImages(fileVersion, thumbnailFile);
      } finally {
        FileUtil.delete(thumbnailFile);
      }

      if (_log.isInfoEnabled()) {
        _log.info("PDFBox generated a thumbnail for " + fileVersion.getFileVersionId());
      }
    }

    if (generatePreview) {
      int index = 0;

      for (File previewFile : previewFiles) {
        try {
          addFileToStore(
              fileVersion.getCompanyId(), PREVIEW_PATH,
              getPreviewFilePath(fileVersion, index + 1), previewFile);
        } finally {
          FileUtil.delete(previewFile);
        }

        index++;
      }

      if (_log.isInfoEnabled()) {
        _log.info(
            "PDFBox generated "
                + getPreviewFileCount(fileVersion)
                + " preview pages for "
                + fileVersion.getFileVersionId());
      }
    }
  }