コード例 #1
0
ファイル: ExtractPDF.java プロジェクト: sti-ipf/Corisco
  // *
  public final ImageRecord getMetadata(BufferedImage bi) throws DjatokaException {
    if (bi == null) throw new DjatokaException("Image Does Not Exist");

    logger.debug("getMetadata(BufferedImage): " + bi.getWidth());
    try {
      ImageRecord r = new ImageRecord();

      r.setWidth(bi.getWidth());
      r.setHeight(bi.getHeight());

      r.setDWTLevels(DEFAULT_LEVELS);
      r.setLevels(DEFAULT_LEVELS);

      r.setBitDepth(bi.getColorModel().getPixelSize());
      r.setNumChannels(bi.getColorModel().getNumColorComponents());
      // r.setCompositingLayerCount(getNumberOfPages(r)); // 'bi' refers to just one page extracted
      // from the PDF file.
      // logger.debug("r2: "+r.toString());

      // TODO

      return r;
    } catch (Exception e) {
      throw new DjatokaException(e);
    }
  }
コード例 #2
0
ファイル: ExtractPDF.java プロジェクト: sti-ipf/Corisco
 @Override
 // TODO
 // FIXME
 public final String[] getXMLBox(ImageRecord r) throws DjatokaException {
   String[] xml = null;
   try {
     if (r.getImageFile() == null
         && r.getObject() != null
         && r.getObject() instanceof InputStream) {
       //				xml = new JP2ImageInfo((InputStream) r.getObject()).getXmlDocs();
     } else {
       //				xml = new JP2ImageInfo(new File(r.getImageFile())).getXmlDocs();
     }
   } catch (Exception e) {
     logger.error(e, e);
   }
   return xml;
 }
コード例 #3
0
ファイル: ExtractPDF.java プロジェクト: sti-ipf/Corisco
 /**
  * Extracts region defined in DjatokaDecodeParam as BufferedImage
  *
  * @param input ImageRecord wrapper containing file reference, inputstream, etc.
  * @param params DjatokaDecodeParam instance containing region and transform settings.
  * @return extracted region as a BufferedImage
  * @throws DjatokaException
  */
 @Override
 public BufferedImage process(ImageRecord input, DjatokaDecodeParam params)
     throws DjatokaException {
   logger.debug("in imagerecord;");
   if (input.getImageFile() != null) return process(input.getImageFile(), params);
   else if (input.getObject() != null && (input.getObject() instanceof InputStream))
     return process((InputStream) input.getObject(), params);
   else
     throw new DjatokaException(
         "File not defined and Input Object Type "
             + input.getObject().getClass().getName()
             + " is not supported");
 }
コード例 #4
0
ファイル: ExtractPDF.java プロジェクト: sti-ipf/Corisco
  /**
   * Get PDF information with pdfinfo: - "Pages: X": number of pages; - "Page X size: www.ww
   * hhh.hh": size of each page, in pts.
   *
   * @returns a map: - [Pages][n] - [Page 1][111.11 222.22] - [Page i][www.ww hhh.hh] - [Page
   *     n][999.99 1000.00]
   */
  private static Map<String, String> getPDFProperties(ImageRecord input) throws DjatokaException {
    logger.debug("Getting PDF info");

    try {
      setPDFCommandsPath();
    } catch (IllegalStateException e) {
      logger.error("Failed to set PDF commands path: ", e);
      throw e;
    }

    HashMap<String, String> pdfProperties = new HashMap<String, String>();

    String sourcePath = null;

    if (input.getImageFile() != null) {
      logger.debug("PDFInfo image file: " + input.getImageFile());
      sourcePath = input.getImageFile();
    } else if (input.getObject() != null && (input.getObject() instanceof InputStream)) {
      FileInputStream fis = null;
      fis = (FileInputStream) input.getObject();
      File in;

      // Copy to tmp file
      try {
        String cacheDir = OpenURLJP2KService.getCacheDir();
        if (cacheDir != null) {
          in = File.createTempFile("tmp", ".pdf", new File(cacheDir));
        } else {
          in = File.createTempFile("tmp", ".pdf");
        }
        in.deleteOnExit();

        FileOutputStream fos = new FileOutputStream(in);
        IOUtils.copyStream(fis, fos);
      } catch (IOException e) {
        logger.error(e, e);
        throw new DjatokaException(e);
      }
      sourcePath = in.getAbsolutePath();
    } else {
      throw new DjatokaException(
          "File not defined and Input Object Type "
              + input // .getObject().getClass().getName()
              + " is not supported");
    }

    String pdfinfoCmd[] = PDFINFO_COMMAND.clone();
    pdfinfoCmd[PDFINFO_COMMAND_POSITION_BIN] = pdfinfoPath;
    pdfinfoCmd[PDFINFO_COMMAND_POSITION_FIRSTPAGE] = "1";
    pdfinfoCmd[PDFINFO_COMMAND_POSITION_LASTPAGE] =
        "-1"; // Last page even we not knowing its number.
    pdfinfoCmd[PDFINFO_COMMAND_POSITION_FILE] = sourcePath;
    Process pdfProc = null;
    try {
      ArrayList<MatchResult> pageSizes = new ArrayList<MatchResult>();
      MatchResult pages = null;

      pdfProc = Runtime.getRuntime().exec(pdfinfoCmd);
      BufferedReader lr = new BufferedReader(new InputStreamReader(pdfProc.getInputStream()));
      String line;
      for (line = lr.readLine(); line != null; line = lr.readLine()) {
        Matcher mm1 = PAGES_PATT.matcher(line);
        if (mm1.matches()) pages = mm1.toMatchResult();
        Matcher mm2 = MEDIABOX_PATT.matcher(line);
        if (mm2.matches()) pageSizes.add(mm2.toMatchResult());
      }

      int istatus = pdfProc.waitFor();
      if (istatus != 0)
        logger.error("pdfinfo proc failed, exit status=" + istatus + ", file=" + sourcePath);

      if (pages == null) {
        logger.error(
            "Did not find 'Pages' line in output of pdfinfo command: "
                + Arrays.deepToString(pdfinfoCmd));
        pdfProperties.put("Pages", "0");
      } else {
        // int n = Integer.parseInteger(pages.group(1));
        pdfProperties.put("Pages", pages.group(1));
      }

      if (pageSizes.isEmpty()) {
        logger.error(
            "Did not find \"Page X size\" lines in output of pdfinfo command: "
                + Arrays.deepToString(pdfinfoCmd));
        throw new IllegalArgumentException("Failed to get pages size of PDF with pdfinfo.");
      } else {
        for (MatchResult mr : pageSizes) {
          String page = mr.group(1);

          float x0 = Float.parseFloat(mr.group(2));
          float y0 = Float.parseFloat(mr.group(3));
          float x1 = Float.parseFloat(mr.group(4));
          float y1 = Float.parseFloat(mr.group(5));
          float w = Math.abs(x1 - x0);
          float h = Math.abs(y1 - y0);
          // Have to scale page sizes by max dpi (MAX_DPI / DEFAULT_DENSITY). Otherwise,
          // BookReader.js will request the wrong zoom level (svc.level).
          float ws = w * MAX_DPI / DEFAULT_DENSITY;
          float hs = h * MAX_DPI / DEFAULT_DENSITY;
          String width = "" + ws; // mr.group(2);
          String height = "" + hs; // mr.group(3);
          pdfProperties.put("Page " + page, width + " " + height);
        }
      }

    } catch (Exception e) {
      logger.error("Failed getting PDF information: ", e);
      throw new DjatokaException("Failed getting PDF information: ", e);
    } finally {
      // Our exec() should just consume one of the streams, but we want to stay safe.
      // http://mark.koli.ch/2011/01/leaky-pipes-remember-to-close-your-streams-when-using-javas-runtimegetruntimeexec.html
      org.apache.commons.io.IOUtils.closeQuietly(pdfProc.getOutputStream());
      org.apache.commons.io.IOUtils.closeQuietly(pdfProc.getInputStream());
      org.apache.commons.io.IOUtils.closeQuietly(pdfProc.getErrorStream());
    }

    return pdfProperties;
  }
コード例 #5
0
ファイル: ExtractPDF.java プロジェクト: sti-ipf/Corisco
  /**
   * Returns PDF props in ImageRecord
   *
   * @param r ImageRecord containing absolute file path of PDF file.
   * @return a populated ImageRecord object
   * @throws DjatokaException
   */
  @Override
  public final ImageRecord getMetadata(ImageRecord r) throws DjatokaException {
    if ((r.getImageFile() == null || !new File(r.getImageFile()).exists()) && r.getObject() == null)
      throw new DjatokaException("Image Does Not Exist: " + r.toString());
    logger.debug("Get metadata: " + r.toString());
    try {
      DjatokaDecodeParam params = new DjatokaDecodeParam();
      BufferedImage bi = process(r, params);

      r.setWidth(bi.getWidth());
      r.setHeight(bi.getHeight());
      r.setDWTLevels(DEFAULT_LEVELS);
      r.setLevels(DEFAULT_LEVELS);
      r.setBitDepth(bi.getColorModel().getPixelSize());
      r.setNumChannels(bi.getColorModel().getNumColorComponents());

      // r.setCompositingLayerCount(getNumberOfPages(r)); // Semantics: number of pages in the PDF
      // file.
      HashMap<String, String> pdfProps = (HashMap<String, String>) getPDFProperties(r);
      int n = Integer.parseInt(pdfProps.remove("Pages"));
      r.setCompositingLayerCount(n);

      // Since it is not possible for the viewer to query about a specific page's width and height
      // (because in Djatoka's point of view a PDF is just one image with various compositing
      // layers, which are the pages),
      // at this point right here we query the PDF file about the size of all pages and store this
      // information in a Map. This map can be returned by getMetadata by setting it as the
      // instProps member of the
      // ImageRecord class, which Djatoka already implements and which is returned as JSON to the
      // viewer JS.
      // The viewer then has to store this information and later query it instead of asking Djatoka
      // (getMetadata) again.
      // Map<String, String> instProps = getPagesSizes(r);
      r.setInstProps(pdfProps);
      logger.debug("instProps: " + r.getInstProps());

      logger.debug("Get metadata: " + r.toString());
    } catch (Exception e) {
      throw new DjatokaException(e);
    }

    return r;
  }