// * public final ImageRecord getMetadata(BufferedImage bi) throws DjatokaException { if (bi == null) throw new DjatokaException("Image Does Not Exist"); logger.debug("getMetadata(BufferedImage): " + bi.getWidth()); try { ImageRecord r = new ImageRecord(); r.setWidth(bi.getWidth()); r.setHeight(bi.getHeight()); r.setDWTLevels(DEFAULT_LEVELS); r.setLevels(DEFAULT_LEVELS); r.setBitDepth(bi.getColorModel().getPixelSize()); r.setNumChannels(bi.getColorModel().getNumColorComponents()); // r.setCompositingLayerCount(getNumberOfPages(r)); // 'bi' refers to just one page extracted // from the PDF file. // logger.debug("r2: "+r.toString()); // TODO return r; } catch (Exception e) { throw new DjatokaException(e); } }
@Override // TODO // FIXME public final String[] getXMLBox(ImageRecord r) throws DjatokaException { String[] xml = null; try { if (r.getImageFile() == null && r.getObject() != null && r.getObject() instanceof InputStream) { // xml = new JP2ImageInfo((InputStream) r.getObject()).getXmlDocs(); } else { // xml = new JP2ImageInfo(new File(r.getImageFile())).getXmlDocs(); } } catch (Exception e) { logger.error(e, e); } return xml; }
/** * Extracts region defined in DjatokaDecodeParam as BufferedImage * * @param input ImageRecord wrapper containing file reference, inputstream, etc. * @param params DjatokaDecodeParam instance containing region and transform settings. * @return extracted region as a BufferedImage * @throws DjatokaException */ @Override public BufferedImage process(ImageRecord input, DjatokaDecodeParam params) throws DjatokaException { logger.debug("in imagerecord;"); if (input.getImageFile() != null) return process(input.getImageFile(), params); else if (input.getObject() != null && (input.getObject() instanceof InputStream)) return process((InputStream) input.getObject(), params); else throw new DjatokaException( "File not defined and Input Object Type " + input.getObject().getClass().getName() + " is not supported"); }
/** * Get PDF information with pdfinfo: - "Pages: X": number of pages; - "Page X size: www.ww * hhh.hh": size of each page, in pts. * * @returns a map: - [Pages][n] - [Page 1][111.11 222.22] - [Page i][www.ww hhh.hh] - [Page * n][999.99 1000.00] */ private static Map<String, String> getPDFProperties(ImageRecord input) throws DjatokaException { logger.debug("Getting PDF info"); try { setPDFCommandsPath(); } catch (IllegalStateException e) { logger.error("Failed to set PDF commands path: ", e); throw e; } HashMap<String, String> pdfProperties = new HashMap<String, String>(); String sourcePath = null; if (input.getImageFile() != null) { logger.debug("PDFInfo image file: " + input.getImageFile()); sourcePath = input.getImageFile(); } else if (input.getObject() != null && (input.getObject() instanceof InputStream)) { FileInputStream fis = null; fis = (FileInputStream) input.getObject(); File in; // Copy to tmp file try { String cacheDir = OpenURLJP2KService.getCacheDir(); if (cacheDir != null) { in = File.createTempFile("tmp", ".pdf", new File(cacheDir)); } else { in = File.createTempFile("tmp", ".pdf"); } in.deleteOnExit(); FileOutputStream fos = new FileOutputStream(in); IOUtils.copyStream(fis, fos); } catch (IOException e) { logger.error(e, e); throw new DjatokaException(e); } sourcePath = in.getAbsolutePath(); } else { throw new DjatokaException( "File not defined and Input Object Type " + input // .getObject().getClass().getName() + " is not supported"); } String pdfinfoCmd[] = PDFINFO_COMMAND.clone(); pdfinfoCmd[PDFINFO_COMMAND_POSITION_BIN] = pdfinfoPath; pdfinfoCmd[PDFINFO_COMMAND_POSITION_FIRSTPAGE] = "1"; pdfinfoCmd[PDFINFO_COMMAND_POSITION_LASTPAGE] = "-1"; // Last page even we not knowing its number. pdfinfoCmd[PDFINFO_COMMAND_POSITION_FILE] = sourcePath; Process pdfProc = null; try { ArrayList<MatchResult> pageSizes = new ArrayList<MatchResult>(); MatchResult pages = null; pdfProc = Runtime.getRuntime().exec(pdfinfoCmd); BufferedReader lr = new BufferedReader(new InputStreamReader(pdfProc.getInputStream())); String line; for (line = lr.readLine(); line != null; line = lr.readLine()) { Matcher mm1 = PAGES_PATT.matcher(line); if (mm1.matches()) pages = mm1.toMatchResult(); Matcher mm2 = MEDIABOX_PATT.matcher(line); if (mm2.matches()) pageSizes.add(mm2.toMatchResult()); } int istatus = pdfProc.waitFor(); if (istatus != 0) logger.error("pdfinfo proc failed, exit status=" + istatus + ", file=" + sourcePath); if (pages == null) { logger.error( "Did not find 'Pages' line in output of pdfinfo command: " + Arrays.deepToString(pdfinfoCmd)); pdfProperties.put("Pages", "0"); } else { // int n = Integer.parseInteger(pages.group(1)); pdfProperties.put("Pages", pages.group(1)); } if (pageSizes.isEmpty()) { logger.error( "Did not find \"Page X size\" lines in output of pdfinfo command: " + Arrays.deepToString(pdfinfoCmd)); throw new IllegalArgumentException("Failed to get pages size of PDF with pdfinfo."); } else { for (MatchResult mr : pageSizes) { String page = mr.group(1); float x0 = Float.parseFloat(mr.group(2)); float y0 = Float.parseFloat(mr.group(3)); float x1 = Float.parseFloat(mr.group(4)); float y1 = Float.parseFloat(mr.group(5)); float w = Math.abs(x1 - x0); float h = Math.abs(y1 - y0); // Have to scale page sizes by max dpi (MAX_DPI / DEFAULT_DENSITY). Otherwise, // BookReader.js will request the wrong zoom level (svc.level). float ws = w * MAX_DPI / DEFAULT_DENSITY; float hs = h * MAX_DPI / DEFAULT_DENSITY; String width = "" + ws; // mr.group(2); String height = "" + hs; // mr.group(3); pdfProperties.put("Page " + page, width + " " + height); } } } catch (Exception e) { logger.error("Failed getting PDF information: ", e); throw new DjatokaException("Failed getting PDF information: ", e); } finally { // Our exec() should just consume one of the streams, but we want to stay safe. // http://mark.koli.ch/2011/01/leaky-pipes-remember-to-close-your-streams-when-using-javas-runtimegetruntimeexec.html org.apache.commons.io.IOUtils.closeQuietly(pdfProc.getOutputStream()); org.apache.commons.io.IOUtils.closeQuietly(pdfProc.getInputStream()); org.apache.commons.io.IOUtils.closeQuietly(pdfProc.getErrorStream()); } return pdfProperties; }
/** * Returns PDF props in ImageRecord * * @param r ImageRecord containing absolute file path of PDF file. * @return a populated ImageRecord object * @throws DjatokaException */ @Override public final ImageRecord getMetadata(ImageRecord r) throws DjatokaException { if ((r.getImageFile() == null || !new File(r.getImageFile()).exists()) && r.getObject() == null) throw new DjatokaException("Image Does Not Exist: " + r.toString()); logger.debug("Get metadata: " + r.toString()); try { DjatokaDecodeParam params = new DjatokaDecodeParam(); BufferedImage bi = process(r, params); r.setWidth(bi.getWidth()); r.setHeight(bi.getHeight()); r.setDWTLevels(DEFAULT_LEVELS); r.setLevels(DEFAULT_LEVELS); r.setBitDepth(bi.getColorModel().getPixelSize()); r.setNumChannels(bi.getColorModel().getNumColorComponents()); // r.setCompositingLayerCount(getNumberOfPages(r)); // Semantics: number of pages in the PDF // file. HashMap<String, String> pdfProps = (HashMap<String, String>) getPDFProperties(r); int n = Integer.parseInt(pdfProps.remove("Pages")); r.setCompositingLayerCount(n); // Since it is not possible for the viewer to query about a specific page's width and height // (because in Djatoka's point of view a PDF is just one image with various compositing // layers, which are the pages), // at this point right here we query the PDF file about the size of all pages and store this // information in a Map. This map can be returned by getMetadata by setting it as the // instProps member of the // ImageRecord class, which Djatoka already implements and which is returned as JSON to the // viewer JS. // The viewer then has to store this information and later query it instead of asking Djatoka // (getMetadata) again. // Map<String, String> instProps = getPagesSizes(r); r.setInstProps(pdfProps); logger.debug("instProps: " + r.getInstProps()); logger.debug("Get metadata: " + r.toString()); } catch (Exception e) { throw new DjatokaException(e); } return r; }