protected String blobsToText(List<Blob> blobs, String docId) { List<String> strings = new LinkedList<String>(); for (Blob blob : blobs) { try { SimpleBlobHolder bh = new SimpleBlobHolder(blob); BlobHolder result = convert(bh); if (result == null) { continue; } blob = result.getBlob(); if (blob == null) { continue; } String string = new String(blob.getByteArray(), "UTF-8"); // strip '\0 chars from text if (string.indexOf('\0') >= 0) { string = string.replace("\0", " "); } strings.add(string); } catch (Exception e) { String msg = "Could not extract fulltext of file '" + blob.getFilename() + "' for document: " + docId + ": " + e; log.warn(msg); log.debug(msg, e); continue; } } return StringUtils.join(strings, " "); }
protected BlobHolder getBlobHolderToConvert() { Blob blob = getTarget().getAdapter(Blob.class); BlobHolder bh = null; if (blob == null) { DocumentModel doc = getTarget().getAdapter(DocumentModel.class); if (doc != null) { bh = doc.getAdapter(BlobHolder.class); if (bh != null) { blob = bh.getBlob(); } } } if (blob == null) { throw new IllegalParameterException("No Blob found"); } if (getTarget().isInstanceOf("blob")) { bh = ((BlobObject) getTarget()).getBlobHolder(); } if (bh == null) { bh = new SimpleBlobHolder(blob); } return bh; }
protected DocumentModel doCreateLeafNode(DocumentModel parent, SourceNode node) throws IOException { if (!shouldImportDocument(node)) { return null; } Stopwatch stopwatch = SimonManager.getStopwatch("org.nuxeo.ecm.platform.importer.create_leaf"); Split split = stopwatch.start(); DocumentModel leaf = null; try { leaf = getFactory().createLeafNode(session, parent, node); } catch (IOException e) { String errMsg = "Unable to create leaf document for " + node.getSourcePath() + ":" + e + (e.getCause() != null ? e.getCause() : ""); fslog(errMsg, true); log.error(errMsg); // Process leaf node creation error and check if the global // import task should continue boolean shouldImportTaskContinue = getFactory().processLeafNodeCreationError(session, parent, node); if (!shouldImportTaskContinue) { throw new NuxeoException(e); } } finally { split.stop(); } BlobHolder bh = node.getBlobHolder(); if (leaf != null && bh != null) { Blob blob = bh.getBlob(); if (blob != null) { long fileSize = blob.getLength(); String fileName = blob.getFilename(); if (fileSize > 0) { long kbSize = fileSize / 1024; String parentPath = (parent == null) ? "null" : parent.getPathAsString(); fslog( "Created doc " + leaf.getName() + " at " + parentPath + " with file " + fileName + " of size " + kbSize + "KB", true); } uploadedKO += fileSize; } // save session if needed commit(); } return leaf; }
public boolean isAnnotationsEnabled(DocumentModel doc) { BlobHolder blobHolder = doc.getAdapter(BlobHolder.class); Blob blob = blobHolder.getBlob(); if (blob == null || blob.getMimeType() == null) { return false; } return Framework.isBooleanPropertyTrue(TEXT_ANNOTATIONS_KEY) || blob.getMimeType().startsWith("image"); }
protected Blob convertWithMimeType(BlobHolder bh, String mimeType, UriInfo uriInfo) { Map<String, Serializable> parameters = computeConversionParameters(uriInfo); ConversionService conversionService = Framework.getService(ConversionService.class); BlobHolder blobHolder = conversionService.convertToMimeType(mimeType, bh, parameters); Blob conversionBlob = blobHolder.getBlob(); if (conversionBlob == null) { throw new WebResourceNotFoundException( String.format("No converted Blob for '%s' mime type", mimeType)); } return conversionBlob; }
@Test public void testPagesWithoutPreviewConverter() throws ClientException { String converterName = cs.getConverterName("application/vnd.apple.pages", "application/pdf"); assertEquals("iwork2pdf", converterName); BlobHolder pagesBH = getBlobFromPath("test-docs/hello-without-preview.pages"); pagesBH.getBlob().setMimeType("application/vnd.apple.pages"); try { cs.convert(converterName, pagesBH, null); fail("pdf preview isn't available"); } catch (ConversionException e) { // ok } }
protected Blob convertWithConverter(BlobHolder bh, String converter, UriInfo uriInfo) { ConversionService conversionService = Framework.getService(ConversionService.class); if (!conversionService.isConverterAvailable(converter).isAvailable()) { throw new IllegalParameterException( String.format("The '%s' converter is not available", converter)); } Map<String, Serializable> parameters = computeConversionParameters(uriInfo); BlobHolder blobHolder = conversionService.convert(converter, bh, parameters); Blob conversionBlob = blobHolder.getBlob(); if (conversionBlob == null) { throw new WebResourceNotFoundException( String.format("No converted Blob using '%s' converter", converter)); } return conversionBlob; }
@Override protected Map<String, Blob> getCmdBlobParameters( BlobHolder blobHolder, Map<String, Serializable> parameters) throws ConversionException { Map<String, Blob> cmdBlobParams = new HashMap<String, Blob>(); cmdBlobParams.put("inFilePath", blobHolder.getBlob()); return cmdBlobParams; }
@OperationMethod public Blob run(DocumentModel targetDocument) throws Exception { TemplateBasedDocument renderable = targetDocument.getAdapter(TemplateBasedDocument.class); if (renderable != null) { if (store) { return renderable.renderAndStoreAsAttachment(templateName, save); } else { return renderable.renderWithTemplate(templateName); } } else { BlobHolder bh = targetDocument.getAdapter(BlobHolder.class); if (bh != null) { return bh.getBlob(); } else { return null; } } }
protected String doTestPDFConverter( String srcMT, String fileName, boolean pdfa, boolean updateIndex) throws Exception { ConversionService cs = Framework.getLocalService(ConversionService.class); String converterName = cs.getConverterName(srcMT, "application/pdf"); assertEquals("any2pdf", converterName); ConverterCheckResult check = cs.isConverterAvailable(converterName); assertNotNull(check); if (!check.isAvailable()) { log.warn("Skipping JOD based converter tests since OOo is not installed"); log.warn(" converter check output : " + check.getInstallationMessage()); log.warn(" converter check output : " + check.getErrorMessage()); return null; } BlobHolder hg = getBlobFromPath("test-docs/" + fileName, srcMT); Map<String, Serializable> parameters = new HashMap<String, Serializable>(); if (pdfa) { parameters.put(JODBasedConverter.PDFA1_PARAM, Boolean.TRUE); } if (updateIndex) { parameters.put(JODBasedConverter.UPDATE_INDEX_PARAM, Boolean.TRUE); } BlobHolder result = cs.convert(converterName, hg, parameters); assertNotNull(result); File pdfFile = File.createTempFile("testingPDFConverter", ".pdf"); String text = null; try { result.getBlob().transferTo(pdfFile); text = readPdfText(pdfFile); assertTrue(text.contains("Hello") || text.contains("hello")); if (pdfa) { assertTrue("Output is not PDF/A", isPDFA(pdfFile)); } return text; } finally { pdfFile.delete(); } }
@Override public BlobHolder convert(BlobHolder blobHolder, Map<String, Serializable> parameters) throws ConversionException { Blob originalBlob; String path; try { originalBlob = blobHolder.getBlob(); path = blobHolder.getFilePath(); } catch (ClientException e) { throw new ConversionException("Cannot fetch content of blob", e); } Blob transcodedBlob; try { transcodedBlob = convert(originalBlob); } catch (IOException e) { throw new ConversionException("Cannot transcode " + path + " to UTF-8", e); } return new SimpleBlobHolder(transcodedBlob); }
@Test public void shouldDoAsyncConversionGivenConverterName() throws IOException { File file = FileUtils.getResourceFileFromContext("test-data/hello.doc"); Blob blob = Blobs.createBlob(file, "application/msword", null, "hello.doc"); BlobHolder bh = new SimpleBlobHolder(blob); String id = conversionService.scheduleConversion("identity", bh, null); assertNotNull(id); eventService.waitForAsyncCompletion(); BlobHolder result = conversionService.getConversionResult(id, true); assertNotNull(result); List<Blob> blobs = result.getBlobs(); assertEquals(1, blobs.size()); Blob resultBlob = blobs.get(0); assertEquals(blob.getFilename(), resultBlob.getFilename()); assertEquals(blob.getMimeType(), resultBlob.getMimeType()); }
@Test public void testHTMLConverter() throws Exception { String converterName = cs.getConverterName("application/vnd.apple.pages", "text/html"); assertEquals("iwork2html", converterName); CommandLineExecutorService cles = Framework.getLocalService(CommandLineExecutorService.class); assertNotNull(cles); ConverterCheckResult check = cs.isConverterAvailable(converterName); assertNotNull(check); if (!check.isAvailable()) { log.warn("Skipping PDF2Html tests since commandLine is not installed"); log.warn(" converter check output : " + check.getInstallationMessage()); log.warn(" converter check output : " + check.getErrorMessage()); return; } CommandAvailability ca = cles.getCommandAvailability("pdftohtml"); if (!ca.isAvailable()) { log.warn("pdftohtml command is not available, skipping test"); return; } BlobHolder pagesBH = getBlobFromPath("test-docs/hello.pages"); pagesBH.getBlob().setMimeType("application/vnd.apple.pages"); BlobHolder result = cs.convert(converterName, pagesBH, null); assertNotNull(result); List<Blob> blobs = result.getBlobs(); assertNotNull(blobs); assertEquals(2, blobs.size()); Blob mainBlob = result.getBlob(); assertEquals("index.html", mainBlob.getFilename()); Blob subBlob = blobs.get(1); assertTrue(subBlob.getFilename().startsWith("index001")); String htmlContent = mainBlob.getString(); assertTrue(htmlContent.contains("hello")); }
@Override public BlobHolder convert(BlobHolder blobHolder, Map<String, Serializable> parameters) throws ConversionException { try { // Make sure the toThumbnail command is available CommandLineExecutorService cles = Framework.getLocalService(CommandLineExecutorService.class); CommandAvailability commandAvailability = cles.getCommandAvailability(THUMBNAIL_COMMAND); if (!commandAvailability.isAvailable()) { return null; } // get the input and output of the command Blob blob = blobHolder.getBlob(); File inputFile = null; if (blob instanceof FileBlob) { inputFile = ((FileBlob) blob).getFile(); } else if (blob instanceof SQLBlob) { StreamSource source = ((SQLBlob) blob).getBinary().getStreamSource(); inputFile = ((FileSource) source).getFile(); } else if (blob instanceof StreamingBlob) { StreamingBlob streamingBlob = ((StreamingBlob) blob); if (!streamingBlob.isPersistent()) { streamingBlob.persist(); } StreamSource source = streamingBlob.getStreamSource(); inputFile = ((FileSource) source).getFile(); } if (inputFile == null) { return null; } CmdParameters params = new CmdParameters(); File outputFile = File.createTempFile("nuxeoImageTarget", "." + "png"); String size = THUMBNAIL_DEFAULT_SIZE; if (parameters != null) { if (parameters.containsKey(THUMBNAIL_SIZE_PARAMETER_NAME)) { size = (String) parameters.get(THUMBNAIL_SIZE_PARAMETER_NAME); } } params.addNamedParameter(THUMBNAIL_SIZE_PARAMETER_NAME, size); params.addNamedParameter("inputFilePath", inputFile); params.addNamedParameter("outputFilePath", outputFile); ExecResult res = cles.execCommand(THUMBNAIL_COMMAND, params); if (!res.isSuccessful()) { throw res.getError(); } Blob targetBlob = new FileBlob(outputFile); Framework.trackFile(outputFile, targetBlob); return new SimpleCachableBlobHolder(targetBlob); } catch (CommandNotAvailable | IOException | ClientException | CommandException e) { throw new ConversionException("Thumbnail conversion failed", e); } }
@Test public void testConverter() throws Exception { String converterName = cs.getConverterName("text/plain", "application/pdf"); assertEquals("any2pdf", converterName); checkConverterAvailability(converterName); checkCommandAvailability("soffice"); BlobHolder pdfBH = getBlobFromPath("test-docs/hello.txt"); Map<String, Serializable> parameters = new HashMap<>(); BlobHolder result = cs.convert(converterName, pdfBH, parameters); assertNotNull(result); List<Blob> blobs = result.getBlobs(); assertNotNull(blobs); assertEquals(1, blobs.size()); Blob mainBlob = result.getBlob(); String text = DocumentUTUtils.readPdfText(mainBlob.getFile()); assertTrue(text.contains("Hello") || text.contains("hello")); }
public void testiWorkConverter(String blobPath) throws Exception { String converterName = cs.getConverterName("application/vnd.apple.iwork", "application/pdf"); assertEquals("iwork2pdf", converterName); BlobHolder pagesBH = getBlobFromPath(blobPath); pagesBH.getBlob().setMimeType("application/vnd.apple.iwork"); BlobHolder result = cs.convert(converterName, pagesBH, null); assertNotNull(result); List<Blob> blobs = result.getBlobs(); assertNotNull(blobs); assertEquals(1, blobs.size()); File pdfFile = File.createTempFile("testingPDFConverter", ".pdf"); try { result.getBlob().transferTo(pdfFile); String text = BaseConverterTest.readPdfText(pdfFile).toLowerCase(); assertTrue(text.contains("hello")); } finally { pdfFile.delete(); } }
@Override public BlobHolder convert(BlobHolder blobHolder, Map<String, Serializable> parameters) throws ConversionException { try { // Make sure the toThumbnail command is available CommandLineExecutorService cles = Framework.getLocalService(CommandLineExecutorService.class); CommandAvailability commandAvailability = cles.getCommandAvailability(THUMBNAIL_COMMAND); if (!commandAvailability.isAvailable()) { return null; } // get the input and output of the command Blob blob = blobHolder.getBlob(); Blob targetBlob = Blobs.createBlobWithExtension(".png"); targetBlob.setMimeType("image/png"); try (CloseableFile source = blob.getCloseableFile()) { CmdParameters params = new CmdParameters(); String size; if (parameters != null && parameters.containsKey(THUMBNAIL_SIZE_PARAMETER_NAME)) { size = (String) parameters.get(THUMBNAIL_SIZE_PARAMETER_NAME); } else { size = THUMBNAIL_DEFAULT_SIZE; } params.addNamedParameter(THUMBNAIL_SIZE_PARAMETER_NAME, size); params.addNamedParameter("inputFilePath", source.getFile()); params.addNamedParameter("outputFilePath", targetBlob.getFile()); ExecResult res = cles.execCommand(THUMBNAIL_COMMAND, params); if (!res.isSuccessful()) { throw res.getError(); } } return new SimpleCachableBlobHolder(targetBlob); } catch (CommandNotAvailable | IOException | ClientException | CommandException e) { throw new ConversionException("Thumbnail conversion failed", e); } }
@Override public BlobHolder convert(BlobHolder blobHolder, Map<String, Serializable> parameters) throws ConversionException { blobHolder = new UTF8CharsetConverter().convert(blobHolder, parameters); Blob inputBlob = blobHolder.getBlob(); String blobPath = blobHolder.getFilePath(); if (inputBlob == null) { return null; } OfficeDocumentConverter documentConverter = newDocumentConverter(); // This plugin do deal only with one input source. String sourceMimetype = inputBlob.getMimeType(); boolean pdfa1 = parameters != null && Boolean.TRUE.equals(parameters.get(PDFA1_PARAM)); File sourceFile = null; File outFile = null; File[] files = null; try { // If the input blob has the HTML mime type, make sure the // charset meta is present, add it if not if ("text/html".equals(sourceMimetype)) { inputBlob = checkCharsetMeta(inputBlob); } // Get original file extension String ext = inputBlob.getFilename(); int dotPosition = ext.lastIndexOf('.'); if (dotPosition == -1) { ext = ".bin"; } else { ext = ext.substring(dotPosition); } // Copy in a file to be able to read it several time sourceFile = Framework.createTempFile("NXJOOoConverterDocumentIn", ext); InputStream stream = inputBlob.getStream(); FileUtils.copyToFile(stream, sourceFile); stream.close(); DocumentFormat sourceFormat = null; if (sourceMimetype != null) { // Try to fetch it from the registry. sourceFormat = getSourceFormat(documentConverter, sourceMimetype); } // If not found in the registry or not given as a parameter. // Try to sniff ! What does that smell ? :) if (sourceFormat == null) { sourceFormat = getSourceFormat(documentConverter, sourceFile); } // From plugin settings because we know the destination // mimetype. DocumentFormat destinationFormat = getDestinationFormat(documentConverter, sourceFormat, pdfa1); // allow HTML2PDF filtering List<Blob> blobs = new ArrayList<>(); if (descriptor.getDestinationMimeType().equals("text/html")) { String tmpDirPath = getTmpDirectory(); File myTmpDir = new File(tmpDirPath + "/JODConv_" + System.currentTimeMillis()); boolean created = myTmpDir.mkdir(); if (!created) { throw new IOException("Unable to create temp dir"); } outFile = new File( myTmpDir.getAbsolutePath() + "/" + "NXJOOoConverterDocumentOut." + destinationFormat.getExtension()); created = outFile.createNewFile(); if (!created) { throw new IOException("Unable to create temp file"); } log.debug("Input File = " + outFile.getAbsolutePath()); // Perform the actual conversion. documentConverter.convert(sourceFile, outFile, destinationFormat); files = myTmpDir.listFiles(); for (File file : files) { // copy the files to a new tmp location, as we'll delete them Blob blob; try (FileInputStream in = new FileInputStream(file)) { blob = Blobs.createBlob(in); } blob.setFilename(file.getName()); blobs.add(blob); // add a blob for the index if (file.getName().equals(outFile.getName())) { Blob indexBlob; try (FileInputStream in = new FileInputStream(file)) { indexBlob = Blobs.createBlob(in); } indexBlob.setFilename("index.html"); blobs.add(0, indexBlob); } } } else { outFile = Framework.createTempFile( "NXJOOoConverterDocumentOut", '.' + destinationFormat.getExtension()); // Perform the actual conversion. documentConverter.convert(sourceFile, outFile, destinationFormat, parameters); Blob blob; try (FileInputStream in = new FileInputStream(outFile)) { blob = Blobs.createBlob(in, getDestinationMimeType()); } blobs.add(blob); } return new SimpleCachableBlobHolder(blobs); } catch (IOException e) { String msg = String.format( "An error occurred trying to convert file %s to from %s to %s", blobPath, sourceMimetype, getDestinationMimeType()); throw new ConversionException(msg, e); } finally { if (sourceFile != null) { sourceFile.delete(); } if (outFile != null) { outFile.delete(); } if (files != null) { for (File file : files) { if (file.exists()) { file.delete(); } } } } }