protected String blobsToText(List<Blob> blobs, String docId) {
   List<String> strings = new LinkedList<String>();
   for (Blob blob : blobs) {
     try {
       SimpleBlobHolder bh = new SimpleBlobHolder(blob);
       BlobHolder result = convert(bh);
       if (result == null) {
         continue;
       }
       blob = result.getBlob();
       if (blob == null) {
         continue;
       }
       String string = new String(blob.getByteArray(), "UTF-8");
       // strip '\0 chars from text
       if (string.indexOf('\0') >= 0) {
         string = string.replace("\0", " ");
       }
       strings.add(string);
     } catch (Exception e) {
       String msg =
           "Could not extract fulltext of file '"
               + blob.getFilename()
               + "' for document: "
               + docId
               + ": "
               + e;
       log.warn(msg);
       log.debug(msg, e);
       continue;
     }
   }
   return StringUtils.join(strings, " ");
 }
Beispiel #2
0
  protected BlobHolder getBlobHolderToConvert() {
    Blob blob = getTarget().getAdapter(Blob.class);
    BlobHolder bh = null;
    if (blob == null) {
      DocumentModel doc = getTarget().getAdapter(DocumentModel.class);
      if (doc != null) {
        bh = doc.getAdapter(BlobHolder.class);
        if (bh != null) {
          blob = bh.getBlob();
        }
      }
    }
    if (blob == null) {
      throw new IllegalParameterException("No Blob found");
    }

    if (getTarget().isInstanceOf("blob")) {
      bh = ((BlobObject) getTarget()).getBlobHolder();
    }

    if (bh == null) {
      bh = new SimpleBlobHolder(blob);
    }
    return bh;
  }
  protected DocumentModel doCreateLeafNode(DocumentModel parent, SourceNode node)
      throws IOException {
    if (!shouldImportDocument(node)) {
      return null;
    }
    Stopwatch stopwatch = SimonManager.getStopwatch("org.nuxeo.ecm.platform.importer.create_leaf");
    Split split = stopwatch.start();
    DocumentModel leaf = null;
    try {
      leaf = getFactory().createLeafNode(session, parent, node);
    } catch (IOException e) {
      String errMsg =
          "Unable to create leaf document for "
              + node.getSourcePath()
              + ":"
              + e
              + (e.getCause() != null ? e.getCause() : "");
      fslog(errMsg, true);
      log.error(errMsg);
      // Process leaf node creation error and check if the global
      // import task should continue
      boolean shouldImportTaskContinue =
          getFactory().processLeafNodeCreationError(session, parent, node);
      if (!shouldImportTaskContinue) {
        throw new NuxeoException(e);
      }
    } finally {
      split.stop();
    }
    BlobHolder bh = node.getBlobHolder();
    if (leaf != null && bh != null) {
      Blob blob = bh.getBlob();
      if (blob != null) {
        long fileSize = blob.getLength();
        String fileName = blob.getFilename();
        if (fileSize > 0) {
          long kbSize = fileSize / 1024;
          String parentPath = (parent == null) ? "null" : parent.getPathAsString();
          fslog(
              "Created doc "
                  + leaf.getName()
                  + " at "
                  + parentPath
                  + " with file "
                  + fileName
                  + " of size "
                  + kbSize
                  + "KB",
              true);
        }
        uploadedKO += fileSize;
      }

      // save session if needed
      commit();
    }
    return leaf;
  }
Beispiel #4
0
  public boolean isAnnotationsEnabled(DocumentModel doc) {
    BlobHolder blobHolder = doc.getAdapter(BlobHolder.class);
    Blob blob = blobHolder.getBlob();
    if (blob == null || blob.getMimeType() == null) {
      return false;
    }

    return Framework.isBooleanPropertyTrue(TEXT_ANNOTATIONS_KEY)
        || blob.getMimeType().startsWith("image");
  }
Beispiel #5
0
 protected Blob convertWithMimeType(BlobHolder bh, String mimeType, UriInfo uriInfo) {
   Map<String, Serializable> parameters = computeConversionParameters(uriInfo);
   ConversionService conversionService = Framework.getService(ConversionService.class);
   BlobHolder blobHolder = conversionService.convertToMimeType(mimeType, bh, parameters);
   Blob conversionBlob = blobHolder.getBlob();
   if (conversionBlob == null) {
     throw new WebResourceNotFoundException(
         String.format("No converted Blob for '%s' mime type", mimeType));
   }
   return conversionBlob;
 }
  @Test
  public void testPagesWithoutPreviewConverter() throws ClientException {
    String converterName = cs.getConverterName("application/vnd.apple.pages", "application/pdf");
    assertEquals("iwork2pdf", converterName);

    BlobHolder pagesBH = getBlobFromPath("test-docs/hello-without-preview.pages");
    pagesBH.getBlob().setMimeType("application/vnd.apple.pages");
    try {
      cs.convert(converterName, pagesBH, null);
      fail("pdf preview isn't available");
    } catch (ConversionException e) {
      // ok
    }
  }
Beispiel #7
0
 protected Blob convertWithConverter(BlobHolder bh, String converter, UriInfo uriInfo) {
   ConversionService conversionService = Framework.getService(ConversionService.class);
   if (!conversionService.isConverterAvailable(converter).isAvailable()) {
     throw new IllegalParameterException(
         String.format("The '%s' converter is not available", converter));
   }
   Map<String, Serializable> parameters = computeConversionParameters(uriInfo);
   BlobHolder blobHolder = conversionService.convert(converter, bh, parameters);
   Blob conversionBlob = blobHolder.getBlob();
   if (conversionBlob == null) {
     throw new WebResourceNotFoundException(
         String.format("No converted Blob using '%s' converter", converter));
   }
   return conversionBlob;
 }
 @Override
 protected Map<String, Blob> getCmdBlobParameters(
     BlobHolder blobHolder, Map<String, Serializable> parameters) throws ConversionException {
   Map<String, Blob> cmdBlobParams = new HashMap<String, Blob>();
   cmdBlobParams.put("inFilePath", blobHolder.getBlob());
   return cmdBlobParams;
 }
 @OperationMethod
 public Blob run(DocumentModel targetDocument) throws Exception {
   TemplateBasedDocument renderable = targetDocument.getAdapter(TemplateBasedDocument.class);
   if (renderable != null) {
     if (store) {
       return renderable.renderAndStoreAsAttachment(templateName, save);
     } else {
       return renderable.renderWithTemplate(templateName);
     }
   } else {
     BlobHolder bh = targetDocument.getAdapter(BlobHolder.class);
     if (bh != null) {
       return bh.getBlob();
     } else {
       return null;
     }
   }
 }
  protected String doTestPDFConverter(
      String srcMT, String fileName, boolean pdfa, boolean updateIndex) throws Exception {

    ConversionService cs = Framework.getLocalService(ConversionService.class);

    String converterName = cs.getConverterName(srcMT, "application/pdf");
    assertEquals("any2pdf", converterName);

    ConverterCheckResult check = cs.isConverterAvailable(converterName);
    assertNotNull(check);
    if (!check.isAvailable()) {
      log.warn("Skipping JOD based converter tests since OOo is not installed");
      log.warn("  converter check output : " + check.getInstallationMessage());
      log.warn("  converter check output : " + check.getErrorMessage());
      return null;
    }

    BlobHolder hg = getBlobFromPath("test-docs/" + fileName, srcMT);

    Map<String, Serializable> parameters = new HashMap<String, Serializable>();
    if (pdfa) {
      parameters.put(JODBasedConverter.PDFA1_PARAM, Boolean.TRUE);
    }
    if (updateIndex) {
      parameters.put(JODBasedConverter.UPDATE_INDEX_PARAM, Boolean.TRUE);
    }
    BlobHolder result = cs.convert(converterName, hg, parameters);
    assertNotNull(result);

    File pdfFile = File.createTempFile("testingPDFConverter", ".pdf");
    String text = null;
    try {
      result.getBlob().transferTo(pdfFile);
      text = readPdfText(pdfFile);
      assertTrue(text.contains("Hello") || text.contains("hello"));
      if (pdfa) {
        assertTrue("Output is not PDF/A", isPDFA(pdfFile));
      }
      return text;
    } finally {
      pdfFile.delete();
    }
  }
 @Override
 public BlobHolder convert(BlobHolder blobHolder, Map<String, Serializable> parameters)
     throws ConversionException {
   Blob originalBlob;
   String path;
   try {
     originalBlob = blobHolder.getBlob();
     path = blobHolder.getFilePath();
   } catch (ClientException e) {
     throw new ConversionException("Cannot fetch content of blob", e);
   }
   Blob transcodedBlob;
   try {
     transcodedBlob = convert(originalBlob);
   } catch (IOException e) {
     throw new ConversionException("Cannot transcode " + path + " to UTF-8", e);
   }
   return new SimpleBlobHolder(transcodedBlob);
 }
Beispiel #12
0
  @Test
  public void shouldDoAsyncConversionGivenConverterName() throws IOException {
    File file = FileUtils.getResourceFileFromContext("test-data/hello.doc");
    Blob blob = Blobs.createBlob(file, "application/msword", null, "hello.doc");
    BlobHolder bh = new SimpleBlobHolder(blob);

    String id = conversionService.scheduleConversion("identity", bh, null);
    assertNotNull(id);

    eventService.waitForAsyncCompletion();

    BlobHolder result = conversionService.getConversionResult(id, true);
    assertNotNull(result);
    List<Blob> blobs = result.getBlobs();
    assertEquals(1, blobs.size());
    Blob resultBlob = blobs.get(0);
    assertEquals(blob.getFilename(), resultBlob.getFilename());
    assertEquals(blob.getMimeType(), resultBlob.getMimeType());
  }
  @Test
  public void testHTMLConverter() throws Exception {
    String converterName = cs.getConverterName("application/vnd.apple.pages", "text/html");
    assertEquals("iwork2html", converterName);

    CommandLineExecutorService cles = Framework.getLocalService(CommandLineExecutorService.class);
    assertNotNull(cles);

    ConverterCheckResult check = cs.isConverterAvailable(converterName);
    assertNotNull(check);
    if (!check.isAvailable()) {
      log.warn("Skipping PDF2Html tests since commandLine is not installed");
      log.warn(" converter check output : " + check.getInstallationMessage());
      log.warn(" converter check output : " + check.getErrorMessage());
      return;
    }

    CommandAvailability ca = cles.getCommandAvailability("pdftohtml");

    if (!ca.isAvailable()) {
      log.warn("pdftohtml command is not available, skipping test");
      return;
    }

    BlobHolder pagesBH = getBlobFromPath("test-docs/hello.pages");
    pagesBH.getBlob().setMimeType("application/vnd.apple.pages");
    BlobHolder result = cs.convert(converterName, pagesBH, null);
    assertNotNull(result);

    List<Blob> blobs = result.getBlobs();
    assertNotNull(blobs);
    assertEquals(2, blobs.size());

    Blob mainBlob = result.getBlob();
    assertEquals("index.html", mainBlob.getFilename());

    Blob subBlob = blobs.get(1);
    assertTrue(subBlob.getFilename().startsWith("index001"));

    String htmlContent = mainBlob.getString();
    assertTrue(htmlContent.contains("hello"));
  }
  @Override
  public BlobHolder convert(BlobHolder blobHolder, Map<String, Serializable> parameters)
      throws ConversionException {
    try {
      // Make sure the toThumbnail command is available
      CommandLineExecutorService cles = Framework.getLocalService(CommandLineExecutorService.class);
      CommandAvailability commandAvailability = cles.getCommandAvailability(THUMBNAIL_COMMAND);
      if (!commandAvailability.isAvailable()) {
        return null;
      }
      // get the input and output of the command
      Blob blob = blobHolder.getBlob();
      File inputFile = null;
      if (blob instanceof FileBlob) {
        inputFile = ((FileBlob) blob).getFile();
      } else if (blob instanceof SQLBlob) {
        StreamSource source = ((SQLBlob) blob).getBinary().getStreamSource();
        inputFile = ((FileSource) source).getFile();
      } else if (blob instanceof StreamingBlob) {
        StreamingBlob streamingBlob = ((StreamingBlob) blob);
        if (!streamingBlob.isPersistent()) {
          streamingBlob.persist();
        }
        StreamSource source = streamingBlob.getStreamSource();
        inputFile = ((FileSource) source).getFile();
      }
      if (inputFile == null) {
        return null;
      }
      CmdParameters params = new CmdParameters();
      File outputFile = File.createTempFile("nuxeoImageTarget", "." + "png");
      String size = THUMBNAIL_DEFAULT_SIZE;
      if (parameters != null) {
        if (parameters.containsKey(THUMBNAIL_SIZE_PARAMETER_NAME)) {
          size = (String) parameters.get(THUMBNAIL_SIZE_PARAMETER_NAME);
        }
      }
      params.addNamedParameter(THUMBNAIL_SIZE_PARAMETER_NAME, size);
      params.addNamedParameter("inputFilePath", inputFile);
      params.addNamedParameter("outputFilePath", outputFile);

      ExecResult res = cles.execCommand(THUMBNAIL_COMMAND, params);
      if (!res.isSuccessful()) {
        throw res.getError();
      }
      Blob targetBlob = new FileBlob(outputFile);
      Framework.trackFile(outputFile, targetBlob);
      return new SimpleCachableBlobHolder(targetBlob);
    } catch (CommandNotAvailable | IOException | ClientException | CommandException e) {
      throw new ConversionException("Thumbnail conversion failed", e);
    }
  }
Beispiel #15
0
  @Test
  public void testConverter() throws Exception {
    String converterName = cs.getConverterName("text/plain", "application/pdf");
    assertEquals("any2pdf", converterName);

    checkConverterAvailability(converterName);
    checkCommandAvailability("soffice");

    BlobHolder pdfBH = getBlobFromPath("test-docs/hello.txt");
    Map<String, Serializable> parameters = new HashMap<>();

    BlobHolder result = cs.convert(converterName, pdfBH, parameters);
    assertNotNull(result);

    List<Blob> blobs = result.getBlobs();
    assertNotNull(blobs);
    assertEquals(1, blobs.size());

    Blob mainBlob = result.getBlob();
    String text = DocumentUTUtils.readPdfText(mainBlob.getFile());
    assertTrue(text.contains("Hello") || text.contains("hello"));
  }
  public void testiWorkConverter(String blobPath) throws Exception {
    String converterName = cs.getConverterName("application/vnd.apple.iwork", "application/pdf");
    assertEquals("iwork2pdf", converterName);

    BlobHolder pagesBH = getBlobFromPath(blobPath);
    pagesBH.getBlob().setMimeType("application/vnd.apple.iwork");

    BlobHolder result = cs.convert(converterName, pagesBH, null);
    assertNotNull(result);

    List<Blob> blobs = result.getBlobs();
    assertNotNull(blobs);
    assertEquals(1, blobs.size());

    File pdfFile = File.createTempFile("testingPDFConverter", ".pdf");
    try {
      result.getBlob().transferTo(pdfFile);
      String text = BaseConverterTest.readPdfText(pdfFile).toLowerCase();
      assertTrue(text.contains("hello"));
    } finally {
      pdfFile.delete();
    }
  }
  @Override
  public BlobHolder convert(BlobHolder blobHolder, Map<String, Serializable> parameters)
      throws ConversionException {
    try {
      // Make sure the toThumbnail command is available
      CommandLineExecutorService cles = Framework.getLocalService(CommandLineExecutorService.class);
      CommandAvailability commandAvailability = cles.getCommandAvailability(THUMBNAIL_COMMAND);
      if (!commandAvailability.isAvailable()) {
        return null;
      }
      // get the input and output of the command
      Blob blob = blobHolder.getBlob();

      Blob targetBlob = Blobs.createBlobWithExtension(".png");
      targetBlob.setMimeType("image/png");
      try (CloseableFile source = blob.getCloseableFile()) {
        CmdParameters params = new CmdParameters();
        String size;
        if (parameters != null && parameters.containsKey(THUMBNAIL_SIZE_PARAMETER_NAME)) {
          size = (String) parameters.get(THUMBNAIL_SIZE_PARAMETER_NAME);
        } else {
          size = THUMBNAIL_DEFAULT_SIZE;
        }
        params.addNamedParameter(THUMBNAIL_SIZE_PARAMETER_NAME, size);
        params.addNamedParameter("inputFilePath", source.getFile());
        params.addNamedParameter("outputFilePath", targetBlob.getFile());

        ExecResult res = cles.execCommand(THUMBNAIL_COMMAND, params);
        if (!res.isSuccessful()) {
          throw res.getError();
        }
      }
      return new SimpleCachableBlobHolder(targetBlob);
    } catch (CommandNotAvailable | IOException | ClientException | CommandException e) {
      throw new ConversionException("Thumbnail conversion failed", e);
    }
  }
Beispiel #18
0
  @Override
  public BlobHolder convert(BlobHolder blobHolder, Map<String, Serializable> parameters)
      throws ConversionException {
    blobHolder = new UTF8CharsetConverter().convert(blobHolder, parameters);
    Blob inputBlob = blobHolder.getBlob();
    String blobPath = blobHolder.getFilePath();
    if (inputBlob == null) {
      return null;
    }

    OfficeDocumentConverter documentConverter = newDocumentConverter();
    // This plugin do deal only with one input source.
    String sourceMimetype = inputBlob.getMimeType();

    boolean pdfa1 = parameters != null && Boolean.TRUE.equals(parameters.get(PDFA1_PARAM));

    File sourceFile = null;
    File outFile = null;
    File[] files = null;
    try {

      // If the input blob has the HTML mime type, make sure the
      // charset meta is present, add it if not
      if ("text/html".equals(sourceMimetype)) {
        inputBlob = checkCharsetMeta(inputBlob);
      }

      // Get original file extension
      String ext = inputBlob.getFilename();
      int dotPosition = ext.lastIndexOf('.');
      if (dotPosition == -1) {
        ext = ".bin";
      } else {
        ext = ext.substring(dotPosition);
      }
      // Copy in a file to be able to read it several time
      sourceFile = Framework.createTempFile("NXJOOoConverterDocumentIn", ext);
      InputStream stream = inputBlob.getStream();
      FileUtils.copyToFile(stream, sourceFile);
      stream.close();

      DocumentFormat sourceFormat = null;
      if (sourceMimetype != null) {
        // Try to fetch it from the registry.
        sourceFormat = getSourceFormat(documentConverter, sourceMimetype);
      }
      // If not found in the registry or not given as a parameter.
      // Try to sniff ! What does that smell ? :)
      if (sourceFormat == null) {
        sourceFormat = getSourceFormat(documentConverter, sourceFile);
      }

      // From plugin settings because we know the destination
      // mimetype.
      DocumentFormat destinationFormat =
          getDestinationFormat(documentConverter, sourceFormat, pdfa1);

      // allow HTML2PDF filtering

      List<Blob> blobs = new ArrayList<>();

      if (descriptor.getDestinationMimeType().equals("text/html")) {
        String tmpDirPath = getTmpDirectory();
        File myTmpDir = new File(tmpDirPath + "/JODConv_" + System.currentTimeMillis());
        boolean created = myTmpDir.mkdir();
        if (!created) {
          throw new IOException("Unable to create temp dir");
        }

        outFile =
            new File(
                myTmpDir.getAbsolutePath()
                    + "/"
                    + "NXJOOoConverterDocumentOut."
                    + destinationFormat.getExtension());

        created = outFile.createNewFile();
        if (!created) {
          throw new IOException("Unable to create temp file");
        }

        log.debug("Input File = " + outFile.getAbsolutePath());
        // Perform the actual conversion.
        documentConverter.convert(sourceFile, outFile, destinationFormat);

        files = myTmpDir.listFiles();
        for (File file : files) {
          // copy the files to a new tmp location, as we'll delete them
          Blob blob;
          try (FileInputStream in = new FileInputStream(file)) {
            blob = Blobs.createBlob(in);
          }
          blob.setFilename(file.getName());
          blobs.add(blob);
          // add a blob for the index
          if (file.getName().equals(outFile.getName())) {
            Blob indexBlob;
            try (FileInputStream in = new FileInputStream(file)) {
              indexBlob = Blobs.createBlob(in);
            }
            indexBlob.setFilename("index.html");
            blobs.add(0, indexBlob);
          }
        }

      } else {
        outFile =
            Framework.createTempFile(
                "NXJOOoConverterDocumentOut", '.' + destinationFormat.getExtension());

        // Perform the actual conversion.
        documentConverter.convert(sourceFile, outFile, destinationFormat, parameters);

        Blob blob;
        try (FileInputStream in = new FileInputStream(outFile)) {
          blob = Blobs.createBlob(in, getDestinationMimeType());
        }
        blobs.add(blob);
      }
      return new SimpleCachableBlobHolder(blobs);
    } catch (IOException e) {
      String msg =
          String.format(
              "An error occurred trying to convert file %s to from %s to %s",
              blobPath, sourceMimetype, getDestinationMimeType());
      throw new ConversionException(msg, e);
    } finally {
      if (sourceFile != null) {
        sourceFile.delete();
      }
      if (outFile != null) {
        outFile.delete();
      }

      if (files != null) {
        for (File file : files) {
          if (file.exists()) {
            file.delete();
          }
        }
      }
    }
  }