private TikaImageExtractingParser(RenderingContext renderingContext) {
      this.renderingContext = renderingContext;

      // Our expected types
      types = new HashSet<MediaType>();
      types.add(MediaType.image("bmp"));
      types.add(MediaType.image("gif"));
      types.add(MediaType.image("jpg"));
      types.add(MediaType.image("jpeg"));
      types.add(MediaType.image("png"));
      types.add(MediaType.image("tiff"));

      // Are images going in the same place as the HTML?
      if (renderingContext.getParamWithDefault(PARAM_IMAGES_SAME_FOLDER, false)) {
        RenditionLocation location =
            resolveRenditionLocation(
                renderingContext.getSourceNode(),
                renderingContext.getDefinition(),
                renderingContext.getDestinationNode());
        imgFolder = location.getParentRef();
        if (logger.isDebugEnabled()) {
          logger.debug("Using imgFolder: " + imgFolder);
        }
      }
    }
Ejemplo n.º 2
0
  /*
  If Tesseract is found, test we retrieve the proper number of supporting Parsers.
   */
  @Test
  public void offersTypesIfFound() throws Exception {
    TesseractOCRParser parser = new TesseractOCRParser();
    DefaultParser defaultParser = new DefaultParser();

    ParseContext parseContext = new ParseContext();
    MediaType png = MediaType.image("png");

    // Assuming that Tesseract is on the path, we should find 5 Parsers that support PNG.
    assumeTrue(canRun());

    assertEquals(5, parser.getSupportedTypes(parseContext).size());
    assertTrue(parser.getSupportedTypes(parseContext).contains(png));

    // DefaultParser will now select the TesseractOCRParser.
    assertEquals(
        TesseractOCRParser.class, defaultParser.getParsers(parseContext).get(png).getClass());
  }
Ejemplo n.º 3
0
  /*
  Check that if Tesseract is not found, the TesseractOCRParser claims to not support
  any file types. So, the standard image parser is called instead.
   */
  @Test
  public void offersNoTypesIfNotFound() throws Exception {
    TesseractOCRParser parser = new TesseractOCRParser();
    DefaultParser defaultParser = new DefaultParser();
    MediaType png = MediaType.image("png");

    // With an invalid path, will offer no types
    TesseractOCRConfig invalidConfig = new TesseractOCRConfig();
    invalidConfig.setTesseractPath("/made/up/path");

    ParseContext parseContext = new ParseContext();
    parseContext.set(TesseractOCRConfig.class, invalidConfig);

    // No types offered
    assertEquals(0, parser.getSupportedTypes(parseContext).size());

    // And DefaultParser won't use us
    assertEquals(ImageParser.class, defaultParser.getParsers(parseContext).get(png).getClass());
  }