private TikaImageExtractingParser(RenderingContext renderingContext) { this.renderingContext = renderingContext; // Our expected types types = new HashSet<MediaType>(); types.add(MediaType.image("bmp")); types.add(MediaType.image("gif")); types.add(MediaType.image("jpg")); types.add(MediaType.image("jpeg")); types.add(MediaType.image("png")); types.add(MediaType.image("tiff")); // Are images going in the same place as the HTML? if (renderingContext.getParamWithDefault(PARAM_IMAGES_SAME_FOLDER, false)) { RenditionLocation location = resolveRenditionLocation( renderingContext.getSourceNode(), renderingContext.getDefinition(), renderingContext.getDestinationNode()); imgFolder = location.getParentRef(); if (logger.isDebugEnabled()) { logger.debug("Using imgFolder: " + imgFolder); } } }
/* If Tesseract is found, test we retrieve the proper number of supporting Parsers. */ @Test public void offersTypesIfFound() throws Exception { TesseractOCRParser parser = new TesseractOCRParser(); DefaultParser defaultParser = new DefaultParser(); ParseContext parseContext = new ParseContext(); MediaType png = MediaType.image("png"); // Assuming that Tesseract is on the path, we should find 5 Parsers that support PNG. assumeTrue(canRun()); assertEquals(5, parser.getSupportedTypes(parseContext).size()); assertTrue(parser.getSupportedTypes(parseContext).contains(png)); // DefaultParser will now select the TesseractOCRParser. assertEquals( TesseractOCRParser.class, defaultParser.getParsers(parseContext).get(png).getClass()); }
/* Check that if Tesseract is not found, the TesseractOCRParser claims to not support any file types. So, the standard image parser is called instead. */ @Test public void offersNoTypesIfNotFound() throws Exception { TesseractOCRParser parser = new TesseractOCRParser(); DefaultParser defaultParser = new DefaultParser(); MediaType png = MediaType.image("png"); // With an invalid path, will offer no types TesseractOCRConfig invalidConfig = new TesseractOCRConfig(); invalidConfig.setTesseractPath("/made/up/path"); ParseContext parseContext = new ParseContext(); parseContext.set(TesseractOCRConfig.class, invalidConfig); // No types offered assertEquals(0, parser.getSupportedTypes(parseContext).size()); // And DefaultParser won't use us assertEquals(ImageParser.class, defaultParser.getParsers(parseContext).get(png).getClass()); }