/** * This tests if the Enhancements created by the Engine confirm to the rules defined for the * Stanbol Enhancement Structure. * * @throws IOException * @throws EngineException */ @Test public void testEngine() throws IOException, EngineException { EntityLinkerConfig linkerConfig = new EntityLinkerConfig(); linkerConfig.setRedirectProcessingMode(RedirectProcessingMode.FOLLOW); KeywordLinkingEngine engine = KeywordLinkingEngine.createInstance( openNLP, searcher, new TextAnalyzerConfig(), linkerConfig); engine.referencedSiteName = TEST_REFERENCED_SITE_NAME; ContentItem ci = ciFactory.createContentItem(new StringSource(TEST_TEXT)); // tells the engine that this is an English text ci.getMetadata().add(new TripleImpl(ci.getUri(), DC_LANGUAGE, new PlainLiteralImpl("en"))); // compute the enhancements engine.computeEnhancements(ci); // validate the enhancement results Map<UriRef, Resource> expectedValues = new HashMap<UriRef, Resource>(); expectedValues.put(ENHANCER_EXTRACTED_FROM, ci.getUri()); expectedValues.put( DC_CREATOR, LiteralFactory.getInstance().createTypedLiteral(engine.getClass().getName())); // adding null as expected for confidence makes it a required property expectedValues.put(Properties.ENHANCER_CONFIDENCE, null); // validate create fise:TextAnnotations int numTextAnnotations = validateAllTextAnnotations(ci.getMetadata(), TEST_TEXT, expectedValues); assertEquals("Four fise:TextAnnotations are expected by this Test", 4, numTextAnnotations); // validate create fise:EntityAnnotations int numEntityAnnotations = validateAllEntityAnnotations(ci, expectedValues); assertEquals("Five fise:EntityAnnotations are expected by this Test", 5, numEntityAnnotations); }
/** * Similar to {@link * EnhancementStructureHelper#validateAllEntityAnnotations(org.apache.clerezza.rdf.core.TripleCollection, * Map)} but in addition checks fise:confidence [0..1] and entityhub:site properties * * @param ci * @param expectedValues * @return */ private static int validateAllEntityAnnotations( ContentItem ci, Map<UriRef, Resource> expectedValues) { Iterator<Triple> entityAnnotationIterator = ci.getMetadata().filter(null, RDF_TYPE, ENHANCER_ENTITYANNOTATION); int entityAnnotationCount = 0; while (entityAnnotationIterator.hasNext()) { UriRef entityAnnotation = (UriRef) entityAnnotationIterator.next().getSubject(); // test if selected Text is added validateEntityAnnotation(ci.getMetadata(), entityAnnotation, expectedValues); // validate also that the confidence is between [0..1] Iterator<Triple> confidenceIterator = ci.getMetadata().filter(entityAnnotation, ENHANCER_CONFIDENCE, null); // Confidence is now checked by the EnhancementStructureHelper (STANBOL-630) // assertTrue("Expected fise:confidence value is missing (entityAnnotation " // +entityAnnotation+")",confidenceIterator.hasNext()); // Double confidence = LiteralFactory.getInstance().createObject(Double.class, // (TypedLiteral)confidenceIterator.next().getObject()); // assertTrue("fise:confidence MUST BE <= 1 (value= '"+confidence // + "',entityAnnotation " +entityAnnotation+")", // 1.0 >= confidence.doubleValue()); // assertTrue("fise:confidence MUST BE >= 0 (value= '"+confidence // +"',entityAnnotation "+entityAnnotation+")", // 0.0 <= confidence.doubleValue()); // Test the entityhub:site property (STANBOL-625) UriRef ENTITYHUB_SITE = new UriRef(RdfResourceEnum.site.getUri()); Iterator<Triple> entitySiteIterator = ci.getMetadata().filter(entityAnnotation, ENTITYHUB_SITE, null); assertTrue( "Expected entityhub:site value is missing (entityAnnotation " + entityAnnotation + ")", entitySiteIterator.hasNext()); Resource siteResource = entitySiteIterator.next().getObject(); assertTrue("entityhub:site values MUST BE Literals", siteResource instanceof Literal); assertEquals( "'" + TEST_REFERENCED_SITE_NAME + "' is expected as " + "entityhub:site value", TEST_REFERENCED_SITE_NAME, ((Literal) siteResource).getLexicalForm()); assertFalse("entityhub:site MUST HAVE only a single value", entitySiteIterator.hasNext()); entityAnnotationCount++; } return entityAnnotationCount; }
/** * Creates an {@link AnalysedText} instance for the parsed {@link Blob} and registers itself as * {@link ContentItem#addPart(org.apache.clerezza.rdf.core.UriRef, Object) ContentPart} with the * {@link UriRef} {@link AnalysedText#ANALYSED_TEXT_URI} to the parsed {@link ContentItem}. * * <p>If already a ContentPart with the given UriRef is registered this Method will throw an * {@link IllegalStateException}. * * @param ci the ContentItem to register the created {@link AnalysedText} instance * @param blob the analysed {@link Blob} * @return the created {@link AnalysedText} * @throws IllegalArgumentException of <code>null</code> is parsed as ContentItem or Blob * @throws IllegalStateException if there is already an ContentPart is registered for {@link * AnalysedText#ANALYSED_TEXT_URI} with the parsed ContentItem. * @throws IOException on any error while reading data from the parsed blob */ public final AnalysedText createAnalysedText(ContentItem ci, Blob blob) throws IOException { ci.getLock().readLock().lock(); try { AnalysedText existing = ci.getPart(AnalysedText.ANALYSED_TEXT_URI, AnalysedText.class); throw new IllegalStateException( "The AnalysedText ContentPart already exists (impl: " + existing.getClass().getSimpleName() + "| blob: " + existing.getBlob().getMimeType() + ")"); } catch (NoSuchPartException e) { // this is the expected case } catch (ClassCastException e) { throw new IllegalStateException( "A ContentPart with the URI '" + AnalysedText.ANALYSED_TEXT_URI + "' already exists but the parts " + "type is not compatible with " + AnalysedText.class.getSimpleName() + "!", e); } finally { ci.getLock().readLock().unlock(); } // create the Analysed text AnalysedText at = createAnalysedText(blob); ci.getLock().writeLock().lock(); try { // NOTE: there is a possibility that an other thread has added // the contentpart ci.addPart(AnalysedText.ANALYSED_TEXT_URI, at); } finally { ci.getLock().writeLock().unlock(); } return at; }