예제 #1
0
 /**
  * This tests if the Enhancements created by the Engine confirm to the rules defined for the
  * Stanbol Enhancement Structure.
  *
  * @throws IOException
  * @throws EngineException
  */
 @Test
 public void testEngine() throws IOException, EngineException {
   EntityLinkerConfig linkerConfig = new EntityLinkerConfig();
   linkerConfig.setRedirectProcessingMode(RedirectProcessingMode.FOLLOW);
   KeywordLinkingEngine engine =
       KeywordLinkingEngine.createInstance(
           openNLP, searcher, new TextAnalyzerConfig(), linkerConfig);
   engine.referencedSiteName = TEST_REFERENCED_SITE_NAME;
   ContentItem ci = ciFactory.createContentItem(new StringSource(TEST_TEXT));
   // tells the engine that this is an English text
   ci.getMetadata().add(new TripleImpl(ci.getUri(), DC_LANGUAGE, new PlainLiteralImpl("en")));
   // compute the enhancements
   engine.computeEnhancements(ci);
   // validate the enhancement results
   Map<UriRef, Resource> expectedValues = new HashMap<UriRef, Resource>();
   expectedValues.put(ENHANCER_EXTRACTED_FROM, ci.getUri());
   expectedValues.put(
       DC_CREATOR, LiteralFactory.getInstance().createTypedLiteral(engine.getClass().getName()));
   // adding null as expected for confidence makes it a required property
   expectedValues.put(Properties.ENHANCER_CONFIDENCE, null);
   // validate create fise:TextAnnotations
   int numTextAnnotations =
       validateAllTextAnnotations(ci.getMetadata(), TEST_TEXT, expectedValues);
   assertEquals("Four fise:TextAnnotations are expected by this Test", 4, numTextAnnotations);
   // validate create fise:EntityAnnotations
   int numEntityAnnotations = validateAllEntityAnnotations(ci, expectedValues);
   assertEquals("Five fise:EntityAnnotations are expected by this Test", 5, numEntityAnnotations);
 }
예제 #2
0
 /**
  * Similar to {@link
  * EnhancementStructureHelper#validateAllEntityAnnotations(org.apache.clerezza.rdf.core.TripleCollection,
  * Map)} but in addition checks fise:confidence [0..1] and entityhub:site properties
  *
  * @param ci
  * @param expectedValues
  * @return
  */
 private static int validateAllEntityAnnotations(
     ContentItem ci, Map<UriRef, Resource> expectedValues) {
   Iterator<Triple> entityAnnotationIterator =
       ci.getMetadata().filter(null, RDF_TYPE, ENHANCER_ENTITYANNOTATION);
   int entityAnnotationCount = 0;
   while (entityAnnotationIterator.hasNext()) {
     UriRef entityAnnotation = (UriRef) entityAnnotationIterator.next().getSubject();
     // test if selected Text is added
     validateEntityAnnotation(ci.getMetadata(), entityAnnotation, expectedValues);
     // validate also that the confidence is between [0..1]
     Iterator<Triple> confidenceIterator =
         ci.getMetadata().filter(entityAnnotation, ENHANCER_CONFIDENCE, null);
     // Confidence is now checked by the EnhancementStructureHelper (STANBOL-630)
     //            assertTrue("Expected fise:confidence value is missing (entityAnnotation "
     //                    +entityAnnotation+")",confidenceIterator.hasNext());
     //            Double confidence = LiteralFactory.getInstance().createObject(Double.class,
     //                (TypedLiteral)confidenceIterator.next().getObject());
     //            assertTrue("fise:confidence MUST BE <= 1 (value= '"+confidence
     //                    + "',entityAnnotation " +entityAnnotation+")",
     //                    1.0 >= confidence.doubleValue());
     //            assertTrue("fise:confidence MUST BE >= 0 (value= '"+confidence
     //                    +"',entityAnnotation "+entityAnnotation+")",
     //                    0.0 <= confidence.doubleValue());
     // Test the entityhub:site property (STANBOL-625)
     UriRef ENTITYHUB_SITE = new UriRef(RdfResourceEnum.site.getUri());
     Iterator<Triple> entitySiteIterator =
         ci.getMetadata().filter(entityAnnotation, ENTITYHUB_SITE, null);
     assertTrue(
         "Expected entityhub:site value is missing (entityAnnotation " + entityAnnotation + ")",
         entitySiteIterator.hasNext());
     Resource siteResource = entitySiteIterator.next().getObject();
     assertTrue("entityhub:site values MUST BE Literals", siteResource instanceof Literal);
     assertEquals(
         "'" + TEST_REFERENCED_SITE_NAME + "' is expected as " + "entityhub:site value",
         TEST_REFERENCED_SITE_NAME,
         ((Literal) siteResource).getLexicalForm());
     assertFalse("entityhub:site MUST HAVE only a single value", entitySiteIterator.hasNext());
     entityAnnotationCount++;
   }
   return entityAnnotationCount;
 }
예제 #3
0
 /**
  * Creates an {@link AnalysedText} instance for the parsed {@link Blob} and registers itself as
  * {@link ContentItem#addPart(org.apache.clerezza.rdf.core.UriRef, Object) ContentPart} with the
  * {@link UriRef} {@link AnalysedText#ANALYSED_TEXT_URI} to the parsed {@link ContentItem}.
  *
  * <p>If already a ContentPart with the given UriRef is registered this Method will throw an
  * {@link IllegalStateException}.
  *
  * @param ci the ContentItem to register the created {@link AnalysedText} instance
  * @param blob the analysed {@link Blob}
  * @return the created {@link AnalysedText}
  * @throws IllegalArgumentException of <code>null</code> is parsed as ContentItem or Blob
  * @throws IllegalStateException if there is already an ContentPart is registered for {@link
  *     AnalysedText#ANALYSED_TEXT_URI} with the parsed ContentItem.
  * @throws IOException on any error while reading data from the parsed blob
  */
 public final AnalysedText createAnalysedText(ContentItem ci, Blob blob) throws IOException {
   ci.getLock().readLock().lock();
   try {
     AnalysedText existing = ci.getPart(AnalysedText.ANALYSED_TEXT_URI, AnalysedText.class);
     throw new IllegalStateException(
         "The AnalysedText ContentPart already exists (impl: "
             + existing.getClass().getSimpleName()
             + "| blob: "
             + existing.getBlob().getMimeType()
             + ")");
   } catch (NoSuchPartException e) {
     // this is the expected case
   } catch (ClassCastException e) {
     throw new IllegalStateException(
         "A ContentPart with the URI '"
             + AnalysedText.ANALYSED_TEXT_URI
             + "' already exists but the parts "
             + "type is not compatible with "
             + AnalysedText.class.getSimpleName()
             + "!",
         e);
   } finally {
     ci.getLock().readLock().unlock();
   }
   // create the Analysed text
   AnalysedText at = createAnalysedText(blob);
   ci.getLock().writeLock().lock();
   try {
     // NOTE: there is a possibility that an other thread has added
     // the contentpart
     ci.addPart(AnalysedText.ANALYSED_TEXT_URI, at);
   } finally {
     ci.getLock().writeLock().unlock();
   }
   return at;
 }