예제 #1
0
 @Override
 public void process(JCas cas) throws AnalysisEngineProcessException {
   this.setSource(cas);
   this.setTarget(cas);
   AnnotationIndex<Annotation> index = cas.getAnnotationIndex(this.getSourceType());
   FSIterator<Annotation> iter = index.iterator();
   while (iter.hasNext()) {
     Annotation annotation = iter.next();
     String source = null;
     if (this.getSourceFeature() == null) {
       source = annotation.getCoveredText();
     } else {
       source = annotation.getStringValue(this.getSourceFeature());
     }
     if (source != null) {
       String target = this.getMapping().get(source);
       if (target != null) {
         if (this.update().booleanValue()) {
           this.update(cas, annotation, this.getTargetFeature(), target);
         } else {
           this.create(
               cas, this.getTargetFeature(), annotation.getBegin(), annotation.getEnd(), target);
         }
       }
     }
   }
 }
예제 #2
0
  /**
   * Metodo per il process della Cas generata con l'upload del documenti in 'Nuovo pattern'.
   *
   * @param cas cas da analizzare
   * @throws SQLException eccezione durante l'estrazione dei dati
   */
  private void processPatternAttachmentCas(final JCas cas) throws SQLException {

    HpmDao dao = new HpmDao();

    //      il valore della property è l'hpmPatternId
    String kpeopleTagPattern = getKpeopleTagPattern(cas);

    //      recupero e salvo su db gli attachments associati alla creazione pattern
    List<Document> docs = new ArrayList<Document>();

    //      recupero l'email dell'utente che ha generato il pattern
    String email = getUserAuthorAnnotation(cas).getEmail();

    AnnotationIndex<Annotation> aaIdx = cas.getAnnotationIndex(AttachmentAnnotation.type);
    FSIterator<Annotation> itAa = aaIdx.iterator();

    while (itAa.hasNext()) {
      AttachmentAnnotation aa = (AttachmentAnnotation) itAa.next();

      Document document = new Document();
      document.setAttachmentType(new AttachmentType(2));
      document.setGuid(aa.getUrlAttachment());
      document.setHashcode(aa.getHashcode());
      document.setTemplate(false);
      document.setHpmAttachmentId(aa.getId());
      document.setName(aa.getAttachmentName());

      docs.add(document);
    }

    docs = dao.savePatternDocument(docs, email, kpeopleTagPattern);
  }
예제 #3
0
 private void clean(JCas cas) {
   AnnotationIndex<Annotation> index = cas.getAnnotationIndex(SingleWordTermAnnotation.type);
   FSIterator<Annotation> iterator = index.iterator();
   while (iterator.hasNext()) {
     SingleWordTermAnnotation annotation = (SingleWordTermAnnotation) iterator.next();
     this.clean(cas, annotation);
   }
 }
예제 #4
0
 private void display(JCas cas) {
   AnnotationIndex<Annotation> index = cas.getAnnotationIndex(SourceDocumentInformation.type);
   FSIterator<Annotation> iterator = index.iterator();
   if (iterator.hasNext()) {
     SourceDocumentInformation sdi = (SourceDocumentInformation) iterator.next();
     this.getContext().getLogger().log(Level.INFO, "Cleaning terms of " + sdi.getUri());
   }
 }
예제 #5
0
 private void select(JCas cas) {
   AnnotationIndex<Annotation> index = cas.getAnnotationIndex(TermAnnotation.type);
   FSIterator<Annotation> iterator = index.iterator();
   while (iterator.hasNext()) {
     TermAnnotation annotation = (TermAnnotation) iterator.next();
     if (annotation.getOccurrences() <= this.getThreshold().intValue()) {
       this.getAnnotations().add(annotation);
     }
   }
 }
예제 #6
0
 @Override
 public void process(JCas cas) throws AnalysisEngineProcessException {
   try {
     AnnotationIndex<Annotation> index = cas.getAnnotationIndex(WordAnnotation.type);
     FSIterator<Annotation> iterator = index.iterator();
     while (iterator.hasNext()) {
       WordAnnotation annotation = (WordAnnotation) iterator.next();
       String norm = annotation.getCoveredText();
       annotation.setLemma(norm);
       annotation.setStem(norm);
     }
   } catch (Exception e) {
     throw new AnalysisEngineProcessException(e);
   }
 }
예제 #7
0
  /**
   * aggiunge le proprietà presenti.
   *
   * @param event eventoda salvare
   * @param cas oggetto Cas
   */
  private void addProperties(final Event event, final JCas cas) {
    AnnotationIndex<Annotation> annIdx = cas.getAnnotationIndex(PropertyAnnotation.type);
    FSIterator<Annotation> it = annIdx.iterator();

    while (it.hasNext()) {
      PropertyAnnotation annotation = (PropertyAnnotation) it.next();

      String key = annotation.getKey();
      String value = annotation.getValue();

      logger.debug("Aggiunta proprietà\nKey: " + key + "\nValue: " + value);

      event.getProperties().put(key, value);
    }
  }
예제 #8
0
 private void clean(JCas cas, SingleWordTermAnnotation annotation) {
   Set<TermComponentAnnotation> delete = new HashSet<TermComponentAnnotation>();
   AnnotationIndex<Annotation> index = cas.getAnnotationIndex(TermComponentAnnotation.type);
   FSIterator<Annotation> iterator = index.subiterator(annotation);
   while (iterator.hasNext()) {
     TermComponentAnnotation component = (TermComponentAnnotation) iterator.next();
     FSIterator<Annotation> subiterator = index.subiterator(component);
     while (subiterator.hasNext()) {
       delete.add((TermComponentAnnotation) subiterator.next());
     }
   }
   for (TermComponentAnnotation del : delete) {
     del.removeFromIndexes();
   }
 }
예제 #9
0
  @Override
  protected void generateAnnotations(JCas jcas)
      throws AnalysisEngineProcessException, FeaturePathException {
    // CAS is necessary to retrieve values
    CAS currCAS = jcas.getCas();

    for (String path : paths) {

      // Separate Typename and featurepath
      String[] segments = path.split("/", 2);
      String typeName = segments[0];

      // Try to get the type from the typesystem of the CAS
      Type t = currCAS.getTypeSystem().getType(typeName);
      if (t == null) {
        throw new IllegalStateException("Type [" + typeName + "] not found in type system");
      }

      // get an fpi object and initialize it
      // initialize the FeaturePathInfo with the corresponding part
      initializeFeaturePathInfoFrom(fp, segments);

      // get the annotations
      AnnotationIndex<?> idx = currCAS.getAnnotationIndex(t);
      FSIterator<?> iterator = idx.iterator();

      while (iterator.hasNext()) {
        AnnotationFS fs = (AnnotationFS) iterator.next();

        try {
          if (this.filterFeaturePath != null) {
            // check annotation filter condition
            if (this.filterFeaturePathInfo.match(fs, this.filterCondition)) {
              createStemAnnotation(jcas, fs);
            }
          } else { // no annotation filter specified
            createStemAnnotation(jcas, fs);
          }
        } catch (AnalysisEngineProcessException e) {
          // TODO Auto-generated catch block
          throw new IllegalStateException("error occured while creating a stem annotation", e);
        }
      }
    }
  }
예제 #10
0
  /**
   * Recupera il valore della proprietà kpeopletagpattern.
   *
   * @param cas cas da analizzare
   * @return valore della proprietà kpeopletagpattern
   */
  private String getKpeopleTagPattern(final JCas cas) {
    AnnotationIndex<Annotation> annIdx = cas.getAnnotationIndex(PropertyAnnotation.type);
    FSIterator<Annotation> it = annIdx.iterator();

    String kpeopletagpattern = null;

    while (it.hasNext()) {
      PropertyAnnotation annotation = (PropertyAnnotation) it.next();

      String key = annotation.getKey();
      String value = annotation.getValue();

      if (key.equals("kpeopletagpattern")) {
        kpeopletagpattern = value;
      }
    }

    return kpeopletagpattern;
  }
예제 #11
0
 private void adjust(JCas cas) {
   AnnotationIndex<Annotation> index = cas.getAnnotationIndex(TermAnnotation.type);
   FSIterator<Annotation> iterator = index.iterator();
   while (iterator.hasNext()) {
     TermAnnotation annotation = (TermAnnotation) iterator.next();
     if (annotation.getVariants() != null) {
       int occ = annotation.getOccurrences();
       double freq = annotation.getFrequency();
       double spec = annotation.getSpecificity();
       for (int i = 0; i < annotation.getVariants().size(); i++) {
         occ += annotation.getVariants(i).getOccurrences();
         freq += annotation.getVariants(i).getFrequency();
         spec += annotation.getVariants(i).getSpecificity();
       }
       annotation.setOccurrences(occ);
       annotation.setFrequency(freq);
       annotation.setSpecificity(spec);
     }
   }
 }
예제 #12
0
  /**
   * aggiunge oggetti di tipo Document all'event. Nel caso di Communication associo anche la email
   * ai documenti.
   *
   * @param event oggetto a cui associare i Document
   * @param email email da salvare
   * @param cas cas da elaborare
   */
  private void addDocuments(final Event event, final Email email, final JCas cas) {

    AnnotationIndex<Annotation> aaIdx = cas.getAnnotationIndex(AttachmentAnnotation.type);
    FSIterator<Annotation> itAa = aaIdx.iterator();
    while (itAa.hasNext()) {
      AttachmentAnnotation aa = (AttachmentAnnotation) itAa.next();

      Document document = new Document();
      document.setAttachmentType(new AttachmentType(2));
      document.setGuid(aa.getUrlAttachment());
      document.setHashcode(aa.getHashcode());
      document.setAuthor(aa.getAuthor());
      document.setTemplate(false);
      document.setHpmAttachmentId(aa.getId());
      document.setName(aa.getAttachmentName());

      event.getAttachments().add(document);

      //          associo il documento alla mail (per il legame EMAIL-DOCUMENT)
      if (email != null) {
        email.getDocuments().add(document);
      }
    }
  }
예제 #13
0
  /** Outputs Trigrams for an input Annotation. */
  public void extractNgramsFromAnnotation(Annotation annotation, JCas aJCas) {

    AnnotationIndex<Annotation> tokens = aJCas.getAnnotationIndex(Token.type);
    Iterator<Annotation> tIterator = tokens.subiterator(annotation);

    Annotation token = null;
    Annotation prev = null;
    Annotation prev_prev = null;

    while (tIterator.hasNext()) {

      prev_prev = prev;
      prev = token;
      token = tIterator.next();

      if (tokenIsInAnnotation(annotation, prev) && tokenIsInAnnotation(annotation, prev_prev)) {

        NGram ngram = new NGram(aJCas);
        FSArray ngArray = new FSArray(aJCas, 3);

        ngArray.set(0, prev_prev);
        ngArray.set(1, prev);
        ngArray.set(2, token);

        ngram.setBegin(prev_prev.getBegin());
        ngram.setEnd(token.getEnd());

        ngram.setElements(ngArray);
        ngram.setElementType("edu.cmu.deiis.types.Token");

        ngram.setConfidence(1D);
        ngram.setCasProcessorId(PROCESSOR_ID);
        ngram.addToIndexes();
      }
    }
  }
예제 #14
0
  @Test
  public void test() throws Exception {
    String html = "<Parent>\n";
    html += "<Child1>Some content</Child1>\n";
    html += "<Child2 attribute=“someValue” />\n";
    html += "<Child3>More content.</Child3>\n";
    html += "</Parent>\n";

    URL urlA = HtmlAnnotator.class.getClassLoader().getResource("HtmlAnnotator.xml");
    if (urlA == null) {
      urlA =
          HtmlAnnotator.class
              .getClassLoader()
              .getResource("org/apache/uima/ruta/engine/HtmlAnnotator.xml");
    }

    URL urlC = HtmlAnnotator.class.getClassLoader().getResource("HtmlConverter.xml");
    if (urlC == null) {
      urlC =
          HtmlAnnotator.class
              .getClassLoader()
              .getResource("org/apache/uima/ruta/engine/HtmlConverter.xml");
    }

    XMLInputSource inA = new XMLInputSource(urlA);
    ResourceSpecifier specifierA = UIMAFramework.getXMLParser().parseResourceSpecifier(inA);
    AnalysisEngine aeA = UIMAFramework.produceAnalysisEngine(specifierA);
    aeA.setConfigParameterValue(HtmlAnnotator.PARAM_ONLY_CONTENT, false);
    aeA.reconfigure();

    XMLInputSource inC = new XMLInputSource(urlC);
    ResourceSpecifier specifierC = UIMAFramework.getXMLParser().parseResourceSpecifier(inC);
    AnalysisEngine aeC = UIMAFramework.produceAnalysisEngine(specifierC);
    aeC.setConfigParameterValue(HtmlConverter.PARAM_SKIP_WHITESPACES, false);
    aeC.setConfigParameterValue(HtmlConverter.PARAM_PROCESS_ALL, true);
    aeC.setConfigParameterValue(
        HtmlConverter.PARAM_GAP_INDUCING_TAGS, new String[] {"child1", "child2", "child3"});
    aeC.setConfigParameterValue(HtmlConverter.PARAM_GAP_TEXT, "$");
    aeC.reconfigure();

    CAS cas = aeA.newCAS();
    Type tagType = cas.getTypeSystem().getType(HtmlAnnotator.NAMESPACE + "TAG");
    AnnotationIndex<AnnotationFS> ai = null;
    FSIterator<AnnotationFS> iterator = null;

    cas.setDocumentText(html);
    aeA.process(cas);
    aeC.process(cas);

    CAS plainTextCas = cas.getView(HtmlConverter.DEFAULT_MODIFIED_VIEW);

    assertEquals("$Some content$$More content.", plainTextCas.getDocumentText());

    ai = plainTextCas.getAnnotationIndex(tagType);
    iterator = ai.iterator();
    assertEquals(4, ai.size());
    assertEquals("$Some content$$More content.", iterator.next().getCoveredText());
    assertEquals("$Some content", iterator.next().getCoveredText());
    assertEquals("$", iterator.next().getCoveredText());
    assertEquals("$More content.", iterator.next().getCoveredText());

    cas.release();
  }
예제 #15
0
  @Test
  public void testExpandOffsets() throws Exception {
    String html = "<Parent>\n";
    html += "<Child1>Some content</Child1>\n";
    html += "<Child2 attribute=“someValue” />\n";
    html += "<Child3>More content.</Child3>\n";
    html += "</Parent>\n";

    URL urlA = HtmlAnnotator.class.getClassLoader().getResource("HtmlAnnotator.xml");
    if (urlA == null) {
      urlA =
          HtmlAnnotator.class
              .getClassLoader()
              .getResource("org/apache/uima/ruta/engine/HtmlAnnotator.xml");
    }

    URL urlC = HtmlAnnotator.class.getClassLoader().getResource("HtmlConverter.xml");
    if (urlC == null) {
      urlC =
          HtmlAnnotator.class
              .getClassLoader()
              .getResource("org/apache/uima/ruta/engine/HtmlConverter.xml");
    }

    XMLInputSource inA = new XMLInputSource(urlA);
    ResourceSpecifier specifierA = UIMAFramework.getXMLParser().parseResourceSpecifier(inA);
    AnalysisEngine aeA = UIMAFramework.produceAnalysisEngine(specifierA);
    aeA.setConfigParameterValue(HtmlAnnotator.PARAM_ONLY_CONTENT, false);
    aeA.reconfigure();

    XMLInputSource inC = new XMLInputSource(urlC);
    ResourceSpecifier specifierC = UIMAFramework.getXMLParser().parseResourceSpecifier(inC);
    AnalysisEngine aeC = UIMAFramework.produceAnalysisEngine(specifierC);
    aeC.setConfigParameterValue(HtmlConverter.PARAM_SKIP_WHITESPACES, false);
    aeC.setConfigParameterValue(HtmlConverter.PARAM_PROCESS_ALL, true);
    aeC.setConfigParameterValue(HtmlConverter.PARAM_EXPAND_OFFSETS, true);
    aeC.reconfigure();

    CAS cas = aeA.newCAS();
    Type tagType = cas.getTypeSystem().getType(HtmlAnnotator.NAMESPACE + "TAG");
    Feature expandedFeature = tagType.getFeatureByBaseName("expandedOffsets");
    AnnotationIndex<AnnotationFS> ai = null;
    FSIterator<AnnotationFS> iterator = null;

    cas.setDocumentText(html);
    aeA.process(cas);
    aeC.process(cas);

    CAS plainTextCas = cas.getView(HtmlConverter.DEFAULT_MODIFIED_VIEW);

    assertEquals("Some contentMore content.", plainTextCas.getDocumentText());

    ai = plainTextCas.getAnnotationIndex(tagType);
    iterator = ai.iterator();
    assertEquals(4, ai.size());
    AnnotationFS next = null;
    next = iterator.next();
    assertEquals(false, next.getBooleanValue(expandedFeature));
    assertEquals("Some contentMore content.", next.getCoveredText());
    next = iterator.next();
    assertEquals(false, next.getBooleanValue(expandedFeature));
    assertEquals("Some content", next.getCoveredText());
    next = iterator.next();
    boolean b1 = next.getBooleanValue(expandedFeature);
    assertEquals("More content.", next.getCoveredText());
    next = iterator.next();
    boolean b2 = next.getBooleanValue(expandedFeature);
    assertEquals("More content.", next.getCoveredText());
    // for one of these two annotation (with same offsets) the feature must be set to true
    assertEquals(true, b1 || b2);

    cas.release();
  }