Java FSIterator.hasNext 예제들, org.apache.uima.cas.FSIterator.hasNext Java 예제들

예제 #1

0

파일 보기

파일: TreeTaggerWrapper.java 프로젝트: qwaider/heideltime

  private void improveFrenchSentences(JCas jcas) {
    HashSet<de.unihd.dbs.uima.types.heideltime.Sentence> hsRemoveAnnotations =
        new HashSet<de.unihd.dbs.uima.types.heideltime.Sentence>();
    HashSet<de.unihd.dbs.uima.types.heideltime.Sentence> hsAddAnnotations =
        new HashSet<de.unihd.dbs.uima.types.heideltime.Sentence>();

    HashSet<String> hsSentenceBeginnings = new HashSet<String>();
    hsSentenceBeginnings.add("J.-C.");
    hsSentenceBeginnings.add("J-C.");
    hsSentenceBeginnings.add("NSJC");

    Boolean changes = true;
    while (changes) {
      changes = false;
      FSIndex annoHeidelSentences =
          jcas.getAnnotationIndex(de.unihd.dbs.uima.types.heideltime.Sentence.type);
      FSIterator iterHeidelSent = annoHeidelSentences.iterator();
      while (iterHeidelSent.hasNext()) {
        de.unihd.dbs.uima.types.heideltime.Sentence s1 =
            (de.unihd.dbs.uima.types.heideltime.Sentence) iterHeidelSent.next();

        if ((s1.getCoveredText().endsWith("av."))
            || (s1.getCoveredText().endsWith("Av."))
            || (s1.getCoveredText().endsWith("apr."))
            || (s1.getCoveredText().endsWith("Apr."))
            || (s1.getCoveredText().endsWith("avant."))
            || (s1.getCoveredText().endsWith("Avant."))) {
          if (iterHeidelSent.hasNext()) {
            de.unihd.dbs.uima.types.heideltime.Sentence s2 =
                (de.unihd.dbs.uima.types.heideltime.Sentence) iterHeidelSent.next();
            iterHeidelSent.moveToPrevious();
            for (String beg : hsSentenceBeginnings) {
              if (s2.getCoveredText().startsWith(beg)) {
                de.unihd.dbs.uima.types.heideltime.Sentence s3 =
                    new de.unihd.dbs.uima.types.heideltime.Sentence(jcas);
                s3.setBegin(s1.getBegin());
                s3.setEnd(s2.getEnd());
                hsAddAnnotations.add(s3);
                hsRemoveAnnotations.add(s1);
                hsRemoveAnnotations.add(s2);
                changes = true;
                break;
              }
            }
          }
        }
      }
      for (de.unihd.dbs.uima.types.heideltime.Sentence s : hsRemoveAnnotations) {
        s.removeFromIndexes(jcas);
      }
      hsRemoveAnnotations.clear();
      for (de.unihd.dbs.uima.types.heideltime.Sentence s : hsAddAnnotations) {
        s.addToIndexes(jcas);
      }
      hsAddAnnotations.clear();
    }
  }

예제 #2

0

파일 보기

파일: TermCleaner.java 프로젝트: sanju2010/ttc-project

 private void clean(JCas cas, SingleWordTermAnnotation annotation) {
   Set<TermComponentAnnotation> delete = new HashSet<TermComponentAnnotation>();
   AnnotationIndex<Annotation> index = cas.getAnnotationIndex(TermComponentAnnotation.type);
   FSIterator<Annotation> iterator = index.subiterator(annotation);
   while (iterator.hasNext()) {
     TermComponentAnnotation component = (TermComponentAnnotation) iterator.next();
     FSIterator<Annotation> subiterator = index.subiterator(component);
     while (subiterator.hasNext()) {
       delete.add((TermComponentAnnotation) subiterator.next());
     }
   }
   for (TermComponentAnnotation del : delete) {
     del.removeFromIndexes();
   }
 }

예제 #3

0

파일 보기

파일: HpmAnnotator.java 프로젝트: webscience/K-people

  /**
   * Metodo per il process della Cas generata con l'upload del documenti in 'Nuovo pattern'.
   *
   * @param cas cas da analizzare
   * @throws SQLException eccezione durante l'estrazione dei dati
   */
  private void processPatternAttachmentCas(final JCas cas) throws SQLException {

    HpmDao dao = new HpmDao();

    //      il valore della property è l'hpmPatternId
    String kpeopleTagPattern = getKpeopleTagPattern(cas);

    //      recupero e salvo su db gli attachments associati alla creazione pattern
    List<Document> docs = new ArrayList<Document>();

    //      recupero l'email dell'utente che ha generato il pattern
    String email = getUserAuthorAnnotation(cas).getEmail();

    AnnotationIndex<Annotation> aaIdx = cas.getAnnotationIndex(AttachmentAnnotation.type);
    FSIterator<Annotation> itAa = aaIdx.iterator();

    while (itAa.hasNext()) {
      AttachmentAnnotation aa = (AttachmentAnnotation) itAa.next();

      Document document = new Document();
      document.setAttachmentType(new AttachmentType(2));
      document.setGuid(aa.getUrlAttachment());
      document.setHashcode(aa.getHashcode());
      document.setTemplate(false);
      document.setHpmAttachmentId(aa.getId());
      document.setName(aa.getAttachmentName());

      docs.add(document);
    }

    docs = dao.savePatternDocument(docs, email, kpeopleTagPattern);
  }

예제 #4

0

파일 보기

파일: JCasUtils.java 프로젝트: termsuite/termsuite-core

 public static Optional<SourceDocumentInformation> getSourceDocumentAnnotation(JCas jCas) {
   //		zgere
   FSIterator<Annotation> iterator =
       jCas.getAnnotationIndex(SourceDocumentInformation.type).iterator();
   if (iterator.hasNext()) return Optional.of((SourceDocumentInformation) iterator.next());
   else return Optional.absent();
 }

예제 #5

0

파일 보기

파일: JCasUtils.java 프로젝트: termsuite/termsuite-core

 public static void showJCas(JCas jcas) {
   FSIterator<Annotation> it = jcas.getAnnotationIndex().iterator();
   Map<String, MutableInt> counters = new TreeMap<String, MutableInt>();
   int total = 0;
   while (it.hasNext()) {
     total += 1;
     String annoType = "rien";
     try {
       Annotation annotation = (Annotation) it.next();
       annoType = annotation.getType().getName();
     } catch (NullPointerException e) {
       it.moveToNext();
       annoType = e.getClass().getCanonicalName();
     }
     if (counters.get(annoType) == null) {
       counters.put(annoType, new MutableInt(1));
     } else {
       counters.get(annoType).increment();
     }
   }
   System.out.println(
       "Total annotation in JCas (ID: " + System.identityHashCode(jcas) + "): " + total);
   for (String annoType : counters.keySet()) {
     System.out.println(annoType + ": " + counters.get(annoType));
   }
 }

예제 #6

0

파일 보기

파일: Mapper.java 프로젝트: sanju2010/ttc-project

 @Override
 public void process(JCas cas) throws AnalysisEngineProcessException {
   this.setSource(cas);
   this.setTarget(cas);
   AnnotationIndex<Annotation> index = cas.getAnnotationIndex(this.getSourceType());
   FSIterator<Annotation> iter = index.iterator();
   while (iter.hasNext()) {
     Annotation annotation = iter.next();
     String source = null;
     if (this.getSourceFeature() == null) {
       source = annotation.getCoveredText();
     } else {
       source = annotation.getStringValue(this.getSourceFeature());
     }
     if (source != null) {
       String target = this.getMapping().get(source);
       if (target != null) {
         if (this.update().booleanValue()) {
           this.update(cas, annotation, this.getTargetFeature(), target);
         } else {
           this.create(
               cas, this.getTargetFeature(), annotation.getBegin(), annotation.getEnd(), target);
         }
       }
     }
   }
 }

예제 #7

0

파일 보기

파일: DocumentAnnotationUtils.java 프로젝트: EUMSSI/EUMSSI-tools

  /**
   * Return the input file from the CAS Assumes that it has the sourceDocumentInformation (set by
   * FileSystemCollectionReader or documentAnalyzer.sh) null otherwise
   */
  public static File retrieveSourceDocumentFile(JCas aJCas) throws AnalysisEngineProcessException {
    FSIterator<Annotation> sourceDocumentInformationFSIterator =
        aJCas
            .getAnnotationIndex(
                JCasSofaViewUtils.getJCasType(
                    aJCas, DEFAULT_SOURCE_DOCUMENT_INFORMATION_ANNOTATION))
            .iterator();
    File inFile = null;
    if (sourceDocumentInformationFSIterator.hasNext()) {
      SourceDocumentInformation theSourceDocumentInformation =
          (SourceDocumentInformation) sourceDocumentInformationFSIterator.next();

      try {
        inFile = new File(new URL(theSourceDocumentInformation.getUri()).getPath());
        // System.out.println("Debug: SourceDocumentInformation File Name "+ inFileName);

      } catch (MalformedURLException e) {
        // invalid URL, use default processing below
        String errmsg = "Error: MalformedURLException !";
        throw new AnalysisEngineProcessException(errmsg, new Object[] {}, e);
        // e.printStackTrace();
      }
    }
    return inFile;
  }

예제 #8

0

파일 보기

파일: AbnerAnnotator.java 프로젝트: longcmu/11791_SoftwareEngineeringforInformationSystems

  /**
   * abnerNER would analyze words and give confidence
   *
   * @param args sentences to be processed
   * @param arg0 information input
   * @throws AnalysisEngineProcessException
   */
  public static void abnerNER(String[] args, JCas arg0) {

    HashMap<String, Double> ConfMap = new HashMap<String, Double>();

    /** use abner to find gene names from words */
    Tagger t = new Tagger();
    for (int i = 1; i < args.length; ++i) {
      String s = args[i];
      String[][] ents = t.getEntities(s);

      /** use HashMap to store words selected by Abner */
      for (int j = 0; j < ents[0].length; j++) {
        ConfMap.put(ents[0][j], 1.0);
      }
    }

    // TODO Auto-generated method stub
    FSIterator<org.apache.uima.jcas.tcas.Annotation> ite =
        arg0.getAnnotationIndex(NameTag.type).iterator();

    while (ite.hasNext()) {
      /** get the words selected by LingPipe */
      String name = ((NameTag) ite.get()).getText();

      /** set the confidence for words selected by both LingPipe and Abner as 1 */
      if (ConfMap.containsKey(name)) {
        ((NameTag) ite.get()).setConfidenceAbner(1.0);
      } else {
        ((NameTag) ite.get()).setConfidenceAbner(0.0);
      }
      ite.next();
    }
  }

예제 #9

0

파일 보기

파일: ProductSaveToXMLAE.java 프로젝트: knowrob/uima_prolog

  public void process(JCas aJCas) throws AnalysisEngineProcessException {
    if (!init) {
      try {
        initialize();
      } catch (ResourceInitializationException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
      }
      init = true;
    }
    // retreive the filename of the input file from the CAS
    FSIterator it = aJCas.getAnnotationIndex(Product.type).iterator();
    File outFile = null;
    if (it.hasNext()) {
      Product fileLoc = (Product) it.next();
      outFile = new File(mOutputDir, fileLoc.getName() + ".xml");
    }
    if (outFile == null) {
      outFile = new File(mOutputDir, "doc" + mDocNum++ + ".xml");
    }
    // serialize XCAS and write to output file

    try {
      writeXCas(aJCas.getCas(), outFile);
    } catch (IOException e) {
      System.err.println("Could not write to output file");
      e.printStackTrace();
    } catch (SAXException e) {
      System.out.println("SAX Failure");
      e.printStackTrace();
    }
  }

예제 #10

0

파일 보기

파일: JCasUtils.java 프로젝트: termsuite/termsuite-core

  public static void showSdiWithCategory2(JCas jcas) {
    String wordsLine = "";
    String catsLine = "";
    int cnt = 0;
    FSIterator<Annotation> it = jcas.getAnnotationIndex(WordAnnotation.type).iterator();
    while (it.hasNext()) {
      cnt += 1;
      WordAnnotation a = (WordAnnotation) it.next();

      String[] strings = center(a.getCoveredText(), a.getTag());
      wordsLine += strings[0] + " ";
      catsLine += strings[1] + " ";
      if (cnt == 20) {
        System.out.println(wordsLine);
        System.out.println(catsLine);
        System.out.println();

        wordsLine = "";
        catsLine = "";
        cnt = 0;
      }
    }
    if (cnt > 0) {
      System.out.println(wordsLine);
      System.out.println(catsLine);
    }
  }

예제 #11

0

파일 보기

파일: TermCleaner.java 프로젝트: sanju2010/ttc-project

 private void clean(JCas cas) {
   AnnotationIndex<Annotation> index = cas.getAnnotationIndex(SingleWordTermAnnotation.type);
   FSIterator<Annotation> iterator = index.iterator();
   while (iterator.hasNext()) {
     SingleWordTermAnnotation annotation = (SingleWordTermAnnotation) iterator.next();
     this.clean(cas, annotation);
   }
 }

예제 #12

0

파일 보기

파일: TermCleaner.java 프로젝트: sanju2010/ttc-project

 private void display(JCas cas) {
   AnnotationIndex<Annotation> index = cas.getAnnotationIndex(SourceDocumentInformation.type);
   FSIterator<Annotation> iterator = index.iterator();
   if (iterator.hasNext()) {
     SourceDocumentInformation sdi = (SourceDocumentInformation) iterator.next();
     this.getContext().getLogger().log(Level.INFO, "Cleaning terms of " + sdi.getUri());
   }
 }

예제 #13

0

파일 보기

파일: JCasUtils.java 프로젝트: termsuite/termsuite-core

 public static int countType(JCas jcas, int type) {
   FSIterator<Annotation> it = jcas.getAnnotationIndex(type).iterator();
   int cnt = 0;
   while (it.hasNext()) {
     cnt++;
     it.next();
   }
   return cnt;
 }

예제 #14

0

파일 보기

파일: TokenRegexAE.java 프로젝트: JuleStar/uima-tokens-regex

  @Override
  public void process(final JCas jCas) throws AnalysisEngineProcessException {
    beforeRuleProcessing(jCas);

    if (!this.labelFeature.equals(NO_SET_LABEL)) {
      // Must set labels
      FSIterator<Annotation> it = jCas.getAnnotationIndex(getIteratedType(jCas)).iterator();
      Feature feat = this.getIteratedType(jCas).getFeatureByBaseName(this.labelFeature);
      while (it.hasNext()) {
        Annotation word = (Annotation) it.next();
        word.setStringValue(feat, this.resource.getMatchingLabelString(word));
      }
    }

    RecognitionHandler recognitionHandler =
        new RecognitionHandler() {
          @Override
          public void recognizedEpisode(RegexOccurrence episode) {
            ruleMatched(jCas, episode);
          }
        };
    for (final Rule rule : this.resource.getRules()) {
      rule.getAutomaton().setAllowOverlappingInstances(this.allowOverlappingOccurrences);
      rule.getAutomaton().addRecognitionHandler(recognitionHandler);
      rule.getAutomaton().reset();
    }

    FSIterator<Annotation> it = jCas.getAnnotationIndex(getIteratedType(jCas)).iterator();
    while (it.hasNext()) {
      Annotation word = (Annotation) it.next();
      boolean allRulesFailed = true;
      for (Rule rule : this.resource.getRules()) {
        rule.getAutomaton().nextAnnotation(word);
        allRulesFailed &= rule.getAutomaton().currentInstancesNum() == 0;
      }
      if (allRulesFailed) allRulesFailed(jCas);
    }

    for (Rule rule : this.resource.getRules()) rule.getAutomaton().finish();
    for (final Rule rule : this.resource.getRules())
      rule.getAutomaton().removeRecognitionHandler(recognitionHandler);

    afterRuleProcessing(jCas);
  }

예제 #15

0

파일 보기

파일: TermCleaner.java 프로젝트: sanju2010/ttc-project

 private void select(JCas cas) {
   AnnotationIndex<Annotation> index = cas.getAnnotationIndex(TermAnnotation.type);
   FSIterator<Annotation> iterator = index.iterator();
   while (iterator.hasNext()) {
     TermAnnotation annotation = (TermAnnotation) iterator.next();
     if (annotation.getOccurrences() <= this.getThreshold().intValue()) {
       this.getAnnotations().add(annotation);
     }
   }
 }

예제 #16

0

파일 보기

파일: JCasUtils.java 프로젝트: termsuite/termsuite-core

 public static String getTermSuiteCasFileName(JCas jcas) {
   FSIterator<Annotation> it = jcas.getAnnotationIndex(SourceDocumentInformation.type).iterator();
   if (it.hasNext()) {
     SourceDocumentInformation sdi = (SourceDocumentInformation) it.next();
     Iterator<String> iterator =
         Splitter.on("/").split(sdi.getUri() == null ? "(no uri)" : sdi.getUri()).iterator();
     String name = null;
     while (iterator.hasNext()) name = iterator.next();
     return name;
   } else return null;
 }

예제 #17

0

파일 보기

파일: HpmAnnotator.java 프로젝트: webscience/K-people

  /**
   * Restituisce l'annotation BodyAnnotation.
   *
   * @param cas CAS da elaborare
   * @return annotation di tipo BodyAnnotation
   */
  private BodyAnnotation getBodyAnnotation(final JCas cas) {
    Type type = cas.getTypeSystem().getType(BodyAnnotation.class.getCanonicalName());

    BodyAnnotation ann = null;

    FSIterator<Annotation> it = cas.getAnnotationIndex(type).iterator();
    if (it.hasNext()) {
      ann = (BodyAnnotation) it.next();
    }

    return ann;
  }

예제 #18

0

파일 보기

파일: JCasUtils.java 프로젝트: termsuite/termsuite-core

 public static void showTermFreq(JCas jcas, int num) {
   FSIterator<Annotation> it = jcas.getAnnotationIndex(TermOccAnnotation.type).iterator();
   int cnt = 0;
   while (it.hasNext()) {
     cnt += 1;
     TermOccAnnotation annotation = (TermOccAnnotation) it.next();
     if (cnt == num) {
       System.out.println("TermOccAnnotation n°" + num + ": " + annotation);
       break;
     }
   }
 }

예제 #19

0

파일 보기

파일: RetrievalEvaluator.java 프로젝트: parag1102/HW4-paraga

  /**
   * TODO :: 1. construct the global word dictionary 2. keep the word frequency for each sentence
   *
   * <p>Creates two dictionaries queryDictionary and answerDictionary
   *
   * <p>queryDictionary is list of maps with key as the words in the question and value as the count
   * of the word in the question sentence. Similarly answerDictionary is list of maps with key as
   * the words in the answer and value as the count of the word in the answer sentence.
   */
  @Override
  public void processCas(CAS aCas) throws ResourceProcessException {

    JCas jcas;
    try {
      jcas = aCas.getJCas();
    } catch (CASException e) {
      throw new ResourceProcessException(e);
    }

    FSIterator it = jcas.getAnnotationIndex(Document.type).iterator();

    if (it.hasNext()) {
      Document doc = (Document) it.next();

      // Make sure that your previous annotators have populated this in CAS
      FSList fsTokenList = doc.getTokenList();
      ArrayList<Token> tokenList = Utils.fromFSListToCollection(fsTokenList, Token.class);

      HashMap<String, Integer> myMap = new HashMap<String, Integer>();
      HashMap<String, Integer> myMap2 = new HashMap<String, Integer>();

      // if question then fill QuesqIdList, QuesrelList & queryDictionary
      if (doc.getRelevanceValue() == 99) {
        QuesqIdList.add(doc.getQueryID());
        QuesrelList.add(doc.getRelevanceValue());
        for (int k = 0; k < tokenList.size(); k++) {
          myMap.put(tokenList.get(k).getText(), tokenList.get(k).getFrequency());
        }
        queryDictionary.add(myMap);
      }
      // if answer then fill AnsqIdList, AnsrelList & answerDictionary
      else {
        AnsqIdList.add(doc.getQueryID());
        AnsrelList.add(doc.getRelevanceValue());
        for (int k = 0; k < tokenList.size(); k++) {
          myMap2.put(tokenList.get(k).getText(), tokenList.get(k).getFrequency());
        }
        answerDictionary.add(myMap2);
        if (1 == doc.getRelevanceValue()) {
          GoldAnswerStringList.put(doc.getQueryID(), doc.getText());
        }
      }

      // Do something useful here

      /*for(int i=0;i<tokenList.size();i++)
        System.out.print(tokenList.get(i).getText().toString()+"=>" + tokenList.get(i).getFrequency()+"\t");
      System.out.println();*/
    }
  }

예제 #20

0

파일 보기

파일: ChineseNormalizer.java 프로젝트: sanju2010/ttc-project

 @Override
 public void process(JCas cas) throws AnalysisEngineProcessException {
   try {
     AnnotationIndex<Annotation> index = cas.getAnnotationIndex(WordAnnotation.type);
     FSIterator<Annotation> iterator = index.iterator();
     while (iterator.hasNext()) {
       WordAnnotation annotation = (WordAnnotation) iterator.next();
       String norm = annotation.getCoveredText();
       annotation.setLemma(norm);
       annotation.setStem(norm);
     }
   } catch (Exception e) {
     throw new AnalysisEngineProcessException(e);
   }
 }

예제 #21

0

파일 보기

파일: HpmAnnotator.java 프로젝트: webscience/K-people

  /**
   * aggiunge le proprietà presenti.
   *
   * @param event eventoda salvare
   * @param cas oggetto Cas
   */
  private void addProperties(final Event event, final JCas cas) {
    AnnotationIndex<Annotation> annIdx = cas.getAnnotationIndex(PropertyAnnotation.type);
    FSIterator<Annotation> it = annIdx.iterator();

    while (it.hasNext()) {
      PropertyAnnotation annotation = (PropertyAnnotation) it.next();

      String key = annotation.getKey();
      String value = annotation.getValue();

      logger.debug("Aggiunta proprietà\nKey: " + key + "\nValue: " + value);

      event.getProperties().put(key, value);
    }
  }

예제 #22

0

파일 보기

파일: HpmAnnotator.java 프로젝트: webscience/K-people

  /**
   * Imposta il campo emailBody per l'oggetto Email.
   *
   * @param cas cas da elaborare
   * @return valore del campo
   */
  private String getEmailBody(final JCas cas) {
    Type annotationType = cas.getTypeSystem().getType(BodyAnnotation.class.getCanonicalName());
    FSIterator<Annotation> it = cas.getAnnotationIndex(annotationType).iterator();

    String emailBody = "";
    if (it.hasNext()) {
      BodyAnnotation ann = (BodyAnnotation) it.next();

      String value = ann.getValue();
      if (value != null) {
        emailBody = value;
      }
    }

    return emailBody;
  }

예제 #23

0

파일 보기

파일: JCasUtils.java 프로젝트: termsuite/termsuite-core

 public static void showSdiWithCategory(JCas jcas) {
   FSIterator<Annotation> it = jcas.getAnnotationIndex(WordAnnotation.type).iterator();
   int wordCnt = 0;
   while (it.hasNext()) {
     wordCnt++;
     WordAnnotation a = (WordAnnotation) it.next();
     System.out.print(a.getCoveredText() + "_" + a.getTag());
     if (wordCnt < 12) {
       System.out.print(" ");
     } else {
       System.out.println();
       wordCnt = 0;
     }
   }
   System.out.println(Joiner.on(" ").join(it));
 }

예제 #24

0

파일 보기

파일: SnowballStemmer.java 프로젝트: renaud/dkpro-core

  @Override
  protected void generateAnnotations(JCas jcas)
      throws AnalysisEngineProcessException, FeaturePathException {
    // CAS is necessary to retrieve values
    CAS currCAS = jcas.getCas();

    for (String path : paths) {

      // Separate Typename and featurepath
      String[] segments = path.split("/", 2);
      String typeName = segments[0];

      // Try to get the type from the typesystem of the CAS
      Type t = currCAS.getTypeSystem().getType(typeName);
      if (t == null) {
        throw new IllegalStateException("Type [" + typeName + "] not found in type system");
      }

      // get an fpi object and initialize it
      // initialize the FeaturePathInfo with the corresponding part
      initializeFeaturePathInfoFrom(fp, segments);

      // get the annotations
      AnnotationIndex<?> idx = currCAS.getAnnotationIndex(t);
      FSIterator<?> iterator = idx.iterator();

      while (iterator.hasNext()) {
        AnnotationFS fs = (AnnotationFS) iterator.next();

        try {
          if (this.filterFeaturePath != null) {
            // check annotation filter condition
            if (this.filterFeaturePathInfo.match(fs, this.filterCondition)) {
              createStemAnnotation(jcas, fs);
            }
          } else { // no annotation filter specified
            createStemAnnotation(jcas, fs);
          }
        } catch (AnalysisEngineProcessException e) {
          // TODO Auto-generated catch block
          throw new IllegalStateException("error occured while creating a stem annotation", e);
        }
      }
    }
  }

예제 #25

0

파일 보기

파일: MateLemmatizerTagger.java 프로젝트: Alpha34587/termsuite-core

  @Override
  public void process(JCas jcas) throws AnalysisEngineProcessException {
    UIMAProfiler.getProfiler("AnalysisEngine").start(this, "process");

    Lemmatizer mateLemmatizer = mateLemmatizerModel.getEngine();
    Tagger mateTagger = mateTaggerModel.getEngine();

    /*
     * keeps an array of annotations in memory so as to be able
     * to access them by index.
     */
    List<WordAnnotation> annotations = Lists.newArrayList();
    FSIterator<Annotation> it = jcas.getAnnotationIndex(WordAnnotation.type).iterator();
    while (it.hasNext()) {
      WordAnnotation a = (WordAnnotation) it.next();
      annotations.add(a);
    }

    String[] tokens = new String[annotations.size() + 2];

    // preprends to fake words to prevent Mate from bugging on the two first words
    tokens[0] = "<root>";
    tokens[1] = "<root2>";
    for (int i = 0; i < annotations.size(); i++)
      tokens[i + 2] = annotations.get(i).getCoveredText();

    SentenceData09 mateSentence = new SentenceData09();
    mateSentence.init(tokens);

    // Run POS tagging
    mateSentence = mateTagger.apply(mateSentence);

    // Run lemmatization
    mateSentence = mateLemmatizer.apply(mateSentence);

    WordAnnotation wordAnnotation;
    for (int j = 1; j < mateSentence.length(); j++) {
      wordAnnotation = annotations.get(j - 1);
      wordAnnotation.setTag(mateSentence.ppos[j]);
      wordAnnotation.setLemma(mateSentence.plemmas[j]);
    }

    UIMAProfiler.getProfiler("AnalysisEngine").stop(this, "process");
  }

예제 #26

0

파일 보기

파일: HpmAnnotator.java 프로젝트: webscience/K-people

  /**
   * Recupera il valore della proprietà kpeopletagpattern.
   *
   * @param cas cas da analizzare
   * @return valore della proprietà kpeopletagpattern
   */
  private String getKpeopleTagPattern(final JCas cas) {
    AnnotationIndex<Annotation> annIdx = cas.getAnnotationIndex(PropertyAnnotation.type);
    FSIterator<Annotation> it = annIdx.iterator();

    String kpeopletagpattern = null;

    while (it.hasNext()) {
      PropertyAnnotation annotation = (PropertyAnnotation) it.next();

      String key = annotation.getKey();
      String value = annotation.getValue();

      if (key.equals("kpeopletagpattern")) {
        kpeopletagpattern = value;
      }
    }

    return kpeopletagpattern;
  }

예제 #27

0

파일 보기

파일: RemoveLowConfidenceEntities.java 프로젝트: CASM-Consulting/baleen

  @Override
  public void doProcess(JCas aJCas) throws AnalysisEngineProcessException {
    List<Entity> toRemove = new ArrayList<Entity>();

    FSIterator<Annotation> iter = aJCas.getAnnotationIndex(Entity.type).iterator();
    while (iter.hasNext()) {
      Entity e = (Entity) iter.next();

      if (e.getConfidence() < confidenceThreshold
          && (!ignoreZeroConfidence || e.getConfidence() > 0.0)) {
        toRemove.add(e);
        getMonitor()
            .debug(
                "Low confidence entity found (ID: {}) - this entity will be removed",
                e.getInternalId());
      }
    }

    removeFromJCasIndex(toRemove);
  }

예제 #28

0

파일 보기

파일: HpmAnnotator.java 프로젝트: webscience/K-people

  /**
   * Imposta il campo fieldTo per l'oggetto Email.
   *
   * @param email email da popolare
   * @param cas cas da cui estrarre le annotations
   */
  private void addEmailTo(final Email email, final JCas cas) {

    Type annotationType =
        cas.getTypeSystem().getType(UserReceiverToAnnotation.class.getCanonicalName());
    FSIterator<Annotation> it = cas.getAnnotationIndex(annotationType).iterator();

    //  uso la mappa per evitare l'inserimento di email duplicate
    Hashtable<String, String> emailMap = new Hashtable<String, String>();

    while (it.hasNext()) {
      UserReceiverToAnnotation ann = (UserReceiverToAnnotation) it.next();
      emailMap.put(ann.getEmail(), "Y");
    }

    Enumeration<String> en = emailMap.keys();
    while (en.hasMoreElements()) {
      String element = en.nextElement();
      email.getEmailTo().add(element);
    }
  }

예제 #29

0

파일 보기

파일: TermCleaner.java 프로젝트: sanju2010/ttc-project

 private void adjust(JCas cas) {
   AnnotationIndex<Annotation> index = cas.getAnnotationIndex(TermAnnotation.type);
   FSIterator<Annotation> iterator = index.iterator();
   while (iterator.hasNext()) {
     TermAnnotation annotation = (TermAnnotation) iterator.next();
     if (annotation.getVariants() != null) {
       int occ = annotation.getOccurrences();
       double freq = annotation.getFrequency();
       double spec = annotation.getSpecificity();
       for (int i = 0; i < annotation.getVariants().size(); i++) {
         occ += annotation.getVariants(i).getOccurrences();
         freq += annotation.getVariants(i).getFrequency();
         spec += annotation.getVariants(i).getSpecificity();
       }
       annotation.setOccurrences(occ);
       annotation.setFrequency(freq);
       annotation.setSpecificity(spec);
     }
   }
 }

예제 #30

0

파일 보기

파일: XmiWriterCasConsumer.java 프로젝트: jianlins/BMI6116

  /**
   * Processes the CAS which was populated by the TextAnalysisEngines. <br>
   * In this case, the CAS is converted to XMI and written into the output file .
   *
   * @param aCAS a CAS which has been populated by the TAEs
   * @throws ResourceProcessException if there is an error in processing the Resource
   * @see org.apache.uima.collection.base_cpm.CasObjectProcessor#processCas(org.apache.uima.cas.CAS)
   */
  public void processCas(CAS aCAS) throws ResourceProcessException {
    String modelFileName = null;

    JCas jcas;
    try {
      jcas = aCAS.getJCas();
    } catch (CASException e) {
      throw new ResourceProcessException(e);
    }

    // retrieve the filename of the input file from the CAS
    FSIterator it = jcas.getAnnotationIndex(SourceDocumentInformation.type).iterator();
    File outFile = null;
    if (it.hasNext()) {
      SourceDocumentInformation fileLoc = (SourceDocumentInformation) it.next();
      File inFile;
      try {
        inFile = new File(new URL(fileLoc.getUri()).getPath());
        String outFileName = inFile.getName();
        if (fileLoc.getOffsetInSource() > 0) {
          outFileName += ("_" + fileLoc.getOffsetInSource());
        }
        outFileName += ".xmi";
        outFile = new File(mOutputDir, outFileName);
        modelFileName = mOutputDir.getAbsolutePath() + "/" + inFile.getName() + ".ecore";
      } catch (MalformedURLException e1) {
        // invalid URL, use default processing below
      }
    }
    if (outFile == null) {
      outFile = new File(mOutputDir, "doc" + mDocNum++ + ".xmi");
    }
    // serialize XCAS and write to output file
    try {
      writeXmi(jcas.getCas(), outFile, modelFileName);
    } catch (IOException e) {
      throw new ResourceProcessException(e);
    } catch (SAXException e) {
      throw new ResourceProcessException(e);
    }
  }