Java Analyzer примеры использования

Язык программирования: Java

Пространство имен/Пакет: qa.qcri.qf.pipeline

Класс/Тип: Analyzer

Примеров на hotexamples.com: 4

Java Analyzer - 4 примера найдено. Это лучшие примеры Java кода для qa.qcri.qf.pipeline.Analyzer, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

addAE(3)

analyze(1)

Пример #1

Показать файл

Файл: Baseline.java Проект: pakchoi/Iyas

  public JCas getPreliminarCas(
      Analyzer analyzer, JCas emptyCas, String sentenceId, String sentence) {
    this.preliminaryCas.reset();

    /** Without this the annotator fails badly */
    sentence = sentence.replaceAll("/", "");
    sentence = sentence.replaceAll("~", "");

    // Carry out preliminary analysis
    Analyzable content = new SimpleContent(sentenceId, sentence, ArabicAnalyzer.ARABIC_LAN);

    analyzer.analyze(this.preliminaryCas, content);

    // Copy data to a new CAS and use normalized text as DocumentText
    emptyCas.reset();
    emptyCas.setDocumentLanguage(ArabicAnalyzer.ARABIC_LAN);

    CasCopier.copyCas(this.preliminaryCas.getCas(), emptyCas.getCas(), false);

    String normalizedText =
        JCasUtil.selectSingle(this.preliminaryCas, NormalizedText.class).getText();
    emptyCas.setDocumentText(normalizedText);

    return emptyCas;
  }

Пример #2

Показать файл

Файл: CommentSelectionDatasetCreatorV2.java Проект: pakchoi/Iyas

  private void setupUimaTools() throws IOException, UIMAException {
    /*
    Document docTrain = JsoupUtils.getDoc(CQA_QL_TRAIN_EN);
    Document docDevel = JsoupUtils.getDoc(CQA_QL_DEV_EN);
    Document docTest = JsoupUtils.getDoc(CQA_QL_TEST_EN);

    this.userProfiles = UserProfile.createUserProfiles(docTrain, docDevel, docTest);
    //////user profiles are built on the training+dev+test semeval2015 datasets!
    */

    //    for(Entry<String, UserProfile> entry: userProfiles.entrySet()){
    //      if(entry.getValue().getSignatures().size()>0){
    //        System.out.println("---------- SIGNATURES FOR USER: "******"
    // ----------");
    //        for(String signature : entry.getValue().getSignatures()){
    //          System.out.println("____\n" + JsoupUtils.recoverOriginalText(signature));
    //        }
    //      }
    //    }
    this.stopwords = new Stopwords(Stopwords.STOPWORD_EN);

    this.pfEnglish = new PairFeatureFactoryEnglish(this.alphabet);
    this.pfEnglish.setupMeasures(RichNode.OUTPUT_PAR_LEMMA, this.stopwords);

    /** Add some punctuation to the stopwords list */
    for (String stopword : ".|...|\\|,|?|!|#|(|)|$|%|&".split("\\|")) {
      this.stopwords.add(stopword);
    }

    /** Specify A and B subtask labels */
    this.a_labels.add("Not English");
    this.a_labels.add("Good");
    this.a_labels.add("Potential");
    this.a_labels.add("Dialogue");
    this.a_labels.add("Bad");

    this.b_labels.add("No");
    this.b_labels.add("Yes");
    this.b_labels.add("Unsure");

    /** Create the analysis pipeline */
    AnalysisEngine segmenter = createEngine(createEngineDescription(OpenNlpSegmenter.class));
    AnalysisEngine postagger = createEngine(createEngineDescription(OpenNlpPosTagger.class));
    AnalysisEngine chunker = createEngine(createEngineDescription(OpenNlpChunker.class));
    AnalysisEngine lemmatizer = createEngine(createEngineDescription(StanfordLemmatizer.class));

    this.analysisEngineList = new AnalysisEngine[4];
    this.analysisEngineList[0] = segmenter;
    this.analysisEngineList[1] = postagger;
    this.analysisEngineList[2] = chunker;
    this.analysisEngineList[3] = lemmatizer;

    this.analyzer = new Analyzer(new UIMAFilePersistence("CASes/semeval"));
    for (AnalysisEngine ae : this.analysisEngineList) {
      analyzer.addAE(ae);
    }
  }

Пример #3

Показать файл

Файл: Baseline.java Проект: pakchoi/Iyas

  public void runForArabic() throws UIMAException {
    this.stopwords = new Stopwords(Stopwords.STOPWORD_AR);
    this.stopwords = new Stopwords("semeval2015-3/arabic-corpus-specific-stopwords.txt");

    this.pfArabic = new PairFeatureFactoryArabic(this.alphabet);
    this.pfArabic.setupMeasures(RichNode.OUTPUT_PAR_TOKEN_LOWERCASE, this.stopwords);

    this.language = LANG_ARABIC;

    this.preliminaryCas = JCasFactory.createJCas();

    /** Specify the task label For Arabic there is just one task */
    this.a_labels.add("direct");
    this.a_labels.add("related");
    this.a_labels.add("irrelevant");

    /**
     * Instantiate the QCRI Analyzer, but for now we are using the analysis engines instantiated
     * later on
     */
    if (USE_QCRI_ALT_TOOLS) {
      this.analyzer = new Analyzer(new UIMANoPersistence());
      analyzer.addAE(
          AnalysisEngineFactory.createEngine(createEngineDescription(ArabicAnalyzer.class)));
    } else {
      /**
       * Whitespace tokenizer. The Stanford Segmenter for Arabic has a very bad bug and the
       * tokenization is completely wrong.
       */
      AnalysisEngine segmenter = createEngine(createEngineDescription(WhitespaceTokenizer.class));
      /** Stanford POS-Tagger */
      AnalysisEngine postagger =
          createEngine(
              createEngineDescription(
                  StanfordPosTagger.class,
                  StanfordPosTagger.PARAM_LANGUAGE,
                  "ar",
                  StanfordPosTagger.PARAM_VARIANT,
                  "accurate"));
      /** Putting together the UIMA DKPro annotators */
      this.analysisEngineList = new AnalysisEngine[2];
      this.analysisEngineList[0] = segmenter;
      this.analysisEngineList[1] = postagger;
    }

    try {
      processArabicFile(analyzer, CQA_QL_TRAIN_AR, "train");
      processArabicFile(analyzer, CQA_QL_DEV_AR, "dev");
    } catch (SimilarityException e) {
      e.printStackTrace();
    } catch (IOException e) {
      e.printStackTrace();
    }
  }

Пример #4

Показать файл

Файл: Baseline.java Проект: pakchoi/Iyas

  public void runForEnglish() throws UIMAException {

    this.stopwords = new Stopwords(Stopwords.STOPWORD_EN);

    this.pfEnglish = new PairFeatureFactoryEnglish(this.alphabet);
    this.pfEnglish.setupMeasures(RichNode.OUTPUT_PAR_LEMMA, this.stopwords);

    this.language = LANG_ENGLISH;

    /** Add some punctuation to the stopwords list */
    for (String stopword : ".|...|\\|,|?|!|#|(|)|$|%|&".split("\\|")) {
      this.stopwords.add(stopword);
    }

    /** Specify A and B subtask labels */
    this.a_labels.add("Not English");
    this.a_labels.add("Good");
    this.a_labels.add("Potential");
    this.a_labels.add("Dialogue");
    this.a_labels.add("Bad");

    this.b_labels.add("No");
    this.b_labels.add("Yes");
    this.b_labels.add("Unsure");

    /** Create the analysis pipeline */
    AnalysisEngine segmenter = createEngine(createEngineDescription(OpenNlpSegmenter.class));
    AnalysisEngine postagger = createEngine(createEngineDescription(OpenNlpPosTagger.class));
    AnalysisEngine chunker = createEngine(createEngineDescription(OpenNlpChunker.class));
    AnalysisEngine lemmatizer = createEngine(createEngineDescription(StanfordLemmatizer.class));

    this.analysisEngineList = new AnalysisEngine[4];
    this.analysisEngineList[0] = segmenter;
    this.analysisEngineList[1] = postagger;
    this.analysisEngineList[2] = chunker;
    this.analysisEngineList[3] = lemmatizer;

    this.analyzer = new Analyzer(new UIMAFilePersistence("CASes/semeval"));
    for (AnalysisEngine ae : this.analysisEngineList) {
      analyzer.addAE(ae);
    }

    try {
      this.processEnglishFile(CQA_QL_TRAIN_EN, "train");
      this.processEnglishFile(CQA_QL_DEV_EN, "dev");
    } catch (UIMAException | IOException | SimilarityException e) {
      e.printStackTrace();
    }
  }