/**
  * Returns average token length of chunks in a view
  *
  * @param view the view of the JCas
  * @return average token length of all chunks
  */
 private double getAverageNounPhraseTokenLength(JCas view) {
   int totalNumber = 0;
   for (Chunk chunk : JCasUtil.select(view, Chunk.class)) {
     totalNumber += JCasUtil.selectCovered(view, Token.class, chunk).size();
   }
   return totalNumber / (double) JCasUtil.select(view, Chunk.class).size();
 }
  @Override
  public Set<Feature> extract(JCas view) throws TextClassificationException {
    int nrOfSpellingErrors = JCasUtil.select(view, SpellingAnomaly.class).size();
    int nrOfTokens = JCasUtil.select(view, Token.class).size();

    double ratio = 0.0;
    if (nrOfTokens > 0) {
      ratio = (double) nrOfSpellingErrors / nrOfTokens;
    }
    return new Feature("SpellingErrorRatio", ratio).asSet();
  }
  @Override
  public synchronized String nextSentence() {
    if (sentences == null || !sentences.hasNext()) {
      try {
        if (getReader().hasNext()) {
          CAS cas = resource.retrieve();

          try {
            getReader().getNext(cas);
          } catch (Exception e) {
            log.warn("Done iterating returning an empty string");
            return "";
          }

          resource.getAnalysisEngine().process(cas);

          List<String> list = new ArrayList<>();
          for (Sentence sentence : JCasUtil.select(cas.getJCas(), Sentence.class)) {
            list.add(sentence.getCoveredText());
          }

          sentences = list.iterator();
          // needs to be next cas
          while (!sentences.hasNext()) {
            // sentence is empty; go to another cas
            if (reader.hasNext()) {
              cas.reset();
              getReader().getNext(cas);
              resource.getAnalysisEngine().process(cas);
              for (Sentence sentence : JCasUtil.select(cas.getJCas(), Sentence.class)) {
                list.add(sentence.getCoveredText());
              }
              sentences = list.iterator();
            } else return null;
          }

          String ret = sentences.next();
          if (this.getPreProcessor() != null) ret = this.getPreProcessor().preProcess(ret);
          return ret;
        }

        return null;

      } catch (Exception e) {
        throw new RuntimeException(e);
      }

    } else {
      String ret = sentences.next();
      if (this.getPreProcessor() != null) ret = this.getPreProcessor().preProcess(ret);
      return ret;
    }
  }
  //	@Test
  public void allAggregationStrategies_1segment_expectCorrectRanking() throws Exception {
    String testDocument = "foo bar baz";

    List<Class<? extends AggregationStrategy>> aggregationStrategies =
        new ArrayList<Class<? extends AggregationStrategy>>();
    aggregationStrategies.add(MaximumAggregation.class);

    for (Class<? extends AggregationStrategy> aggregationStrategy : aggregationStrategies) {

      AnalysisEngineDescription aed =
          AnalysisEngineFactory.createPrimitiveDescription(
              BookIndexPhraseAggregationAnnotator.class);

      bindResource(aed, RankedPhraseAggregationAnnotator.AGGREGATION_STRATEGY, aggregationStrategy);

      AnalysisEngine ae = createPrimitive(aed);
      JCas jcas = setup_1segment(testDocument, ae);

      ae.process(jcas);

      List<String> expectedBookIndexPhrases = new ArrayList<String>();
      expectedBookIndexPhrases.add("bar");
      expectedBookIndexPhrases.add("foo");
      expectedBookIndexPhrases.add("baz");

      List<String> resultBookIndexPhrases = new ArrayList<String>();
      for (BookIndexPhrase b : JCasUtil.select(jcas, BookIndexPhrase.class)) {
        resultBookIndexPhrases.add(b.getPhrase());
      }

      assertEquals(expectedBookIndexPhrases, resultBookIndexPhrases);
    }
  }
Exemplo n.º 5
0
  public void process(JCas jcas) throws AnalysisEngineProcessException {
    logger.debug("scoring with model {}", modelName);

    AnswerStats astats = new AnswerStats(jcas);
    List<AnswerScore> answers = new LinkedList<AnswerScore>();

    for (Answer a : JCasUtil.select(jcas, Answer.class)) {
      AnswerFV fv = new AnswerFV(a, astats);

      double fvec[] = reorderByLabels(fv.getFV());

      double res = model.prior;
      for (Tree t : model.forest) {
        res += model.learning_rate * classifyWithOneTree(fvec, t, 0);
      }
      res = (1.0 / (1.0 + Math.exp(-res)));
      answers.add(new AnswerScore(a, res));
    }

    /* Reindex the touched answer info(s). */
    for (AnswerScore as : answers) {
      as.a.removeFromIndexes();
      as.a.setConfidence(as.score);
      as.a.addToIndexes();
    }
  }
Exemplo n.º 6
0
  @Override
  protected void doProcess(JCas jCas) throws AnalysisEngineProcessException {
    Connection conn = postgresResource.getConnection();

    try {
      // Insert document and metadata into database
      Integer docKey = executeDocInsert(jCas);
      for (Metadata md : JCasUtil.select(jCas, Metadata.class)) {
        executeDocMetadataInsert(docKey, md);
      }

      processEntities(jCas, docKey);

      conn.commit();
    } catch (SQLException | BaleenException e) {
      getMonitor().error("Unable to insert document into Postgres database", e);
      if (conn != null) {
        try {
          conn.rollback();
        } catch (SQLException e2) {
          getMonitor()
              .error(
                  "Unable to rollback insertion - state of the database may have been left inconsistent",
                  e2);
        }
      }
    }
  }
Exemplo n.º 7
0
 @Override
 protected void tableCellSenseArray(
     FSArray senses, SenseType senseIdClass, Collection<Sense> highlightSense) throws IOException {
   output.write("\t\t<td class='" + senseIdClass + "'>");
   if (senses == null || senses.size() == 0) {
     output.write(NA);
   } else {
     boolean firstSense = true;
     for (Sense s : JCasUtil.select(senses, Sense.class)) {
       if (firstSense == false) {
         output.write("<br />");
       }
       if (highlightSense != null && !highlightSense.contains(s)) {
         output.write("<span style='color: #888'>");
       }
       output.write(String.format("%.2f", s.getConfidence()) + "\t" + s.getId());
       if (highlightSense != null && !highlightSense.contains(s)) {
         output.write("</span>");
       }
       firstSense = false;
     }
   }
   output.write("</td>");
   output.newLine();
 }
Exemplo n.º 8
0
  protected void tableCellTestResults(FSArray goldSenseArray, FSArray testSenseArray)
      throws IOException {
    Set<Sense> bestTestSenses = null;
    SenseType senseType;

    if (goldSenseArray == null) {
      senseType = SenseType.GOLDNA;
    } else if (testSenseArray == null || testSenseArray.size() == 0) {
      senseType = SenseType.TESTNA;
    } else {
      senseType = SenseType.CORRECT;
      Set<String> goldSenseIds = new TreeSet<String>();
      for (Sense s : JCasUtil.select(goldSenseArray, Sense.class)) {
        goldSenseIds.add(s.getId());
      }
      bestTestSenses = getBestSenses(testSenseArray);
      for (Sense s : bestTestSenses) {
        if (!goldSenseIds.contains(s.getId())) {
          senseType = SenseType.INCORRECT;
          break;
        }
      }
    }
    tableCellSenseArray(testSenseArray, senseType, bestTestSenses);
  }
Exemplo n.º 9
0
  @Override
  public List<Feature> extract(JCas jcas) throws TextClassificationException {

    double nbToken = 0;
    double minToken = -1; // Sizes in letter
    double maxToken = 0;
    double meanToken = 0;
    for (Token token : JCasUtil.select(jcas, Token.class)) {
      nbToken++;
      if (minToken < 0) {
        minToken = token.getCoveredText().length(); // gets the size value of the first
        // token
      }
      if (minToken > token.getCoveredText().length()) {
        minToken = token.getCoveredText().length();
      }
      if (maxToken < token.getCoveredText().length()) {
        maxToken = token.getCoveredText().length();
      }

      meanToken += token.getCoveredText().length();
    }
    try {
      meanToken /= nbToken;
    } catch (Exception e) {
      meanToken = 0;
    }

    List<Feature> featList = new ArrayList<Feature>();
    featList.addAll(Arrays.asList(new Feature("nb_" + TOKEN, nbToken)));
    featList.addAll(Arrays.asList(new Feature("max_" + TOKEN + "_size", maxToken)));
    featList.addAll(Arrays.asList(new Feature("min_" + TOKEN + "_size", minToken)));
    featList.addAll(Arrays.asList(new Feature("mean_" + TOKEN + "_size", meanToken)));
    return featList;
  }
  @Override
  public void process(JCas cas) throws AnalysisEngineProcessException {

    LOG.debug(getHeaderDocId(cas) + "\t" + cas.getDocumentText());

    for (BrainRegion br : JCasUtil.select(cas, BrainRegion.class)) {
      LOG.debug(br.getCoveredText());
    }
  }
  @Override
  public List<Feature> extract(JCas jcas) throws TextClassificationException {
    List<Feature> featList = new ArrayList<Feature>();

    double numSentences = JCasUtil.select(jcas, Sentence.class).size();

    if (numSentences == 0) {
      featList.add(
          new Feature(
              FN_TOKENS_PER_SENTENCE, new MissingValue(MissingValueNonNominalType.NUMERIC)));
    } else {
      double numTokens = JCasUtil.select(jcas, Token.class).size();
      double ratio = numTokens / numSentences;

      featList.add(new Feature(FN_TOKENS_PER_SENTENCE, ratio));
    }
    return featList;
  }
 /**
  * Collects all the child nodes of the Tokens.
  *
  * @param jCas
  * @return
  */
 public static HashMap<Token, Set<Dependency>> getChildNodesMap(JCas jCas) {
   HashMap<Token, Set<Dependency>> map = new HashMap<Token, Set<Dependency>>();
   Collection<Dependency> deps = JCasUtil.select(jCas, Dependency.class);
   for (Dependency dep : deps) {
     if (!map.containsKey(dep.getGovernor())) {
       map.put(dep.getGovernor(), new HashSet<Dependency>());
     }
     map.get(dep.getGovernor()).add(dep);
   }
   return map;
 }
Exemplo n.º 13
0
 @Override
 public void process(JCas jCas) throws AnalysisEngineProcessException {
   if (windowClass != null) {
     for (Annotation window : JCasUtil.select(jCas, windowClass)) {
       String text = window.getCoveredText();
       createParentheticals(jCas, text, window.getBegin());
     }
   } else {
     String text = jCas.getDocumentText();
     createParentheticals(jCas, text, 0);
   }
 }
  @Override
  public void process(JCas aJCas) throws AnalysisEngineProcessException {
    String documentId = DocumentMetaData.get(aJCas).getDocumentId();

    Class[] types = {Claim.class, Premise.class, Backing.class, Rebuttal.class, Refutation.class};
    for (Class type : types) {
      for (Object o : JCasUtil.select(aJCas, type)) {
        ArgumentComponent argumentComponent = (ArgumentComponent) o;

        // non-implicit components
        int end = argumentComponent.getEnd();
        int begin = argumentComponent.getBegin();
        if (end > begin) {
          List<Sentence> sentences =
              JCasUtil2.selectOverlapping(Sentence.class, argumentComponent, aJCas);

          String filename =
              documentId
                  + "_s"
                  + sentences.size()
                  + "_"
                  + argumentComponent.getClass().getSimpleName()
                  + "_"
                  + begin
                  + "_"
                  + end
                  + ".txt";

          StringBuilder sb = new StringBuilder();

          for (Sentence sentence : sentences) {
            List<String> tokens = new ArrayList<>();
            for (Token token : JCasUtil.selectCovered(Token.class, sentence)) {
              tokens.add(token.getCoveredText());
            }

            sb.append(StringUtils.join(tokens, " "));
            sb.append("\n");
          }

          try {
            FileUtils.write(new File(outputFolder, filename), sb.toString().trim());
          } catch (IOException e) {
            throw new AnalysisEngineProcessException(e);
          }
        }
      }
    }
  }
 public void initializeSentenceBoundaryDetection(JCas jCas) {
   //		System.out.println(getClass().getName() + " being called via a Ruta script..");
   Collection<Part> parts = JCasUtil.select(jCas, Part.class);
   initialize();
   for (Part part : parts) {
     //			System.out.println("Section: " + part.getSectionName() + " Part: " +
     // part.getPartNumber());
     if (part != null) {
       String scope = part.getCoveredText();
       if (scope != null && !scope.trim().isEmpty()) {
         sentenceBoundaryTokenizer(jCas, part, scope);
       }
     }
   }
 }
Exemplo n.º 16
0
  public static void main(String[] args) throws Exception {
    JCas jCas = JCasFactory.createJCas();
    jCas.setDocumentLanguage("de");
    jCas.setDocumentText(
        "Die Fossillagerstätte Geiseltal befindet sich im ehemaligen Braunkohlerevier des Geiseltales südlich der Stadt Halle in Sachsen-Anhalt. Sie ist eine bedeutende Fundstelle heute ausgestorbener Pflanzen und Tiere aus der Zeit des Mittleren Eozäns vor 48 bis 41 Millionen Jahren. Im Geiseltal wurde nachweislich seit 1698 erstmals Kohle gefördert, die ersten Fossilien kamen aber erst Anfang des 20. Jahrhunderts eher zufällig zu Tage. Planmäßige wissenschaftliche Ausgrabungen begannen 1925 seitens der Martin-Luther-Universität Halle-Wittenberg. Unterbrochen durch den Zweiten Weltkrieg, können die Untersuchungen in zwei Forschungsphasen untergliedert werden. Aufgrund der zunehmenden Auskohlung der Rohstofflager kamen die Ausgrabungen Mitte der 1980er allmählich zum Erliegen und endeten endgültig zu Beginn des dritten Jahrtausends.");

    SimplePipeline.runPipeline(
        jCas,
        AnalysisEngineFactory.createEngineDescription(BreakIteratorSegmenter.class),
        AnalysisEngineFactory.createEngineDescription(StanfordNamedEntityRecognizer.class),
        AnalysisEngineFactory.createEngineDescription(CasDumpWriter.class));

    for (NamedEntity ne : JCasUtil.select(jCas, NamedEntity.class)) {
      System.out.println("Found NE: " + ne.getValue() + ", " + ne.getCoveredText());
    }
  }
Exemplo n.º 17
0
  @Test
  public void testProcess() throws AnalysisEngineProcessException, ResourceInitializationException {
    final String text = "The fox jumps over the dog.";
    jCas.setDocumentText(text);

    processJCas();

    final Collection<Sentence> select = JCasUtil.select(jCas, Sentence.class);
    final Sentence s1 = select.iterator().next();

    final List<Dependency> dependencies = JCasUtil.selectCovered(jCas, Dependency.class, s1);

    // We could test the output here, but its so model dependent its not
    // worth it, as long as annotations have been created"

    // 7 = 6 words + 1 punctuation, each should have a dependency
    assertEquals(7, dependencies.size());
  }
Exemplo n.º 18
0
  @Test
  public void test() throws AnalysisEngineProcessException, ResourceInitializationException {

    final String text = "The fox jumps over the dog.";
    jCas.setDocumentText(text);

    processJCas();

    final Collection<Sentence> select = JCasUtil.select(jCas, Sentence.class);
    final Sentence s1 = select.iterator().next();

    final List<PhraseChunk> phrases = JCasUtil.selectCovered(jCas, PhraseChunk.class, s1);
    Assert.assertEquals(4, phrases.size());
    Assert.assertEquals("The fox", phrases.get(0).getCoveredText());
    Assert.assertEquals("jumps over the dog", phrases.get(1).getCoveredText());
    Assert.assertEquals("over the dog", phrases.get(2).getCoveredText());
    Assert.assertEquals("the dog", phrases.get(3).getCoveredText());
  }
Exemplo n.º 19
0
  private void processEntities(JCas jCas, Integer docKey) throws SQLException {
    // Insert entities
    Map<ReferenceTarget, List<Entity>> coreferenceEntities = new HashMap<>();

    for (Entity ent : JCasUtil.select(jCas, Entity.class)) {
      ReferenceTarget rt = ent.getReferent();
      if (rt == null) {
        rt = new ReferenceTarget(jCas);
      }
      List<Entity> entities = coreferenceEntities.getOrDefault(rt, new ArrayList<>());
      entities.add(ent);
      coreferenceEntities.put(rt, entities);
    }

    for (List<Entity> entities : coreferenceEntities.values()) {
      processCoreferencedEntities(docKey, entities);
    }
  }
Exemplo n.º 20
0
  @Test
  public void testRead() throws Exception {

    // read
    List<JCas> l =
        asList(createReader(MongoCollectionReader.class, BlueUima.PARAM_DB_CONNECTION, conn));
    assertEquals(1, l.size());

    JCas jCas = l.get(0);
    assertEquals(UimaTests.TEST_SENTENCE, jCas.getDocumentText());

    for (Annotation a : JCasUtil.select(jCas, Annotation.class)) {
      System.out.println(a);
    }

    BiolexiconDictTerm b = selectSingle(jCas, BiolexiconDictTerm.class);
    assertNotNull(b);
    assertEquals("theId", b.getEntityId());

    Header h = selectSingle(jCas, Header.class);
    assertNotNull(h);
    assertEquals("17", h.getDocId());
  }
Exemplo n.º 21
0
  @Override
  public void process(JCas aJCas) throws AnalysisEngineProcessException {
    super.process(aJCas);
    Table<WSDItem, String, FSArray> testResults =
        TreeBasedTable.create(new WSDItemComparator(), new StringComparator());

    for (WSDResult r : JCasUtil.select(aJCas, WSDResult.class)) {
      if (r.getWsdItem() == null) {
        logger.info(
            "skipping "
                + r.getDisambiguationMethod()
                + " result for \""
                + r.getCoveredText()
                + "\" because it has no instance");
        continue;
      }
      if (r.getSenses() == null) {
        logger.info(
            "skipping "
                + r.getDisambiguationMethod()
                + " result for "
                + r.getWsdItem().getId()
                + " because no senses are assigned");
        continue;
      }
      if (ignoreResult(r)) {
        logger.info(
            goldStandardAlgorithm
                + " result for "
                + r.getWsdItem().getId()
                + " matches the ignore pattern");
      } else {
        testResults.put(r.getWsdItem(), r.getDisambiguationMethod(), r.getSenses());
      }
    }

    try {
      beginDocument("Document");
      beginTable(testResults.rowKeySet().size(), testResults.columnKeySet().size());
      beginTableRow();
      tableHeader("instance");
      tableHeader(goldStandardAlgorithm);
      for (String testAlgorithm : testResults.columnKeySet()) {
        if (!testAlgorithm.equals(goldStandardAlgorithm)) {
          tableHeader(testAlgorithm);
        }
      }
      endTableRow();
      for (WSDItem wsdItem : testResults.rowKeySet()) {
        if (maxItemsAttempted >= 0 && numItemsAttempted++ >= maxItemsAttempted) {
          break;
        }
        FSArray goldResults = testResults.get(wsdItem, goldStandardAlgorithm);
        beginTableRow();
        tableHeaderInstance(wsdItem);
        tableCellGoldResults(goldResults);
        for (String testAlgorithm : testResults.columnKeySet()) {
          if (!testAlgorithm.equals(goldStandardAlgorithm)) {
            tableCellTestResults(goldResults, testResults.get(wsdItem, testAlgorithm));
          }
        }
        endTableRow();
      }
      endTable();
      endDocument();
    } catch (IOException e) {
      throw new AnalysisEngineProcessException(e);
    }
  }
  public Set<Feature> extract(JCas jcas) {

    double nrOfNPs = 0.0;
    double nrOfVPs = 0.0;
    double nrOfPPs = 0.0;
    int nrOfSbars = 0;
    int nrOfVerbphrases = 0;
    int nrOfComplexNominals = 0;
    double nrOfClauses = 0.0;
    int nrOfDependentClauses = 0;
    double nrOfTunits = 0.0;
    int nrOfComplexTunits = 0;
    int nrOfCoords = 0;

    int lengthSumNPs = 0;
    int lengthSumVPs = 0;
    int lengthSumPPs = 0;
    int lengthSumClauses = 0;
    int lengthSumTunits = 0;
    int parseTreeDepthSum = 0;
    Set<Feature> featSet = new HashSet<Feature>();
    double nrOfSentences = JCasUtil.select(jcas, Sentence.class).size() * 1.0;
    for (Sentence s : JCasUtil.select(jcas, Sentence.class)) {
      parseTreeDepthSum += ParsePatternUtils.getParseDepth(s);
      for (Constituent c : JCasUtil.selectCovered(Constituent.class, s)) {
        if (c instanceof NP) {
          nrOfNPs++;
          lengthSumNPs += c.getCoveredText().length();
        } else if (c instanceof VP) {
          nrOfVPs++;
          lengthSumVPs += c.getCoveredText().length();
        } else if (c instanceof PP) {
          nrOfPPs++;
          lengthSumPPs += c.getCoveredText().length();
        } else if (c instanceof SBAR) {
          nrOfSbars++;
          if (ParsePatternUtils.isDependentClause(c)) {
            nrOfDependentClauses++;
          }

        } else if (ParsePatternUtils.isClause(c)) {
          nrOfClauses++;
          lengthSumClauses += c.getCoveredText().length();
        }

        if (ParsePatternUtils.isTunit(c)) {
          nrOfTunits++;
          lengthSumTunits += c.getCoveredText().length();
          if (ParsePatternUtils.isComplexTunit(c)) {
            nrOfComplexTunits++;
          }
        }
        if (ParsePatternUtils.isCoordinate(c)) {
          nrOfCoords++;
        }

        if (ParsePatternUtils.isComplexNominal(c)) {
          nrOfComplexNominals++;
        }
        if (ParsePatternUtils.isVerbPhrase(c)) {
          nrOfVerbphrases++;
        }
      }
    }

    // avoid division by zero, there should be at least one sentence in the cas
    nrOfSentences = Math.max(1, nrOfSentences);

    featSet.addAll(Arrays.asList(new Feature(NPS_PER_SENTENCE, nrOfNPs / nrOfSentences)));
    featSet.addAll(Arrays.asList(new Feature(VPS_PER_SENTENCE, nrOfVPs / nrOfSentences)));
    featSet.addAll(Arrays.asList(new Feature(PPS_PER_SENTENCE, nrOfPPs / nrOfSentences)));
    featSet.addAll(Arrays.asList(new Feature(SBARS_PER_SENTENCE, nrOfSbars / nrOfSentences)));

    featSet.addAll(Arrays.asList(new Feature(CLAUSES_PER_SENTENCE, nrOfClauses / nrOfSentences)));
    featSet.addAll(
        Arrays.asList(new Feature(DEP_CLAUSES_PER_SENTENCE, nrOfDependentClauses / nrOfSentences)));
    featSet.addAll(Arrays.asList(new Feature(TUNITS_PER_SENTENCE, nrOfTunits / nrOfSentences)));
    featSet.addAll(
        Arrays.asList(new Feature(COMPLEX_TUNITS_PER_SENTENCE, nrOfComplexTunits / nrOfSentences)));
    featSet.addAll(Arrays.asList(new Feature(COORDS_PER_SENTENCE, nrOfCoords / nrOfSentences)));

    // avoid division by 0,
    // if we don't have any NPs, the lengthSum is 0, division by 1 will yield 0 as average
    // length
    nrOfNPs = Math.max(1, nrOfNPs);
    nrOfVPs = Math.max(1, nrOfVPs);
    nrOfPPs = Math.max(1, nrOfPPs);
    nrOfTunits = Math.max(1, nrOfTunits);

    featSet.addAll(Arrays.asList(new Feature(AVG_NP_LENGTH, lengthSumNPs / nrOfNPs)));
    featSet.addAll(Arrays.asList(new Feature(AVG_VP_LENGTH, lengthSumVPs / nrOfVPs)));
    featSet.addAll(Arrays.asList(new Feature(AVG_PP_LENGTH, lengthSumPPs / nrOfPPs)));
    featSet.addAll(Arrays.asList(new Feature(AVG_TUNIT_LENGTH, lengthSumTunits / nrOfTunits)));

    featSet.addAll(Arrays.asList(new Feature(AVG_TREE_DEPTH, parseTreeDepthSum / nrOfSentences)));

    featSet.addAll(Arrays.asList(new Feature(CLAUSES_PER_TUNIT, nrOfClauses / nrOfTunits)));

    nrOfClauses = Math.max(1, nrOfClauses);
    featSet.addAll(Arrays.asList(new Feature(AVG_CLAUSE_LENGTH, lengthSumClauses / nrOfClauses)));
    featSet.addAll(
        Arrays.asList(new Feature(COMPLEX_TUNITS_PER_TUNIT, nrOfComplexTunits / nrOfTunits)));
    featSet.addAll(Arrays.asList(new Feature(COORDS_PER_TUNIT, nrOfCoords / nrOfTunits)));
    featSet.addAll(
        Arrays.asList(new Feature(COMPLEXNOMINALS_PER_TUNIT, nrOfComplexNominals / nrOfTunits)));
    featSet.addAll(Arrays.asList(new Feature(VERBPHRASES_PER_TUNIT, nrOfVerbphrases / nrOfTunits)));
    featSet.addAll(
        Arrays.asList(new Feature(DEPCLAUSE_TUNIT_RATIO, nrOfDependentClauses / nrOfTunits)));
    ;

    featSet.addAll(
        Arrays.asList(new Feature(DEPCLAUSE_CLAUSE_RATIO, nrOfDependentClauses / nrOfClauses)));
    featSet.addAll(Arrays.asList(new Feature(COORDS_PER_CLAUSE, nrOfCoords / nrOfClauses)));
    ;
    featSet.addAll(
        Arrays.asList(new Feature(COMPLEXNOMINALS_PER_CLAUSE, nrOfComplexNominals / nrOfClauses)));
    ;
    return featSet;
  }
Exemplo n.º 23
0
 @Override
 public void process(JCas jCas) throws AnalysisEngineProcessException {
   UsenetDocument document = JCasUtil.select(jCas, UsenetDocument.class).iterator().next();
   System.out.println(
       "classified " + ViewUriUtil.getURI(jCas) + " as " + document.getCategory() + ".");
 }
Exemplo n.º 24
0
  public synchronized void process(JCas jcas) throws AnalysisEngineProcessException {
    JCas questionView;
    try {
      questionView = jcas;
    } catch (Exception e) {
      throw new AnalysisEngineProcessException(e);
    }

    QuestionInfo qi = JCasUtil.selectSingle(questionView, QuestionInfo.class);
    /*{"qId": "...", "sv": "...", "LAT" : [ {...}, {...}, {...}]} */
    String line = "{\"qId\": " + "\"" + qi.getQuestionId() + "\"" + ", " + "\"SV\": ";

    String SVtmp = "[";
    for (Iterator SVIterator = JCasUtil.select(jcas, SV.class).iterator(); SVIterator.hasNext(); ) {
      SV sv = (SV) SVIterator.next();
      SVtmp += "\"" + sv.getCoveredText() + "\"";
      if (SVIterator.hasNext()) {
        SVtmp += ", ";
      }
    }
    SVtmp += "], ";
    line += SVtmp;

    line += "\"LAT\": ";
    String LATtmp = "[";
    for (Iterator iterator = JCasUtil.select(jcas, LAT.class).iterator(); iterator.hasNext(); ) {
      LAT l = (LAT) iterator.next();
      /*{"synset" : "...", "text" : "...", "specificity" : "..." "type" : "..."}*/
      LATtmp += "{";
      if (l.getSynset() != 0) { // only add synset when it is not zero
        LATtmp += "\"synset\": " + "\"" + l.getSynset() + "\", ";
      }
      // add the rest
      LATtmp +=
          "\"text\": \""
              + l.getText()
              + "\","
              + " \"specificity\": \""
              + l.getSpecificity()
              + "\", "
              + "\"type\": "
              + "\""
              + l.getClass().getSimpleName()
              + "\"}";
      // not last, add comma
      if (iterator.hasNext()) {
        LATtmp += ", ";
      }
    }
    LATtmp += "], ";
    line += LATtmp;

    line += "\"Concept\": ";
    String Concepttmp = "[";
    for (Iterator iterator = JCasUtil.select(jcas, Concept.class).iterator();
        iterator.hasNext(); ) {
      Concept c = (Concept) iterator.next();
      Concepttmp += "{";
      Concepttmp += "\"fullLabel\": \"" + c.getFullLabel().replaceAll("\"", "\\\"") + "\", ";
      Concepttmp += "\"cookedLabel\": \"" + c.getCookedLabel().replaceAll("\"", "\\\"") + "\", ";
      Concepttmp += "\"pageID\": \"" + c.getPageID() + "\"";
      Concepttmp += "}";
      // not last, add comma
      if (iterator.hasNext()) {
        Concepttmp += ", ";
      }
    }
    Concepttmp += "], ";
    line += Concepttmp;

    line += "}";
    output(line);
    // Question q = QuestionDashboard.getInstance().get(qi.getQuestionId());
    // QuestionDashboard.getInstance().finishQuestion(q);
  }