/**
  * Returns average token length of chunks in a view
  *
  * @param view the view of the JCas
  * @return average token length of all chunks
  */
 private double getAverageNounPhraseTokenLength(JCas view) {
   int totalNumber = 0;
   for (Chunk chunk : JCasUtil.select(view, Chunk.class)) {
     totalNumber += JCasUtil.selectCovered(view, Token.class, chunk).size();
   }
   return totalNumber / (double) JCasUtil.select(view, Chunk.class).size();
 }
  @Override
  public Set<Feature> extract(JCas view) throws TextClassificationException {
    int nrOfSpellingErrors = JCasUtil.select(view, SpellingAnomaly.class).size();
    int nrOfTokens = JCasUtil.select(view, Token.class).size();

    double ratio = 0.0;
    if (nrOfTokens > 0) {
      ratio = (double) nrOfSpellingErrors / nrOfTokens;
    }
    return new Feature("SpellingErrorRatio", ratio).asSet();
  }
  @Override
  public synchronized String nextSentence() {
    if (sentences == null || !sentences.hasNext()) {
      try {
        if (getReader().hasNext()) {
          CAS cas = resource.retrieve();

          try {
            getReader().getNext(cas);
          } catch (Exception e) {
            log.warn("Done iterating returning an empty string");
            return "";
          }

          resource.getAnalysisEngine().process(cas);

          List<String> list = new ArrayList<>();
          for (Sentence sentence : JCasUtil.select(cas.getJCas(), Sentence.class)) {
            list.add(sentence.getCoveredText());
          }

          sentences = list.iterator();
          // needs to be next cas
          while (!sentences.hasNext()) {
            // sentence is empty; go to another cas
            if (reader.hasNext()) {
              cas.reset();
              getReader().getNext(cas);
              resource.getAnalysisEngine().process(cas);
              for (Sentence sentence : JCasUtil.select(cas.getJCas(), Sentence.class)) {
                list.add(sentence.getCoveredText());
              }
              sentences = list.iterator();
            } else return null;
          }

          String ret = sentences.next();
          if (this.getPreProcessor() != null) ret = this.getPreProcessor().preProcess(ret);
          return ret;
        }

        return null;

      } catch (Exception e) {
        throw new RuntimeException(e);
      }

    } else {
      String ret = sentences.next();
      if (this.getPreProcessor() != null) ret = this.getPreProcessor().preProcess(ret);
      return ret;
    }
  }
  @Override
  public void process(JCas aJCas) throws AnalysisEngineProcessException {

    // Keeping track of the ranges of different relation candidates so we wont have duplicate
    // text snippets for different candidates on the same text
    Map<IndexRange, IndexRange> rangeMappings = new HashMap<IndexRange, IndexRange>();

    if (aggregateJCas == null) aggregateJCas = getEmptyJCas();

    CasCopier copier = new CasCopier(aJCas.getCas(), aggregateJCas.getCas());
    Iterator<RelationCandidate> iter = JCasUtil.iterator(aJCas, RelationCandidate.class);

    while (iter.hasNext()) {

      RelationCandidate candidate = iter.next();
      RelationCandidate candidateCopy = (RelationCandidate) copier.copyFs(candidate);

      // See if we already have this candidate in the aggregate jcas
      IndexRange candidateRange = new IndexRange(candidate);
      // The offset between the old jcas and the new of this relation candidate
      int offset = 0;

      if (rangeMappings.containsKey(candidateRange)) {
        offset = rangeMappings.get(candidateRange).getStart() - candidateRange.getStart();
        updateAnnotation(candidateCopy, offset);
        // No need to copy features, has already been done

      } else {
        offset = content.length() - candidateRange.getStart();
        updateAnnotation(candidateCopy, offset);
        rangeMappings.put(candidateRange, new IndexRange(candidateCopy));

        // For every feature we want to copy
        for (Class<? extends Annotation> feature : features) {

          // Iterating over the annotations of this feature type covered by this relation candidate
          for (Annotation annotation : JCasUtil.selectCovered(aJCas, feature, candidate)) {
            Annotation cAnnotation = (Annotation) copier.copyFs(annotation);
            // Updating the indices of the annotation
            updateAnnotation(cAnnotation, offset);
            aggregateJCas.addFsToIndexes(cAnnotation);
          }
        }
        // Adding the text content of the relation candidate to the new cas
        content.append(candidate.getCoveredText());
      }
      aggregateJCas.addFsToIndexes(candidateCopy);
    }
  }
  @Override
  public void process(JCas aJCas) throws AnalysisEngineProcessException {
    String documentId = DocumentMetaData.get(aJCas).getDocumentId();

    Class[] types = {Claim.class, Premise.class, Backing.class, Rebuttal.class, Refutation.class};
    for (Class type : types) {
      for (Object o : JCasUtil.select(aJCas, type)) {
        ArgumentComponent argumentComponent = (ArgumentComponent) o;

        // non-implicit components
        int end = argumentComponent.getEnd();
        int begin = argumentComponent.getBegin();
        if (end > begin) {
          List<Sentence> sentences =
              JCasUtil2.selectOverlapping(Sentence.class, argumentComponent, aJCas);

          String filename =
              documentId
                  + "_s"
                  + sentences.size()
                  + "_"
                  + argumentComponent.getClass().getSimpleName()
                  + "_"
                  + begin
                  + "_"
                  + end
                  + ".txt";

          StringBuilder sb = new StringBuilder();

          for (Sentence sentence : sentences) {
            List<String> tokens = new ArrayList<>();
            for (Token token : JCasUtil.selectCovered(Token.class, sentence)) {
              tokens.add(token.getCoveredText());
            }

            sb.append(StringUtils.join(tokens, " "));
            sb.append("\n");
          }

          try {
            FileUtils.write(new File(outputFolder, filename), sb.toString().trim());
          } catch (IOException e) {
            throw new AnalysisEngineProcessException(e);
          }
        }
      }
    }
  }
예제 #6
0
  public void process(JCas jcas) throws AnalysisEngineProcessException {
    logger.debug("scoring with model {}", modelName);

    AnswerStats astats = new AnswerStats(jcas);
    List<AnswerScore> answers = new LinkedList<AnswerScore>();

    for (Answer a : JCasUtil.select(jcas, Answer.class)) {
      AnswerFV fv = new AnswerFV(a, astats);

      double fvec[] = reorderByLabels(fv.getFV());

      double res = model.prior;
      for (Tree t : model.forest) {
        res += model.learning_rate * classifyWithOneTree(fvec, t, 0);
      }
      res = (1.0 / (1.0 + Math.exp(-res)));
      answers.add(new AnswerScore(a, res));
    }

    /* Reindex the touched answer info(s). */
    for (AnswerScore as : answers) {
      as.a.removeFromIndexes();
      as.a.setConfidence(as.score);
      as.a.addToIndexes();
    }
  }
  //	@Test
  public void allAggregationStrategies_1segment_expectCorrectRanking() throws Exception {
    String testDocument = "foo bar baz";

    List<Class<? extends AggregationStrategy>> aggregationStrategies =
        new ArrayList<Class<? extends AggregationStrategy>>();
    aggregationStrategies.add(MaximumAggregation.class);

    for (Class<? extends AggregationStrategy> aggregationStrategy : aggregationStrategies) {

      AnalysisEngineDescription aed =
          AnalysisEngineFactory.createPrimitiveDescription(
              BookIndexPhraseAggregationAnnotator.class);

      bindResource(aed, RankedPhraseAggregationAnnotator.AGGREGATION_STRATEGY, aggregationStrategy);

      AnalysisEngine ae = createPrimitive(aed);
      JCas jcas = setup_1segment(testDocument, ae);

      ae.process(jcas);

      List<String> expectedBookIndexPhrases = new ArrayList<String>();
      expectedBookIndexPhrases.add("bar");
      expectedBookIndexPhrases.add("foo");
      expectedBookIndexPhrases.add("baz");

      List<String> resultBookIndexPhrases = new ArrayList<String>();
      for (BookIndexPhrase b : JCasUtil.select(jcas, BookIndexPhrase.class)) {
        resultBookIndexPhrases.add(b.getPhrase());
      }

      assertEquals(expectedBookIndexPhrases, resultBookIndexPhrases);
    }
  }
예제 #8
0
  @Override
  public List<Feature> extract(JCas jcas) throws TextClassificationException {

    double nbToken = 0;
    double minToken = -1; // Sizes in letter
    double maxToken = 0;
    double meanToken = 0;
    for (Token token : JCasUtil.select(jcas, Token.class)) {
      nbToken++;
      if (minToken < 0) {
        minToken = token.getCoveredText().length(); // gets the size value of the first
        // token
      }
      if (minToken > token.getCoveredText().length()) {
        minToken = token.getCoveredText().length();
      }
      if (maxToken < token.getCoveredText().length()) {
        maxToken = token.getCoveredText().length();
      }

      meanToken += token.getCoveredText().length();
    }
    try {
      meanToken /= nbToken;
    } catch (Exception e) {
      meanToken = 0;
    }

    List<Feature> featList = new ArrayList<Feature>();
    featList.addAll(Arrays.asList(new Feature("nb_" + TOKEN, nbToken)));
    featList.addAll(Arrays.asList(new Feature("max_" + TOKEN + "_size", maxToken)));
    featList.addAll(Arrays.asList(new Feature("min_" + TOKEN + "_size", minToken)));
    featList.addAll(Arrays.asList(new Feature("mean_" + TOKEN + "_size", meanToken)));
    return featList;
  }
예제 #9
0
파일: Postgres.java 프로젝트: n-/baleen
  @Override
  protected void doProcess(JCas jCas) throws AnalysisEngineProcessException {
    Connection conn = postgresResource.getConnection();

    try {
      // Insert document and metadata into database
      Integer docKey = executeDocInsert(jCas);
      for (Metadata md : JCasUtil.select(jCas, Metadata.class)) {
        executeDocMetadataInsert(docKey, md);
      }

      processEntities(jCas, docKey);

      conn.commit();
    } catch (SQLException | BaleenException e) {
      getMonitor().error("Unable to insert document into Postgres database", e);
      if (conn != null) {
        try {
          conn.rollback();
        } catch (SQLException e2) {
          getMonitor()
              .error(
                  "Unable to rollback insertion - state of the database may have been left inconsistent",
                  e2);
        }
      }
    }
  }
예제 #10
0
 @Override
 protected void tableCellSenseArray(
     FSArray senses, SenseType senseIdClass, Collection<Sense> highlightSense) throws IOException {
   output.write("\t\t<td class='" + senseIdClass + "'>");
   if (senses == null || senses.size() == 0) {
     output.write(NA);
   } else {
     boolean firstSense = true;
     for (Sense s : JCasUtil.select(senses, Sense.class)) {
       if (firstSense == false) {
         output.write("<br />");
       }
       if (highlightSense != null && !highlightSense.contains(s)) {
         output.write("<span style='color: #888'>");
       }
       output.write(String.format("%.2f", s.getConfidence()) + "\t" + s.getId());
       if (highlightSense != null && !highlightSense.contains(s)) {
         output.write("</span>");
       }
       firstSense = false;
     }
   }
   output.write("</td>");
   output.newLine();
 }
예제 #11
0
파일: Baseline.java 프로젝트: pakchoi/Iyas
  public JCas getPreliminarCas(
      Analyzer analyzer, JCas emptyCas, String sentenceId, String sentence) {
    this.preliminaryCas.reset();

    /** Without this the annotator fails badly */
    sentence = sentence.replaceAll("/", "");
    sentence = sentence.replaceAll("~", "");

    // Carry out preliminary analysis
    Analyzable content = new SimpleContent(sentenceId, sentence, ArabicAnalyzer.ARABIC_LAN);

    analyzer.analyze(this.preliminaryCas, content);

    // Copy data to a new CAS and use normalized text as DocumentText
    emptyCas.reset();
    emptyCas.setDocumentLanguage(ArabicAnalyzer.ARABIC_LAN);

    CasCopier.copyCas(this.preliminaryCas.getCas(), emptyCas.getCas(), false);

    String normalizedText =
        JCasUtil.selectSingle(this.preliminaryCas, NormalizedText.class).getText();
    emptyCas.setDocumentText(normalizedText);

    return emptyCas;
  }
예제 #12
0
  protected void tableCellTestResults(FSArray goldSenseArray, FSArray testSenseArray)
      throws IOException {
    Set<Sense> bestTestSenses = null;
    SenseType senseType;

    if (goldSenseArray == null) {
      senseType = SenseType.GOLDNA;
    } else if (testSenseArray == null || testSenseArray.size() == 0) {
      senseType = SenseType.TESTNA;
    } else {
      senseType = SenseType.CORRECT;
      Set<String> goldSenseIds = new TreeSet<String>();
      for (Sense s : JCasUtil.select(goldSenseArray, Sense.class)) {
        goldSenseIds.add(s.getId());
      }
      bestTestSenses = getBestSenses(testSenseArray);
      for (Sense s : bestTestSenses) {
        if (!goldSenseIds.contains(s.getId())) {
          senseType = SenseType.INCORRECT;
          break;
        }
      }
    }
    tableCellSenseArray(testSenseArray, senseType, bestTestSenses);
  }
  @Override
  public void process(JCas cas) throws AnalysisEngineProcessException {

    LOG.debug(getHeaderDocId(cas) + "\t" + cas.getDocumentText());

    for (BrainRegion br : JCasUtil.select(cas, BrainRegion.class)) {
      LOG.debug(br.getCoveredText());
    }
  }
  @Override
  public List<Feature> extract(JCas jcas) throws TextClassificationException {
    List<Feature> featList = new ArrayList<Feature>();

    double numSentences = JCasUtil.select(jcas, Sentence.class).size();

    if (numSentences == 0) {
      featList.add(
          new Feature(
              FN_TOKENS_PER_SENTENCE, new MissingValue(MissingValueNonNominalType.NUMERIC)));
    } else {
      double numTokens = JCasUtil.select(jcas, Token.class).size();
      double ratio = numTokens / numSentences;

      featList.add(new Feature(FN_TOKENS_PER_SENTENCE, ratio));
    }
    return featList;
  }
예제 #15
0
  @Test
  public void testProcess() throws AnalysisEngineProcessException, ResourceInitializationException {
    final String text = "The fox jumps over the dog.";
    jCas.setDocumentText(text);

    processJCas();

    final Collection<Sentence> select = JCasUtil.select(jCas, Sentence.class);
    final Sentence s1 = select.iterator().next();

    final List<Dependency> dependencies = JCasUtil.selectCovered(jCas, Dependency.class, s1);

    // We could test the output here, but its so model dependent its not
    // worth it, as long as annotations have been created"

    // 7 = 6 words + 1 punctuation, each should have a dependency
    assertEquals(7, dependencies.size());
  }
  @Test
  public void test() throws AnalysisEngineProcessException, ResourceInitializationException {

    final String text = "The fox jumps over the dog.";
    jCas.setDocumentText(text);

    processJCas();

    final Collection<Sentence> select = JCasUtil.select(jCas, Sentence.class);
    final Sentence s1 = select.iterator().next();

    final List<PhraseChunk> phrases = JCasUtil.selectCovered(jCas, PhraseChunk.class, s1);
    Assert.assertEquals(4, phrases.size());
    Assert.assertEquals("The fox", phrases.get(0).getCoveredText());
    Assert.assertEquals("jumps over the dog", phrases.get(1).getCoveredText());
    Assert.assertEquals("over the dog", phrases.get(2).getCoveredText());
    Assert.assertEquals("the dog", phrases.get(3).getCoveredText());
  }
 /**
  * Collects all the child nodes of the Tokens.
  *
  * @param jCas
  * @return
  */
 public static HashMap<Token, Set<Dependency>> getChildNodesMap(JCas jCas) {
   HashMap<Token, Set<Dependency>> map = new HashMap<Token, Set<Dependency>>();
   Collection<Dependency> deps = JCasUtil.select(jCas, Dependency.class);
   for (Dependency dep : deps) {
     if (!map.containsKey(dep.getGovernor())) {
       map.put(dep.getGovernor(), new HashSet<Dependency>());
     }
     map.get(dep.getGovernor()).add(dep);
   }
   return map;
 }
 @Override
 public void process(JCas jCas) throws AnalysisEngineProcessException {
   if (windowClass != null) {
     for (Annotation window : JCasUtil.select(jCas, windowClass)) {
       String text = window.getCoveredText();
       createParentheticals(jCas, text, window.getBegin());
     }
   } else {
     String text = jCas.getDocumentText();
     createParentheticals(jCas, text, 0);
   }
 }
  @Override
  public Set<Feature> extract(JCas view, TextClassificationUnit classificationUnit)
      throws TextClassificationException {

    boolean isCompound = false;

    POS pos = JCasUtil.selectCovered(Token.class, classificationUnit).get(0).getPos();

    String word =
        JCasUtil.selectCovered(Lemma.class, classificationUnit).get(0).getValue().toLowerCase();

    // only check for noun compounds
    if (pos instanceof N) {
      try {
        isCompound = isCompound(word);
      } catch (ResourceInitializationException e) {
        throw new TextClassificationException(e);
      }
    }

    return new Feature(IS_COMPOUND, isCompound).asSet();
  }
 public void initializeSentenceBoundaryDetection(JCas jCas) {
   //		System.out.println(getClass().getName() + " being called via a Ruta script..");
   Collection<Part> parts = JCasUtil.select(jCas, Part.class);
   initialize();
   for (Part part : parts) {
     //			System.out.println("Section: " + part.getSectionName() + " Part: " +
     // part.getPartNumber());
     if (part != null) {
       String scope = part.getCoveredText();
       if (scope != null && !scope.trim().isEmpty()) {
         sentenceBoundaryTokenizer(jCas, part, scope);
       }
     }
   }
 }
예제 #21
0
  public static void main(String[] args) throws Exception {
    JCas jCas = JCasFactory.createJCas();
    jCas.setDocumentLanguage("de");
    jCas.setDocumentText(
        "Die Fossillagerstätte Geiseltal befindet sich im ehemaligen Braunkohlerevier des Geiseltales südlich der Stadt Halle in Sachsen-Anhalt. Sie ist eine bedeutende Fundstelle heute ausgestorbener Pflanzen und Tiere aus der Zeit des Mittleren Eozäns vor 48 bis 41 Millionen Jahren. Im Geiseltal wurde nachweislich seit 1698 erstmals Kohle gefördert, die ersten Fossilien kamen aber erst Anfang des 20. Jahrhunderts eher zufällig zu Tage. Planmäßige wissenschaftliche Ausgrabungen begannen 1925 seitens der Martin-Luther-Universität Halle-Wittenberg. Unterbrochen durch den Zweiten Weltkrieg, können die Untersuchungen in zwei Forschungsphasen untergliedert werden. Aufgrund der zunehmenden Auskohlung der Rohstofflager kamen die Ausgrabungen Mitte der 1980er allmählich zum Erliegen und endeten endgültig zu Beginn des dritten Jahrtausends.");

    SimplePipeline.runPipeline(
        jCas,
        AnalysisEngineFactory.createEngineDescription(BreakIteratorSegmenter.class),
        AnalysisEngineFactory.createEngineDescription(StanfordNamedEntityRecognizer.class),
        AnalysisEngineFactory.createEngineDescription(CasDumpWriter.class));

    for (NamedEntity ne : JCasUtil.select(jCas, NamedEntity.class)) {
      System.out.println("Found NE: " + ne.getValue() + ", " + ne.getCoveredText());
    }
  }
예제 #22
0
  @Override
  public void process(JCas cas) throws AnalysisEngineProcessException {

    // Return early if no Constituent is present
    if (!JCasUtil.exists(cas, Constituent.class)) return;

    // Lazy loading
    if (this.classifier == null) {
      init();
    }

    String tree = ts.serializeTree(RichTree.getConstituencyTree(cas), this.parameterList);

    String example = "|BT| " + tree + " |ET|";
    String questionClass = this.classifier.getMostConfidentModel(example);

    addQuestionClassAnnotation(cas, questionClass);
  }
예제 #23
0
파일: Postgres.java 프로젝트: n-/baleen
  private void processEntities(JCas jCas, Integer docKey) throws SQLException {
    // Insert entities
    Map<ReferenceTarget, List<Entity>> coreferenceEntities = new HashMap<>();

    for (Entity ent : JCasUtil.select(jCas, Entity.class)) {
      ReferenceTarget rt = ent.getReferent();
      if (rt == null) {
        rt = new ReferenceTarget(jCas);
      }
      List<Entity> entities = coreferenceEntities.getOrDefault(rt, new ArrayList<>());
      entities.add(ent);
      coreferenceEntities.put(rt, entities);
    }

    for (List<Entity> entities : coreferenceEntities.values()) {
      processCoreferencedEntities(docKey, entities);
    }
  }
예제 #24
0
  private void convert(JCas aJCas, PrintWriter aOut) {
    Type chunkType = JCasUtil.getType(aJCas, Chunk.class);
    Feature chunkValue = chunkType.getFeatureByBaseName("chunkValue");

    for (Sentence sentence : select(aJCas, Sentence.class)) {
      HashMap<Token, Row> ctokens = new LinkedHashMap<Token, Row>();

      // Tokens
      List<Token> tokens = selectCovered(Token.class, sentence);

      // Chunks
      IobEncoder encoder = new IobEncoder(aJCas.getCas(), chunkType, chunkValue);

      for (int i = 0; i < tokens.size(); i++) {
        Row row = new Row();
        row.id = i + 1;
        row.token = tokens.get(i);
        row.chunk = encoder.encode(tokens.get(i));
        ctokens.put(row.token, row);
      }

      // Write sentence in CONLL 2006 format
      for (Row row : ctokens.values()) {
        String pos = UNUSED;
        if (writePos && (row.token.getPos() != null)) {
          POS posAnno = row.token.getPos();
          pos = posAnno.getPosValue();
        }

        String chunk = UNUSED;
        if (writeChunk && (row.chunk != null)) {
          chunk = encoder.encode(row.token);
        }

        aOut.printf("%s %s %s\n", row.token.getCoveredText(), pos, chunk);
      }

      aOut.println();
    }
  }
예제 #25
0
  @Test
  public void testRead() throws Exception {

    // read
    List<JCas> l =
        asList(createReader(MongoCollectionReader.class, BlueUima.PARAM_DB_CONNECTION, conn));
    assertEquals(1, l.size());

    JCas jCas = l.get(0);
    assertEquals(UimaTests.TEST_SENTENCE, jCas.getDocumentText());

    for (Annotation a : JCasUtil.select(jCas, Annotation.class)) {
      System.out.println(a);
    }

    BiolexiconDictTerm b = selectSingle(jCas, BiolexiconDictTerm.class);
    assertNotNull(b);
    assertEquals("theId", b.getEntityId());

    Header h = selectSingle(jCas, Header.class);
    assertNotNull(h);
    assertEquals("17", h.getDocId());
  }
  @Override
  public void process(JCas aJCas) throws AnalysisEngineProcessException {
    CAS cas = aJCas.getCas();

    for (AnnotationFS cover : CasUtil.select(cas, CasUtil.getAnnotationType(cas, annotationType))) {

      // If there is a constraint, check if it matches
      if (constraint != null) {
        JXPathContext ctx = JXPathContext.newContext(cover);
        boolean match = ctx.iterate(constraint).hasNext();
        if (!match) {
          continue;
        }
      }

      // If the target type is a token, use it directly, otherwise select the covered tokens
      Collection<Token> tokens;
      if (cover instanceof Token) {
        tokens = Collections.singleton((Token) cover);
      } else {
        tokens = JCasUtil.selectCovered(aJCas, Token.class, cover);
      }

      for (Token token : tokens) {
        try {
          String semanticField = semanticFieldResource.getSemanticTag(token);
          SemanticField semanticFieldAnnotation =
              new SemanticField(aJCas, token.getBegin(), token.getEnd());
          semanticFieldAnnotation.setValue(semanticField);
          semanticFieldAnnotation.addToIndexes();
        } catch (ResourceAccessException e) {
          throw new AnalysisEngineProcessException(e);
        }
      }
    }
  }
예제 #27
0
  public synchronized void process(JCas jcas) throws AnalysisEngineProcessException {
    JCas questionView;
    try {
      questionView = jcas;
    } catch (Exception e) {
      throw new AnalysisEngineProcessException(e);
    }

    QuestionInfo qi = JCasUtil.selectSingle(questionView, QuestionInfo.class);
    /*{"qId": "...", "sv": "...", "LAT" : [ {...}, {...}, {...}]} */
    String line = "{\"qId\": " + "\"" + qi.getQuestionId() + "\"" + ", " + "\"SV\": ";

    String SVtmp = "[";
    for (Iterator SVIterator = JCasUtil.select(jcas, SV.class).iterator(); SVIterator.hasNext(); ) {
      SV sv = (SV) SVIterator.next();
      SVtmp += "\"" + sv.getCoveredText() + "\"";
      if (SVIterator.hasNext()) {
        SVtmp += ", ";
      }
    }
    SVtmp += "], ";
    line += SVtmp;

    line += "\"LAT\": ";
    String LATtmp = "[";
    for (Iterator iterator = JCasUtil.select(jcas, LAT.class).iterator(); iterator.hasNext(); ) {
      LAT l = (LAT) iterator.next();
      /*{"synset" : "...", "text" : "...", "specificity" : "..." "type" : "..."}*/
      LATtmp += "{";
      if (l.getSynset() != 0) { // only add synset when it is not zero
        LATtmp += "\"synset\": " + "\"" + l.getSynset() + "\", ";
      }
      // add the rest
      LATtmp +=
          "\"text\": \""
              + l.getText()
              + "\","
              + " \"specificity\": \""
              + l.getSpecificity()
              + "\", "
              + "\"type\": "
              + "\""
              + l.getClass().getSimpleName()
              + "\"}";
      // not last, add comma
      if (iterator.hasNext()) {
        LATtmp += ", ";
      }
    }
    LATtmp += "], ";
    line += LATtmp;

    line += "\"Concept\": ";
    String Concepttmp = "[";
    for (Iterator iterator = JCasUtil.select(jcas, Concept.class).iterator();
        iterator.hasNext(); ) {
      Concept c = (Concept) iterator.next();
      Concepttmp += "{";
      Concepttmp += "\"fullLabel\": \"" + c.getFullLabel().replaceAll("\"", "\\\"") + "\", ";
      Concepttmp += "\"cookedLabel\": \"" + c.getCookedLabel().replaceAll("\"", "\\\"") + "\", ";
      Concepttmp += "\"pageID\": \"" + c.getPageID() + "\"";
      Concepttmp += "}";
      // not last, add comma
      if (iterator.hasNext()) {
        Concepttmp += ", ";
      }
    }
    Concepttmp += "], ";
    line += Concepttmp;

    line += "}";
    output(line);
    // Question q = QuestionDashboard.getInstance().get(qi.getQuestionId());
    // QuestionDashboard.getInstance().finishQuestion(q);
  }
예제 #28
0
  public static Tree createStanfordTree(Annotation root, TreeFactory tFact) {
    JCas aJCas;
    try {
      aJCas = root.getCAS().getJCas();
    } catch (CASException e) {
      throw new IllegalStateException("Unable to get JCas from JCas wrapper");
    }

    // define the new (root) node
    Tree rootNode;

    // before we can create a node, we must check if we have any children (we have to know
    // whether to create a node or a leaf - not very dynamic)
    if (root instanceof Constituent && !isLeaf((Constituent) root)) {
      Constituent node = (Constituent) root;
      List<Tree> childNodes = new ArrayList<Tree>();

      // get childNodes from child annotations
      FSArray children = node.getChildren();
      for (int i = 0; i < children.size(); i++) {
        childNodes.add(createStanfordTree(node.getChildren(i), tFact));
      }

      // now create the node with its children
      rootNode = tFact.newTreeNode(node.getConstituentType(), childNodes);

    } else {
      // Handle leaf annotations
      // Leafs are always Token-annotations
      // We also have to insert a Preterminal node with the value of the
      // POS-Annotation on the token
      // because the POS is not directly stored within the treee
      Token wordAnnotation = (Token) root;

      // create leaf-node for the tree
      Tree wordNode = tFact.newLeaf(wordAnnotation.getCoveredText());

      // create information about preceding and trailing whitespaces in the leaf node
      StringBuilder preWhitespaces = new StringBuilder();
      StringBuilder trailWhitespaces = new StringBuilder();

      List<Token> precedingTokenList = selectPreceding(aJCas, Token.class, wordAnnotation, 1);
      List<Token> followingTokenList = selectFollowing(aJCas, Token.class, wordAnnotation, 1);

      if (precedingTokenList.size() > 0) {
        Token precedingToken = precedingTokenList.get(0);
        int precedingWhitespaces = wordAnnotation.getBegin() - precedingToken.getEnd();
        for (int i = 0; i < precedingWhitespaces; i++) {
          preWhitespaces.append(" ");
        }
      }
      if (followingTokenList.size() > 0) {
        Token followingToken = followingTokenList.get(0);
        int trailingWhitespaces = followingToken.getBegin() - wordAnnotation.getEnd();
        for (int i = 0; i < trailingWhitespaces; i++) {
          trailWhitespaces.append(" ");
        }
      }

      // write whitespace information as CoreAnnotation.BeforeAnnotation and
      // CoreAnnotation.AfterAnnotation to the node add annotation to list and write back to
      // node label
      ((CoreLabel) wordNode.label())
          .set(CoreAnnotations.BeforeAnnotation.class, preWhitespaces.toString());
      ((CoreLabel) wordNode.label())
          .set(CoreAnnotations.AfterAnnotation.class, trailWhitespaces.toString());

      // get POS-annotation
      // get the token that is covered by the POS
      List<POS> coveredPos = JCasUtil.selectCovered(aJCas, POS.class, wordAnnotation);
      // the POS should only cover one token
      assert coveredPos.size() == 1;
      POS pos = coveredPos.get(0);

      // create POS-Node in the tree and attach word-node to it
      rootNode = tFact.newTreeNode(pos.getPosValue(), Arrays.asList((new Tree[] {wordNode})));
    }

    return rootNode;
  }
  public void process(JCas jcas) throws AnalysisEngineProcessException {
    String rtn = "";

    JCas questionView, answerHitlist;
    try {
      questionView = jcas.getView("Question");
      answerHitlist = jcas.getView("AnswerHitlist");
    } catch (Exception e) {
      throw new AnalysisEngineProcessException(e);
    }
    QuestionInfo qi = JCasUtil.selectSingle(questionView, QuestionInfo.class);
    FSIndex idx = answerHitlist.getJFSIndexRepository().getIndex("SortedAnswers");
    FSIterator answers = idx.iterator();
    if (answers.hasNext()) {
      // int counter = 0;
      int i = 1;
      while (answers.hasNext()) {
        Answer answer = (Answer) answers.next();
        StringBuilder sb = new StringBuilder();
        sb.append(i++);
        sb.append(". ");
        sb.append(answer.getText());
        sb.append(" (conf. ");
        sb.append(answer.getConfidence());
        sb.append(")");
        /* PRINT the passages assigned to this answer
        sb.append("\n");
        for(int ID: answer.getPassageIDs().toArray()){
        	sb.append("		");
        	sb.append(counter++);
        	sb.append(". ");
        	sb.append(QuestionDashboard.getInstance().getPassage(ID));
        	sb.append(" (");
        	sb.append(ID);
        	sb.append(")");
        	sb.append("\n");

        }
        counter = 0;
        */
        if (answer.getResources() != null) {
          for (FeatureStructure resfs : answer.getResources().toArray()) {
            sb.append(" ");
            sb.append(((AnswerResource) resfs).getIri());
          }
        }
        System.out.println(sb.toString());

        rtn = rtn + sb.toString() + "\n";
      }
    } else {
      System.out.println("No answer found.");

      rtn = "No answer found.";
    }
    Question q = QuestionDashboard.getInstance().get(qi.getQuestionId());
    // q.setAnswers(answers); XXX
    QuestionDashboard.getInstance().finishQuestion(q);

    final_answer = rtn;
  }
예제 #30
0
 @Override
 public void process(JCas jCas) throws AnalysisEngineProcessException {
   UsenetDocument document = JCasUtil.select(jCas, UsenetDocument.class).iterator().next();
   System.out.println(
       "classified " + ViewUriUtil.getURI(jCas) + " as " + document.getCategory() + ".");
 }