@Override
  protected List<Keyterm> getKeyterms(String question) {
    String document = question;
    System.out.println("CHECK " + question);
    //  int indexSpace = documentLine.indexOf(' ');
    //  String docID = documentLine.substring(0, indexSpace);
    // String document = documentLine.substring(indexSpace + 1);

    Chunker extractor = null;
    try {
      extractor = (Chunker) AbstractExternalizable.readObject(modelFile);
    } catch (IOException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    } catch (ClassNotFoundException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    }
    List<Keyterm> keyterms = new ArrayList<Keyterm>();
    Chunking entities = extractor.chunk(document);
    for (Chunk ne : entities.chunkSet()) {
      // Annotations
      String keyWord = question.substring(ne.start(), ne.end());
      keyterms.add(new Keyterm(keyWord));
    }

    // store the keyterms from chunker here from annotator
    return keyterms;
  }
 public static void main(String[] args) {
   String modelFile = "src/main/resources/models/ne-en-bio-genetag.HmmChunker";
   String question =
       "What is the role of PrnP in mad cow disease (bovine spongiform encephalitis, BSE)?";
   try {
     Chunker achunker = (Chunker) AbstractExternalizable.readObject(new File(modelFile));
     Chunking chunking = achunker.chunk(question);
     for (Chunk chunk : chunking.chunkSet()) {
       System.out.println(question.substring(chunk.start(), chunk.end()));
     }
   } catch (Exception e) {
     System.err.println("oops");
   }
 }
  @Override
  protected List<Keyterm> getKeyterms(String text) {

    List<Keyterm> keyTerms = new ArrayList<Keyterm>();
    Chunking chunking = chunker.chunk(text);
    for (Chunk chunk : chunking.chunkSet()) {
      int start = chunk.start();
      int end = chunk.end();
      String term = text.substring(start, end);
      keyTerms.add(new Keyterm(term));
    }

    String sentenceText = text;
    for (int start = 0; start < sentenceText.length(); start++) {
      if (start != 0 && sentenceText.charAt(start - 1) != ' ') continue;

      for (int end = start + 1; end < sentenceText.length() && end < start + 97; end++) {
        if (end != sentenceText.length() - 1 && sentenceText.charAt(end) != ' ') continue;

        String spanText = sentenceText.substring(start, end);
        if (geneDictionary.contains(spanText)) {
          keyTerms.add(new Keyterm(spanText));
        }
      }
    }
    return keyTerms;
  }
 @Override
 protected List<Keyterm> updateKeyterms(String question, List<Keyterm> keyterms) {
   Chunking chunking = chunker.chunk(question);
   for (Chunk chunk : chunking.chunkSet()) {
     String word = question.substring(chunk.start(), chunk.end());
     Keyterm keyterm = new Keyterm(word);
     keyterm.setComponentId("GENE");
     keyterms.add(keyterm);
   }
   return keyterms;
 }
예제 #5
0
  void assertChunkingResult(Chunker chunker, String in, String[] types, int[] starts, int[] ends)
      throws IOException {

    Chunking chunking = chunker.chunk(in);
    Chunk[] chunks = chunking.chunkSet().toArray(new Chunk[0]);
    Arrays.sort(chunks, Chunk.TEXT_ORDER_COMPARATOR);
    assertEquals(types.length, starts.length);
    assertEquals(starts.length, ends.length);
    assertEquals(chunks.length, types.length);
    for (int i = 0; i < chunks.length; ++i) {
      assertEquals(types[i], chunks[i].type());
      assertEquals(starts[i], chunks[i].start());
      assertEquals(ends[i], chunks[i].end());
    }
  }
예제 #6
0
  /** annotate on the cas where we have gene names */
  public void process(JCas aJCas) {
    if (chunker == null) {
      initialize();
    }

    // get document text
    String docText = aJCas.getDocumentText();

    Chunking chunking = chunker.chunk(docText);
    for (Chunk ck : chunking.chunkSet()) {
      GenTag annotation = new GenTag(aJCas);
      annotation.setBegin(ck.start());
      annotation.setEnd(ck.end());
      annotation.addToIndexes();
    }
  }