@Override protected List<Keyterm> getKeyterms(String question) { String document = question; System.out.println("CHECK " + question); // int indexSpace = documentLine.indexOf(' '); // String docID = documentLine.substring(0, indexSpace); // String document = documentLine.substring(indexSpace + 1); Chunker extractor = null; try { extractor = (Chunker) AbstractExternalizable.readObject(modelFile); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (ClassNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); } List<Keyterm> keyterms = new ArrayList<Keyterm>(); Chunking entities = extractor.chunk(document); for (Chunk ne : entities.chunkSet()) { // Annotations String keyWord = question.substring(ne.start(), ne.end()); keyterms.add(new Keyterm(keyWord)); } // store the keyterms from chunker here from annotator return keyterms; }
public static void main(String[] args) { String modelFile = "src/main/resources/models/ne-en-bio-genetag.HmmChunker"; String question = "What is the role of PrnP in mad cow disease (bovine spongiform encephalitis, BSE)?"; try { Chunker achunker = (Chunker) AbstractExternalizable.readObject(new File(modelFile)); Chunking chunking = achunker.chunk(question); for (Chunk chunk : chunking.chunkSet()) { System.out.println(question.substring(chunk.start(), chunk.end())); } } catch (Exception e) { System.err.println("oops"); } }
@Override protected List<Keyterm> getKeyterms(String text) { List<Keyterm> keyTerms = new ArrayList<Keyterm>(); Chunking chunking = chunker.chunk(text); for (Chunk chunk : chunking.chunkSet()) { int start = chunk.start(); int end = chunk.end(); String term = text.substring(start, end); keyTerms.add(new Keyterm(term)); } String sentenceText = text; for (int start = 0; start < sentenceText.length(); start++) { if (start != 0 && sentenceText.charAt(start - 1) != ' ') continue; for (int end = start + 1; end < sentenceText.length() && end < start + 97; end++) { if (end != sentenceText.length() - 1 && sentenceText.charAt(end) != ' ') continue; String spanText = sentenceText.substring(start, end); if (geneDictionary.contains(spanText)) { keyTerms.add(new Keyterm(spanText)); } } } return keyTerms; }
@Override protected List<Keyterm> updateKeyterms(String question, List<Keyterm> keyterms) { Chunking chunking = chunker.chunk(question); for (Chunk chunk : chunking.chunkSet()) { String word = question.substring(chunk.start(), chunk.end()); Keyterm keyterm = new Keyterm(word); keyterm.setComponentId("GENE"); keyterms.add(keyterm); } return keyterms; }
void assertChunkingResult(Chunker chunker, String in, String[] types, int[] starts, int[] ends) throws IOException { Chunking chunking = chunker.chunk(in); Chunk[] chunks = chunking.chunkSet().toArray(new Chunk[0]); Arrays.sort(chunks, Chunk.TEXT_ORDER_COMPARATOR); assertEquals(types.length, starts.length); assertEquals(starts.length, ends.length); assertEquals(chunks.length, types.length); for (int i = 0; i < chunks.length; ++i) { assertEquals(types[i], chunks[i].type()); assertEquals(starts[i], chunks[i].start()); assertEquals(ends[i], chunks[i].end()); } }
/** annotate on the cas where we have gene names */ public void process(JCas aJCas) { if (chunker == null) { initialize(); } // get document text String docText = aJCas.getDocumentText(); Chunking chunking = chunker.chunk(docText); for (Chunk ck : chunking.chunkSet()) { GenTag annotation = new GenTag(aJCas); annotation.setBegin(ck.start()); annotation.setEnd(ck.end()); annotation.addToIndexes(); } }