/* */ public void writeExternal(ObjectOutput objOut) throws IOException { /* 408 */ objOut.writeLong(this.mTrainer.mNumTrainingChars); /* 409 */ objOut.writeObject(this.mTrainer.mLM); /* 410 */ boolean tokenizing = this.mTrainer.mTokenizerFactory != null; /* 411 */ objOut.writeBoolean(tokenizing); /* 412 */ if (tokenizing) { /* 413 */ AbstractExternalizable.serializeOrCompile( this.mTrainer.mTokenizerFactory, objOut); /* 414 */ objOut.writeObject(this.mTrainer.mTokenCounter); /* */ } /* 416 */ AbstractExternalizable.serializeOrCompile(this.mTrainer.mEditDistance, objOut); /* */ }
/** * Create the filtered base data for the model to train on * * @return * @throws IOException * @throws ClassNotFoundException */ private LMClassifier<?, ?> createModelInputAndTrain( boolean readExistingModel, String trainingDataFile) throws IOException, ClassNotFoundException { final String NAIVE_BAYES_SERIALIZED_MODEL = "naiveBayesModel"; if (readExistingModel) { return deserialize(NAIVE_BAYES_SERIALIZED_MODEL); } List<String> lines = IOUtils.readLines(classLoader.getResourceAsStream(INPUT_TWEETS_ORIG_TXT)); List<String> linesNoStopWords = new ArrayList<String>(); LMClassifier<?, ?> cls = NaiveBayesClassifier.createNGramProcess(new String[] {POSITIVE, NEGATIVE, NEUTRAL}, 7); Classification posClassification = new Classification(POSITIVE); Classification negClassification = new Classification(NEGATIVE); Classified<String> classified = null; for (String line : lines) { String newLine = NLPUtils.removeStopWords(line); if (POSITIVE.equals(newLine.split(" ")[0])) { classified = new Classified<String>(newLine, posClassification); } else { classified = new Classified<String>(newLine, negClassification); } linesNoStopWords.add(NLPUtils.removeStopWords(line)); ((ObjectHandler) cls).handle(classified); } AbstractExternalizable.compileTo((Compilable) cls, new File(NAIVE_BAYES_SERIALIZED_MODEL)); return cls; }
@Override protected List<Keyterm> getKeyterms(String question) { String document = question; System.out.println("CHECK " + question); // int indexSpace = documentLine.indexOf(' '); // String docID = documentLine.substring(0, indexSpace); // String document = documentLine.substring(indexSpace + 1); Chunker extractor = null; try { extractor = (Chunker) AbstractExternalizable.readObject(modelFile); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (ClassNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); } List<Keyterm> keyterms = new ArrayList<Keyterm>(); Chunking entities = extractor.chunk(document); for (Chunk ne : entities.chunkSet()) { // Annotations String keyWord = question.substring(ne.start(), ne.end()); keyterms.add(new Keyterm(keyWord)); } // store the keyterms from chunker here from annotator return keyterms; }
/** * Initializes this resource * * @return Resource * @throws ResourceInstantiationException */ @SuppressWarnings("rawtypes") public Resource init() throws ResourceInstantiationException { if (modelFileUrl == null) throw new ResourceInstantiationException("No model file provided!"); try { modelFile = new File(modelFileUrl.toURI()); } catch (URISyntaxException e) { throw new ResourceInstantiationException(e); } if (modelFile == null || !modelFile.exists()) { throw new ResourceInstantiationException( "modelFile:" + modelFileUrl.toString() + " does not exists"); } try { classifier = (LMClassifier) AbstractExternalizable.readObject(modelFile); } catch (IOException e) { throw new ResourceInstantiationException(e); } catch (ClassNotFoundException e) { throw new ResourceInstantiationException(e); } return this; }
/* */ public void writeExternal(ObjectOutput objOut) throws IOException { /* 364 */ this.mTrainer.mLM.compileTo(objOut); /* 365 */ boolean tokenizing = this.mTrainer.mTokenizerFactory != null; /* 366 */ objOut.writeBoolean(tokenizing); /* 367 */ if (tokenizing) { /* 368 */ Set keySet = this.mTrainer.mTokenCounter.keySet(); /* 369 */ objOut.writeObject(new HashSet(keySet)); /* */ } /* 371 */ AbstractExternalizable.compileOrSerialize(this.mTrainer.mEditDistance, objOut); /* */ }
@Override public void initialize(UimaContext c) throws ResourceInitializationException { super.initialize(c); try { nerModelFile = new File( new URI( AustinMaKeytermExtractor.class .getResource("/ne-en-bio-genetag.HmmChunker") .toString())); chunker = (Chunker) AbstractExternalizable.readObject(nerModelFile); } catch (IOException e) { System.err.println("IOException in creating chunker"); throw new ResourceInitializationException( "Unable to load NER model file", "load_ner_model_error", new Object[] {nerModelFile}, e); } catch (ClassNotFoundException e) { System.err.println("ClassNotFoundException in creating chunker"); throw new ResourceInitializationException( "Unable to load NER model file", "load_ner_model_error", new Object[] {nerModelFile}, e); } catch (URISyntaxException e) { e.printStackTrace(); throw new ResourceInitializationException( "Unable to load NER model file", "load_ner_model_error", new Object[] {nerModelFile}, e); } // Read in the geneDictionary geneDictionary = new HashSet<String>(); try { File geneDictionaryFile = new File(new URI(AustinMaKeytermExtractor.class.getResource("/ref.dic").toString())); BufferedReader reader = new BufferedReader(new FileReader(geneDictionaryFile)); String line; try { while ((line = reader.readLine()) != null) geneDictionary.add(line); } catch (IOException e) { System.err.println("IOException in reading gene dictionary file"); throw new ResourceInitializationException( "Unable to load gene dictionary file", "load_gene_dic_error", new Object[] {}, e); } } catch (FileNotFoundException e) { System.err.println("FileNotFoundException in reading gene dictionary file"); throw new ResourceInitializationException( "Unable to find gene dictionary file", "load_gene_dic_error", new Object[] {}, e); } catch (URISyntaxException e1) { // TODO Auto-generated catch block e1.printStackTrace(); throw new ResourceInitializationException( "Unable to find gene dictionary file", "load_gene_dic_error", new Object[] {}, e1); } }
public static void main(String[] args) { String modelFile = "src/main/resources/models/ne-en-bio-genetag.HmmChunker"; String question = "What is the role of PrnP in mad cow disease (bovine spongiform encephalitis, BSE)?"; try { Chunker achunker = (Chunker) AbstractExternalizable.readObject(new File(modelFile)); Chunking chunking = achunker.chunk(question); for (Chunk chunk : chunking.chunkSet()) { System.out.println(question.substring(chunk.start(), chunk.end())); } } catch (Exception e) { System.err.println("oops"); } }
public ChunkerFeatureExtractor() throws ClassNotFoundException, IOException { @SuppressWarnings("unchecked") // req for deserialize CodeSource src = this.getClass().getProtectionDomain().getCodeSource(); String loc = src.getLocation().toString(); File hmmFile = new File( loc.substring(5, loc.length() - 10) + "/objects/pos-en-general-brown.HiddenMarkovModel"); // File hmmFile = new // File("C:/Users/D059348/dev/HU/MaschinelleSprachverarbeitung/objects/pos-en-general-brown.HiddenMarkovModel"); HiddenMarkovModel posHmm = (HiddenMarkovModel) AbstractExternalizable.readObject(hmmFile); FastCache<String, double[]> emissionCache = new FastCache<String, double[]>(100000); mPosTagger = new HmmDecoder(posHmm, null, emissionCache); }
@Override public void initialize(UimaContext aContext) throws ResourceInitializationException { super.initialize(aContext); // initialization part from annotator String model = (String) aContext.getConfigParameterValue("TrainedModel"); modelFile = new File(model); try { extractor = (Chunker) AbstractExternalizable.readObject(modelFile); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (ClassNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); } }
/** * @param fileName * @return * @throws IOException * @throws ClassNotFoundException */ private LMClassifier<?, ?> deserialize(String fileName) throws ClassNotFoundException, IOException { return (LMClassifier<?, ?>) AbstractExternalizable.readObject(new File(classLoader.getResource(fileName).getFile())); }
void assertChunking(TokenChunker chunker, String in, String[] types, int[] starts, int[] ends) throws IOException { assertChunkingResult(chunker, in, types, starts, ends); Chunker chunker2 = (Chunker) AbstractExternalizable.serializeDeserialize(chunker); }