/** * Loads the model from disk. * * @param path The location of model that was saved to disk * @throws ClassCastException if model is the wrong format * @throws IOException if the model file doesn't exist or is otherwise unavailable/incomplete * @throws ClassNotFoundException this would probably indicate a serious classpath problem */ public static BasicEntityExtractor load( String path, Class<? extends BasicEntityExtractor> entityClassifier, boolean preferDefaultGazetteer) throws ClassCastException, IOException, ClassNotFoundException { // load the additional arguments // try to load the extra file from the CLASSPATH first InputStream is = BasicEntityExtractor.class.getClassLoader().getResourceAsStream(path + ".extra"); // if not found in the CLASSPATH, load from the file system if (is == null) is = new FileInputStream(path + ".extra"); ObjectInputStream in = new ObjectInputStream(is); String gazetteerLocation = ErasureUtils.<String>uncheckedCast(in.readObject()); if (preferDefaultGazetteer) gazetteerLocation = DefaultPaths.DEFAULT_NFL_GAZETTEER; Set<String> annotationsToSkip = ErasureUtils.<Set<String>>uncheckedCast(in.readObject()); Boolean useSubTypes = ErasureUtils.<Boolean>uncheckedCast(in.readObject()); Boolean useBIO = ErasureUtils.<Boolean>uncheckedCast(in.readObject()); in.close(); is.close(); BasicEntityExtractor extractor = (BasicEntityExtractor) MachineReading.makeEntityExtractor(entityClassifier, gazetteerLocation); // load the CRF classifier (this works from any resource, e.g., classpath or file system) extractor.classifier = CRFClassifier.getClassifier(path); // copy the extra arguments extractor.annotationsToSkip = annotationsToSkip; extractor.useSubTypes = useSubTypes; extractor.useBIO = useBIO; return extractor; }
/** * Annotate an ExtractionDataSet with entities. This will modify the ExtractionDataSet in place. * * @param doc The dataset to label */ @Override public void annotate(Annotation doc) { if (SAVE_CONLL_2003) { // dump a file in CoNLL-2003 format try { PrintStream os = new PrintStream(new FileOutputStream("test.conll")); List<List<CoreLabel>> labels = AnnotationUtils.entityMentionsToCoreLabels(doc, annotationsToSkip, useSubTypes, useBIO); BasicEntityExtractor.saveCoNLL(os, labels, true); // saveCoNLLFiles("/tmp/ace/test", doc, useSubTypes, useBIO); os.close(); } catch (IOException e) { e.printStackTrace(); System.exit(1); } } List<CoreMap> sents = doc.get(CoreAnnotations.SentencesAnnotation.class); int sentCount = 1; for (CoreMap sentence : sents) { if (useNERTags) { this.makeAnnotationFromAllNERTags(sentence); } else extractEntities(sentence, sentCount); sentCount++; } /* if(SAVE_CONLL_2003){ try { saveCoNLLFiles("test_output/", doc, useSubTypes, useBIO); System.err.println("useBIO = " + useBIO); } catch (IOException e) { e.printStackTrace(); System.exit(1); } } */ }