/* * Model creation, saving, loading, and saving */ public void train(Annotation doc) { List<List<CoreLabel>> trainingSet = AnnotationUtils.entityMentionsToCoreLabels(doc, annotationsToSkip, useSubTypes, useBIO); if (SAVE_CONLL_2003) { // dump a file in CoNLL-2003 format try { PrintStream os = new PrintStream(new FileOutputStream("train.conll")); // saveCoNLLFiles("/tmp/ace/train/", doc, useSubTypes, useBIO); saveCoNLL(os, trainingSet, useBIO); os.close(); } catch (IOException e) { e.printStackTrace(); System.exit(1); } } this.classifier = createClassifier(); if (trainingSet.size() > 0) { this.classifier.train(Collections.unmodifiableCollection(trainingSet)); } }
/** * Annotate an ExtractionDataSet with entities. This will modify the ExtractionDataSet in place. * * @param doc The dataset to label */ @Override public void annotate(Annotation doc) { if (SAVE_CONLL_2003) { // dump a file in CoNLL-2003 format try { PrintStream os = new PrintStream(new FileOutputStream("test.conll")); List<List<CoreLabel>> labels = AnnotationUtils.entityMentionsToCoreLabels(doc, annotationsToSkip, useSubTypes, useBIO); BasicEntityExtractor.saveCoNLL(os, labels, true); // saveCoNLLFiles("/tmp/ace/test", doc, useSubTypes, useBIO); os.close(); } catch (IOException e) { e.printStackTrace(); System.exit(1); } } List<CoreMap> sents = doc.get(CoreAnnotations.SentencesAnnotation.class); int sentCount = 1; for (CoreMap sentence : sents) { if (useNERTags) { this.makeAnnotationFromAllNERTags(sentence); } else extractEntities(sentence, sentCount); sentCount++; } /* if(SAVE_CONLL_2003){ try { saveCoNLLFiles("test_output/", doc, useSubTypes, useBIO); System.err.println("useBIO = " + useBIO); } catch (IOException e) { e.printStackTrace(); System.exit(1); } } */ }