public static void main(String[] args) throws Exception { // Logger.getLogger(DocumentFeaturesDiff.class).setLevel(Level.ALL); GateUtils.initGateKeepLog(); GateUtils.registerCzsemPlugin(); ProcessingResource eval = new PRSetup.SinglePRSetup(LearningEvaluator.class) .putFeature("keyASName", ":-)") // .putFeature("responseASName", "lemma_flex") .putFeature("responseASName", "flex") .putFeature("keyAnnotationsAreInDocumentFeatures", true) .putFeatureList("annotationTypes", "Lookup") .putFeatureList("featureNames", "meshID") .createPR(); SerialAnalyserController controller = (SerialAnalyserController) Factory.createResource(SerialAnalyserController.class.getCanonicalName()); controller.add(eval); Corpus corpus = Factory.newCorpus(null); corpus.populate( new File("C:\\Users\\dedek\\Desktop\\bmc\\experiment\\analyzed").toURI().toURL(), // new File("C:\\Users\\dedek\\Desktop\\bmca_devel").toURI().toURL(), null, "utf8", false); System.err.println("populated"); controller.setCorpus(corpus); controller.execute(); }
public static void performExperiment( DatasetFactory ds_factory, double ds_reduce_retio, String[] eval_annot_types, int repeatCount, int numFolds, String results_file_name, TrainTest... engines) throws URISyntaxException, IOException, ExecutionException, ResourceInstantiationException, PersistenceException, JDOMException, BenchmarkReportInputFileFormatException { for (int a = 0; a < repeatCount; a++) { for (String annot_type : eval_annot_types) { logger.info(String.format("Performing evaluation for annotation type: %s", annot_type)); LearningEvaluator.CentralResultsRepository.repository.clear(); TimeBenchmarkUtils.enableGateTimeBenchmark(); final DataSet dataset = new DataSetReduce(ds_factory.createDataset(annot_type), ds_reduce_retio); MachineLearningExperiment experiment = new MachineLearningExperiment(dataset, engines); if (numFolds == 1) { Corpus testCorpus = ((CzechLawDataSet) ds_factory.createDataset(annot_type)).getTestCorpus(); experiment.trainOnly(); experiment.testOnly(testCorpus); logger.info("saving results, counting time statistics..."); MachineLearningExperimenter.saveResults(results_file_name); } else { // experiment.crossValidation(numFolds); experiment.controlledCrossValidation( 10, new File("../intlib/train-10-fold-cross/").toURI().toURL(), null, false); logger.info("saving results, counting time statistics..."); MachineLearningExperimenter.saveResults(results_file_name); } GateUtils.deleteAllPublicGateResources(); } } }
public Corpus getTestCorpus() throws PersistenceException, ResourceInstantiationException { DataStore ds = GateUtils.openDataStore(dataStore); Corpus corpus = GateUtils.loadCorpusFormDatastore(ds, "test___1354799449330___3473"); return corpus; }