@SuppressWarnings("unchecked") public Set<String> processDoc(String str) throws Exception { Set<String> toReturn = new HashSet<String>(); Corpus c = null; Document aDoc = null; try { c = Factory.newCorpus("sample"); aDoc = Factory.newDocument(str); c.add(aDoc); controller.setCorpus(c); controller.execute(); AnnotationSet aSet = aDoc.getAnnotations("StockSymbols"); for (Annotation annot : aSet) { String symbol = (String) annot.getFeatures().get("sym"); toReturn.add(symbol); } } catch (Exception e) { throw e; } finally { if (aDoc != null) { Factory.deleteResource(aDoc); } if (c != null) { Factory.deleteResource(c); } } return toReturn; }
/** Clear up the resources used after one test. */ private void clearOneTest() { corpus.clear(); Factory.deleteResource(corpus); Factory.deleteResource(learningApi); controller.remove(learningApi); controller.cleanup(); Factory.deleteResource(controller); }
/** * Loading the configurationg file and corpus for testing. And make settings as in the GATE Gui. */ void loadSettings(String configFileName, String corpusDirName, String inputasN, String outputasN) throws GateException, IOException { LogService.minVerbosityLevel = 0; if (LogService.minVerbosityLevel > 0) System.out.println("Learning Home : " + learningHome.getAbsolutePath()); FeatureMap parameters = Factory.newFeatureMap(); URL configFileURL = new File(configFileName).toURI().toURL(); parameters.put("configFileURL", configFileURL); learningApi = (LearningAPIMain) Factory.createResource("gate.learning.LearningAPIMain", parameters); // Load the corpus corpus = Factory.newCorpus("DataSet"); ExtensionFileFilter fileFilter = new ExtensionFileFilter(); fileFilter.addExtension("xml"); File[] xmlFiles = new File(corpusDirName).listFiles(fileFilter); Arrays.sort( xmlFiles, new Comparator<File>() { public int compare(File a, File b) { return a.getName().compareTo(b.getName()); } }); for (File f : xmlFiles) { if (!f.isDirectory()) { Document doc = Factory.newDocument(f.toURI().toURL(), "UTF-8"); doc.setName(f.getName()); corpus.add(doc); } } // URL tempURL = new File(corpusDirName).toURI().toURL(); // corpus.populate(tempURL, fileFilter, "UTF-8", false); // Set the inputAS learningApi.setInputASName(inputasN); learningApi.setOutputASName(outputasN); controller = (gate.creole.SerialAnalyserController) Factory.createResource("gate.creole.SerialAnalyserController"); controller.setCorpus(corpus); controller.add(learningApi); }
public static void main(String[] args) throws Exception { // Logger.getLogger(DocumentFeaturesDiff.class).setLevel(Level.ALL); GateUtils.initGateKeepLog(); GateUtils.registerCzsemPlugin(); ProcessingResource eval = new PRSetup.SinglePRSetup(LearningEvaluator.class) .putFeature("keyASName", ":-)") // .putFeature("responseASName", "lemma_flex") .putFeature("responseASName", "flex") .putFeature("keyAnnotationsAreInDocumentFeatures", true) .putFeatureList("annotationTypes", "Lookup") .putFeatureList("featureNames", "meshID") .createPR(); SerialAnalyserController controller = (SerialAnalyserController) Factory.createResource(SerialAnalyserController.class.getCanonicalName()); controller.add(eval); Corpus corpus = Factory.newCorpus(null); corpus.populate( new File("C:\\Users\\dedek\\Desktop\\bmc\\experiment\\analyzed").toURI().toURL(), // new File("C:\\Users\\dedek\\Desktop\\bmca_devel").toURI().toURL(), null, "utf8", false); System.err.println("populated"); controller.setCorpus(corpus); controller.execute(); }
/** * Test the chunk learning by using the Naive Bayes method and a small part of the OntoNews * corpus. */ public void testNBChunkLearnng() throws IOException, GateException { // Initialisation System.out.print("Testing the Naive Bayes method on chunk learning..."); File chunklearningHome = new File(new File(learningHome, "test"), "chunklearning"); String configFileURL = new File(chunklearningHome, "engines-naivebayesweka.xml").getAbsolutePath(); String corpusDirName = new File(chunklearningHome, "data-ontonews").getAbsolutePath(); // Remove the label list file, feature list file and chunk length files. String wdResults = new File(chunklearningHome, ConstantParameters.SUBDIRFORRESULTS).getAbsolutePath(); emptySavedFiles(wdResults); String inputASN = "Key"; loadSettings(configFileURL, corpusDirName, inputASN, inputASN); // Set the evaluation mode RunMode runM = RunMode.EVALUATION; learningApi.setLearningMode(runM); controller.execute(); // Using the evaluation mode for testing EvaluationBasedOnDocs evaluation = learningApi.getEvaluation(); // Compare the overall results with the correct numbers /*assertEquals(evaluation.macroMeasuresOfResults.correct, 3); assertEquals(evaluation.macroMeasuresOfResults.partialCor, 1); assertEquals(evaluation.macroMeasuresOfResults.spurious, 19); assertEquals(evaluation.macroMeasuresOfResults.missing, 68);*/ assertEquals( "Wrong value for correct: ", 27, (int) Math.floor(evaluation.macroMeasuresOfResults.correct)); assertEquals( "Wrong value for partial: ", 3, (int) Math.floor(evaluation.macroMeasuresOfResults.partialCor)); assertEquals( "Wrong value for spurious: ", 26, (int) Math.floor(evaluation.macroMeasuresOfResults.spurious)); assertEquals( "Wrong value for missing: ", 42, (int) Math.floor(evaluation.macroMeasuresOfResults.missing)); // Remove the resources clearOneTest(); System.out.println("completed"); }