@SuppressWarnings("unchecked")
 public Set<String> processDoc(String str) throws Exception {
   Set<String> toReturn = new HashSet<String>();
   Corpus c = null;
   Document aDoc = null;
   try {
     c = Factory.newCorpus("sample");
     aDoc = Factory.newDocument(str);
     c.add(aDoc);
     controller.setCorpus(c);
     controller.execute();
     AnnotationSet aSet = aDoc.getAnnotations("StockSymbols");
     for (Annotation annot : aSet) {
       String symbol = (String) annot.getFeatures().get("sym");
       toReturn.add(symbol);
     }
   } catch (Exception e) {
     throw e;
   } finally {
     if (aDoc != null) {
       Factory.deleteResource(aDoc);
     }
     if (c != null) {
       Factory.deleteResource(c);
     }
   }
   return toReturn;
 }
예제 #2
0
 /** Clear up the resources used after one test. */
 private void clearOneTest() {
   corpus.clear();
   Factory.deleteResource(corpus);
   Factory.deleteResource(learningApi);
   controller.remove(learningApi);
   controller.cleanup();
   Factory.deleteResource(controller);
 }
예제 #3
0
 /**
  * Loading the configurationg file and corpus for testing. And make settings as in the GATE Gui.
  */
 void loadSettings(String configFileName, String corpusDirName, String inputasN, String outputasN)
     throws GateException, IOException {
   LogService.minVerbosityLevel = 0;
   if (LogService.minVerbosityLevel > 0)
     System.out.println("Learning Home : " + learningHome.getAbsolutePath());
   FeatureMap parameters = Factory.newFeatureMap();
   URL configFileURL = new File(configFileName).toURI().toURL();
   parameters.put("configFileURL", configFileURL);
   learningApi =
       (LearningAPIMain) Factory.createResource("gate.learning.LearningAPIMain", parameters);
   // Load the corpus
   corpus = Factory.newCorpus("DataSet");
   ExtensionFileFilter fileFilter = new ExtensionFileFilter();
   fileFilter.addExtension("xml");
   File[] xmlFiles = new File(corpusDirName).listFiles(fileFilter);
   Arrays.sort(
       xmlFiles,
       new Comparator<File>() {
         public int compare(File a, File b) {
           return a.getName().compareTo(b.getName());
         }
       });
   for (File f : xmlFiles) {
     if (!f.isDirectory()) {
       Document doc = Factory.newDocument(f.toURI().toURL(), "UTF-8");
       doc.setName(f.getName());
       corpus.add(doc);
     }
   }
   //    URL tempURL = new File(corpusDirName).toURI().toURL();
   //    corpus.populate(tempURL, fileFilter, "UTF-8", false);
   // Set the inputAS
   learningApi.setInputASName(inputasN);
   learningApi.setOutputASName(outputasN);
   controller =
       (gate.creole.SerialAnalyserController)
           Factory.createResource("gate.creole.SerialAnalyserController");
   controller.setCorpus(corpus);
   controller.add(learningApi);
 }
  public static void main(String[] args) throws Exception {
    // Logger.getLogger(DocumentFeaturesDiff.class).setLevel(Level.ALL);

    GateUtils.initGateKeepLog();
    GateUtils.registerCzsemPlugin();

    ProcessingResource eval =
        new PRSetup.SinglePRSetup(LearningEvaluator.class)
            .putFeature("keyASName", ":-)")
            //				.putFeature("responseASName", "lemma_flex")
            .putFeature("responseASName", "flex")
            .putFeature("keyAnnotationsAreInDocumentFeatures", true)
            .putFeatureList("annotationTypes", "Lookup")
            .putFeatureList("featureNames", "meshID")
            .createPR();

    SerialAnalyserController controller =
        (SerialAnalyserController)
            Factory.createResource(SerialAnalyserController.class.getCanonicalName());

    controller.add(eval);

    Corpus corpus = Factory.newCorpus(null);
    corpus.populate(
        new File("C:\\Users\\dedek\\Desktop\\bmc\\experiment\\analyzed").toURI().toURL(),
        //				new File("C:\\Users\\dedek\\Desktop\\bmca_devel").toURI().toURL(),
        null,
        "utf8",
        false);

    System.err.println("populated");

    controller.setCorpus(corpus);

    controller.execute();
  }
예제 #5
0
 /**
  * Test the chunk learning by using the Naive Bayes method and a small part of the OntoNews
  * corpus.
  */
 public void testNBChunkLearnng() throws IOException, GateException {
   // Initialisation
   System.out.print("Testing the Naive Bayes method on chunk learning...");
   File chunklearningHome = new File(new File(learningHome, "test"), "chunklearning");
   String configFileURL =
       new File(chunklearningHome, "engines-naivebayesweka.xml").getAbsolutePath();
   String corpusDirName = new File(chunklearningHome, "data-ontonews").getAbsolutePath();
   // Remove the label list file, feature list file and chunk length files.
   String wdResults =
       new File(chunklearningHome, ConstantParameters.SUBDIRFORRESULTS).getAbsolutePath();
   emptySavedFiles(wdResults);
   String inputASN = "Key";
   loadSettings(configFileURL, corpusDirName, inputASN, inputASN);
   // Set the evaluation mode
   RunMode runM = RunMode.EVALUATION;
   learningApi.setLearningMode(runM);
   controller.execute();
   // Using the evaluation mode for testing
   EvaluationBasedOnDocs evaluation = learningApi.getEvaluation();
   // Compare the overall results with the correct numbers
   /*assertEquals(evaluation.macroMeasuresOfResults.correct, 3);
   assertEquals(evaluation.macroMeasuresOfResults.partialCor, 1);
   assertEquals(evaluation.macroMeasuresOfResults.spurious, 19);
   assertEquals(evaluation.macroMeasuresOfResults.missing, 68);*/
   assertEquals(
       "Wrong value for correct: ",
       27,
       (int) Math.floor(evaluation.macroMeasuresOfResults.correct));
   assertEquals(
       "Wrong value for partial: ",
       3,
       (int) Math.floor(evaluation.macroMeasuresOfResults.partialCor));
   assertEquals(
       "Wrong value for spurious: ",
       26,
       (int) Math.floor(evaluation.macroMeasuresOfResults.spurious));
   assertEquals(
       "Wrong value for missing: ",
       42,
       (int) Math.floor(evaluation.macroMeasuresOfResults.missing));
   // Remove the resources
   clearOneTest();
   System.out.println("completed");
 }