コード例 #1
0
  public static void main(String[] args) throws Exception {
    // Logger.getLogger(DocumentFeaturesDiff.class).setLevel(Level.ALL);

    GateUtils.initGateKeepLog();
    GateUtils.registerCzsemPlugin();

    ProcessingResource eval =
        new PRSetup.SinglePRSetup(LearningEvaluator.class)
            .putFeature("keyASName", ":-)")
            //				.putFeature("responseASName", "lemma_flex")
            .putFeature("responseASName", "flex")
            .putFeature("keyAnnotationsAreInDocumentFeatures", true)
            .putFeatureList("annotationTypes", "Lookup")
            .putFeatureList("featureNames", "meshID")
            .createPR();

    SerialAnalyserController controller =
        (SerialAnalyserController)
            Factory.createResource(SerialAnalyserController.class.getCanonicalName());

    controller.add(eval);

    Corpus corpus = Factory.newCorpus(null);
    corpus.populate(
        new File("C:\\Users\\dedek\\Desktop\\bmc\\experiment\\analyzed").toURI().toURL(),
        //				new File("C:\\Users\\dedek\\Desktop\\bmca_devel").toURI().toURL(),
        null,
        "utf8",
        false);

    System.err.println("populated");

    controller.setCorpus(corpus);

    controller.execute();
  }
コード例 #2
0
  public static void performExperiment(
      DatasetFactory ds_factory,
      double ds_reduce_retio,
      String[] eval_annot_types,
      int repeatCount,
      int numFolds,
      String results_file_name,
      TrainTest... engines)
      throws URISyntaxException, IOException, ExecutionException, ResourceInstantiationException,
          PersistenceException, JDOMException, BenchmarkReportInputFileFormatException {
    for (int a = 0; a < repeatCount; a++) {
      for (String annot_type : eval_annot_types) {
        logger.info(String.format("Performing evaluation for annotation type: %s", annot_type));

        LearningEvaluator.CentralResultsRepository.repository.clear();
        TimeBenchmarkUtils.enableGateTimeBenchmark();

        final DataSet dataset =
            new DataSetReduce(ds_factory.createDataset(annot_type), ds_reduce_retio);

        MachineLearningExperiment experiment = new MachineLearningExperiment(dataset, engines);

        if (numFolds == 1) {
          Corpus testCorpus =
              ((CzechLawDataSet) ds_factory.createDataset(annot_type)).getTestCorpus();

          experiment.trainOnly();

          experiment.testOnly(testCorpus);

          logger.info("saving results, counting time statistics...");
          MachineLearningExperimenter.saveResults(results_file_name);
        } else {
          // experiment.crossValidation(numFolds);
          experiment.controlledCrossValidation(
              10, new File("../intlib/train-10-fold-cross/").toURI().toURL(), null, false);

          logger.info("saving results, counting time statistics...");
          MachineLearningExperimenter.saveResults(results_file_name);
        }

        GateUtils.deleteAllPublicGateResources();
      }
    }
  }
コード例 #3
0
 public Corpus getTestCorpus() throws PersistenceException, ResourceInstantiationException {
   DataStore ds = GateUtils.openDataStore(dataStore);
   Corpus corpus = GateUtils.loadCorpusFormDatastore(ds, "test___1354799449330___3473");
   return corpus;
 }