public static void main(String[] args) throws Exception {
    // Logger.getLogger(DocumentFeaturesDiff.class).setLevel(Level.ALL);

    GateUtils.initGateKeepLog();
    GateUtils.registerCzsemPlugin();

    ProcessingResource eval =
        new PRSetup.SinglePRSetup(LearningEvaluator.class)
            .putFeature("keyASName", ":-)")
            //				.putFeature("responseASName", "lemma_flex")
            .putFeature("responseASName", "flex")
            .putFeature("keyAnnotationsAreInDocumentFeatures", true)
            .putFeatureList("annotationTypes", "Lookup")
            .putFeatureList("featureNames", "meshID")
            .createPR();

    SerialAnalyserController controller =
        (SerialAnalyserController)
            Factory.createResource(SerialAnalyserController.class.getCanonicalName());

    controller.add(eval);

    Corpus corpus = Factory.newCorpus(null);
    corpus.populate(
        new File("C:\\Users\\dedek\\Desktop\\bmc\\experiment\\analyzed").toURI().toURL(),
        //				new File("C:\\Users\\dedek\\Desktop\\bmca_devel").toURI().toURL(),
        null,
        "utf8",
        false);

    System.err.println("populated");

    controller.setCorpus(corpus);

    controller.execute();
  }
Exemplo n.º 2
0
  /**
   * @param args
   * @throws Exception
   */
  public static void main(String[] args) throws Exception {
    /* Parse command line arguments */
    Getopt g = new Getopt("gateExtractor", args, "i:r:te");
    g.setOpterr(false);

    String inputPath = "";
    String outputPath = "";

    boolean train = false;
    boolean eval = false;
    boolean run = false;

    int c;
    String arg;
    while ((c = g.getopt()) != -1) {
      switch (c) {
        case 'i':
          arg = g.getOptarg();
          if (arg == null || arg.isEmpty()) {
            usage("Please provide an input path");
          }
          inputPath = arg;
          break;
        case 'r':
          run = true;
          arg = g.getOptarg();
          if (arg == null || arg.isEmpty()) {
            usage("Please provide an output path");
          }
          outputPath = arg;
          break;
        case 't':
          train = true;
          break;
        case 'e':
          eval = true;
          break;
        case '?':
        default:
          usage(null);
      }
    }

    if (args.length == 0 || (!run && !train && !eval)) {
      usage("Nothing to do.");
    }

    if (inputPath == null || inputPath.isEmpty()) {
      usage("Please provide an input path");
    }

    if (run && (outputPath == null || outputPath.isEmpty())) {
      usage("Please provide an output directory!");
    }

    if (train && eval) {
      usage("Only one mode allowed at a time");
    }

    if (train && run) {
      usage("Only one mode allowed at a time");
    }

    if (eval && run) {
      usage("Only one mode allowed at a time");
    }

    /* Initialize GATE */
    String location =
        new File(Main.class.getProtectionDomain().getCodeSource().getLocation().getPath())
            .getParent();
    String resourcesFolder = location + "/resources";
    Gate.setGateHome(new File(resourcesFolder));

    /* Create ml-config.xml with threads */

    createConfig(resourcesFolder + File.separator);
    Gate.init();

    /* Load Corpus */
    log.info("Loading Corpus ... ");
    Corpus corpus = Factory.newCorpus("Training Corpus");
    File directory = new File(inputPath);
    URL url = directory.toURI().toURL();
    corpus.populate(url, null, null, true);
    log.info("Done loading Corpus!");

    Pipeline pipeline = null;

    /* Do Tagging */
    pipeline = new Tagger();
    pipeline.run(corpus, resourcesFolder);

    /* Train */
    if (train) {
      pipeline = new Trainer();
      pipeline.run(corpus, resourcesFolder);
    }

    /* Apply learned rules */
    if (run) {
      pipeline = new Extractor();
      pipeline.run(corpus, resourcesFolder);

      ExecutorService executorService = Executors.newFixedThreadPool(20);
      for (int i = 0; i < corpus.size(); i++) {
        executorService.execute(new OutputGenerator(outputPath, corpus.get(i)));
      }
      executorService.shutdown();
      executorService.awaitTermination(Long.MAX_VALUE, TimeUnit.DAYS);
    }

    /* Evaluate results */
    if (eval) {
      pipeline = new Evaluator();
      pipeline.run(corpus, resourcesFolder);
    }

    /* Clean up */
    Factory.deleteResource(corpus);
    outputFile_mlConfigThreads.delete();
  }