public static void main(String[] args) throws Exception { // Logger.getLogger(DocumentFeaturesDiff.class).setLevel(Level.ALL); GateUtils.initGateKeepLog(); GateUtils.registerCzsemPlugin(); ProcessingResource eval = new PRSetup.SinglePRSetup(LearningEvaluator.class) .putFeature("keyASName", ":-)") // .putFeature("responseASName", "lemma_flex") .putFeature("responseASName", "flex") .putFeature("keyAnnotationsAreInDocumentFeatures", true) .putFeatureList("annotationTypes", "Lookup") .putFeatureList("featureNames", "meshID") .createPR(); SerialAnalyserController controller = (SerialAnalyserController) Factory.createResource(SerialAnalyserController.class.getCanonicalName()); controller.add(eval); Corpus corpus = Factory.newCorpus(null); corpus.populate( new File("C:\\Users\\dedek\\Desktop\\bmc\\experiment\\analyzed").toURI().toURL(), // new File("C:\\Users\\dedek\\Desktop\\bmca_devel").toURI().toURL(), null, "utf8", false); System.err.println("populated"); controller.setCorpus(corpus); controller.execute(); }
/** * @param args * @throws Exception */ public static void main(String[] args) throws Exception { /* Parse command line arguments */ Getopt g = new Getopt("gateExtractor", args, "i:r:te"); g.setOpterr(false); String inputPath = ""; String outputPath = ""; boolean train = false; boolean eval = false; boolean run = false; int c; String arg; while ((c = g.getopt()) != -1) { switch (c) { case 'i': arg = g.getOptarg(); if (arg == null || arg.isEmpty()) { usage("Please provide an input path"); } inputPath = arg; break; case 'r': run = true; arg = g.getOptarg(); if (arg == null || arg.isEmpty()) { usage("Please provide an output path"); } outputPath = arg; break; case 't': train = true; break; case 'e': eval = true; break; case '?': default: usage(null); } } if (args.length == 0 || (!run && !train && !eval)) { usage("Nothing to do."); } if (inputPath == null || inputPath.isEmpty()) { usage("Please provide an input path"); } if (run && (outputPath == null || outputPath.isEmpty())) { usage("Please provide an output directory!"); } if (train && eval) { usage("Only one mode allowed at a time"); } if (train && run) { usage("Only one mode allowed at a time"); } if (eval && run) { usage("Only one mode allowed at a time"); } /* Initialize GATE */ String location = new File(Main.class.getProtectionDomain().getCodeSource().getLocation().getPath()) .getParent(); String resourcesFolder = location + "/resources"; Gate.setGateHome(new File(resourcesFolder)); /* Create ml-config.xml with threads */ createConfig(resourcesFolder + File.separator); Gate.init(); /* Load Corpus */ log.info("Loading Corpus ... "); Corpus corpus = Factory.newCorpus("Training Corpus"); File directory = new File(inputPath); URL url = directory.toURI().toURL(); corpus.populate(url, null, null, true); log.info("Done loading Corpus!"); Pipeline pipeline = null; /* Do Tagging */ pipeline = new Tagger(); pipeline.run(corpus, resourcesFolder); /* Train */ if (train) { pipeline = new Trainer(); pipeline.run(corpus, resourcesFolder); } /* Apply learned rules */ if (run) { pipeline = new Extractor(); pipeline.run(corpus, resourcesFolder); ExecutorService executorService = Executors.newFixedThreadPool(20); for (int i = 0; i < corpus.size(); i++) { executorService.execute(new OutputGenerator(outputPath, corpus.get(i))); } executorService.shutdown(); executorService.awaitTermination(Long.MAX_VALUE, TimeUnit.DAYS); } /* Evaluate results */ if (eval) { pipeline = new Evaluator(); pipeline.run(corpus, resourcesFolder); } /* Clean up */ Factory.deleteResource(corpus); outputFile_mlConfigThreads.delete(); }