private static Evaluation simpleParse(Parameters parameters, String model) throws IOException { DepTreebank tb; if (parameters.xconll) { tb = readXConllTreebank(parameters.input, -1); } else { tb = readTreebank(parameters.input); } ArcStandardTransitionBasedParserModel tbm = getModel(parameters, model); TransitionBasedSystem<DepTree> parser = new PerceptronTransitionBasedSystem<DepTree>(tbm); SimpleEvaluation eval = new SimpleEvaluation(); SentenceProcessComposition spc = new SentenceProcessComposition(); spc.add(TreebankProcesses.greedyParse(parser)); spc.add(TreebankIO.saveInXConll("tmp.conll")); if (parameters.fixedMweOnly) { spc.add(TreebankProcesses.removeRegularMWEs()); // in case treebank contains regular MWEs } spc.add(TreebankProcesses.unlabelMWEArcs()); spc.add(TreebankProcesses.mergeFixedMWEs()); spc.add(TreebankProcesses.mergeRegularMWEs()); spc.add(TreebankEvaluations.computeSegmentationAccuracy(false)); spc.add(TreebankEvaluations.computeSegmentationAccuracy(true)); // spc.add(TreebankEvaluations.computeSegmentationParsingScore()); spc.add(TreebankProcesses.unmergeFixedMWE()); // spc.add(TreebankIO.saveInXConll("unmerge.conll")); if (parameters.output != null) { spc.add(TreebankIO.saveInXConll(parameters.output)); } spc.add(TreebankEvaluations.computeParsingAccuracy()); TreebankProcesses.processTreebank(tb, spc, eval); return eval; }
private static void train(Parameters parameters) throws IOException { System.err.println("--------- New model -----"); // ArcStandardTransitionBasedParserModel tbm = null; DepTreebank tb; if (parameters.xconll) { tb = readXConllTreebank( parameters.train, parameters .trainSize); // treebank in conll format; MWEs with specific arc labels (MWE_LABEL // for fixed MWEs; REG_MWE for regular MWEs) } else { tb = readTreebank(parameters.train, parameters.trainSize); } SentenceProcessComposition spc = new SentenceProcessComposition(); spc.add(TreebankProcesses.copyGold()); spc.add(TreebankIO.saveInXConll("tmpx.conll")); if (!parameters.fixedMweOnly) { // trainFullSystem(parameters.train, parameters.model, parameters.iters, -1, true); if (parameters.baseline) { if (parameters.xconll) { spc.add(TreebankProcesses.unmergeRegularMWE()); } spc.add(TreebankIO.saveInXConll("tmp.conll")); } else { // spc.add(TreebankIO.saveInXConll("tmp.conll")); spc.add(TreebankProcesses.mergeFixedMWEs()); spc.add(TreebankProcesses.mergeRegularMWEs()); spc.add(TreebankProcesses.binarizeMWE(false)); spc.add(TreebankIO.saveInXConll("tmp.conll")); } } else { spc.add(TreebankProcesses.removeRegularMWEs()); // in case treebank contains regular MWEs // spc.add(TreebankProcesses.removeMwePosInLabels()); //put an option in case one wants to // keep mwe pos label if (parameters.baseline) { spc.add(TreebankIO.saveInXConll("tmp.conll")); } else { spc.add(TreebankProcesses.mergeFixedMWEs()); spc.add(TreebankProcesses.binarizeMWE(false)); spc.add(TreebankIO.saveInXConll("tmp.conll")); // trainWithMerge(parameters.train, parameters.model, parameters.iters, -1); } } TreebankProcesses.staticOracleTrain(tb, spc, parameters); }