/** * Create and train a CRF model from the given training data, optionally testing it on the given * test data. * * @param training training data * @param testing test data (possibly <code>null</code>) * @param eval accuracy evaluator (possibly <code>null</code>) * @param orders label Markov orders (main and backoff) * @param defaultLabel default label * @param forbidden regular expression specifying impossible label transitions <em>current</em> * <code>,</code><em>next</em> (<code>null</code> indicates no forbidden transitions) * @param allowed regular expression specifying allowed label transitions (<code>null</code> * indicates everything is allowed that is not forbidden) * @param connected whether to include even transitions not occurring in the training data. * @param iterations number of training iterations * @param var Gaussian prior variance * @return the trained model */ public static CRF train( InstanceList training, InstanceList testing, TransducerEvaluator eval, int[] orders, String defaultLabel, String forbidden, String allowed, boolean connected, int iterations, double var, CRF crf) { Pattern forbiddenPat = Pattern.compile(forbidden); Pattern allowedPat = Pattern.compile(allowed); if (crf == null) { crf = new CRF(training.getPipe(), (Pipe) null); String startName = crf.addOrderNStates( training, orders, null, defaultLabel, forbiddenPat, allowedPat, connected); CRFTrainerByLabelLikelihood crft = new CRFTrainerByLabelLikelihood(crf); crft.setGaussianPriorVariance(var); for (int i = 0; i < crf.numStates(); i++) crf.getState(i).setInitialWeight(Transducer.IMPOSSIBLE_WEIGHT); crf.getState(startName).setInitialWeight(0.0); } logger.info("Training on " + training.size() + " instances"); if (testing != null) logger.info("Testing on " + testing.size() + " instances"); CRFTrainerByLabelLikelihood crft = new CRFTrainerByLabelLikelihood(crf); if (featureInductionOption.value) { crft.trainWithFeatureInduction( training, null, testing, eval, iterations, 10, 20, 500, 0.5, false, null); } else { boolean converged; for (int i = 1; i <= iterations; i++) { converged = crft.train(training, 1); if (i % 1 == 0 && eval != null) // Change the 1 to higher integer to evaluate less often eval.evaluate(crft); if (viterbiOutputOption.value && i % 10 == 0) new ViterbiWriter( "", new InstanceList[] {training, testing}, new String[] {"training", "testing"}) .evaluate(crft); if (converged) break; } } return crf; }