public Classifier runLearner(Corpus trainCorpus, File workDir, String featSetName) throws IOException, FileNotFoundException { String modelName = cfg.getModelName(); String model = Utils.getWorkDirectory() + "/" + modelName; if (modelName == null) throw new RuntimeException("Model name needs to be specified (parameter MODEL_NAME)"); // SimpleDateFormat nameFormat = new SimpleDateFormat("yyyyMMdd"); // Date date = new Date(); // modelName = nameFormat.format(date) + "-" + modelName; Classifier classifier = Constructor.createClassifier(model); if (featSetName == null) throw new RuntimeException( "Feature set name needs to be specified (parameter FEAT_SET_NAME)"); if (modelName == null) throw new RuntimeException("Model name needs to be specified (parameter MODEL_NAME)"); // merge feature files together File mergedFeatureVector = File.createTempFile("mergedFeatureVector_", ".csv.gz", workDir); OutputStream trainFeatures = new FileOutputStream(mergedFeatureVector); FeatureMerger.combine(trainFeatures, trainCorpus); System.out.println("start training"); classifier.train(mergedFeatureVector, new File(workDir, classifier.getName() + ".model")); return classifier; }
public static void main(String[] args) { try { String corpusFile = args[0]; String goldSet = args[1]; File outputDir = new File(args[2]); SystemConfig systemConfig = DriverUtils.configure(args); systemConfig.setAnnotationSetName(Constants.GS_NP, goldSet); Trainer trainer = new Trainer(systemConfig); FeatureGenerator featureGenerator = new FeatureGenerator(systemConfig); // get corpus Corpus c = DriverUtils.loadFiles(corpusFile); Preprocessor preprocessor = new Preprocessor(systemConfig); preprocessor.preprocess(c, false); // generate features String featureSetName = featureGenerator.generateFeatures(c, true); // train classifier Classifier classifier = trainer.runLearner(c, outputDir, featureSetName); System.out.println("classifier trained: " + classifier.getName()); } catch (IOException e) { e.printStackTrace(); } catch (ConfigurationException e) { e.printStackTrace(); } }