public Classifier runLearner(Corpus trainCorpus, File workDir, String featSetName) throws IOException, FileNotFoundException { String modelName = cfg.getModelName(); String model = Utils.getWorkDirectory() + "/" + modelName; if (modelName == null) throw new RuntimeException("Model name needs to be specified (parameter MODEL_NAME)"); // SimpleDateFormat nameFormat = new SimpleDateFormat("yyyyMMdd"); // Date date = new Date(); // modelName = nameFormat.format(date) + "-" + modelName; Classifier classifier = Constructor.createClassifier(model); if (featSetName == null) throw new RuntimeException( "Feature set name needs to be specified (parameter FEAT_SET_NAME)"); if (modelName == null) throw new RuntimeException("Model name needs to be specified (parameter MODEL_NAME)"); // merge feature files together File mergedFeatureVector = File.createTempFile("mergedFeatureVector_", ".csv.gz", workDir); OutputStream trainFeatures = new FileOutputStream(mergedFeatureVector); FeatureMerger.combine(trainFeatures, trainCorpus); System.out.println("start training"); classifier.train(mergedFeatureVector, new File(workDir, classifier.getName() + ".model")); return classifier; }
/** * Trains a classifier using the feature files housed in the training directories in the Config. * * @param options - a string array of various options used in training (e.g. - where to save the * model file, training parameters, etc.) */ public void train(File trainFile, File outputModelFile) { if (mOptions == null) { mOptions = Utils.getConfig().getStringArray("ClOptions." + getName()); } train(trainFile, outputModelFile, mOptions); }