예제 #1
0
  public Classifier runLearner(Corpus trainCorpus, File workDir, String featSetName)
      throws IOException, FileNotFoundException {
    String modelName = cfg.getModelName();
    String model = Utils.getWorkDirectory() + "/" + modelName;
    if (modelName == null)
      throw new RuntimeException("Model name needs to be specified (parameter MODEL_NAME)");
    // SimpleDateFormat nameFormat = new SimpleDateFormat("yyyyMMdd");
    // Date date = new Date();
    // modelName = nameFormat.format(date) + "-" + modelName;
    Classifier classifier = Constructor.createClassifier(model);

    if (featSetName == null)
      throw new RuntimeException(
          "Feature set name needs to be specified (parameter FEAT_SET_NAME)");
    if (modelName == null)
      throw new RuntimeException("Model name needs to be specified (parameter MODEL_NAME)");

    // merge feature files together
    File mergedFeatureVector = File.createTempFile("mergedFeatureVector_", ".csv.gz", workDir);

    OutputStream trainFeatures = new FileOutputStream(mergedFeatureVector);
    FeatureMerger.combine(trainFeatures, trainCorpus);
    System.out.println("start training");
    classifier.train(mergedFeatureVector, new File(workDir, classifier.getName() + ".model"));
    return classifier;
  }
 /**
  * Trains a classifier using the feature files housed in the training directories in the Config.
  *
  * @param options - a string array of various options used in training (e.g. - where to save the
  *     model file, training parameters, etc.)
  */
 public void train(File trainFile, File outputModelFile) {
   if (mOptions == null) {
     mOptions = Utils.getConfig().getStringArray("ClOptions." + getName());
   }
   train(trainFile, outputModelFile, mOptions);
 }