예제 #1
0
  public Classifier runLearner(Corpus trainCorpus, File workDir, String featSetName)
      throws IOException, FileNotFoundException {
    String modelName = cfg.getModelName();
    String model = Utils.getWorkDirectory() + "/" + modelName;
    if (modelName == null)
      throw new RuntimeException("Model name needs to be specified (parameter MODEL_NAME)");
    // SimpleDateFormat nameFormat = new SimpleDateFormat("yyyyMMdd");
    // Date date = new Date();
    // modelName = nameFormat.format(date) + "-" + modelName;
    Classifier classifier = Constructor.createClassifier(model);

    if (featSetName == null)
      throw new RuntimeException(
          "Feature set name needs to be specified (parameter FEAT_SET_NAME)");
    if (modelName == null)
      throw new RuntimeException("Model name needs to be specified (parameter MODEL_NAME)");

    // merge feature files together
    File mergedFeatureVector = File.createTempFile("mergedFeatureVector_", ".csv.gz", workDir);

    OutputStream trainFeatures = new FileOutputStream(mergedFeatureVector);
    FeatureMerger.combine(trainFeatures, trainCorpus);
    System.out.println("start training");
    classifier.train(mergedFeatureVector, new File(workDir, classifier.getName() + ".model"));
    return classifier;
  }
예제 #2
0
  public static void main(String[] args) {
    try {
      String corpusFile = args[0];
      String goldSet = args[1];
      File outputDir = new File(args[2]);

      SystemConfig systemConfig = DriverUtils.configure(args);
      systemConfig.setAnnotationSetName(Constants.GS_NP, goldSet);

      Trainer trainer = new Trainer(systemConfig);
      FeatureGenerator featureGenerator = new FeatureGenerator(systemConfig);

      // get corpus
      Corpus c = DriverUtils.loadFiles(corpusFile);

      Preprocessor preprocessor = new Preprocessor(systemConfig);
      preprocessor.preprocess(c, false);

      // generate features
      String featureSetName = featureGenerator.generateFeatures(c, true);

      // train classifier
      Classifier classifier = trainer.runLearner(c, outputDir, featureSetName);
      System.out.println("classifier trained: " + classifier.getName());

    } catch (IOException e) {
      e.printStackTrace();
    } catch (ConfigurationException e) {
      e.printStackTrace();
    }
  }