예제 #1
0
 private static void preprocess(Properties props, Dictionaries dictionaries, boolean isTrainSet)
     throws Exception {
   (isTrainSet
           ? new DatasetBuilder(
               StatisticalCorefProperties.minClassImbalance(props),
               StatisticalCorefProperties.minTrainExamplesPerDocument(props))
           : new DatasetBuilder())
       .runFromScratch(props, dictionaries);
   new MetadataWriter(isTrainSet).runFromScratch(props, dictionaries);
   new FeatureExtractorRunner(props, dictionaries).runFromScratch(props, dictionaries);
 }
예제 #2
0
 public static void setTrainingPath(Properties props) {
   trainingPath = StatisticalCorefProperties.trainingPath(props);
   pairwiseModelsPath = trainingPath + "pairwise_models/";
   clusteringModelsPath = trainingPath + "clustering_models/";
   makeDir(pairwiseModelsPath);
   makeDir(clusteringModelsPath);
 }
예제 #3
0
  public static void doTraining(Properties props) throws Exception {
    props = StatisticalCorefProperties.addHcorefProps(props);
    setTrainingPath(props);
    Dictionaries dictionaries = new Dictionaries(props);

    setDataPath("train");
    wordCountsFile = "train/word_counts.ser";
    StatisticalCorefProperties.setInput(props, Dataset.TRAIN);
    preprocess(props, dictionaries, true);

    setDataPath("dev");
    StatisticalCorefProperties.setInput(props, Dataset.DEV);
    preprocess(props, dictionaries, false);

    setDataPath("train");
    dictionaries = null;
    PairwiseModel classificationModel =
        PairwiseModel.newBuilder(CLASSIFICATION_MODEL, MetaFeatureExtractor.newBuilder().build())
            .build();
    PairwiseModel rankingModel =
        PairwiseModel.newBuilder(RANKING_MODEL, MetaFeatureExtractor.newBuilder().build()).build();
    PairwiseModel anaphoricityModel =
        PairwiseModel.newBuilder(ANAPHORICITY_MODEL, MetaFeatureExtractor.anaphoricityMFE())
            .trainingExamples(5000000)
            .build();
    PairwiseModelTrainer.trainRanking(rankingModel);
    PairwiseModelTrainer.trainClassification(classificationModel, false);
    PairwiseModelTrainer.trainClassification(anaphoricityModel, true);

    setDataPath("dev");
    PairwiseModelTrainer.test(classificationModel, predictionsName, false);
    PairwiseModelTrainer.test(rankingModel, predictionsName, false);
    PairwiseModelTrainer.test(anaphoricityModel, predictionsName, true);

    new Clusterer().doTraining(CLUSTERING_MODEL_NAME);
  }