private static void preprocess(Properties props, Dictionaries dictionaries, boolean isTrainSet) throws Exception { (isTrainSet ? new DatasetBuilder( StatisticalCorefProperties.minClassImbalance(props), StatisticalCorefProperties.minTrainExamplesPerDocument(props)) : new DatasetBuilder()) .runFromScratch(props, dictionaries); new MetadataWriter(isTrainSet).runFromScratch(props, dictionaries); new FeatureExtractorRunner(props, dictionaries).runFromScratch(props, dictionaries); }
public static void setTrainingPath(Properties props) { trainingPath = StatisticalCorefProperties.trainingPath(props); pairwiseModelsPath = trainingPath + "pairwise_models/"; clusteringModelsPath = trainingPath + "clustering_models/"; makeDir(pairwiseModelsPath); makeDir(clusteringModelsPath); }
public static void doTraining(Properties props) throws Exception { props = StatisticalCorefProperties.addHcorefProps(props); setTrainingPath(props); Dictionaries dictionaries = new Dictionaries(props); setDataPath("train"); wordCountsFile = "train/word_counts.ser"; StatisticalCorefProperties.setInput(props, Dataset.TRAIN); preprocess(props, dictionaries, true); setDataPath("dev"); StatisticalCorefProperties.setInput(props, Dataset.DEV); preprocess(props, dictionaries, false); setDataPath("train"); dictionaries = null; PairwiseModel classificationModel = PairwiseModel.newBuilder(CLASSIFICATION_MODEL, MetaFeatureExtractor.newBuilder().build()) .build(); PairwiseModel rankingModel = PairwiseModel.newBuilder(RANKING_MODEL, MetaFeatureExtractor.newBuilder().build()).build(); PairwiseModel anaphoricityModel = PairwiseModel.newBuilder(ANAPHORICITY_MODEL, MetaFeatureExtractor.anaphoricityMFE()) .trainingExamples(5000000) .build(); PairwiseModelTrainer.trainRanking(rankingModel); PairwiseModelTrainer.trainClassification(classificationModel, false); PairwiseModelTrainer.trainClassification(anaphoricityModel, true); setDataPath("dev"); PairwiseModelTrainer.test(classificationModel, predictionsName, false); PairwiseModelTrainer.test(rankingModel, predictionsName, false); PairwiseModelTrainer.test(anaphoricityModel, predictionsName, true); new Clusterer().doTraining(CLUSTERING_MODEL_NAME); }