private static void generateTopicFromAbstractsInDB() throws SQLException, FileNotFoundException, IOException, ClassNotFoundException { InstanceList allInstances, trainingInstances = null, testingInstances = null; ParallelTopicModel topicModel; TopicTrainingModel ldaTrainModel = new TopicTrainingModel(); allInstances = Utilities.readInstancesFromMalletFile( TopicConstants.INPUTDIRPATH + File.separator + TopicConstants.ALLMALLETFILENAME); trainingInstances = Utilities.readInstancesFromMalletFile( TopicConstants.INPUTDIRPATH + File.separator + TopicConstants.TRAININGMALLETFILENAME); testingInstances = Utilities.readInstancesFromMalletFile( TopicConstants.INPUTDIRPATH + File.separator + TopicConstants.TESTINGMALLETFILENAME); /** ******** Extraction of topics from training data using the optimal parameters *********** */ ldaTrainModel.buildTopicModelUsingLDA( trainingInstances, TopicConstants.NUMTOPICS, TopicConstants.NUMTOPICS * TopicConstants.ALPHA, TopicConstants.BETA, TopicConstants.NUMITERATIONS); topicModel = ldaTrainModel.getTopicModel(); ldaTrainModel.generateTopicOutputFiles(topicModel, TopicConstants.OUTPUTDIRPATH); }
/** Best values for SE: 80 topics, 20000 iterations, alpha= 0.001, beta= 0.01 */ private static void optimizeLDAParameters() throws IOException, ClassNotFoundException { /** ******** Selection of optimal Parameters ********** */ // double[] alphasToBeConsidered= {0.0001, 0.001, 0.01, 0.05, 0.1, 0.5, 1}; // double[] betasToBeConsidered= {0.0001, 0.001, 0.01, 0.05, 0.1, 0.5, 1}; // int[] topicSizesToBeConsidered= {3, 5, 10, 15, 20, 30, 40, 50, 60, 80, 100}; // int[] iterationsToBeConsidered= {5, 10, 50, 100, 500, 1000, 2000, 2500, 3000}; double[] alphasToBeConsidered = {0.001}; double[] betasToBeConsidered = {0.01}; int[] topicSizesToBeConsidered = {90}; int[] iterationsToBeConsidered = {20000}; InstanceList trainingInstances = Utilities.readInstancesFromMalletFile( TopicConstants.INPUTDIRPATH + File.separator + TopicConstants.ALLMALLETFILENAME); TopicTrainingModel ldaTrainModel = new TopicTrainingModel(); String res = ldaTrainModel.selectOptimalParameters( trainingInstances, TopicConstants.OUTPUTDIRPATH, alphasToBeConsidered, betasToBeConsidered, iterationsToBeConsidered, topicSizesToBeConsidered); System.out.println(res); /* ldaTrainModel.selectOptimalAlpha(trainingInstances, inputDir,outputDir, alphasToBeConsidered); ldaTrainModel.selectOptimalBeta(trainingInstances, inputDir, outputDir, betasToBeConsidered); ldaTrainModel.selectOptimalIterationsSize(trainingInstances, inputDir, outputDir, iterationsToBeConsidered); ldaTrainModel.selectOptimalTopicSize(trainingInstances, inputDir, outputDir, topicSizesToBeConsidered); */ }