/** * @param targetTerm * @param sourceFile * @param trainingAlgo * @param outputFileClassifier * @param outputFileResults * @param termWindowSize * @param pipe * @return */ private static List<ClassificationResult> runTrainingAndClassification( String targetTerm, String sourceFile, String trainingAlgo, String outputFileClassifier, String outputFileResults, int termWindowSize, Pipe pipe, boolean useCollocationalVector) { // Read in concordance file and create list of Mallet training instances // TODO: Remove duplication of code (see execConvertToMalletFormat(...)) String vectorType = useCollocationalVector ? "coll" : "bow"; InstanceList instanceList = readConcordanceFileToInstanceList( targetTerm, sourceFile, termWindowSize, pipe, useCollocationalVector); // Creating splits for training and testing double[] proportions = {0.9, 0.1}; InstanceList[] splitLists = instanceList.split(proportions); InstanceList trainingList = splitLists[0]; InstanceList testList = splitLists[1]; // Train the classifier ClassifierTrainer classifierTrainer = getClassifierTrainerForAlgorithm(trainingAlgo); Classifier classifier = classifierTrainer.train(trainingList); if (classifier.getLabelAlphabet() != null) { // TODO: Make sure this is not null in RandomClassifier System.out.println("Labels:\n" + classifier.getLabelAlphabet()); System.out.println( "Size of data alphabet (= type count of training list): " + classifier.getAlphabet().size()); } // Run tests and get results Trial trial = new Trial(classifier, testList); List<ClassificationResult> results = new ArrayList<ClassificationResult>(); for (int i = 0; i < classifier.getLabelAlphabet().size(); i++) { Label label = classifier.getLabelAlphabet().lookupLabel(i); ClassificationResult result = new MalletClassificationResult( trainingAlgo, targetTerm, vectorType, label.toString(), termWindowSize, trial, sourceFile); results.add(result); System.out.println(result.toString()); } // Save classifier saveClassifierToFile(outputFileClassifier, classifier, trainingAlgo, termWindowSize); return results; }
public static void trainAndSaveOnWholeData(InstanceList ilist, ClassifierTrainer trainer) throws IOException { Classifier classifier = trainer.train(ilist); String outputFilename = "Models/" + classifier.toString(); saveClassifier(classifier, new File(outputFilename)); }
// in the training feature table // Lines should be formatted as: // // [name] [label] [data ... ] // public static Classifier TrainMaxent(String trainingFilename, File modelFile) throws IOException { // build data input pipe ArrayList<Pipe> pipes = new ArrayList<Pipe>(); // define pipe // the features in [data ...] should like: feature:value pipes.add(new Target2Label()); pipes.add(new Csv2FeatureVector()); Pipe pipe = new SerialPipes(pipes); pipe.setTargetProcessing(true); // read data InstanceList trainingInstances = new InstanceList(pipe); FileReader training_file_reader = new FileReader(trainingFilename); CsvIterator reader = new CsvIterator( training_file_reader, "(\\w+)\\s+([^\\s]+)\\s+(.*)", 3, 2, 1); // (data, label, name) field indices trainingInstances.addThruPipe(reader); training_file_reader.close(); // calculate running time long startTime = System.currentTimeMillis(); PrintStream temp = System.err; System.setErr(System.out); // train a Maxent classifier (could be other classifiers) ClassifierTrainer trainer = new MaxEntTrainer(Gaussian_Variance); Classifier classifier = trainer.train(trainingInstances); System.setErr(temp); // calculate running time long endTime = System.currentTimeMillis(); long totalTime = endTime - startTime; System.out.println("Total training time: " + totalTime); // write model ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(modelFile)); oos.writeObject(classifier); oos.close(); return classifier; }
public Classifier train(InstanceList trainingInstances, InstanceList testingInstances) { classifier = trainer.train(trainingInstances); printPerformance(trainingInstances, "Training\n======="); if (testingInstances != null) { printPerformance(testingInstances, "Testing\n======="); } return classifier; }
public void trainClassifier(File dir, String... args) throws Exception { InstanceListCreator instanceListCreator = new InstanceListCreator(); InstanceList instanceList = instanceListCreator.createInstanceList(getTrainingDataFile(dir)); instanceList.save(new File(dir, "training-data.ser")); String factoryName = args[0]; Class<ClassifierTrainerFactory<?>> factoryClass = createTrainerFactory(factoryName); if (factoryClass == null) { String factoryName2 = "org.cleartk.ml.mallet.factory." + factoryName + "TrainerFactory"; factoryClass = createTrainerFactory(factoryName2); } if (factoryClass == null) { throw new IllegalArgumentException( String.format( "name for classifier trainer factory is not valid: name given ='%s'. Valid classifier names include: %s, %s, %s, and %s", factoryName, ClassifierTrainerFactory.NAMES[0], ClassifierTrainerFactory.NAMES[1], ClassifierTrainerFactory.NAMES[2], ClassifierTrainerFactory.NAMES[3])); } String[] factoryArgs = new String[args.length - 1]; System.arraycopy(args, 1, factoryArgs, 0, factoryArgs.length); ClassifierTrainerFactory<?> factory = factoryClass.newInstance(); ClassifierTrainer<?> trainer = null; try { trainer = factory.createTrainer(factoryArgs); } catch (Throwable t) { throw new IllegalArgumentException( "Unable to create trainer. Usage for " + factoryClass.getCanonicalName() + ": " + factory.getUsageMessage(), t); } this.classifier = trainer.train(instanceList); ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(new File(dir, MODEL_NAME))); oos.writeObject(classifier); oos.close(); }
public Classifier train(InstanceList trainingInstances) { classifier = trainer.train(trainingInstances); writeExtremeFeatures(); return classifier; }
public static ArrayList<Double> getAverageCrossValidationScore( InstanceList ilist, int i, ClassifierTrainer trainer) { double crossValidAccSum = 0; double crossValidPrcSum = 0; double crossValidRecSum = 0; double crossValidF1Sum = 0; int count = 0; // get gross validation folds CrossValidationIterator cvIlists = ilist.crossValidationIterator(i); while (cvIlists.hasNext()) { System.out.println("#############Performing " + count + " iteration###########"); InstanceList[] ilists = cvIlists.next(); System.out.println("The train set size is " + ilists[0].size()); System.out.println("The test set size is " + ilists[1].size()); Classifier classifier = trainer.train(ilists[0]); System.out.println("The training accuracy is " + classifier.getAccuracy(ilists[0])); System.out.println("The testing accuracy is " + classifier.getAccuracy(ilists[1])); System.out.println("The testing precision is " + classifier.getPrecision(ilists[1], 1)); System.out.println("The testing recall is " + classifier.getRecall(ilists[1], 1)); System.out.println("The testing f1score is " + classifier.getF1(ilists[1], 1)); crossValidAccSum += classifier.getAccuracy(ilists[1]); crossValidPrcSum += classifier.getPrecision(ilists[1], 1); crossValidRecSum += classifier.getRecall(ilists[1], 1); crossValidF1Sum += classifier.getF1(ilists[1], 1); count++; // additional calculations ArrayList<Classification> outClassifications = classifier.classify(ilists[1]); int p1l1 = 0; int p1l0 = 0; int p0l1 = 0; int p0l0 = 0; int countCorrect = 0; int countIncorrect = 0; System.out.println("Outclassification size " + outClassifications.size()); for (int k = 0; k < outClassifications.size(); k++) { // System.out.println("Data "+outClassifications.get(k).getInstance().getName()); // System.out.println("Labeling "+outClassifications.get(k).getLabeling()); uncomment to get // score double predictedLabel = outClassifications.get(k).getLabeling().getBestIndex(); // System.out.println("Predicted label "+ predictedLabel); double targetLabel = Double.valueOf(outClassifications.get(k).getInstance().getTarget().toString()); // System.out.println("Target "+ targetLabel); boolean bestlabelIsCorrect = outClassifications.get(k).bestLabelIsCorrect(); // System.out.println("Prediction "+bestlabelIsCorrect); if (bestlabelIsCorrect) countCorrect++; else countIncorrect++; if ((predictedLabel == 1.0) && (targetLabel == 1.0)) p1l1++; else if ((predictedLabel == 1.0) && (targetLabel == 0.0)) p1l0++; else if ((predictedLabel == 0.0) && (targetLabel == 1.0)) p0l1++; else if ((predictedLabel == 0.0) && (targetLabel == 0.0)) p0l0++; } System.out.println("Count Correct " + countCorrect); System.out.println("Count Incorrect " + countIncorrect); System.out.println("p1l1 " + p1l1); System.out.println("p1l0 " + p1l0); System.out.println("p0l1 " + p0l1); System.out.println("p0l0 " + p0l0); } ArrayList<Double> results = new ArrayList<Double>(); double crossValidAccAvg = crossValidAccSum / count; double crossValidPrcAvg = crossValidPrcSum / count; double crossValidRecAvg = crossValidRecSum / count; double crossValidF1Avg = crossValidF1Sum / count; results.add(crossValidAccAvg); results.add(crossValidPrcAvg); results.add(crossValidRecAvg); results.add(crossValidF1Avg); return results; }