/** * Train a classifier with a sigma tuned on a validation set. In this case we are fitting on the * last 30% of the training data. * * @param train The data to train (and validate) on. * @return The constructed classifier */ public LinearClassifier<L, F> trainClassifierV( GeneralDataset<L, F> train, double min, double max, boolean accuracy) { labelIndex = train.labelIndex(); featureIndex = train.featureIndex(); tuneSigmaHeldOut = true; this.min = min; this.max = max; heldOutSetSigma(train); double[][] weights = trainWeights(train); return new LinearClassifier<L, F>(weights, train.featureIndex(), train.labelIndex()); }
public LinearClassifier<L, F> trainClassifier(GeneralDataset<L, F> dataset, double[] initial) { if (dataset instanceof RVFDataset) ((RVFDataset<L, F>) dataset).ensureRealValues(); double[][] weights = trainWeights(dataset, initial, false); LinearClassifier<L, F> classifier = new LinearClassifier<L, F>(weights, dataset.featureIndex(), dataset.labelIndex()); return classifier; }
/** IMPORTANT: dataset and biasedDataset must have same featureIndex, labelIndex */ public Classifier<L, F> trainClassifierSemiSup( GeneralDataset<L, F> data, GeneralDataset<L, F> biasedData, double[][] confusionMatrix, double[] initial) { double[][] weights = trainWeightsSemiSup(data, biasedData, confusionMatrix, initial); LinearClassifier<L, F> classifier = new LinearClassifier<L, F>(weights, data.featureIndex(), data.labelIndex()); return classifier; }
public Classifier<L, F> trainClassifier( GeneralDataset<L, F> dataset, float[] dataWeights, LogPrior prior) { Minimizer<DiffFunction> minimizer = getMinimizer(); if (dataset instanceof RVFDataset) ((RVFDataset<L, F>) dataset).ensureRealValues(); LogConditionalObjectiveFunction<L, F> objective = new LogConditionalObjectiveFunction<L, F>(dataset, dataWeights, logPrior); double[] initial = objective.initial(); double[] weights = minimizer.minimize(objective, TOL, initial); LinearClassifier<L, F> classifier = new LinearClassifier<L, F>( objective.to2D(weights), dataset.featureIndex(), dataset.labelIndex()); return classifier; }
/** * Trains the linear classifier using Generalized Expectation criteria as described in * <tt>Generalized Expectation Criteria for Semi Supervised Learning of Conditional Random * Fields</tt>, Mann and McCallum, ACL 2008. The original algorithm is proposed for CRFs but has * been adopted to LinearClassifier (which is a simpler special case of a CRF). IMPORTANT: the * labeled features that are passed as an argument are assumed to be binary valued, although other * features are allowed to be real valued. */ public LinearClassifier<L, F> trainSemiSupGE( GeneralDataset<L, F> labeledDataset, List<? extends Datum<L, F>> unlabeledDataList, List<F> GEFeatures, double convexComboCoeff) { Minimizer<DiffFunction> minimizer = minimizerCreator.create(); LogConditionalObjectiveFunction<L, F> objective = new LogConditionalObjectiveFunction<L, F>( labeledDataset, new LogPrior(LogPrior.LogPriorType.NULL)); GeneralizedExpectationObjectiveFunction<L, F> geObjective = new GeneralizedExpectationObjectiveFunction<L, F>( labeledDataset, unlabeledDataList, GEFeatures); SemiSupervisedLogConditionalObjectiveFunction semiSupObjective = new SemiSupervisedLogConditionalObjectiveFunction( objective, geObjective, null, convexComboCoeff); double[] initial = objective.initial(); double[] weights = minimizer.minimize(semiSupObjective, TOL, initial); return new LinearClassifier<L, F>( objective.to2D(weights), labeledDataset.featureIndex(), labeledDataset.labelIndex()); }