/** * Sets the sigma parameter to a value that optimizes the cross-validation score given by <code> * scorer</code>. Search for an optimal value is carried out by <code>minimizer</code> * * @param dataset the data set to optimize sigma on. */ public void crossValidateSetSigma( GeneralDataset<L, F> dataset, int kfold, final Scorer<L> scorer, LineSearcher minimizer) { System.err.println("##in Cross Validate, folds = " + kfold); System.err.println("##Scorer is " + scorer); featureIndex = dataset.featureIndex; labelIndex = dataset.labelIndex; final CrossValidator<L, F> crossValidator = new CrossValidator<L, F>(dataset, kfold); final Function< Triple<GeneralDataset<L, F>, GeneralDataset<L, F>, CrossValidator.SavedState>, Double> score = new Function< Triple<GeneralDataset<L, F>, GeneralDataset<L, F>, CrossValidator.SavedState>, Double>() { public Double apply( Triple<GeneralDataset<L, F>, GeneralDataset<L, F>, CrossValidator.SavedState> fold) { GeneralDataset<L, F> trainSet = fold.first(); GeneralDataset<L, F> devSet = fold.second(); double[] weights = (double[]) fold.third().state; double[][] weights2D; weights2D = trainWeights( trainSet, weights, true); // must of course bypass sigma tuning here. fold.third().state = ArrayUtils.flatten(weights2D); LinearClassifier<L, F> classifier = new LinearClassifier<L, F>( weights2D, trainSet.featureIndex, trainSet.labelIndex); double score = scorer.score(classifier, devSet); // System.out.println("score: "+score); System.out.print("."); return score; } }; Function<Double, Double> negativeScorer = new Function<Double, Double>() { public Double apply(Double sigmaToTry) { // sigma = sigmaToTry; setSigma(sigmaToTry); Double averageScore = crossValidator.computeAverage(score); System.err.print("##sigma = " + getSigma() + " "); System.err.println("-> average Score: " + averageScore); return -averageScore; } }; double bestSigma = minimizer.minimize(negativeScorer); System.err.println("##best sigma: " + bestSigma); setSigma(bestSigma); }
/** * Sets the sigma parameter to a value that optimizes the held-out score given by <code>scorer * </code>. Search for an optimal value is carried out by <code>minimizer</code> dataset the data * set to optimize sigma on. kfold * * @return an interim set of optimal weights: the weights */ public double[] heldOutSetSigma( final GeneralDataset<L, F> trainSet, final GeneralDataset<L, F> devSet, final Scorer<L> scorer, LineSearcher minimizer) { featureIndex = trainSet.featureIndex; labelIndex = trainSet.labelIndex; // double[] resultWeights = null; Timing timer = new Timing(); NegativeScorer negativeScorer = new NegativeScorer(trainSet, devSet, scorer, timer); timer.start(); double bestSigma = minimizer.minimize(negativeScorer); System.err.println("##best sigma: " + bestSigma); setSigma(bestSigma); return ArrayUtils.flatten( trainWeights( trainSet, negativeScorer.weights, true)); // make sure it's actually the interim weights from best sigma }