public LinearClassifier createLinearClassifier(double[] weights) { double[][] weights2D; if (objective != null) { weights2D = objective.to2D(weights); } else { weights2D = ArrayUtils.to2D(weights, featureIndex.size(), labelIndex.size()); } return new LinearClassifier<L, F>(weights2D, featureIndex, labelIndex); }
public Double apply(Double sigmaToTry) { double[][] weights2D; setSigma(sigmaToTry); weights2D = trainWeights(trainSet, weights, true); // bypass. weights = ArrayUtils.flatten(weights2D); LinearClassifier<L, F> classifier = new LinearClassifier<L, F>(weights2D, trainSet.featureIndex, trainSet.labelIndex); double score = scorer.score(classifier, devSet); // System.out.println("score: "+score); // System.out.print("."); System.err.print("##sigma = " + getSigma() + " "); System.err.println("-> average Score: " + score); System.err.println("##time elapsed: " + timer.stop() + " milliseconds."); timer.restart(); return -score; }
/** * Sets the sigma parameter to a value that optimizes the held-out score given by <code>scorer * </code>. Search for an optimal value is carried out by <code>minimizer</code> dataset the data * set to optimize sigma on. kfold * * @return an interim set of optimal weights: the weights */ public double[] heldOutSetSigma( final GeneralDataset<L, F> trainSet, final GeneralDataset<L, F> devSet, final Scorer<L> scorer, LineSearcher minimizer) { featureIndex = trainSet.featureIndex; labelIndex = trainSet.labelIndex; // double[] resultWeights = null; Timing timer = new Timing(); NegativeScorer negativeScorer = new NegativeScorer(trainSet, devSet, scorer, timer); timer.start(); double bestSigma = minimizer.minimize(negativeScorer); System.err.println("##best sigma: " + bestSigma); setSigma(bestSigma); return ArrayUtils.flatten( trainWeights( trainSet, negativeScorer.weights, true)); // make sure it's actually the interim weights from best sigma }
/** Make a clique over the provided relativeIndices. relativeIndices should be sorted. */ public static Clique valueOf(int[] relativeIndices) { checkSorted(relativeIndices); // copy the array so as to be safe return valueOfHelper(ArrayUtils.copy(relativeIndices)); }
/** * Return a WordsToSentencesAnnotator that splits on newlines (only), which are then deleted. This * constructor counts the lines by putting in empty token lists for empty lines. It tells the * underlying splitter to return empty lists of tokens and then treats those empty lists as empty * lines. We don't actually include empty sentences in the annotation, though. But they are used * in numbering the sentence. Only this constructor leads to empty sentences. * * @param verbose Whether it is verbose. * @param nlToken Zero or more new line tokens, which might be a {@literal \n} or the fake newline * tokens returned from the tokenizer. * @return A WordsToSentenceAnnotator. */ public static WordsToSentencesAnnotator newlineSplitter(boolean verbose, String... nlToken) { // this constructor will keep empty lines as empty sentences WordToSentenceProcessor<CoreLabel> wts = new WordToSentenceProcessor<CoreLabel>(ArrayUtils.asImmutableSet(nlToken)); return new WordsToSentencesAnnotator(verbose, true, wts); }
public LinearClassifier<L, F> trainClassifierWithInitialWeights( GeneralDataset<L, F> dataset, double[][] initialWeights2D) { double[] initialWeights = (initialWeights2D != null) ? ArrayUtils.flatten(initialWeights2D) : null; return trainClassifier(dataset, initialWeights); }