public LinearClassifier createLinearClassifier(double[] weights) {
   double[][] weights2D;
   if (objective != null) {
     weights2D = objective.to2D(weights);
   } else {
     weights2D = ArrayUtils.to2D(weights, featureIndex.size(), labelIndex.size());
   }
   return new LinearClassifier<L, F>(weights2D, featureIndex, labelIndex);
 }
    public Double apply(Double sigmaToTry) {
      double[][] weights2D;
      setSigma(sigmaToTry);

      weights2D = trainWeights(trainSet, weights, true); // bypass.

      weights = ArrayUtils.flatten(weights2D);

      LinearClassifier<L, F> classifier =
          new LinearClassifier<L, F>(weights2D, trainSet.featureIndex, trainSet.labelIndex);

      double score = scorer.score(classifier, devSet);
      // System.out.println("score: "+score);
      // System.out.print(".");
      System.err.print("##sigma = " + getSigma() + " ");
      System.err.println("-> average Score: " + score);
      System.err.println("##time elapsed: " + timer.stop() + " milliseconds.");
      timer.restart();
      return -score;
    }
  /**
   * Sets the sigma parameter to a value that optimizes the held-out score given by <code>scorer
   * </code>. Search for an optimal value is carried out by <code>minimizer</code> dataset the data
   * set to optimize sigma on. kfold
   *
   * @return an interim set of optimal weights: the weights
   */
  public double[] heldOutSetSigma(
      final GeneralDataset<L, F> trainSet,
      final GeneralDataset<L, F> devSet,
      final Scorer<L> scorer,
      LineSearcher minimizer) {

    featureIndex = trainSet.featureIndex;
    labelIndex = trainSet.labelIndex;
    // double[] resultWeights = null;
    Timing timer = new Timing();

    NegativeScorer negativeScorer = new NegativeScorer(trainSet, devSet, scorer, timer);

    timer.start();
    double bestSigma = minimizer.minimize(negativeScorer);
    System.err.println("##best sigma: " + bestSigma);
    setSigma(bestSigma);

    return ArrayUtils.flatten(
        trainWeights(
            trainSet,
            negativeScorer.weights,
            true)); // make sure it's actually the interim weights from best sigma
  }
예제 #4
0
 /** Make a clique over the provided relativeIndices. relativeIndices should be sorted. */
 public static Clique valueOf(int[] relativeIndices) {
   checkSorted(relativeIndices);
   // copy the array so as to be safe
   return valueOfHelper(ArrayUtils.copy(relativeIndices));
 }
 /**
  * Return a WordsToSentencesAnnotator that splits on newlines (only), which are then deleted. This
  * constructor counts the lines by putting in empty token lists for empty lines. It tells the
  * underlying splitter to return empty lists of tokens and then treats those empty lists as empty
  * lines. We don't actually include empty sentences in the annotation, though. But they are used
  * in numbering the sentence. Only this constructor leads to empty sentences.
  *
  * @param verbose Whether it is verbose.
  * @param nlToken Zero or more new line tokens, which might be a {@literal \n} or the fake newline
  *     tokens returned from the tokenizer.
  * @return A WordsToSentenceAnnotator.
  */
 public static WordsToSentencesAnnotator newlineSplitter(boolean verbose, String... nlToken) {
   // this constructor will keep empty lines as empty sentences
   WordToSentenceProcessor<CoreLabel> wts =
       new WordToSentenceProcessor<CoreLabel>(ArrayUtils.asImmutableSet(nlToken));
   return new WordsToSentencesAnnotator(verbose, true, wts);
 }
 public LinearClassifier<L, F> trainClassifierWithInitialWeights(
     GeneralDataset<L, F> dataset, double[][] initialWeights2D) {
   double[] initialWeights =
       (initialWeights2D != null) ? ArrayUtils.flatten(initialWeights2D) : null;
   return trainClassifier(dataset, initialWeights);
 }