Пример #1
0
  /**
   * Inserts the classifier in the population. Before that, it looks if there is a classifier in the
   * population with the same action and condition (in this case, increments its numerosity). After,
   * it checks that the number of micro classifiers is less than the maximum population size. If it
   * isn't, it deletes one classifier from the population calling the deleteClassifier function. It
   * inserts the classifier in the population and in the action set if it's not null.
   *
   * @param cl is the classifier that has to be inserted in the population.
   * @param ASet Population where the classifier will be inserted.
   */
  public void insertInPopulation(Classifier cl, Population ASet) {
    boolean found = false;
    int i = 0;

    while (i < macroClSum && !found) {
      if (set[i].equals(cl)) {
        set[i].increaseNumerosity(cl.getNumerosity());
        microClSum += cl.getNumerosity();
        if (ASet != null) {
          if (ASet.isThereClassifier(set[i]) >= 0) ASet.microClSum += cl.getNumerosity();
        }
        found = true;
      }
      i++;
    }
    if (!found) {
      addClassifier(cl);
    }

    // Here, the classifier has been added to the population
    if (microClSum
        > Config.popSize) { // If we have inserted to many classifiers, we have to delete one.
      deleteClFromPopulation(ASet);
    }
  } // end insertInPopulation
  @Override
  public Map<LookupElement, StringBuilder> getRelevanceStrings() {
    final LinkedHashMap<LookupElement, StringBuilder> map =
        new LinkedHashMap<LookupElement, StringBuilder>();
    for (LookupElement item : myItems) {
      map.put(item, new StringBuilder());
    }
    final MultiMap<CompletionSorterImpl, LookupElement> inputBySorter =
        groupItemsBySorter(new ArrayList<LookupElement>(map.keySet()));

    if (inputBySorter.size() > 1) {
      for (LookupElement element : map.keySet()) {
        map.get(element).append(obtainSorter(element)).append(": ");
      }
    }

    for (CompletionSorterImpl sorter : inputBySorter.keySet()) {
      final LinkedHashMap<LookupElement, StringBuilder> subMap =
          new LinkedHashMap<LookupElement, StringBuilder>();
      for (LookupElement element : inputBySorter.get(sorter)) {
        subMap.put(element, map.get(element));
      }
      Classifier<LookupElement> classifier = myClassifiers.get(sorter);
      if (classifier != null) {
        classifier.describeItems(subMap, createContext(false));
      }
    }

    return map;
  }
Пример #3
0
  /**
   * Inserts the classifier into the population. Before, it looks if there is a classifier in the
   * population that can subsume the new one (in this case, increments its numerosity). After, it
   * checks that the number of micro classifiers is less than the maximum population size. If it
   * isn't, it deletes one classifier of the population calling the deleteClassifier function. It
   * inserts the classifier in the population and in the action set if it's not null.
   *
   * @param cl is the classifier that has to be inserted in the population.
   * @param ASet Population where the classifier will be inserted.
   */
  public void insertInPSubsumingCl(Classifier cl, Population ASet) {
    int i = 0;
    Classifier bestSubsumer = null;
    Classifier equalClassifier = null;

    // We look for the best subsumer or for an equal classifier.
    while (i < macroClSum) {
      if (set[i].couldSubsume() && set[i].isMoreGeneral(cl)) {
        if (bestSubsumer == null) bestSubsumer = set[i];
        else if (set[i].isMoreGeneral(bestSubsumer)) bestSubsumer = set[i];
      }
      if (set[i].equals(cl)) equalClassifier = set[i];
      i++;
    }

    // If there is a subsumer, its numerosity is increased.
    if (bestSubsumer != null) {
      bestSubsumer.increaseNumerosity(cl.getNumerosity());
      microClSum += cl.getNumerosity();
    } else if (equalClassifier != null) {
      equalClassifier.increaseNumerosity(cl.getNumerosity());
      microClSum += cl.getNumerosity();
    } else {
      addClassifier(cl);
    }
    // There's no classifier deletion, independent of if the maximum size
    // has been overcomen
  } // end insertInPSubsumingCl
Пример #4
0
  /**
   * Calculates the class membership probabilities for the given test instance.
   *
   * @param instance the instance to be classified
   * @return predicted class probability distribution
   * @exception Exception if distribution can't be computed successfully
   */
  public double[] distributionForInstance(Instance instance) throws Exception {
    if (instance.classAttribute().isNumeric()) {
      throw new UnsupportedClassTypeException("Decorate can't handle a numeric class!");
    }
    double[] sums = new double[instance.numClasses()], newProbs;
    Classifier curr;

    for (int i = 0; i < m_Committee.size(); i++) {
      curr = (Classifier) m_Committee.get(i);
      newProbs = curr.distributionForInstance(instance);
      for (int j = 0; j < newProbs.length; j++) sums[j] += newProbs[j];
    }
    if (Utils.eq(Utils.sum(sums), 0)) {
      return sums;
    } else {
      Utils.normalize(sums);
      return sums;
    }
  }
Пример #5
0
  /**
   * Deletes one classifier from the population. After that, if the population passed as a parameter
   * is not null, it looks for the deleted classifier. If it is in the second population, it will
   * delete it too.
   *
   * @param aSet is the population where the deleted classifier has to be searched.
   * @return a Classifier that contains the deleted classifier.
   */
  public Classifier deleteClFromPopulation(Population aSet) {
    // A classifier has been deleted from the population
    Classifier clDeleted = deleteClassifier();

    if (aSet
        != null) { // Now, this classifier has to be deleted from the action set (if it exists in).
      int pos = aSet.isThereClassifier(clDeleted); // It is searched in the action set.
      if (pos >= 0) { // It has to be deleted from the action set too.
        aSet.microClSum--;

        // If the classifier has 0 numerosity, we remove it from the population.
        if (clDeleted.getNumerosity() == 0) { // It has to be completely deleted from action set.
          aSet.macroClSum--; // Decrements the number of macroclassifiers
          aSet.set[pos] = aSet.set[aSet.macroClSum]; // Moves the last classifier to the deleted one
          aSet.set[aSet.macroClSum] = null; // Puts the last classifier to null.
        }
      }
    }
    return clDeleted;
  } // end deleteClFromPopulation
Пример #6
0
  /** This method applies the action set subsumption */
  public void doActionSetSubsumption() {
    int i, pos = 0;
    Classifier cl = null;

    for (i = 0; i < macroClSum; i++) {
      if (set[i].couldSubsume()) {
        if (cl == null
            || set[i].numberOfDontCareSymbols() > cl.numberOfDontCareSymbols()
            || (set[i].numberOfDontCareSymbols() == cl.numberOfDontCareSymbols()
                && Config.rand() < 0.5)) {
          cl = set[i];
          pos = i;
        }
      }
    }
    if (cl != null) {
      for (i = 0; i < macroClSum; i++) {
        if (cl != set[i] && cl.isMoreGeneral(set[i])) {
          cl.increaseNumerosity(set[i].getNumerosity());
          // Now, the classifier has to be removed from the actionSet and the population.
          // It's deleted from the action set.
          Classifier clDeleted = set[i];
          deleteClassifier(i);

          // And now, it's deleted from the population
          Population p = parentRef;
          while (p.parentRef != null) {
            p = p.parentRef;
          }

          pos =
              p.isThereClassifier(
                  clDeleted); // The classifier is searched in the initial population.

          if (pos >= 0) p.deleteClassifier(pos);
        }
      }
    }
  } // end doActionSetSubsumption
  @Override
  public void addElement(
      Lookup lookup, LookupElement element, LookupElementPresentation presentation) {
    StatisticsWeigher.clearBaseStatisticsInfo(element);

    final String invariant =
        presentation.getItemText()
            + "###"
            + getTailTextOrSpace(presentation)
            + "###"
            + presentation.getTypeText();
    element.putUserData(PRESENTATION_INVARIANT, invariant);

    CompletionSorterImpl sorter = obtainSorter(element);
    Classifier<LookupElement> classifier = myClassifiers.get(sorter);
    if (classifier == null) {
      myClassifiers.put(sorter, classifier = sorter.buildClassifier(new AlphaClassifier(lookup)));
    }
    classifier.addElement(element);

    super.addElement(lookup, element, presentation);
  }
Пример #8
0
  /**
   * Returns if the classifier of the class subsumes the classifier passed as a parameter.
   *
   * @param cl is the subsumed classifier.
   * @return a boolean indicating if it subsumes
   */
  public boolean doesSubsume(Classifier cl) {
    int i;

    // First, check if the condition is the same
    if (action != cl.getAction()) return false;

    // Then, check that is more general
    if (parameters.couldSubsume()) {
      for (i = 0; i < rep.length; i++) {
        if (!rep[i].isMoreGeneral(cl.rep[i])) return false;
      }
      return true;
    }
    return false;
  } // end doesSubsume
Пример #9
0
 /**
  * Adds a classifier in the population.
  *
  * @param cl is the new classifier to be added.
  */
 public void addClassifier(Classifier cl) {
   try {
     set[macroClSum] = cl;
     microClSum += cl.getNumerosity();
     macroClSum++;
   } catch (Exception e) {
     System.out.println(
         "Exception in the insertion of a new classifier. The macroClSum is : "
             + macroClSum
             + " and the microClsum: "
             + microClSum);
     System.out.println(
         "And the maximum number of classifiers in the population is: " + Config.popSize);
     e.printStackTrace();
   }
 } // end addClassifier
Пример #10
0
  /**
   * Returns if the classifier of the class is equal to the classifier given as a parameter.
   *
   * @param cl is a classifier.
   * @return a boolean indicating if they are equals.
   */
  public boolean equals(Classifier cl) {
    int i;

    try {
      // Checking the action
      if (action != cl.getAction()) return false;

      // Checking the condition
      for (i = 0; i < rep.length; i++) {
        if (!rep[i].equals(cl.rep[i])) return false;
      }
      return true;

    } catch (Exception e) {
      return false;
    }
  } // end equals
Пример #11
0
  /**
   * Applies crossover. It generates two children.
   *
   * @param parent1 is the first parent.
   * @param parent2 is the second parent
   * @param child1 is the first child
   * @param child2 is the second child.
   */
  public void makeCrossover(
      Classifier parent1, Classifier parent2, Classifier child1, Classifier child2) {
    int i = 0;

    // cross1 is a number between [0.. clLength-1]
    int cross1 = (int) (Config.rand() * (double) Config.clLength);

    // cross2 is a number between [1..clLenght].
    int cross2 = (int) (Config.rand() * (double) Config.clLength) + 1;

    if (cross1 > cross2) {
      int aux = cross2;
      cross2 = cross1;
      cross1 = aux;
      // In the else-if condition is not necessary to check if (cross2<clLength)
      // to increment the point, because cross1 [0..length-1]
    } else if (cross1 == cross2) cross2++;

    // All the intervals (real representation) or genes (ternary representation) that
    // are not in the cross point are crossed.
    if (!Config.ternaryRep) {
      for (i = cross1 + 1; i < cross2 - 1; i++) {
        child2.setAllele(i, parent1);
        child1.setAllele(i, parent2);
      }

      // Now we have to cross the border allele
      child1.crossAllele(cross1, parent1, parent2);
      child1.crossAllele(cross2 - 1, parent2, parent1);
      child2.crossAllele(cross1, parent2, parent1);
      child2.crossAllele(cross2 - 1, parent1, parent2);
    } else {
      for (i = cross1; i < cross2 - 1; i++) {
        child2.setAllele(i, parent1);
        child1.setAllele(i, parent2);
      }
    }
  } // end makeCrossover
Пример #12
0
  /**
   * Build Decorate classifier
   *
   * @param data the training data to be used for generating the classifier
   * @exception Exception if the classifier could not be built successfully
   */
  public void buildClassifier(Instances data) throws Exception {
    if (m_Classifier == null) {
      throw new Exception("A base classifier has not been specified!");
    }
    if (data.checkForStringAttributes()) {
      throw new UnsupportedAttributeTypeException("Cannot handle string attributes!");
    }
    if (data.classAttribute().isNumeric()) {
      throw new UnsupportedClassTypeException("Decorate can't handle a numeric class!");
    }
    if (m_NumIterations < m_DesiredSize)
      throw new Exception("Max number of iterations must be >= desired ensemble size!");

    // initialize random number generator
    if (m_Seed == -1) m_Random = new Random();
    else m_Random = new Random(m_Seed);

    int i = 1; // current committee size
    int numTrials = 1; // number of Decorate iterations
    Instances divData = new Instances(data); // local copy of data - diversity data
    divData.deleteWithMissingClass();
    Instances artData = null; // artificial data

    // compute number of artficial instances to add at each iteration
    int artSize = (int) (Math.abs(m_ArtSize) * divData.numInstances());
    if (artSize == 0) artSize = 1; // atleast add one random example
    computeStats(data); // Compute training data stats for creating artificial examples

    // initialize new committee
    m_Committee = new Vector();
    Classifier newClassifier = m_Classifier;
    newClassifier.buildClassifier(divData);
    m_Committee.add(newClassifier);
    double eComm = computeError(divData); // compute ensemble error
    if (m_Debug)
      System.out.println(
          "Initialize:\tClassifier " + i + " added to ensemble. Ensemble error = " + eComm);

    // repeat till desired committee size is reached OR the max number of iterations is exceeded
    while (i < m_DesiredSize && numTrials < m_NumIterations) {
      // Generate artificial training examples
      artData = generateArtificialData(artSize, data);

      // Label artificial examples
      labelData(artData);
      addInstances(divData, artData); // Add new artificial data

      // Build new classifier
      Classifier tmp[] = Classifier.makeCopies(m_Classifier, 1);
      newClassifier = tmp[0];
      newClassifier.buildClassifier(divData);
      // Remove all the artificial data
      removeInstances(divData, artSize);

      // Test if the new classifier should be added to the ensemble
      m_Committee.add(newClassifier); // add new classifier to current committee
      double currError = computeError(divData);
      if (currError <= eComm) { // adding the new member did not increase the error
        i++;
        eComm = currError;
        if (m_Debug)
          System.out.println(
              "Iteration: "
                  + (1 + numTrials)
                  + "\tClassifier "
                  + i
                  + " added to ensemble. Ensemble error = "
                  + eComm);
      } else { // reject the current classifier because it increased the ensemble error
        m_Committee.removeElementAt(m_Committee.size() - 1); // pop the last member
      }
      numTrials++;
    }
  }
  /**
   * The core implementation of the search.
   *
   * @param root The root word to search from. Traditionally, this is the root of the sentence.
   * @param candidateFragments The callback for the resulting sentence fragments. This is a
   *     predicate of a triple of values. The return value of the predicate determines whether we
   *     should continue searching. The triple is a triple of
   *     <ol>
   *       <li>The log probability of the sentence fragment, according to the featurizer and the
   *           weights
   *       <li>The features along the path to this fragment. The last element of this is the
   *           features from the most recent step.
   *       <li>The sentence fragment. Because it is relatively expensive to compute the resulting
   *           tree, this is returned as a lazy {@link Supplier}.
   *     </ol>
   *
   * @param classifier The classifier for whether an arc should be on the path to a clause split, a
   *     clause split itself, or neither.
   * @param featurizer The featurizer to use. Make sure this matches the weights!
   * @param actionSpace The action space we are allowed to take. Each action defines a means of
   *     splitting a clause on a dependency boundary.
   */
  protected void search(
      // The root to search from
      IndexedWord root,
      // The output specs
      final Predicate<Triple<Double, List<Counter<String>>, Supplier<SentenceFragment>>>
          candidateFragments,
      // The learning specs
      final Classifier<ClauseSplitter.ClauseClassifierLabel, String> classifier,
      Map<String, ? extends List<String>> hardCodedSplits,
      final Function<Triple<State, Action, State>, Counter<String>> featurizer,
      final Collection<Action> actionSpace,
      final int maxTicks) {
    // (the fringe)
    PriorityQueue<Pair<State, List<Counter<String>>>> fringe = new FixedPrioritiesPriorityQueue<>();
    // (avoid duplicate work)
    Set<IndexedWord> seenWords = new HashSet<>();

    State firstState =
        new State(null, null, -9000, null, x -> {}, true); // First state is implicitly "done"
    fringe.add(Pair.makePair(firstState, new ArrayList<>(0)), -0.0);
    int ticks = 0;

    while (!fringe.isEmpty()) {
      if (++ticks > maxTicks) {
        //        System.err.println("WARNING! Timed out on search with " + ticks + " ticks");
        return;
      }
      // Useful variables
      double logProbSoFar = fringe.getPriority();
      assert logProbSoFar <= 0.0;
      Pair<State, List<Counter<String>>> lastStatePair = fringe.removeFirst();
      State lastState = lastStatePair.first;
      List<Counter<String>> featuresSoFar = lastStatePair.second;
      IndexedWord rootWord = lastState.edge == null ? root : lastState.edge.getDependent();

      // Register thunk
      if (lastState.isDone) {
        if (!candidateFragments.test(
            Triple.makeTriple(
                logProbSoFar,
                featuresSoFar,
                () -> {
                  SemanticGraph copy = new SemanticGraph(tree);
                  lastState
                      .thunk
                      .andThen(
                          x -> {
                            // Add the extra edges back in, if they don't break the tree-ness of the
                            // extraction
                            for (IndexedWord newTreeRoot : x.getRoots()) {
                              if (newTreeRoot != null) { // what a strange thing to have happen...
                                for (SemanticGraphEdge extraEdge :
                                    extraEdgesByGovernor.get(newTreeRoot)) {
                                  assert Util.isTree(x);
                                  //noinspection unchecked
                                  addSubtree(
                                      x,
                                      newTreeRoot,
                                      extraEdge.getRelation().toString(),
                                      tree,
                                      extraEdge.getDependent(),
                                      tree.getIncomingEdgesSorted(newTreeRoot));
                                  assert Util.isTree(x);
                                }
                              }
                            }
                          })
                      .accept(copy);
                  return new SentenceFragment(copy, assumedTruth, false);
                }))) {
          break;
        }
      }

      // Find relevant auxilliary terms
      SemanticGraphEdge subjOrNull = null;
      SemanticGraphEdge objOrNull = null;
      for (SemanticGraphEdge auxEdge : tree.outgoingEdgeIterable(rootWord)) {
        String relString = auxEdge.getRelation().toString();
        if (relString.contains("obj")) {
          objOrNull = auxEdge;
        } else if (relString.contains("subj")) {
          subjOrNull = auxEdge;
        }
      }

      // Iterate over children
      // For each outgoing edge...
      for (SemanticGraphEdge outgoingEdge : tree.outgoingEdgeIterable(rootWord)) {
        // Prohibit indirect speech verbs from splitting off clauses
        // (e.g., 'said', 'think')
        // This fires if the governor is an indirect speech verb, and the outgoing edge is a ccomp
        if (outgoingEdge.getRelation().toString().equals("ccomp")
            && ((outgoingEdge.getGovernor().lemma() != null
                    && INDIRECT_SPEECH_LEMMAS.contains(outgoingEdge.getGovernor().lemma()))
                || INDIRECT_SPEECH_LEMMAS.contains(outgoingEdge.getGovernor().word()))) {
          continue;
        }
        // Get some variables
        String outgoingEdgeRelation = outgoingEdge.getRelation().toString();
        List<String> forcedArcOrder = hardCodedSplits.get(outgoingEdgeRelation);
        if (forcedArcOrder == null && outgoingEdgeRelation.contains(":")) {
          forcedArcOrder =
              hardCodedSplits.get(
                  outgoingEdgeRelation.substring(0, outgoingEdgeRelation.indexOf(":")) + ":*");
        }
        boolean doneForcedArc = false;
        // For each action...
        for (Action action :
            (forcedArcOrder == null ? actionSpace : orderActions(actionSpace, forcedArcOrder))) {
          // Check the prerequisite
          if (!action.prerequisitesMet(tree, outgoingEdge)) {
            continue;
          }
          if (forcedArcOrder != null && doneForcedArc) {
            break;
          }
          // 1. Compute the child state
          Optional<State> candidate =
              action.applyTo(tree, lastState, outgoingEdge, subjOrNull, objOrNull);
          if (candidate.isPresent()) {
            double logProbability;
            ClauseClassifierLabel bestLabel;
            Counter<String> features =
                featurizer.apply(Triple.makeTriple(lastState, action, candidate.get()));
            if (forcedArcOrder != null && !doneForcedArc) {
              logProbability = 0.0;
              bestLabel = ClauseClassifierLabel.CLAUSE_SPLIT;
              doneForcedArc = true;
            } else if (features.containsKey("__undocumented_junit_no_classifier")) {
              logProbability = Double.NEGATIVE_INFINITY;
              bestLabel = ClauseClassifierLabel.CLAUSE_INTERM;
            } else {
              Counter<ClauseClassifierLabel> scores = classifier.scoresOf(new RVFDatum<>(features));
              if (scores.size() > 0) {
                Counters.logNormalizeInPlace(scores);
              }
              String rel = outgoingEdge.getRelation().toString();
              if ("nsubj".equals(rel) || "dobj".equals(rel)) {
                scores.remove(
                    ClauseClassifierLabel.NOT_A_CLAUSE); // Always at least yield on nsubj and dobj
              }
              logProbability = Counters.max(scores, Double.NEGATIVE_INFINITY);
              bestLabel = Counters.argmax(scores, (x, y) -> 0, ClauseClassifierLabel.CLAUSE_SPLIT);
            }

            if (bestLabel != ClauseClassifierLabel.NOT_A_CLAUSE) {
              Pair<State, List<Counter<String>>> childState =
                  Pair.makePair(
                      candidate.get().withIsDone(bestLabel),
                      new ArrayList<Counter<String>>(featuresSoFar) {
                        {
                          add(features);
                        }
                      });
              // 2. Register the child state
              if (!seenWords.contains(childState.first.edge.getDependent())) {
                //            System.err.println("  pushing " + action.signature() + " with " +
                // argmax.first.edge);
                fringe.add(childState, logProbability);
              }
            }
          }
        }
      }

      seenWords.add(rootWord);
    }
    //    System.err.println("Search finished in " + ticks + " ticks and " + classifierEvals + "
    // classifier evaluations.");
  }
Пример #14
0
  @Override
  public void trainC(final ClassificationDataSet dataSet, final ExecutorService threadPool) {
    final PriorityQueue<ClassificationModelEvaluation> bestModels =
        new PriorityQueue<ClassificationModelEvaluation>(
            folds,
            new Comparator<ClassificationModelEvaluation>() {
              @Override
              public int compare(
                  ClassificationModelEvaluation t, ClassificationModelEvaluation t1) {
                double v0 = t.getScoreStats(classificationTargetScore).getMean();
                double v1 = t1.getScoreStats(classificationTargetScore).getMean();
                int order = classificationTargetScore.lowerIsBetter() ? 1 : -1;
                return order * Double.compare(v0, v1);
              }
            });

    /**
     * Use this to keep track of which parameter we are altering. Index correspondence to the
     * parameter, and its value corresponds to which value has been used. Increment and carry counts
     * to iterate over all possible combinations.
     */
    int[] setTo = new int[searchParams.size()];

    /**
     * Each model is set to have different combination of parameters. We then train each model to
     * determine the best one.
     */
    final List<Classifier> paramsToEval = new ArrayList<Classifier>();

    while (true) {
      setParameters(setTo);

      paramsToEval.add(baseClassifier.clone());

      if (incrementCombination(setTo)) break;
    }
    /*
     * This is the Executor used for training the models in parallel. If we
     * are not supposed to do that, it will be an executor that executes
     * them sequentually.
     */
    final ExecutorService modelService;
    if (trainModelsInParallel) modelService = threadPool;
    else modelService = new FakeExecutor();

    final CountDownLatch latch; // used for stopping in both cases

    // if we are doing our CV splits ahead of time, get them done now
    final List<ClassificationDataSet> preFolded;

    /** Pre-combine our training combinations so that any caching can be re-used */
    final List<ClassificationDataSet> trainCombinations;

    if (reuseSameCVFolds) {
      preFolded = dataSet.cvSet(folds);
      trainCombinations = new ArrayList<ClassificationDataSet>(preFolded.size());
      for (int i = 0; i < preFolded.size(); i++)
        trainCombinations.add(ClassificationDataSet.comineAllBut(preFolded, i));
    } else {
      preFolded = null;
      trainCombinations = null;
    }

    boolean considerWarm = useWarmStarts && baseClassifier instanceof WarmClassifier;

    /**
     * make sure we don't do a warm start if its only supported when trained on the same data but we
     * aren't reuse-ing the same CV splits So we get the truth table
     *
     * <p>a | b | (a&&b)||¬a T | T | T T | F | F F | T | T F | F | T
     *
     * <p>where a = warmFromSameDataOnly and b = reuseSameSplit So we can instead use ¬ a || b
     */
    if (considerWarm
        && (!((WarmClassifier) baseClassifier).warmFromSameDataOnly() || reuseSameCVFolds)) {
      /* we want all of the first parameter (which is the warm paramter,
       * taken care of for us) values done in a group. So We can get this
       * by just dividing up the larger list into sub lists, each sub list
       * is adjacent in the original and is the number of parameter values
       * we wanted to try
       */

      int stepSize = searchValues.get(0).size();
      int totalJobs = paramsToEval.size() / stepSize;
      latch = new CountDownLatch(totalJobs);
      for (int startPos = 0; startPos < paramsToEval.size(); startPos += stepSize) {
        final List<Classifier> subSet = paramsToEval.subList(startPos, startPos + stepSize);
        modelService.submit(
            new Runnable() {

              @Override
              public void run() {
                Classifier[] prevModels = null;

                for (Classifier c : subSet) {
                  ClassificationModelEvaluation cme =
                      trainModelsInParallel
                          ? new ClassificationModelEvaluation(c, dataSet)
                          : new ClassificationModelEvaluation(c, dataSet, threadPool);
                  cme.setKeepModels(true); // we need these to do warm starts!
                  cme.setWarmModels(prevModels);
                  cme.addScorer(classificationTargetScore.clone());
                  if (reuseSameCVFolds) cme.evaluateCrossValidation(preFolded, trainCombinations);
                  else cme.evaluateCrossValidation(folds);
                  prevModels = cme.getKeptModels();
                  synchronized (bestModels) {
                    bestModels.add(cme);
                  }
                }
                latch.countDown();
              }
            });
      }
    } else // regular CV, train a new model from scratch at every step
    {
      latch = new CountDownLatch(paramsToEval.size());

      for (final Classifier toTrain : paramsToEval) {

        modelService.submit(
            new Runnable() {

              @Override
              public void run() {
                ClassificationModelEvaluation cme =
                    trainModelsInParallel
                        ? new ClassificationModelEvaluation(toTrain, dataSet)
                        : new ClassificationModelEvaluation(toTrain, dataSet, threadPool);
                cme.addScorer(classificationTargetScore.clone());
                if (reuseSameCVFolds) cme.evaluateCrossValidation(preFolded, trainCombinations);
                else cme.evaluateCrossValidation(folds);
                synchronized (bestModels) {
                  bestModels.add(cme);
                }

                latch.countDown();
              }
            });
      }
    }

    // now wait for everyone to finish
    try {
      latch.await();
      // Now we know the best classifier, we need to train one on the whole data set.
      Classifier bestClassifier =
          bestModels.peek().getClassifier(); // Just re-train it on the whole set
      if (trainFinalModel) {
        // try and warm start the final model if we can
        if (useWarmStarts
            && bestClassifier instanceof WarmClassifier
            && !((WarmClassifier) bestClassifier)
                .warmFromSameDataOnly()) // last line here needed to make sure we can do this warm
        // train
        {
          WarmClassifier wc = (WarmClassifier) bestClassifier;
          if (threadPool instanceof FakeExecutor) wc.trainC(dataSet, wc.clone());
          else wc.trainC(dataSet, wc.clone(), threadPool);
        } else {
          if (threadPool instanceof FakeExecutor) bestClassifier.trainC(dataSet);
          else bestClassifier.trainC(dataSet, threadPool);
        }
      }
      trainedClassifier = bestClassifier;

    } catch (InterruptedException ex) {
      Logger.getLogger(GridSearch.class.getName()).log(Level.SEVERE, null, ex);
    }
  }