/**
  * Collect the correct segments for this example. These are defined as all segments with
  * non-NEGATIVE labels, and all unit-length negative labels not inside a positives label.
  */
 private Segmentation correctSegments(
     CandidateSegmentGroup g, ExampleSchema schema, int maxSegmentSize) {
   Segmentation result = new Segmentation(schema);
   int pos, len;
   for (pos = 0; pos < g.getSequenceLength(); ) {
     boolean addedASegmentStartingAtPos = false;
     for (len = 1; !addedASegmentStartingAtPos && len <= maxSegmentSize; len++) {
       Instance inst = g.getSubsequenceInstance(pos, pos + len);
       ClassLabel label = g.getSubsequenceLabel(pos, pos + len);
       if (inst != null && !label.isNegative()) {
         result.add(
             new Segmentation.Segment(
                 pos, pos + len, schema.getClassIndex(label.bestClassName())));
         addedASegmentStartingAtPos = true;
         pos += len;
       }
     }
     if (!addedASegmentStartingAtPos) {
       //				Instance inst = g.getSubsequenceInstance(pos,pos+1);
       //				ClassLabel label = g.getSubsequenceLabel(pos,pos+1);
       result.add(
           new Segmentation.Segment(
               pos, pos + 1, schema.getClassIndex(ExampleSchema.NEG_CLASS_NAME)));
       pos += 1;
     }
   }
   return result;
 }
 /**
  * Compare the target segments to the 'otherSegments', and update the classifier by sum_x
  * [delta*x], for each example x corresponding to a target segment that's not in otherSegments.
  */
 private int compareSegmentsAndIncrement(
     ExampleSchema schema,
     Segmentation segments,
     Segmentation otherSegments,
     Hyperplane[] accum,
     double delta,
     CandidateSegmentGroup g) {
   int errors = 0;
   // first, work out the name of the previous class for each segment
   Map<Segment, String> map = previousClassMap(segments, schema);
   Map<Segment, String> otherMap = previousClassMap(otherSegments, schema);
   String[] history = new String[1];
   for (Iterator<Segment> j = segments.iterator(); j.hasNext(); ) {
     Segmentation.Segment seg = j.next();
     String previousClass = map.get(seg);
     if (seg.lo >= 0
         && (!otherSegments.contains(seg) || !otherMap.get(seg).equals(previousClass))) {
       errors++;
       history[0] = previousClass;
       Instance instance =
           new InstanceFromSequence(g.getSubsequenceExample(seg.lo, seg.hi), history);
       if (DEBUG)
         log.debug(
             "class "
                 + schema.getClassName(seg.y)
                 + " update "
                 + delta
                 + " for: "
                 + instance.getSource());
       accum[seg.y].increment(instance, delta);
     }
   }
   return errors;
 }
 @Override
 public void collectionProcessComplete() throws AnalysisEngineProcessException {
   Segmentation segmentation;
   for (Word word : termIndexResource.getTermIndex().getWords()) {
     segmentation = manualCompositions.getSegmentation(word.getLemma());
     if (segmentation != null) if (segmentation.size() <= 1) word.resetComposition();
   }
 }
 /**
  * Build a mapping from segment to string name of previous segment. This should let you look up
  * segments which are logically equivalent, as well as ones which are pointer-equivalent (==)
  */
 private Map<Segment, String> previousClassMap(Segmentation segments, ExampleSchema schema) {
   // use a treemap so that logically equivalent segments be mapped to same previousClass
   Map<Segment, String> map = new TreeMap<Segment, String>();
   Segmentation.Segment previousSeg = null;
   for (Iterator<Segment> j = segments.iterator(); j.hasNext(); ) {
     Segmentation.Segment seg = j.next();
     String previousClassName =
         previousSeg == null ? NULL_CLASS_NAME : schema.getClassName(previousSeg.y);
     map.put(seg, previousClassName);
     previousSeg = seg;
   }
   return map;
 }
  @Override
  public Segmenter batchTrain(SegmentDataset dataset) {
    ExampleSchema schema = dataset.getSchema();
    innerLearner =
        SequenceUtils.duplicatePrototypeLearner(innerLearnerPrototype, schema.getNumberOfClasses());

    ProgressCounter pc =
        new ProgressCounter(
            "training segments " + innerLearnerPrototype.toString(),
            "sequence",
            numberOfEpochs * dataset.getNumberOfSegmentGroups());

    for (int epoch = 0; epoch < numberOfEpochs; epoch++) {
      // dataset.shuffle();

      // statistics for curious researchers
      int sequenceErrors = 0;
      int transitionErrors = 0;
      int transitions = 0;

      for (Iterator<CandidateSegmentGroup> i = dataset.candidateSegmentGroupIterator();
          i.hasNext(); ) {
        Classifier c = new SequenceUtils.MultiClassClassifier(schema, innerLearner);
        if (DEBUG) log.debug("classifier is: " + c);

        CandidateSegmentGroup g = i.next();
        Segmentation viterbi =
            new SegmentCollinsPerceptronLearner.ViterbiSearcher(c, schema, maxSegmentSize)
                .bestSegments(g);
        if (DEBUG) log.debug("viterbi " + maxSegmentSize + "\n" + viterbi);
        Segmentation correct = correctSegments(g, schema, maxSegmentSize);
        if (DEBUG) log.debug("correct segments:\n" + correct);

        boolean errorOnThisSequence = false;

        // accumulate weights for transitions associated with each class k
        Hyperplane[] accumPos = new Hyperplane[schema.getNumberOfClasses()];
        Hyperplane[] accumNeg = new Hyperplane[schema.getNumberOfClasses()];
        for (int k = 0; k < schema.getNumberOfClasses(); k++) {
          accumPos[k] = new Hyperplane();
          accumNeg[k] = new Hyperplane();
        }

        int fp = compareSegmentsAndIncrement(schema, viterbi, correct, accumNeg, +1, g);
        if (fp > 0) errorOnThisSequence = true;
        int fn = compareSegmentsAndIncrement(schema, correct, viterbi, accumPos, +1, g);
        if (fn > 0) errorOnThisSequence = true;
        if (errorOnThisSequence) sequenceErrors++;
        transitionErrors += fp + fn;

        if (errorOnThisSequence) {
          sequenceErrors++;
          String subPopId = g.getSubpopulationId();
          Object source = "no source";
          for (int k = 0; k < schema.getNumberOfClasses(); k++) {
            // System.out.println("adding class="+k+" example: "+accumPos[k]);
            innerLearner[k].addExample(
                new Example(
                    new HyperplaneInstance(accumPos[k], subPopId, source),
                    ClassLabel.positiveLabel(+1.0)));
            innerLearner[k].addExample(
                new Example(
                    new HyperplaneInstance(accumNeg[k], subPopId, source),
                    ClassLabel.negativeLabel(-1.0)));
          }
        }

        transitions += correct.size();
        pc.progress();
      } // sequence i

      System.out.println(
          "Epoch "
              + epoch
              + ": sequenceErr="
              + sequenceErrors
              + " transitionErrors="
              + transitionErrors
              + "/"
              + transitions);

      if (transitionErrors == 0) break;
    } // epoch
    pc.finished();

    for (int k = 0; k < schema.getNumberOfClasses(); k++) {
      innerLearner[k].completeTraining();
    }

    Classifier c = new SequenceUtils.MultiClassClassifier(schema, innerLearner);
    return new SegmentCollinsPerceptronLearner.ViterbiSegmenter(c, schema, maxSegmentSize);
  }