/** Creates a new holder for sliding-window instances. */ public CompactCandidateSegmentGroup(FeatureFactory factory, CandidateSegmentGroup group) { // The length of the original sequence this.sequenceLength = group.getSequenceLength(); // The maximum length of any sliding window this.maxWindowSize = group.getMaxWindowSize(); this.totalSize = group.size(); this.classNameSet = group.classNameSet(); this.subPopId = group.getSubpopulationId(); unitInstance = new Instance[sequenceLength]; delta = new Delta[sequenceLength][maxWindowSize]; label = new ClassLabel[sequenceLength][maxWindowSize]; segmentSource = new Object[sequenceLength][maxWindowSize]; for (int i = 0; i < sequenceLength; i++) { unitInstance[i] = factory.compress(group.getSubsequenceInstance(i, i + 1)); } for (int i = 0; i < sequenceLength; i++) { for (int j = i + 1; j - i <= maxWindowSize; j++) { if (group.getSubsequenceInstance(i, j) != null) { label[i][j - i - 1] = group.getSubsequenceLabel(i, j); segmentSource[i][j - i - 1] = group.getSubsequenceInstance(i, j).getSource(); delta[i][j - i - 1] = new Delta(factory, i, j, group.getSubsequenceInstance(i, j)); } } } }
@Override public Segmenter batchTrain(SegmentDataset dataset) { ExampleSchema schema = dataset.getSchema(); innerLearner = SequenceUtils.duplicatePrototypeLearner(innerLearnerPrototype, schema.getNumberOfClasses()); ProgressCounter pc = new ProgressCounter( "training segments " + innerLearnerPrototype.toString(), "sequence", numberOfEpochs * dataset.getNumberOfSegmentGroups()); for (int epoch = 0; epoch < numberOfEpochs; epoch++) { // dataset.shuffle(); // statistics for curious researchers int sequenceErrors = 0; int transitionErrors = 0; int transitions = 0; for (Iterator<CandidateSegmentGroup> i = dataset.candidateSegmentGroupIterator(); i.hasNext(); ) { Classifier c = new SequenceUtils.MultiClassClassifier(schema, innerLearner); if (DEBUG) log.debug("classifier is: " + c); CandidateSegmentGroup g = i.next(); Segmentation viterbi = new SegmentCollinsPerceptronLearner.ViterbiSearcher(c, schema, maxSegmentSize) .bestSegments(g); if (DEBUG) log.debug("viterbi " + maxSegmentSize + "\n" + viterbi); Segmentation correct = correctSegments(g, schema, maxSegmentSize); if (DEBUG) log.debug("correct segments:\n" + correct); boolean errorOnThisSequence = false; // accumulate weights for transitions associated with each class k Hyperplane[] accumPos = new Hyperplane[schema.getNumberOfClasses()]; Hyperplane[] accumNeg = new Hyperplane[schema.getNumberOfClasses()]; for (int k = 0; k < schema.getNumberOfClasses(); k++) { accumPos[k] = new Hyperplane(); accumNeg[k] = new Hyperplane(); } int fp = compareSegmentsAndIncrement(schema, viterbi, correct, accumNeg, +1, g); if (fp > 0) errorOnThisSequence = true; int fn = compareSegmentsAndIncrement(schema, correct, viterbi, accumPos, +1, g); if (fn > 0) errorOnThisSequence = true; if (errorOnThisSequence) sequenceErrors++; transitionErrors += fp + fn; if (errorOnThisSequence) { sequenceErrors++; String subPopId = g.getSubpopulationId(); Object source = "no source"; for (int k = 0; k < schema.getNumberOfClasses(); k++) { // System.out.println("adding class="+k+" example: "+accumPos[k]); innerLearner[k].addExample( new Example( new HyperplaneInstance(accumPos[k], subPopId, source), ClassLabel.positiveLabel(+1.0))); innerLearner[k].addExample( new Example( new HyperplaneInstance(accumNeg[k], subPopId, source), ClassLabel.negativeLabel(-1.0))); } } transitions += correct.size(); pc.progress(); } // sequence i System.out.println( "Epoch " + epoch + ": sequenceErr=" + sequenceErrors + " transitionErrors=" + transitionErrors + "/" + transitions); if (transitionErrors == 0) break; } // epoch pc.finished(); for (int k = 0; k < schema.getNumberOfClasses(); k++) { innerLearner[k].completeTraining(); } Classifier c = new SequenceUtils.MultiClassClassifier(schema, innerLearner); return new SegmentCollinsPerceptronLearner.ViterbiSegmenter(c, schema, maxSegmentSize); }