/** * Builds a Trellis over a sentence, by starting at the state State, and advancing through all * legal extensions of each state already in the trellis. You should not have to modify this * code (or even read it, really). */ private Trellis<State> buildTrellis(List<String> sentence) { Trellis<State> trellis = new Trellis<State>(); trellis.setStartState(State.getStartState()); State stopState = State.getStopState(sentence.size() + 2); trellis.setStopState(stopState); Set<State> states = Collections.singleton(State.getStartState()); for (int position = 0; position <= sentence.size() + 1; position++) { Set<State> nextStates = new HashSet<State>(); for (State state : states) { if (state.equals(stopState)) continue; LocalTrigramContext localTrigramContext = new LocalTrigramContext( sentence, position, state.getPreviousPreviousTag(), state.getPreviousTag()); Counter<String> tagScores = localTrigramScorer.getLogScoreCounter(localTrigramContext); for (String tag : tagScores.keySet()) { double score = tagScores.getCount(tag); State nextState = state.getNextState(tag); trellis.setTransitionCount(state, nextState, score); nextStates.add(nextState); } } // System.out.println("States: "+nextStates); states = nextStates; } return trellis; }
/** * Scores a tagging for a sentence. Note that a tag sequence not accepted by the markov process * should receive a log score of Double.NEGATIVE_INFINITY. */ public double scoreTagging(TaggedSentence taggedSentence) { double logScore = 0.0; List<LabeledLocalTrigramContext> labeledLocalTrigramContexts = extractLabeledLocalTrigramContexts(taggedSentence); for (LabeledLocalTrigramContext labeledLocalTrigramContext : labeledLocalTrigramContexts) { Counter<String> logScoreCounter = localTrigramScorer.getLogScoreCounter(labeledLocalTrigramContext); String currentTag = labeledLocalTrigramContext.getCurrentTag(); if (logScoreCounter.containsKey(currentTag)) { logScore += logScoreCounter.getCount(currentTag); } else { logScore += Double.NEGATIVE_INFINITY; } } return logScore; }
// chop up the validation instances into local contexts and pass them on to the local scorer. public void validate(List<TaggedSentence> taggedSentences) { localTrigramScorer.validate(extractLabeledLocalTrigramContexts(taggedSentences)); }
// chop up the training instances into local contexts and pass them on to the local scorer. public void train(List<TaggedSentence> taggedSentences) { localTrigramScorer.train(extractLabeledLocalTrigramContexts(taggedSentences)); }