/** * Builds a Trellis over a sentence, by starting at the state State, and advancing through all * legal extensions of each state already in the trellis. You should not have to modify this * code (or even read it, really). */ private Trellis<State> buildTrellis(List<String> sentence) { Trellis<State> trellis = new Trellis<State>(); trellis.setStartState(State.getStartState()); State stopState = State.getStopState(sentence.size() + 2); trellis.setStopState(stopState); Set<State> states = Collections.singleton(State.getStartState()); for (int position = 0; position <= sentence.size() + 1; position++) { Set<State> nextStates = new HashSet<State>(); for (State state : states) { if (state.equals(stopState)) continue; LocalTrigramContext localTrigramContext = new LocalTrigramContext( sentence, position, state.getPreviousPreviousTag(), state.getPreviousTag()); Counter<String> tagScores = localTrigramScorer.getLogScoreCounter(localTrigramContext); for (String tag : tagScores.keySet()) { double score = tagScores.getCount(tag); State nextState = state.getNextState(tag); trellis.setTransitionCount(state, nextState, score); nextStates.add(nextState); } } // System.out.println("States: "+nextStates); states = nextStates; } return trellis; }
public void act() // sean { if (!played) { burningSteppes.playLoop(); played = !played; } // makeSmokeFireball(); counterDelay++; if (Greenfoot.isKeyDown("h") && delay > 10) { clickSound.play(); Menu menu = new Menu(getThisWorld()); Greenfoot.setWorld(menu); delay = 0; } if (getObjects(Ninja.class).size() != 0 && counterDelay >= 10) { healthCounter.setValue(ninja.getNINJAHP()); shurikenCounter.setValue(ninja.getSHURIKENNUMBER()); powerCounter.setValue(ninja.getPOWERBAR()); checkDoor(); counterDelay -= 10; /**/ // TEMPORAY FUNCTIONS FOR HAYDEN TO CHANGE LEVELS TO MAKE THEM /**/ /**/ // TEMPORAY FUNCTIONS FOR HAYDEN TO CHANGE LEVELS TO MAKE THEM /**/ } delay++; fireballDelay++; }
public void add(int r, int c, double v) { Counter newRow = new Counter(); newRow.add(c, v); this.addRow(r, newRow); rows.add(r); cols.add(c); }
/* * Takes a set of sketch nodes, and returns an ArrayList<Integer> such that * arr.get(i) gives the index of the sketch node that node i is closest too. * * Need to work the return values a little bit. Make a proper data * structure. */ public ArrayList<ArrayList<Integer>> distSketch(int len, Counter sketchNodes) { ArrayList<Integer> closestIndex = new ArrayList<Integer>(); for (int i = 0; i < len; i++) closestIndex.set(i, -1); ArrayList<Double> closestDist = new ArrayList<Double>(); for (int i = 0; i < len; i++) closestDist.set(i, Double.MAX_VALUE); ArrayList<ArrayList<Integer>> sketchReverseIndex = new ArrayList<ArrayList<Integer>>(); for (int index : sketchNodes.keySet()) { Counter distances = this.bfs(index); for (int j = 0; j < len; j++) { double curDist = closestDist.get(j); double dist = distances.getPath(index); if (dist < curDist) { closestIndex.set(j, index); } } sketchReverseIndex.add(new ArrayList<Integer>()); } for (int j = 0; j < len; j++) { int closest = closestIndex.get(j); sketchReverseIndex.get(closest).add(j); } // Return sketchReverseIndex, closestIndex forward index, and index // correspondence bimap return sketchReverseIndex; }
private Word findTrg(Word tg, int pos2, BasicChunk bs) { int pos1 = tg.pos; if (pos2 - pos1 > 10) { return null; } Chunk c1 = bs.getChunk(pos1); Chunk c2 = bs.getChunk(pos2); int begin = c1.begin; int end = c2.end; for (Chunk c : bs.chunkList) { if (c.begin >= begin && c.end <= end) { for (Word w : c.trigs) { if (!validTG.contains(w) && w.pos > pos1) { return w; } else if (validTG.contains(w) && w.pos > pos1) { String key = tg.word + tg.pos_tag; Map<String, Counter> ct = sharedTG.get(key); if (ct == null) { ct = new HashMap<String, Counter>(); sharedTG.put(key, ct); } Counter count = ct.get(w.word + w.pos_tag); if (count == null) { count = new Counter(1); ct.put(w.word + w.pos_tag, count); } else { count.inc(); } } } } } return null; }
public double getCount(K token) { if (!lm.keySet().contains(token)) { System.err.println(lm.keySet().size()); throw new RuntimeException("token not in keyset"); } return lm.getCount(token); }
private void tallyTree( Tree<String> tree, Counter<String> symbolCounter, Counter<UnaryRule> unaryRuleCounter, Counter<BinaryRule> binaryRuleCounter) { if (tree.isLeaf()) return; if (tree.isPreTerminal()) return; if (tree.getChildren().size() == 1) { UnaryRule unaryRule = makeUnaryRule(tree); symbolCounter.incrementCount(tree.getLabel(), 1.0); unaryRuleCounter.incrementCount(unaryRule, 1.0); } if (tree.getChildren().size() == 2) { BinaryRule binaryRule = makeBinaryRule(tree); symbolCounter.incrementCount(tree.getLabel(), 1.0); binaryRuleCounter.incrementCount(binaryRule, 1.0); } if (tree.getChildren().size() < 1 || tree.getChildren().size() > 2) { throw new RuntimeException( "Attempted to construct a Grammar with an illegal tree: " + tree); } for (Tree<String> child : tree.getChildren()) { tallyTree(child, symbolCounter, unaryRuleCounter, binaryRuleCounter); } }
/* * Matrix mult but with min-plus, and iterative. Each min-plus operation * that changes the path inserts it into a new queue */ public SparseMatrix apsp() { SparseMatrix shortestPaths = new SparseMatrix(this); SparseMatrix currentPairs = new SparseMatrix(this.rowDim, this.colDim); SparseMatrix newPairs = new SparseMatrix(this.rowDim, this.colDim); newPairs = new SparseMatrix(this); for (int d = 0; d < this.rowDim; d++) { shortestPaths.set(d, d, 0.0); } for (int d = 0; d < this.rowDim; d++) { newPairs.set(d, d, 0.0); } while (!newPairs.isEmpty()) { currentPairs = new SparseMatrix(newPairs); newPairs = new SparseMatrix(this.rowDim, this.colDim); for (int r : currentPairs.rows) { Counter row = currentPairs.getRow(r); for (int c : row.keySet()) { Counter oRow = this.getRow(c); for (int oc : oRow.keySet()) { double pathLength = currentPairs.get(r, c) + oRow.get(oc); if (pathLength < shortestPaths.getPath(r, oc)) { newPairs.set(r, oc, pathLength); shortestPaths.set(r, oc, pathLength); } } } } } return shortestPaths; }
private double getDiceCoefficient(String f, String e) { double intersection = collocationCountSentences.getCount(f,e); double cardinalityF = fCountSentences.getCount(f); double cardinalityE = eCountSentences.getCount(e); double dice = 2*intersection / (cardinalityF + cardinalityE); return dice; }
public void removeEntries(SparseMatrix redundant) { for (int r : redundant.getRows()) { Counter row = redundant.getRow(r); for (int c : row.keySet()) { this.remove(r, c); } } }
public SparseMatrix multiply(double f) { SparseMatrix multMat = new SparseMatrix(this.rowDim, this.colDim); for (int r : this.rows) { Counter row = this.getRow(r); multMat.addRow(r, row.multiplyImmutable(f)); } return multMat; }
/** {@inheritDoc} */ @Override public synchronized void incrAllCounters(AbstractCounters<Counter, CounterGroup> other) { for (CounterGroup group : other) { for (Counter counter : group) { findCounter(group.getName(), counter.getName()).increment(counter.getValue()); } } }
public Counter getCol(int c) { Counter col = new Counter(); for (int r : rows) { if (this.getRow(r).containsKey(c)) { col.put(r, this.get(r, c)); } } return col; }
public void set(int r, int c, double v) { Counter row = this.getRow(r); if (row.isEmpty()) { mat.put(r, row); } row.put(c, v); rows.add(r); cols.add(c); }
private void tallyTagging(String word, String tag) { if (!isKnown(word)) { totalWordTypes += 1.0; typeTagCounter.incrementCount(tag, 1.0); } totalTokens += 1.0; tagCounter.incrementCount(tag, 1.0); wordCounter.incrementCount(word, 1.0); wordToTagCounters.incrementCount(word, tag, 1.0); }
public Counter harmonicAvg(Counter vector) { Counter currentVec = new Counter(); Counter newVec = new Counter(vector); double dist = currentVec.dist(newVec); while (dist > 0.00001) { currentVec = new Counter(newVec); dist = currentVec.dist(newVec); } return currentVec; }
/** * GT smoothing with least squares interpolation. This follows the procedure in Jurafsky and * Martin sect. 4.5.3. */ public void smoothAndNormalize() { Counter<Integer> cntCounter = new Counter<Integer>(); for (K tok : lm.keySet()) { int cnt = (int) lm.getCount(tok); cntCounter.incrementCount(cnt); } final double[] coeffs = runLogSpaceRegression(cntCounter); UNK_PROB = cntCounter.getCount(1) / lm.totalCount(); for (K tok : lm.keySet()) { double tokCnt = lm.getCount(tok); if (tokCnt <= unkCutoff) // Treat as unknown unkTokens.add(tok); if (tokCnt <= kCutoff) { // Smooth double cSmooth = katzEstimate(cntCounter, tokCnt, coeffs); lm.setCount(tok, cSmooth); } } // Normalize // Counters.normalize(lm); // MY COUNTER IS ALWAYS NORMALIZED AND AWESOME }
public SparseMatrix transpose() { SparseMatrix transp = new SparseMatrix(this.rowDim, this.colDim); for (int r : rows) { Counter row = this.getRow(r); for (int c : row.keySet()) { double v = row.get(c); transp.set(c, r, v); } } return transp; }
public SparseMatrix makeLaplacian() { SparseMatrix laplacian = new SparseMatrix(this.rowDim, this.colDim); for (int r : this.getRows()) { Counter row = this.getRow(r); laplacian.set(r, r, row.sum()); for (int c : row.keySet()) { laplacian.set(r, c, -1 * row.get(c)); } } return laplacian; }
public List<S> getBestPath(Trellis<S> trellis) { List<S> states = new ArrayList<S>(); S currentState = trellis.getStartState(); states.add(currentState); while (!currentState.equals(trellis.getEndState())) { Counter<S> transitions = trellis.getForwardTransitions(currentState); S nextState = transitions.argMax(); states.add(nextState); currentState = nextState; } return states; }
public void addRow(int r, Counter other) { // System.out.println("MSG: added row "+r); Counter row = this.getRow(r); if (row.isEmpty()) { mat.put(r, row); rows.add(r); } for (int c : other.keySet()) { cols.add(c); } row.addAll(other); }
/* Returns a smoothed estimate of P(word|tag) */ public double scoreTagging(String word, String tag) { double p_tag = tagCounter.getCount(tag) / totalTokens; double c_word = wordCounter.getCount(word); double c_tag_and_word = wordToTagCounters.getCount(word, tag); if (c_word < 10) { // rare or unknown c_word += 1.0; c_tag_and_word += typeTagCounter.getCount(tag) / totalWordTypes; } double p_word = (1.0 + c_word) / (totalTokens + totalWordTypes); double p_tag_given_word = c_tag_and_word / c_word; return p_tag_given_word / p_tag * p_word; }
private double[] runLogSpaceRegression(Counter<Integer> cntCounter) { SimpleRegression reg = new SimpleRegression(); for (int cnt : cntCounter.keySet()) { reg.addData(cnt, Math.log(cntCounter.getCount(cnt))); } // System.out.println(reg.getIntercept()); // System.out.println(reg.getSlope()); // System.out.println(regression.getSlopeStdErr()); double[] coeffs = new double[] {reg.getIntercept(), reg.getSlope()}; return coeffs; }
private void add2Map(String pos, String tg, String type) { Map<String, Counter> ct = TGCount.get(tg); if (ct == null) { ct = new HashMap<String, Counter>(); TGCount.put(tg, ct); } String key = pos + type; Counter c = ct.get(key); if (c == null) { c = new Counter(1); ct.put(key, c); } else { c.inc(); } }
private double katzEstimate(Counter<Integer> cnt, double c, double[] coeffs) { double nC = cnt.getCount((int) c); double nC1 = cnt.getCount(((int) c) + 1); if (nC1 == 0.0) nC1 = Math.exp(coeffs[0] + (coeffs[1] * (c + 1.0))); double n1 = cnt.getCount(1); double nK1 = cnt.getCount(((int) kCutoff) + 1); if (nK1 == 0.0) nK1 = Math.exp(coeffs[0] + (coeffs[1] * (kCutoff + 1.0))); double kTerm = (kCutoff + 1.0) * (nK1 / n1); double cTerm = (c + 1.0) * (nC1 / nC); double cSmooth = (cTerm - (c * kTerm)) / (1.0 - kTerm); return cSmooth; }
public Alignment alignSentencePair(SentencePair sentencePair) { Alignment alignment = new Alignment(); List<String> frenchWords = sentencePair.getFrenchWords(); List<String> englishWords = sentencePair.getEnglishWords(); int numFrenchWords = frenchWords.size(); int numEnglishWords = englishWords.size(); for (int frenchPosition = 0; frenchPosition < numFrenchWords; frenchPosition++) { String f = frenchWords.get(frenchPosition); int englishMaxPosition = frenchPosition; if (englishMaxPosition >= numEnglishWords) englishMaxPosition = -1; // map French word to BASELINE if c(f,e) = 0 for all English words double maxConditionalProb = 0; for (int englishPosition = 0; englishPosition < numEnglishWords; englishPosition++) { String e = englishWords.get(englishPosition); double conditionalGivenEnglish = collocationCounts.getCount(f, e) / (eCounts.getCount(e)); if (conditionalGivenEnglish > maxConditionalProb) { maxConditionalProb = conditionalGivenEnglish; englishMaxPosition = englishPosition; } } alignment.addAlignment(englishMaxPosition, frenchPosition, true); } return alignment; }
/** * Scores a tagging for a sentence. Note that a tag sequence not accepted by the markov process * should receive a log score of Double.NEGATIVE_INFINITY. */ public double scoreTagging(TaggedSentence taggedSentence) { double logScore = 0.0; List<LabeledLocalTrigramContext> labeledLocalTrigramContexts = extractLabeledLocalTrigramContexts(taggedSentence); for (LabeledLocalTrigramContext labeledLocalTrigramContext : labeledLocalTrigramContexts) { Counter<String> logScoreCounter = localTrigramScorer.getLogScoreCounter(labeledLocalTrigramContext); String currentTag = labeledLocalTrigramContext.getCurrentTag(); if (logScoreCounter.containsKey(currentTag)) { logScore += logScoreCounter.getCount(currentTag); } else { logScore += Double.NEGATIVE_INFINITY; } } return logScore; }
private void trainCounters() { for (SentencePair sentencePair : trainingSentencePairs) { List<String> frenchWords = sentencePair.getFrenchWords(); List<String> englishWords = sentencePair.getEnglishWords(); Set<String> frenchSet = new HashSet<String>(frenchWords); Set<String> englishSet = new HashSet<String>(englishWords); fCountSentences.incrementAll(frenchSet, 1.0); eCountSentences.incrementAll(englishSet, 1.0); for (String f: frenchSet) { for (String e: englishSet) collocationCountSentences.incrementCount(f, e, 1.0); } } System.out.println("Trained!"); }
public Counter multiply(Counter vector) { Counter resVec = new Counter(); // for(int r: vector.keySet()){ // Counter row = this.getRow(r); // double sum = 0; // for(int c: row.keySet()){ //// resVec.add(r, row.get(c)*vector.get(c)); // sum += row.get(c)*vector.get(c); // } // resVec.add(r, sum); // } for (int r : rows) { resVec.set(r, this.getRow(r).dot(vector)); } return resVec; }
public SparseMatrix stochasticizeRows() { SparseMatrix stochasticMat = new SparseMatrix(this.rowDim, this.colDim); double[] rowSums = new double[this.rowDim]; for (int r : this.rows) { Counter row = this.getRow(r); for (int c : row.keySet()) { rowSums[r] += row.get(c); } } for (int r : this.rows) { Counter row = this.getRow(r); for (int c : row.keySet()) { double value = 0; if (rowSums[r] != 0) { // if(true){ value = this.get(r, c) / rowSums[r]; } stochasticMat.set(r, c, value); } } return stochasticMat; }