void ComputeEmissionProbsForDoc(_Doc d) { for (int i = 0; i < d.getSenetenceSize(); i++) { _Stn stn = d.getSentence(i); Arrays.fill(emission[i], 0); int start = 0, end = this.number_of_topics; if (i == 0 && d.getSourceType() == 2) { // first sentence is specially handled for newEgg // get the sentiment label of the first sentence int sentimentLabel = stn.getSentenceSenitmentLabel(); if (sentimentLabel == 0) { // positive sentiment in the first half end = this.number_of_topics / 2; for (int k = end; k < this.number_of_topics; k++) emission[i][k] = Double.NEGATIVE_INFINITY; } else if (sentimentLabel == 1) { // negative sentiment in the second half start = this.number_of_topics / 2; for (int k = 0; k < start; k++) emission[i][k] = Double.NEGATIVE_INFINITY; } } for (int k = start; k < end; k++) { for (_SparseFeature w : stn.getFv()) { emission[i][k] += w.getValue() * topic_term_probabilty[k][w.getIndex()]; // all in log-space } } } }
// probabilities of topic assignment void accPhiStat(_Doc d) { double prob; for (int t = 0; t < d.getSenetenceSize(); t++) { _Stn s = d.getSentence(t); for (_SparseFeature f : s.getFv()) { int wid = f.getIndex(); double v = f.getValue(); // frequency for (int i = 0; i < number_of_topics; i++) { prob = this.p_dwzpsi[t][i]; for (int j = 1; j < constant; j++) prob += this.p_dwzpsi[t][i + j * number_of_topics]; this.sstat[i][wid] += v * prob; } } } }
public void estStnThetaInParentDoc(_ParentDoc2 d) { _SparseFeature[] fv = d.getSparse(); double[][] phi = new double[fv.length][number_of_topics]; HashMap<Integer, Integer> indexMap = new HashMap<Integer, Integer>(); // ///computeWordTopicProportionInDoc for (int i = 0; i < fv.length; i++) { int index = fv[i].getIndex(); indexMap.put(index, i); } for (int n = 0; n < d.m_words.length; n++) { int index = d.m_words[n]; int topic = d.m_topicAssignment[n]; phi[indexMap.get(index)][topic]++; } for (int i = 0; i < fv.length; i++) { Utils.L1Normalization(phi[i]); } for (int i = 0; i < d.m_sentenceMap.size(); i++) { _Stn stnObject = d.m_sentenceMap.get(i); stnObject.setTopicsVct(number_of_topics); Arrays.fill(stnObject.m_topics, 0); if (stnObject.m_stnLength != 0) { int[] indexInCV = stnObject.m_words; for (int j = 0; j < indexInCV.length; j++) { int index = indexInCV[j]; for (int k = 0; k < number_of_topics; k++) { stnObject.m_topics[k] += phi[indexMap.get(index)][k]; } } Utils.L1Normalization(stnObject.m_topics); } } }