예제 #1
0
    void ComputeEmissionProbsForDoc(_Doc d) {
      for (int i = 0; i < d.getSenetenceSize(); i++) {
        _Stn stn = d.getSentence(i);
        Arrays.fill(emission[i], 0);

        int start = 0, end = this.number_of_topics;
        if (i == 0 && d.getSourceType() == 2) { // first sentence is specially handled for newEgg
          // get the sentiment label of the first sentence
          int sentimentLabel = stn.getSentenceSenitmentLabel();
          if (sentimentLabel == 0) { // positive sentiment in the first half					
            end = this.number_of_topics / 2;
            for (int k = end; k < this.number_of_topics; k++)
              emission[i][k] = Double.NEGATIVE_INFINITY;
          } else if (sentimentLabel == 1) { // negative sentiment in the second half
            start = this.number_of_topics / 2;
            for (int k = 0; k < start; k++) emission[i][k] = Double.NEGATIVE_INFINITY;
          }
        }

        for (int k = start; k < end; k++) {
          for (_SparseFeature w : stn.getFv()) {
            emission[i][k] +=
                w.getValue() * topic_term_probabilty[k][w.getIndex()]; // all in log-space
          }
        }
      }
    }
예제 #2
0
 // probabilities of topic assignment
 void accPhiStat(_Doc d) {
   double prob;
   for (int t = 0; t < d.getSenetenceSize(); t++) {
     _Stn s = d.getSentence(t);
     for (_SparseFeature f : s.getFv()) {
       int wid = f.getIndex();
       double v = f.getValue(); // frequency
       for (int i = 0; i < number_of_topics; i++) {
         prob = this.p_dwzpsi[t][i];
         for (int j = 1; j < constant; j++) prob += this.p_dwzpsi[t][i + j * number_of_topics];
         this.sstat[i][wid] += v * prob;
       }
     }
   }
 }
예제 #3
0
  public void estStnThetaInParentDoc(_ParentDoc2 d) {
    _SparseFeature[] fv = d.getSparse();
    double[][] phi = new double[fv.length][number_of_topics];
    HashMap<Integer, Integer> indexMap = new HashMap<Integer, Integer>();

    // ///computeWordTopicProportionInDoc
    for (int i = 0; i < fv.length; i++) {
      int index = fv[i].getIndex();
      indexMap.put(index, i);
    }

    for (int n = 0; n < d.m_words.length; n++) {
      int index = d.m_words[n];
      int topic = d.m_topicAssignment[n];
      phi[indexMap.get(index)][topic]++;
    }

    for (int i = 0; i < fv.length; i++) {
      Utils.L1Normalization(phi[i]);
    }

    for (int i = 0; i < d.m_sentenceMap.size(); i++) {

      _Stn stnObject = d.m_sentenceMap.get(i);
      stnObject.setTopicsVct(number_of_topics);

      Arrays.fill(stnObject.m_topics, 0);
      if (stnObject.m_stnLength != 0) {
        int[] indexInCV = stnObject.m_words;
        for (int j = 0; j < indexInCV.length; j++) {
          int index = indexInCV[j];
          for (int k = 0; k < number_of_topics; k++) {
            stnObject.m_topics[k] += phi[indexMap.get(index)][k];
          }
        }
        Utils.L1Normalization(stnObject.m_topics);
      }
    }
  }