Exemplo n.º 1
0
  public void printParentTopicAssignment(_ParentDoc2 d) {
    String topicAssignmentFile =
        "./data/results/0108_9/parentTopicAssignment/topicAssignment_" + d.getName() + "_.txt";
    try {
      PrintWriter pw = new PrintWriter(new File(topicAssignmentFile));
      for (int i = 0; i < d.m_sentenceMap.size(); i++) {

        _Stn stnObject = d.m_sentenceMap.get(i);
        if (stnObject.m_stnLength != 0) {
          int[] indexInCV = stnObject.m_words;
          int[] positionInDoc = stnObject.m_wordPositionInDoc;
          for (int j = 0; j < indexInCV.length; j++) {
            int index = indexInCV[j];
            int topic = d.m_topicAssignment[positionInDoc[j]];
            String featureName = m_corpus.getFeature(index);
            pw.print(featureName + ":" + topic + "\t");
          }
        }
        pw.println();
      }
      pw.flush();
      pw.close();
    } catch (FileNotFoundException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    }
  }
Exemplo n.º 2
0
  public double calParentLogLikelihood2(_ParentDoc2 pDoc) {
    double likelihood = 0.0;

    int tid = 0;
    int wid = 0;
    double term1 = 0.0;
    double term2 = 0.0;
    for (int n = 0; n < pDoc.getTotalDocLength(); n++) {
      for (int k = 0; k < number_of_topics; k++) {
        wid = pDoc.m_words[n];
        // tid = pDoc.m_topicAssignment[n];
        // normalize
        term1 =
            (m_parentWordTopicSstat[k][wid] + m_childWordTopicSstat[k][wid])
                / (double) (m_parentSstat[k] + m_childSstat[k]);

        if (pDoc.getTotalDocLength() == 0) return 0;

        term2 = pDoc.m_sstat[k] / (double) pDoc.getTotalDocLength();

        // if (term2 == 0)
        // System.out.println("term2 is zero");
        // if (term1 == 0)
        // System.out.println("term1 is zero");

        if ((term1 == 0)) {
          double eps = 1e-9;
          term1 += eps;
        }
        if ((term2 == 0)) {
          double eps = 1e-9;
          term2 += eps;
        }

        likelihood += Math.log(term1) + Math.log(term2);
      }
    }

    return likelihood;
  }
Exemplo n.º 3
0
  // log space
  public double calParentLogLikelihood(_ParentDoc2 pDoc) {
    double term1 = 0.0;
    double term2 = 0.0;

    term1 = Utils.lgamma(number_of_topics * d_alpha) - number_of_topics * Utils.lgamma(d_alpha);

    for (int k = 0; k < number_of_topics; k++) {
      term2 += Utils.lgamma(pDoc.m_sstat[k] + d_alpha);
    }
    term2 -= Utils.lgamma((double) (number_of_topics * d_alpha + pDoc.getDocLength()));

    return term1 + term2;
  }
Exemplo n.º 4
0
  public void estStnThetaInParentDoc(_ParentDoc2 d) {
    _SparseFeature[] fv = d.getSparse();
    double[][] phi = new double[fv.length][number_of_topics];
    HashMap<Integer, Integer> indexMap = new HashMap<Integer, Integer>();

    // ///computeWordTopicProportionInDoc
    for (int i = 0; i < fv.length; i++) {
      int index = fv[i].getIndex();
      indexMap.put(index, i);
    }

    for (int n = 0; n < d.m_words.length; n++) {
      int index = d.m_words[n];
      int topic = d.m_topicAssignment[n];
      phi[indexMap.get(index)][topic]++;
    }

    for (int i = 0; i < fv.length; i++) {
      Utils.L1Normalization(phi[i]);
    }

    for (int i = 0; i < d.m_sentenceMap.size(); i++) {

      _Stn stnObject = d.m_sentenceMap.get(i);
      stnObject.setTopicsVct(number_of_topics);

      Arrays.fill(stnObject.m_topics, 0);
      if (stnObject.m_stnLength != 0) {
        int[] indexInCV = stnObject.m_words;
        for (int j = 0; j < indexInCV.length; j++) {
          int index = indexInCV[j];
          for (int k = 0; k < number_of_topics; k++) {
            stnObject.m_topics[k] += phi[indexMap.get(index)][k];
          }
        }
        Utils.L1Normalization(stnObject.m_topics);
      }
    }
  }