public void printParentTopicAssignment(_ParentDoc2 d) { String topicAssignmentFile = "./data/results/0108_9/parentTopicAssignment/topicAssignment_" + d.getName() + "_.txt"; try { PrintWriter pw = new PrintWriter(new File(topicAssignmentFile)); for (int i = 0; i < d.m_sentenceMap.size(); i++) { _Stn stnObject = d.m_sentenceMap.get(i); if (stnObject.m_stnLength != 0) { int[] indexInCV = stnObject.m_words; int[] positionInDoc = stnObject.m_wordPositionInDoc; for (int j = 0; j < indexInCV.length; j++) { int index = indexInCV[j]; int topic = d.m_topicAssignment[positionInDoc[j]]; String featureName = m_corpus.getFeature(index); pw.print(featureName + ":" + topic + "\t"); } } pw.println(); } pw.flush(); pw.close(); } catch (FileNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); } }
public double calParentLogLikelihood2(_ParentDoc2 pDoc) { double likelihood = 0.0; int tid = 0; int wid = 0; double term1 = 0.0; double term2 = 0.0; for (int n = 0; n < pDoc.getTotalDocLength(); n++) { for (int k = 0; k < number_of_topics; k++) { wid = pDoc.m_words[n]; // tid = pDoc.m_topicAssignment[n]; // normalize term1 = (m_parentWordTopicSstat[k][wid] + m_childWordTopicSstat[k][wid]) / (double) (m_parentSstat[k] + m_childSstat[k]); if (pDoc.getTotalDocLength() == 0) return 0; term2 = pDoc.m_sstat[k] / (double) pDoc.getTotalDocLength(); // if (term2 == 0) // System.out.println("term2 is zero"); // if (term1 == 0) // System.out.println("term1 is zero"); if ((term1 == 0)) { double eps = 1e-9; term1 += eps; } if ((term2 == 0)) { double eps = 1e-9; term2 += eps; } likelihood += Math.log(term1) + Math.log(term2); } } return likelihood; }
// log space public double calParentLogLikelihood(_ParentDoc2 pDoc) { double term1 = 0.0; double term2 = 0.0; term1 = Utils.lgamma(number_of_topics * d_alpha) - number_of_topics * Utils.lgamma(d_alpha); for (int k = 0; k < number_of_topics; k++) { term2 += Utils.lgamma(pDoc.m_sstat[k] + d_alpha); } term2 -= Utils.lgamma((double) (number_of_topics * d_alpha + pDoc.getDocLength())); return term1 + term2; }
public void estStnThetaInParentDoc(_ParentDoc2 d) { _SparseFeature[] fv = d.getSparse(); double[][] phi = new double[fv.length][number_of_topics]; HashMap<Integer, Integer> indexMap = new HashMap<Integer, Integer>(); // ///computeWordTopicProportionInDoc for (int i = 0; i < fv.length; i++) { int index = fv[i].getIndex(); indexMap.put(index, i); } for (int n = 0; n < d.m_words.length; n++) { int index = d.m_words[n]; int topic = d.m_topicAssignment[n]; phi[indexMap.get(index)][topic]++; } for (int i = 0; i < fv.length; i++) { Utils.L1Normalization(phi[i]); } for (int i = 0; i < d.m_sentenceMap.size(); i++) { _Stn stnObject = d.m_sentenceMap.get(i); stnObject.setTopicsVct(number_of_topics); Arrays.fill(stnObject.m_topics, 0); if (stnObject.m_stnLength != 0) { int[] indexInCV = stnObject.m_words; for (int j = 0; j < indexInCV.length; j++) { int index = indexInCV[j]; for (int k = 0; k < number_of_topics; k++) { stnObject.m_topics[k] += phi[indexMap.get(index)][k]; } } Utils.L1Normalization(stnObject.m_topics); } } }