/** * Create a FeatureVector from the LDA data. * * @param c * @param relevant * @return */ protected UnlabeledFeatureVector<Integer> createUnlabeledVector(Citation c) { UnlabeledFeatureVector<Integer> fv = new UnlabeledFeatureVector<Integer>(c.getPmid().toString()); Multimap<Double, Integer> topicDist = lda.getTopics(c.getPmid().toString()); for (double pct : topicDist.keySet()) { for (int topic : topicDist.get(pct)) { String str = String.valueOf(topic); fv.put(str, new DoubleFeature(str, pct)); } } return fv; }
@Override protected Map<PubmedId, FeatureVector<Integer>> createFeatureVectors( Collection<Citation> citations) { StringBuilder sb = new StringBuilder(); for (Citation c : citations) { sb.append(c.getPmid() + "\tX\t\"" + c.getTitle() + " " + c.getAbstr() + "\"\n"); } this.lda = new BasicLDA(NUM_TOPICS, ALPHA_SUM_PRIOR, BETA_PRIOR); try { lda.train(sb.toString(), NUM_LDA_IT); } catch (IOException e) { LOG.error(e); } // create the feature vectors Map<PubmedId, FeatureVector<Integer>> fvs = new HashMap<>(); for (Citation c : citations) { fvs.put(c.getPmid(), createUnlabeledVector(c)); } return fvs; }