コード例 #1
0
  /**
   * Create a FeatureVector from the LDA data.
   *
   * @param c
   * @param relevant
   * @return
   */
  protected UnlabeledFeatureVector<Integer> createUnlabeledVector(Citation c) {
    UnlabeledFeatureVector<Integer> fv =
        new UnlabeledFeatureVector<Integer>(c.getPmid().toString());
    Multimap<Double, Integer> topicDist = lda.getTopics(c.getPmid().toString());
    for (double pct : topicDist.keySet()) {
      for (int topic : topicDist.get(pct)) {
        String str = String.valueOf(topic);
        fv.put(str, new DoubleFeature(str, pct));
      }
    }

    return fv;
  }
コード例 #2
0
  @Override
  protected Map<PubmedId, FeatureVector<Integer>> createFeatureVectors(
      Collection<Citation> citations) {
    StringBuilder sb = new StringBuilder();

    for (Citation c : citations) {
      sb.append(c.getPmid() + "\tX\t\"" + c.getTitle() + " " + c.getAbstr() + "\"\n");
    }

    this.lda = new BasicLDA(NUM_TOPICS, ALPHA_SUM_PRIOR, BETA_PRIOR);
    try {
      lda.train(sb.toString(), NUM_LDA_IT);
    } catch (IOException e) {
      LOG.error(e);
    }

    // create the feature vectors
    Map<PubmedId, FeatureVector<Integer>> fvs = new HashMap<>();
    for (Citation c : citations) {
      fvs.put(c.getPmid(), createUnlabeledVector(c));
    }

    return fvs;
  }