Exemple #1
0
 /**
  * 计算给定实例文档属于指定类别的概率,返回的是取对数后的数值
  *
  * @param category
  * @param doc
  * @return
  */
 public double getProbability(String category, Instance doc) {
   double result = getCategoryProbability(category);
   for (String feature : doc.getWords()) {
     if (VARIABLE.containFeature(feature)) {
       result += getFeatureProbability(feature, category);
     }
   }
   return result;
 }
Exemple #2
0
 /**
  * 判断该实例所属的类别category
  *
  * @param doc
  * @return
  */
 public String getCategory(Instance doc) {
   Collection<String> categories = VARIABLE.getCategories();
   System.out.println(categories);
   double best = Double.NEGATIVE_INFINITY;
   String bestName = null;
   for (String c : categories) {
     double current = getProbability(c, doc);
     System.out.println(c + ":" + current);
     if (best < current) {
       best = current;
       bestName = c;
     }
   }
   return bestName;
 }
Exemple #3
0
 /**
  * 训练一篇文档
  *
  * @param doc
  */
 public void training(Instance doc) {
   VARIABLE.addInstance(doc);
 }
Exemple #4
0
 /**
  * 保存训练结果
  *
  * @throws IOException
  */
 void save(File file) throws IOException {
   DataOutput out = new DataOutputStream(new FileOutputStream(file));
   VARIABLE.write(out);
 }
Exemple #5
0
 /**
  * 加载训练结果
  *
  * @param file
  * @throws IOException
  */
 public void load(File file) throws IOException {
   DataInputStream in = new DataInputStream(new FileInputStream(file));
   VARIABLE = Variable.read(in);
 }
Exemple #6
0
 /**
  * 计算P(feature|cateogry),返回的是取对数后的数值
  *
  * @param feature
  * @param category
  * @return
  */
 public double getFeatureProbability(String feature, String category) {
   int m = VARIABLE.getFeatureCount();
   return Math.log(
       (VARIABLE.getDocCount(feature, category) + 1.0) / (VARIABLE.getDocCount(category) + m));
 }
Exemple #7
0
 /**
  * 计算P(C)=该类型文档总数/文档总数,返回的数对数值
  *
  * @param category
  * @return
  */
 public double getCategoryProbability(String category) {
   return Math.log(VARIABLE.getDocCount(category) * 1.0f / VARIABLE.getDocCount());
 }