public CondClassDistribution info_attr(Domain attr_domain) { Domain target_domain = insts_by_target.getClassDomain(); // flog.debug("What is the attributeToSplit? " + attr_domain); /* initialize the hashtable */ CondClassDistribution insts_by_attr = new CondClassDistribution(attr_domain, target_domain); insts_by_attr.setTotal(insts_by_target.getSum()); // flog.debug("Cond distribution for "+ attr_domain + " \n"+ insts_by_attr); for (int category = 0; category < target_domain.getCategoryCount(); category++) { Object targetCategory = target_domain.getCategory(category); for (Instance inst : insts_by_target.getSupportersFor(targetCategory)) { Object inst_attr_category = inst.getAttrValue(attr_domain.getFReferenceName()); Object inst_class = inst.getAttrValue(target_domain.getFReferenceName()); if (!targetCategory.equals(inst_class)) { if (flog.error() != null) flog.error() .log("How the f**k they are not the same ? " + targetCategory + " " + inst_class); System.exit(0); } insts_by_attr.change(inst_attr_category, targetCategory, inst.getWeight()); // +1 } } return insts_by_attr; }
// public static double info_contattr(InstanceList data, Domain targetDomain, QuantitativeDomain // splitDomain) { public CondClassDistribution info_contattr(Categorizer visitor) { List<Instance> data = visitor.getSortedInstances(); QuantitativeDomain splitDomain = visitor.getSplitDomain(); Domain targetDomain = insts_by_target.getClassDomain(); String targetAttr = targetDomain.getFReferenceName(); CondClassDistribution instances_by_attr = new CondClassDistribution(splitDomain, targetDomain); instances_by_attr.setTotal(data.size()); int index = 0; int split_index = 0; Object attr_key = splitDomain.getCategory(split_index); for (Instance i : data) { if (index == splitDomain.getSplit(split_index).getIndex() + 1) { attr_key = splitDomain.getCategory(split_index + 1); split_index++; } Object targetKey = i.getAttrValue(targetAttr); instances_by_attr.change(attr_key, targetKey, i.getWeight()); // +1 index++; } return instances_by_attr; // double sum = calc_info_attr(instances_by_attr); // return sum; }
/* * for both */ public static double calc_info_attr(CondClassDistribution instances_by_attr) { // Collection<Object> attributeValues = instances_by_attr.getAttributes(); double data_size = instances_by_attr.getTotal(); double sum = 0.0; if (data_size > 0) for (int attr_idx = 0; attr_idx < instances_by_attr.getNumCondClasses(); attr_idx++) { Object attr_category = instances_by_attr.getCondClass(attr_idx); double total_num_attr = instances_by_attr.getTotal_AttrCategory(attr_category); if (total_num_attr > 0) { double prob = total_num_attr / data_size; // flog.debug("{("+total_num_attr +"/"+data_size +":"+prob +")* ["); double info = calc_info(instances_by_attr.getDistributionOf(attr_category)); sum += prob * info; // flog.debug("]} "); } } // flog.debug("\n == "+sum); return sum; }