public Instances transformInstances( Instances inst, Instances output, Map<FV, Collection<FV>> map) { Set<FV> set = map.keySet(); Double[] substitution = calculateAverage(inst); // Prepare the list // First level indicate which attribute the FVS resides in List<List<Value>> list = new ArrayList<>(); for (int i = 0; i < inst.numAttributes(); i++) { list.add(new ArrayList<Value>()); } // Build the data structure for (FV fv : set) { list.get(fv.getFeature()).add(new Value(fv.getValue().toString())); } for (int i = 0; i < inst.numInstances(); i++) { Instance instance = getFVSFilteredInstance(output, inst.instance(i), list, substitution); output.add(instance); } return output; }
public Map<FV, Collection<FV>> extractValuesFromData(Instances inst) { Multimap<FV, FV> fv_list = ArrayListMultimap.create(); // Instances outFormat = getOutputFormat(); for (int i = 0; i < inst.numInstances(); i++) { Instance ins = inst.instance(i); // Skip the class label for (int x = 0; x < ins.numAttributes() - 1; x++) { Object value = null; try { value = ins.stringValue(x); } catch (Exception e) { value = ins.value(x); } FV fv = new FV(x, value, ins.classValue()); fv.setNumLabels(inst.numClasses()); if (!fv_list.put(fv, fv)) { System.err.println("Couldn't put duplicates: " + fv); } } } Map<FV, Collection<FV>> original_map = fv_list.asMap(); return original_map; }
public List<Double> generateEntropy(Map<FV, Collection<FV>> fv_list, int numInstances) { List<Double> entropies = new ArrayList(); Iterator<Entry<FV, Collection<FV>>> iterator = fv_list.entrySet().iterator(); while (iterator.hasNext()) { Entry<FV, Collection<FV>> next = iterator.next(); FV key = next.getKey(); key.setFrequency((double) next.getValue().size() / numInstances); double[] counter = new double[key.getNumLabels()]; for (FV fv : next.getValue()) { int idx = (int) fv.getLabel(); counter[idx]++; } key.setEntropy(calculateEntropy(counter, next.getValue().size())); entropies.add(key.getEntropy()); } return entropies; }