private double distance(Metadata m1, Metadata m2) { double wAttribute_type = 1; double wDefault_task = 1; double wMissing_values = 1; double wNumber_of_attributes = 1; double wNumber_of_instances = 1; // can be null double dAttribute_type = dCategory(m1.getAttribute_type(), m2.getAttribute_type()); double dDefault_task = dCategory(m1.getDefault_task(), m2.getDefault_task()); // default false - always set double dMissing_values = dBoolean(m1.getMissing_values(), m2.getMissing_values()); // mandatory attributes - always set double dNumber_of_attributes = d(m1.getNumber_of_attributes(), m2.getNumber_of_attributes(), minAttributes, maxAttributes); double dNumber_of_instances = d(m1.getNumber_of_instances(), m2.getNumber_of_instances(), minInstances, maxInstances); double distance = wAttribute_type * dAttribute_type + wDefault_task * dDefault_task + wMissing_values * dMissing_values + wNumber_of_attributes * dNumber_of_attributes + wNumber_of_instances * dNumber_of_instances; return distance; }
@Override protected Agent chooseBestAgent(Data data) { Metadata metadata = data.getMetadata(); GetAllMetadata gm = new GetAllMetadata(); gm.setResults_required(true); // 1. choose the nearest training data List allMetadata = DataManagerService.getAllMetadata(this, gm); // set the min, max instances and attributes first Iterator itr = allMetadata.iterator(); while (itr.hasNext()) { Metadata next_md = (Metadata) itr.next(); int na = next_md.getNumber_of_attributes(); minAttributes = Math.min(minAttributes, na); maxAttributes = Math.max(maxAttributes, na); int ni = next_md.getNumber_of_instances(); minInstances = Math.min(ni, minInstances); maxInstances = Math.max(ni, maxInstances); } ArrayList<MetadataDistancePair> distances = new ArrayList<MetadataDistancePair>(); itr = allMetadata.iterator(); while (itr.hasNext()) { Metadata next_md = (Metadata) itr.next(); double dNew = distance(metadata, next_md); distances.add(new MetadataDistancePair(next_md, dNew)); } Collections.sort(distances); List agents = new LinkedList(); for (int i = 0; i < M; i++) { log(distances.get(i).m.getExternal_name() + ": " + distances.get(i).d); List ag = DataManagerService.getTheBestAgents(this, distances.get(i).m.getInternal_name(), N); Iterator it = ag.iterator(); while (it.hasNext()) { agents.add(it.next()); } } HashMap<String, Integer> counts = new HashMap<String, Integer>(); Iterator it = agents.iterator(); while (it.hasNext()) { Agent a = (Agent) it.next(); if (counts.containsKey(a.getType())) { counts.put(a.getType(), counts.get(a.getType()) + 1); } else { counts.put(a.getType(), 1); } } int maxCount = 0; String bestAgentType = null; for (String s : counts.keySet()) { log(s + ": " + counts.get(s)); if (counts.get(s) > maxCount) { maxCount = counts.get(s); bestAgentType = s; } } log("Best agent: " + bestAgentType); ArrayList<Agent> bestAgentOptions = new ArrayList<Agent>(); it = agents.iterator(); while (it.hasNext()) { Agent a = (Agent) it.next(); if (a.getType().equals(bestAgentType)) { bestAgentOptions.add(a); } } List optionSamples = getAgentOptions(bestAgentType); List options = new LinkedList(); it = optionSamples.iterator(); while (it.hasNext()) { Option o = (Option) it.next(); Option newOpt = o.copyOption(); // ignore boolean and set options for now, set their value to the one of the best agent on // closest file if (o.getData_type().equals("BOOLEAN") || o.getData_type().equals("MIXED")) { if (bestAgentOptions.get(0).getOptionByName(o.getName()) == null) { continue; } newOpt.setValue(bestAgentOptions.get(0).getOptionByName(o.getName()).getValue()); } else { double sum = 0; int count = 0; String optionName = o.getName(); for (Agent a : bestAgentOptions) { if (a.getOptionByName(optionName) != null) { sum += Double.parseDouble(a.getOptionByName(optionName).getValue()); } count++; } double avg = sum / count; double stdDev = 0; for (Agent a : bestAgentOptions) { if (a.getOptionByName(optionName) != null) { stdDev += Math.pow(Double.parseDouble(a.getOptionByName(optionName).getValue()) - avg, 2); } } stdDev = Math.sqrt(stdDev / count); if (stdDev > 0) { newOpt.setValue("?"); newOpt.setUser_value("?"); newOpt.setMutable(true); Interval range = new Interval(); range.setMin((float) Math.max(avg - 2 * stdDev, o.getRange().getMin())); range.setMax((float) Math.min(avg + 2 * stdDev, o.getRange().getMax())); newOpt.setRange(range); } else { if (o.getData_type().equals("FLOAT")) { newOpt.setValue(Double.toString(avg)); } if (o.getData_type().equals("INT")) { newOpt.setValue(Integer.toString((int) avg)); } } } options.add(newOpt); } Agent agent = new Agent(); agent.setName(null); agent.setType(bestAgentType); agent.setOptions(options); return agent; }