예제 #1
0
  private double distance(Metadata m1, Metadata m2) {

    double wAttribute_type = 1;
    double wDefault_task = 1;
    double wMissing_values = 1;
    double wNumber_of_attributes = 1;
    double wNumber_of_instances = 1;

    // can be null
    double dAttribute_type = dCategory(m1.getAttribute_type(), m2.getAttribute_type());
    double dDefault_task = dCategory(m1.getDefault_task(), m2.getDefault_task());
    // default false - always set
    double dMissing_values = dBoolean(m1.getMissing_values(), m2.getMissing_values());
    // mandatory attributes - always set
    double dNumber_of_attributes =
        d(m1.getNumber_of_attributes(), m2.getNumber_of_attributes(), minAttributes, maxAttributes);
    double dNumber_of_instances =
        d(m1.getNumber_of_instances(), m2.getNumber_of_instances(), minInstances, maxInstances);

    double distance =
        wAttribute_type * dAttribute_type
            + wDefault_task * dDefault_task
            + wMissing_values * dMissing_values
            + wNumber_of_attributes * dNumber_of_attributes
            + wNumber_of_instances * dNumber_of_instances;

    return distance;
  }
예제 #2
0
  @Override
  protected Agent chooseBestAgent(Data data) {

    Metadata metadata = data.getMetadata();

    GetAllMetadata gm = new GetAllMetadata();
    gm.setResults_required(true);

    // 1. choose the nearest training data
    List allMetadata = DataManagerService.getAllMetadata(this, gm);

    // set the min, max instances and attributes first
    Iterator itr = allMetadata.iterator();
    while (itr.hasNext()) {
      Metadata next_md = (Metadata) itr.next();

      int na = next_md.getNumber_of_attributes();
      minAttributes = Math.min(minAttributes, na);
      maxAttributes = Math.max(maxAttributes, na);

      int ni = next_md.getNumber_of_instances();
      minInstances = Math.min(ni, minInstances);
      maxInstances = Math.max(ni, maxInstances);
    }

    ArrayList<MetadataDistancePair> distances = new ArrayList<MetadataDistancePair>();

    itr = allMetadata.iterator();
    while (itr.hasNext()) {
      Metadata next_md = (Metadata) itr.next();
      double dNew = distance(metadata, next_md);

      distances.add(new MetadataDistancePair(next_md, dNew));
    }

    Collections.sort(distances);

    List agents = new LinkedList();
    for (int i = 0; i < M; i++) {
      log(distances.get(i).m.getExternal_name() + ": " + distances.get(i).d);
      List ag = DataManagerService.getTheBestAgents(this, distances.get(i).m.getInternal_name(), N);
      Iterator it = ag.iterator();
      while (it.hasNext()) {
        agents.add(it.next());
      }
    }

    HashMap<String, Integer> counts = new HashMap<String, Integer>();

    Iterator it = agents.iterator();
    while (it.hasNext()) {
      Agent a = (Agent) it.next();

      if (counts.containsKey(a.getType())) {
        counts.put(a.getType(), counts.get(a.getType()) + 1);
      } else {
        counts.put(a.getType(), 1);
      }
    }

    int maxCount = 0;
    String bestAgentType = null;
    for (String s : counts.keySet()) {
      log(s + ": " + counts.get(s));
      if (counts.get(s) > maxCount) {
        maxCount = counts.get(s);
        bestAgentType = s;
      }
    }

    log("Best agent: " + bestAgentType);

    ArrayList<Agent> bestAgentOptions = new ArrayList<Agent>();

    it = agents.iterator();
    while (it.hasNext()) {
      Agent a = (Agent) it.next();

      if (a.getType().equals(bestAgentType)) {
        bestAgentOptions.add(a);
      }
    }

    List optionSamples = getAgentOptions(bestAgentType);
    List options = new LinkedList();
    it = optionSamples.iterator();
    while (it.hasNext()) {
      Option o = (Option) it.next();

      Option newOpt = o.copyOption();

      // ignore boolean and set options for now, set their value to the one of the best agent on
      // closest file
      if (o.getData_type().equals("BOOLEAN") || o.getData_type().equals("MIXED")) {
        if (bestAgentOptions.get(0).getOptionByName(o.getName()) == null) {
          continue;
        }
        newOpt.setValue(bestAgentOptions.get(0).getOptionByName(o.getName()).getValue());
      } else {
        double sum = 0;
        int count = 0;
        String optionName = o.getName();
        for (Agent a : bestAgentOptions) {
          if (a.getOptionByName(optionName) != null) {
            sum += Double.parseDouble(a.getOptionByName(optionName).getValue());
          }
          count++;
        }
        double avg = sum / count;

        double stdDev = 0;
        for (Agent a : bestAgentOptions) {
          if (a.getOptionByName(optionName) != null) {
            stdDev +=
                Math.pow(Double.parseDouble(a.getOptionByName(optionName).getValue()) - avg, 2);
          }
        }

        stdDev = Math.sqrt(stdDev / count);

        if (stdDev > 0) {
          newOpt.setValue("?");
          newOpt.setUser_value("?");
          newOpt.setMutable(true);
          Interval range = new Interval();
          range.setMin((float) Math.max(avg - 2 * stdDev, o.getRange().getMin()));
          range.setMax((float) Math.min(avg + 2 * stdDev, o.getRange().getMax()));
          newOpt.setRange(range);
        } else {
          if (o.getData_type().equals("FLOAT")) {
            newOpt.setValue(Double.toString(avg));
          }
          if (o.getData_type().equals("INT")) {
            newOpt.setValue(Integer.toString((int) avg));
          }
        }
      }
      options.add(newOpt);
    }

    Agent agent = new Agent();
    agent.setName(null);
    agent.setType(bestAgentType);
    agent.setOptions(options);

    return agent;
  }