Пример #1
0
  /**
   * Determine the list of test instances and the list of training instances.
   *
   * @param prefix
   * @param type
   * @param prune
   */
  public void prepare(String prefix, SequenceType type) {
    _classNames = Utils.getActivityNames(prefix);
    _testMap = getTestSet(_classNames);

    // build a training set and a test set.
    _training = new HashMap<String, List<Instance>>();
    _testing = new ArrayList<Instance>();

    // Now train up the signatures...
    for (String className : _classNames) {
      File dataFile = new File("data/input/" + className + ".lisp");
      List<Instance> instances = Utils.sequences(className, dataFile.getAbsolutePath(), type);
      List<Integer> testSet = _testMap.get(className);

      for (Instance instance : instances) {
        if (testSet.contains(instance.id())) _testing.add(instance);
        else {
          List<Instance> list = _training.get(instance.name());
          if (list == null) {
            list = new ArrayList<Instance>();
            _training.put(instance.name(), list);
          }
          list.add(instance);
        }
      }
    }
  }
Пример #2
0
  /**
   * Select a random set to be the test set.
   *
   * @param classNames
   * @return
   */
  public Map<String, List<Integer>> getTestSet(List<String> classNames) {
    Random r = new Random(System.currentTimeMillis());

    Map<String, List<Integer>> testSet = new HashMap<String, List<Integer>>();

    for (String className : classNames) {
      String f = "data/input/" + className + ".lisp";
      Map<Integer, List<Interval>> map = Utils.load(new File(f));
      List<Integer> episodes = new ArrayList<Integer>(map.keySet());
      Collections.shuffle(episodes, r);

      // 33% of the instances will be part of the test set
      double pct = 1.0 / 3.0;
      int number = (int) Math.round((double) episodes.size() * pct);

      List<Integer> list = new ArrayList<Integer>();
      for (int i = 0; i < number; ++i) {
        list.add(episodes.get(i));
      }
      testSet.put(className, list);
    }

    return testSet;
  }