/** * Determine the list of test instances and the list of training instances. * * @param prefix * @param type * @param prune */ public void prepare(String prefix, SequenceType type) { _classNames = Utils.getActivityNames(prefix); _testMap = getTestSet(_classNames); // build a training set and a test set. _training = new HashMap<String, List<Instance>>(); _testing = new ArrayList<Instance>(); // Now train up the signatures... for (String className : _classNames) { File dataFile = new File("data/input/" + className + ".lisp"); List<Instance> instances = Utils.sequences(className, dataFile.getAbsolutePath(), type); List<Integer> testSet = _testMap.get(className); for (Instance instance : instances) { if (testSet.contains(instance.id())) _testing.add(instance); else { List<Instance> list = _training.get(instance.name()); if (list == null) { list = new ArrayList<Instance>(); _training.put(instance.name(), list); } list.add(instance); } } } }
/** * Select a random set to be the test set. * * @param classNames * @return */ public Map<String, List<Integer>> getTestSet(List<String> classNames) { Random r = new Random(System.currentTimeMillis()); Map<String, List<Integer>> testSet = new HashMap<String, List<Integer>>(); for (String className : classNames) { String f = "data/input/" + className + ".lisp"; Map<Integer, List<Interval>> map = Utils.load(new File(f)); List<Integer> episodes = new ArrayList<Integer>(map.keySet()); Collections.shuffle(episodes, r); // 33% of the instances will be part of the test set double pct = 1.0 / 3.0; int number = (int) Math.round((double) episodes.size() * pct); List<Integer> list = new ArrayList<Integer>(); for (int i = 0; i < number; ++i) { list.add(episodes.get(i)); } testSet.put(className, list); } return testSet; }