/** * Generate artificial training examples. * * @param artSize size of examples set to create * @param data training data * @return the set of unlabeled artificial examples */ protected Instances generateArtificialData(int artSize, Instances data) { int numAttributes = data.numAttributes(); Instances artData = new Instances(data, artSize); double[] att; Instance artInstance; for (int i = 0; i < artSize; i++) { att = new double[numAttributes]; for (int j = 0; j < numAttributes; j++) { if (data.attribute(j).isNominal()) { // Select nominal value based on the frequency of occurence in the training data double[] stats = (double[]) m_AttributeStats.get(j); att[j] = (double) selectIndexProbabilistically(stats); } else if (data.attribute(j).isNumeric()) { // Generate numeric value from the Guassian distribution // defined by the mean and std dev of the attribute double[] stats = (double[]) m_AttributeStats.get(j); att[j] = (m_Random.nextGaussian() * stats[1]) + stats[0]; } else System.err.println("Decorate can only handle numeric and nominal values."); } artInstance = new Instance(1.0, att); artData.add(artInstance); } return artData; }
/** * Add new instances to the given set of instances. * * @param data given instances * @param newData set of instances to add to given instances */ protected void addInstances(Instances data, Instances newData) { for (int i = 0; i < newData.numInstances(); i++) data.add(newData.instance(i)); }