/** * Adds this tree recursively to the buffer. * * @param id the unqiue id for the method * @param buffer the buffer to add the source code to * @return the last ID being used * @throws Exception if something goes wrong */ protected int toSource(int id, StringBuffer buffer) throws Exception { int result; int i; int newID; StringBuffer[] subBuffers; buffer.append("\n"); buffer.append(" protected static double node" + id + "(Object[] i) {\n"); // leaf? if (m_Attribute == null) { result = id; if (Double.isNaN(m_ClassValue)) buffer.append(" return Double.NaN;"); else buffer.append(" return " + m_ClassValue + ";"); if (m_ClassAttribute != null) buffer.append(" // " + m_ClassAttribute.value((int) m_ClassValue)); buffer.append("\n"); buffer.append(" }\n"); } else { buffer.append(" // " + m_Attribute.name() + "\n"); // subtree calls subBuffers = new StringBuffer[m_Attribute.numValues()]; newID = id; for (i = 0; i < m_Attribute.numValues(); i++) { newID++; buffer.append(" "); if (i > 0) buffer.append("else "); buffer.append( "if (((String) i[" + m_Attribute.index() + "]).equals(\"" + m_Attribute.value(i) + "\"))\n"); buffer.append(" return node" + newID + "(i);\n"); subBuffers[i] = new StringBuffer(); newID = m_Successors[i].toSource(newID, subBuffers[i]); } buffer.append(" else\n"); buffer.append( " throw new IllegalArgumentException(\"Value '\" + i[" + m_Attribute.index() + "] + \"' is not allowed!\");\n"); buffer.append(" }\n"); // output subtree code for (i = 0; i < m_Attribute.numValues(); i++) { buffer.append(subBuffers[i].toString()); } subBuffers = null; result = newID; } return result; }
public double classifyInstance(Instance inst) throws Exception { if (m_attribute == null) { return m_intercept; } else { if (inst.isMissing(m_attribute.index())) { throw new Exception("UnivariateLinearRegression: No missing values!"); } return m_intercept + m_slope * inst.value(m_attribute.index()); } }
/** * Method for building an Id3 tree. * * @param data the training data * @exception Exception if decision tree can't be built successfully */ private void makeTree(Instances data) throws Exception { // Check if no instances have reached this node. if (data.numInstances() == 0) { m_Attribute = null; m_ClassValue = Utils.missingValue(); m_Distribution = new double[data.numClasses()]; return; } // Compute attribute with maximum information gain. double[] infoGains = new double[data.numAttributes()]; Enumeration attEnum = data.enumerateAttributes(); while (attEnum.hasMoreElements()) { Attribute att = (Attribute) attEnum.nextElement(); infoGains[att.index()] = computeInfoGain(data, att); } m_Attribute = data.attribute(Utils.maxIndex(infoGains)); // Make leaf if information gain is zero. // Otherwise create successors. if (Utils.eq(infoGains[m_Attribute.index()], 0)) { m_Attribute = null; m_Distribution = new double[data.numClasses()]; Enumeration instEnum = data.enumerateInstances(); while (instEnum.hasMoreElements()) { Instance inst = (Instance) instEnum.nextElement(); m_Distribution[(int) inst.classValue()]++; } Utils.normalize(m_Distribution); m_ClassValue = Utils.maxIndex(m_Distribution); m_ClassAttribute = data.classAttribute(); } else { Instances[] splitData = splitData(data, m_Attribute); m_Successors = new Id3[m_Attribute.numValues()]; for (int j = 0; j < m_Attribute.numValues(); j++) { m_Successors[j] = new Id3(); m_Successors[j].makeTree(splitData[j]); } } }
/** Main method for testing this class. */ public static void main(String[] options) { try { // Create numeric attributes "length" and "weight" Attribute length = new Attribute("length"); Attribute weight = new Attribute("weight"); // Create vector to hold nominal values "first", "second", "third" ArrayList<String> my_nominal_values = new ArrayList<String>(3); my_nominal_values.add("first"); my_nominal_values.add("second"); my_nominal_values.add("third"); // Create nominal attribute "position" Attribute position = new Attribute("position", my_nominal_values); // Create vector of the above attributes ArrayList<Attribute> attributes = new ArrayList<Attribute>(3); attributes.add(length); attributes.add(weight); attributes.add(position); // Create the empty dataset "race" with above attributes Instances race = new Instances("race", attributes, 0); // Make position the class attribute race.setClassIndex(position.index()); // Create empty instance with three attribute values SparseInstance inst = new SparseInstance(3); // Set instance's values for the attributes "length", "weight", and "position" inst.setValue(length, 5.3); inst.setValue(weight, 300); inst.setValue(position, "first"); // Set instance's dataset to be the dataset "race" inst.setDataset(race); // Print the instance System.out.println("The instance: " + inst); // Print the first attribute System.out.println("First attribute: " + inst.attribute(0)); // Print the class attribute System.out.println("Class attribute: " + inst.classAttribute()); // Print the class index System.out.println("Class index: " + inst.classIndex()); // Say if class is missing System.out.println("Class is missing: " + inst.classIsMissing()); // Print the instance's class value in internal format System.out.println("Class value (internal format): " + inst.classValue()); // Print a shallow copy of this instance SparseInstance copy = (SparseInstance) inst.copy(); System.out.println("Shallow copy: " + copy); // Set dataset for shallow copy copy.setDataset(inst.dataset()); System.out.println("Shallow copy with dataset set: " + copy); // Print out all values in internal format System.out.print("All stored values in internal format: "); for (int i = 0; i < inst.numValues(); i++) { if (i > 0) { System.out.print(","); } System.out.print(inst.valueSparse(i)); } System.out.println(); // Set all values to zero System.out.print("All values set to zero: "); while (inst.numValues() > 0) { inst.setValueSparse(0, 0); } for (int i = 0; i < inst.numValues(); i++) { if (i > 0) { System.out.print(","); } System.out.print(inst.valueSparse(i)); } System.out.println(); // Set all values to one System.out.print("All values set to one: "); for (int i = 0; i < inst.numAttributes(); i++) { inst.setValue(i, 1); } for (int i = 0; i < inst.numValues(); i++) { if (i > 0) { System.out.print(","); } System.out.print(inst.valueSparse(i)); } System.out.println(); // Unset dataset for copy, delete first attribute, and insert it again copy.setDataset(null); copy.deleteAttributeAt(0); copy.insertAttributeAt(0); copy.setDataset(inst.dataset()); System.out.println("Copy with first attribute deleted and inserted: " + copy); // Same for second attribute copy.setDataset(null); copy.deleteAttributeAt(1); copy.insertAttributeAt(1); copy.setDataset(inst.dataset()); System.out.println("Copy with second attribute deleted and inserted: " + copy); // Same for last attribute copy.setDataset(null); copy.deleteAttributeAt(2); copy.insertAttributeAt(2); copy.setDataset(inst.dataset()); System.out.println("Copy with third attribute deleted and inserted: " + copy); // Enumerate attributes (leaving out the class attribute) System.out.println("Enumerating attributes (leaving out class):"); Enumeration enu = inst.enumerateAttributes(); while (enu.hasMoreElements()) { Attribute att = (Attribute) enu.nextElement(); System.out.println(att); } // Headers are equivalent? System.out.println("Header of original and copy equivalent: " + inst.equalHeaders(copy)); // Test for missing values System.out.println("Length of copy missing: " + copy.isMissing(length)); System.out.println("Weight of copy missing: " + copy.isMissing(weight.index())); System.out.println("Length of copy missing: " + Utils.isMissingValue(copy.value(length))); // Prints number of attributes and classes System.out.println("Number of attributes: " + copy.numAttributes()); System.out.println("Number of classes: " + copy.numClasses()); // Replace missing values double[] meansAndModes = {2, 3, 0}; copy.replaceMissingValues(meansAndModes); System.out.println("Copy with missing value replaced: " + copy); // Setting and getting values and weights copy.setClassMissing(); System.out.println("Copy with missing class: " + copy); copy.setClassValue(0); System.out.println("Copy with class value set to first value: " + copy); copy.setClassValue("third"); System.out.println("Copy with class value set to \"third\": " + copy); copy.setMissing(1); System.out.println("Copy with second attribute set to be missing: " + copy); copy.setMissing(length); System.out.println("Copy with length set to be missing: " + copy); copy.setValue(0, 0); System.out.println("Copy with first attribute set to 0: " + copy); copy.setValue(weight, 1); System.out.println("Copy with weight attribute set to 1: " + copy); copy.setValue(position, "second"); System.out.println("Copy with position set to \"second\": " + copy); copy.setValue(2, "first"); System.out.println("Copy with last attribute set to \"first\": " + copy); System.out.println("Current weight of instance copy: " + copy.weight()); copy.setWeight(2); System.out.println("Current weight of instance copy (set to 2): " + copy.weight()); System.out.println("Last value of copy: " + copy.toString(2)); System.out.println("Value of position for copy: " + copy.toString(position)); System.out.println("Last value of copy (internal format): " + copy.value(2)); System.out.println("Value of position for copy (internal format): " + copy.value(position)); } catch (Exception e) { e.printStackTrace(); } }