/** * Set cutpoints for a single attribute using MDL. * * @param index the index of the attribute to set cutpoints for * @param data the data to work with */ protected void calculateCutPointsByMDL(int index, Instances data) { // Sort instances data.sort(data.attribute(index)); // Find first instances that's missing int firstMissing = data.numInstances(); for (int i = 0; i < data.numInstances(); i++) { if (data.instance(i).isMissing(index)) { firstMissing = i; break; } } m_CutPoints[index] = cutPointsForSubset(data, index, 0, firstMissing); }
/** * Creates a C4.5-type split on the given data. * * @exception Exception if something goes wrong */ public void buildClassifier(Instances trainInstances) throws Exception { // Initialize the remaining instance variables. m_numSubsets = 0; m_splitPoint = Double.MAX_VALUE; m_infoGain = 0; m_gainRatio = 0; // Different treatment for enumerated and numeric // attributes. if (trainInstances.attribute(m_attIndex).isNominal()) { handleEnumeratedAttribute(trainInstances); } else { trainInstances.sort(trainInstances.attribute(m_attIndex)); handleNumericAttribute(trainInstances); } }