/** * Computes average class values for each attribute and value * * @throws Exception */ private void computeAverageClassValues() throws Exception { double totalCounts, sum; M5Instance instance; double[] counts; double[][] avgClassValues = new double[getInputFormat().numAttributes()][0]; m_Indices = new int[getInputFormat().numAttributes()][0]; for (int j = 0; j < getInputFormat().numAttributes(); j++) { M5Attribute att = getInputFormat().attribute(j); if (att.isNominal()) { avgClassValues[j] = new double[att.numValues()]; counts = new double[att.numValues()]; for (int i = 0; i < getInputFormat().numInstances(); i++) { instance = getInputFormat().instance(i); if (!instance.classIsMissing() && (!instance.isMissing(j))) { counts[(int) instance.value(j)] += instance.weight(); avgClassValues[j][(int) instance.value(j)] += instance.weight() * instance.classValue(); } } sum = M5StaticUtils.sum(avgClassValues[j]); totalCounts = M5StaticUtils.sum(counts); if (M5StaticUtils.gr(totalCounts, 0)) { for (int k = 0; k < att.numValues(); k++) { if (M5StaticUtils.gr(counts[k], 0)) { avgClassValues[j][k] /= (double) counts[k]; } else { avgClassValues[j][k] = sum / (double) totalCounts; } } } m_Indices[j] = M5StaticUtils.sort(avgClassValues[j]); } } }
/** Set the output format if the class is numeric. */ private void setOutputFormatNumeric() { if (m_Indices == null) { setOutputFormat(null); return; } M5Vector newAtts; int newClassIndex; StringBuffer attributeName; M5Instances outputFormat; M5Vector vals; // Compute new attributes newClassIndex = getInputFormat().classIndex(); newAtts = new M5Vector(); for (int j = 0; j < getInputFormat().numAttributes(); j++) { M5Attribute att = getInputFormat().attribute(j); if ((!att.isNominal()) || (j == getInputFormat().classIndex())) { newAtts.addElement(att.copy()); } else { if (j < getInputFormat().classIndex()) { newClassIndex += att.numValues() - 2; } // Compute values for new attributes for (int k = 1; k < att.numValues(); k++) { attributeName = new StringBuffer(att.name() + "="); for (int l = k; l < att.numValues(); l++) { if (l > k) { attributeName.append(','); } attributeName.append(att.value(m_Indices[j][l])); } if (m_Numeric) { newAtts.addElement(new M5Attribute(attributeName.toString())); } else { vals = new M5Vector(2); vals.addElement("f"); vals.addElement("t"); newAtts.addElement(new M5Attribute(attributeName.toString(), vals)); } } } } outputFormat = new M5Instances(getInputFormat().relationName(), newAtts, 0); outputFormat.setClassIndex(newClassIndex); setOutputFormat(outputFormat); }
/** * Convert a single instance over if the class is nominal. The converted instance is added to the * end of the output queue. * * @param instance the instance to convert */ private void convertInstanceNominal(M5Instance instance) { double[] vals = new double[outputFormatPeek().numAttributes()]; int attSoFar = 0; for (int j = 0; j < getInputFormat().numAttributes(); j++) { M5Attribute att = getInputFormat().attribute(j); if ((!att.isNominal()) || (j == getInputFormat().classIndex())) { vals[attSoFar] = instance.value(j); attSoFar++; } else { if (att.numValues() <= 2) { vals[attSoFar] = instance.value(j); attSoFar++; } else { if (instance.isMissing(j)) { for (int k = 0; k < att.numValues(); k++) { vals[attSoFar + k] = instance.value(j); } } else { for (int k = 0; k < att.numValues(); k++) { if (k == (int) instance.value(j)) { vals[attSoFar + k] = 1; } else { vals[attSoFar + k] = 0; } } } attSoFar += att.numValues(); } } } M5Instance inst = null; if (instance instanceof M5SparseInstance) { inst = new M5SparseInstance(instance.weight(), vals); } else { inst = new M5Instance(instance.weight(), vals); } copyStringValues( inst, false, instance.dataset(), getInputStringIndex(), getOutputFormat(), getOutputStringIndex()); inst.setDataset(getOutputFormat()); push(inst); }