public void acceptInstance(InstanceEvent e) { if (e.getStatus() == InstanceEvent.FORMAT_AVAILABLE) { Instances structure = e.getStructure(); m_legendText = new Vector(); m_max = 1.0; m_min = 0; int i = 0; for (i = 0; i < structure.numAttributes(); i++) { if (i > 10) { i--; break; } m_legendText.addElement(structure.attribute(i).name()); m_legendPanel.repaint(); m_scalePanel.repaint(); } m_dataPoint = new double[i]; m_xCount = 0; return; } // process data point Instance inst = e.getInstance(); for (int i = 0; i < m_dataPoint.length; i++) { if (!inst.isMissing(i)) { m_dataPoint[i] = inst.value(i); } } acceptDataPoint(m_dataPoint); m_xCount++; }
/** * Builds the ensemble of perceptrons. * * @exception Exception if something goes wrong during building */ public void buildClassifier(Instances insts) throws Exception { if (insts.checkForStringAttributes()) { throw new UnsupportedAttributeTypeException("Cannot handle string attributes!"); } if (insts.numClasses() > 2) { throw new Exception("Can only handle two-class datasets!"); } if (insts.classAttribute().isNumeric()) { throw new UnsupportedClassTypeException("Can't handle a numeric class!"); } // Filter data m_Train = new Instances(insts); m_Train.deleteWithMissingClass(); m_ReplaceMissingValues = new ReplaceMissingValues(); m_ReplaceMissingValues.setInputFormat(m_Train); m_Train = Filter.useFilter(m_Train, m_ReplaceMissingValues); m_NominalToBinary = new NominalToBinary(); m_NominalToBinary.setInputFormat(m_Train); m_Train = Filter.useFilter(m_Train, m_NominalToBinary); /** Randomize training data */ m_Train.randomize(new Random(m_Seed)); /** Make space to store perceptrons */ m_Additions = new int[m_MaxK + 1]; m_IsAddition = new boolean[m_MaxK + 1]; m_Weights = new int[m_MaxK + 1]; /** Compute perceptrons */ m_K = 0; out: for (int it = 0; it < m_NumIterations; it++) { for (int i = 0; i < m_Train.numInstances(); i++) { Instance inst = m_Train.instance(i); if (!inst.classIsMissing()) { int prediction = makePrediction(m_K, inst); int classValue = (int) inst.classValue(); if (prediction == classValue) { m_Weights[m_K]++; } else { m_IsAddition[m_K] = (classValue == 1); m_Additions[m_K] = i; m_K++; m_Weights[m_K]++; } if (m_K == m_MaxK) { break out; } } } } }
/** * Builds a string listing the attribute values in a specified range of indices, separated by * commas and enclosed in brackets. * * @param instance the instance to print the values from * @param attributes the range of the attributes to list * @return a string listing values of the attributes in the range */ private static String attributeValuesString(Instance instance, Range attRange) { StringBuffer text = new StringBuffer(); if (attRange != null) { boolean firstOutput = true; attRange.setUpper(instance.numAttributes() - 1); for (int i = 0; i < instance.numAttributes(); i++) if (attRange.isInRange(i)) { if (firstOutput) text.append("("); else text.append(","); text.append(instance.toString(i)); firstOutput = false; } if (!firstOutput) text.append(")"); } return text.toString(); }
/** Computes the inner product of two instances */ private double innerProduct(Instance i1, Instance i2) throws Exception { // we can do a fast dot product double result = 0; int n1 = i1.numValues(); int n2 = i2.numValues(); int classIndex = m_Train.classIndex(); for (int p1 = 0, p2 = 0; p1 < n1 && p2 < n2; ) { int ind1 = i1.index(p1); int ind2 = i2.index(p2); if (ind1 == ind2) { if (ind1 != classIndex) { result += i1.valueSparse(p1) * i2.valueSparse(p2); } p1++; p2++; } else if (ind1 > ind2) { p2++; } else { p1++; } } result += 1.0; if (m_Exponent != 1) { return Math.pow(result, m_Exponent); } else { return result; } }
/** * Calculates the class membership probabilities for the given test instance. * * @param instance the instance to be classified * @return preedicted class probability distribution * @exception Exception if distribution can't be computed successfully */ @Override public double[] distributionForInstance(Instance instance) throws Exception { double[] sums = new double[instance.numClasses()], newProbs; for (int i = 0; i < m_NumIterations; i++) { if (instance.classAttribute().isNumeric() == true) { sums[0] += m_Classifiers[i].classifyInstance(instance); } else { newProbs = m_Classifiers[i].distributionForInstance(instance); for (int j = 0; j < newProbs.length; j++) sums[j] += newProbs[j]; } } if (instance.classAttribute().isNumeric() == true) { sums[0] /= m_NumIterations; return sums; } else if (Utils.eq(Utils.sum(sums), 0)) { return sums; } else { Utils.normalize(sums); return sums; } }
/** * Bagging method. * * @param data the training data to be used for generating the bagged classifier. * @exception Exception if the classifier could not be built successfully */ @Override public void buildClassifier(Instances data) throws Exception { super.buildClassifier(data); if (m_CalcOutOfBag && (m_BagSizePercent != 100)) { throw new IllegalArgumentException( "Bag size needs to be 100% if " + "out-of-bag error is to be calculated!"); } double outOfBagCount = 0.0; double errorSum = 0.0; int bagSize = data.numInstances() * m_BagSizePercent / 100; Random random = new Random(m_Seed); for (int j = 0; j < m_Classifiers.length; j++) { Instances bagData = null; boolean[] inBag = null; // create the in-bag dataset if (m_CalcOutOfBag) { inBag = new boolean[data.numInstances()]; bagData = resampleWithWeights(data, random, inBag); } else { bagData = data.resampleWithWeights(random); if (bagSize < data.numInstances()) { bagData.randomize(random); Instances newBagData = new Instances(bagData, 0, bagSize); bagData = newBagData; } } if (m_Classifier instanceof Randomizable) { ((Randomizable) m_Classifiers[j]).setSeed(random.nextInt()); } // build the classifier m_Classifiers[j].buildClassifier(bagData); if (m_CalcOutOfBag) { // calculate out of bag error for (int i = 0; i < inBag.length; i++) { if (!inBag[i]) { Instance outOfBagInst = data.instance(i); outOfBagCount += outOfBagInst.weight(); if (data.classAttribute().isNumeric()) { errorSum += outOfBagInst.weight() * Math.abs( m_Classifiers[j].classifyInstance(outOfBagInst) - outOfBagInst.classValue()); } else { if (m_Classifiers[j].classifyInstance(outOfBagInst) != outOfBagInst.classValue()) { errorSum += outOfBagInst.weight(); } } } } } } m_OutOfBagError = errorSum / outOfBagCount; }