/** * Generates the classifier. * * @param instances set of instances serving as training data * @throws Exception if the classifier has not been generated successfully */ public void buildClassifier(Instances instances) throws Exception { if (!(m_Classifier instanceof WeightedInstancesHandler)) { throw new IllegalArgumentException("Classifier must be a " + "WeightedInstancesHandler!"); } // can classifier handle the data? getCapabilities().testWithFail(instances); // remove instances with missing class instances = new Instances(instances); instances.deleteWithMissingClass(); // only class? -> build ZeroR model if (instances.numAttributes() == 1) { System.err.println( "Cannot build model (only class attribute present in data!), " + "using ZeroR model instead!"); m_ZeroR = new weka.classifiers.rules.ZeroR(); m_ZeroR.buildClassifier(instances); return; } else { m_ZeroR = null; } m_Train = new Instances(instances, 0, instances.numInstances()); m_NNSearch.setInstances(m_Train); }
/** * Adds the supplied instance to the training set. * * @param instance the instance to add * @throws Exception if instance could not be incorporated successfully */ public void updateClassifier(Instance instance) throws Exception { if (m_Train.equalHeaders(instance.dataset()) == false) { throw new Exception("Incompatible instance types"); } if (instance.classIsMissing()) { return; } m_Train.add(instance); m_NNSearch.update(instance); m_kNNValid = false; if ((m_WindowSize > 0) && (m_Train.numInstances() > m_WindowSize)) { boolean deletedInstance = false; while (m_Train.numInstances() > m_WindowSize) { m_Train.delete(0); deletedInstance = true; } // rebuild datastructure KDTree currently can't delete if (deletedInstance == true) m_NNSearch.setInstances(m_Train); } }
/** * Generates the classifier. * * @param instances set of instances serving as training data * @throws Exception if the classifier has not been generated successfully */ public void buildClassifier(Instances instances) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(instances); // remove instances with missing class instances = new Instances(instances); instances.deleteWithMissingClass(); m_NumClasses = instances.numClasses(); m_ClassType = instances.classAttribute().type(); m_Train = new Instances(instances, 0, instances.numInstances()); // Throw away initial instances until within the specified window size if ((m_WindowSize > 0) && (instances.numInstances() > m_WindowSize)) { m_Train = new Instances(m_Train, m_Train.numInstances() - m_WindowSize, m_WindowSize); } m_NumAttributesUsed = 0.0; for (int i = 0; i < m_Train.numAttributes(); i++) { if ((i != m_Train.classIndex()) && (m_Train.attribute(i).isNominal() || m_Train.attribute(i).isNumeric())) { m_NumAttributesUsed += 1.0; } } m_NNSearch.setInstances(m_Train); // Invalidate any currently cross-validation selected k m_kNNValid = false; m_defaultModel = new ZeroR(); m_defaultModel.buildClassifier(instances); m_defaultModel.setOptions(getOptions()); // System.out.println("hello world"); }
/** * Calculates the class membership probabilities for the given test instance. * * @param instance the instance to be classified * @return predicted class probability distribution * @throws Exception if an error occurred during the prediction */ public double[] distributionForInstance(Instance instance) throws Exception { NaiveBayes nb = new NaiveBayes(); // System.out.println("number of instances "+m_Train.numInstances()); if (m_Train.numInstances() == 0) { // throw new Exception("No training instances!"); return m_defaultModel.distributionForInstance(instance); } if ((m_WindowSize > 0) && (m_Train.numInstances() > m_WindowSize)) { m_kNNValid = false; boolean deletedInstance = false; while (m_Train.numInstances() > m_WindowSize) { m_Train.delete(0); } // rebuild datastructure KDTree currently can't delete if (deletedInstance == true) m_NNSearch.setInstances(m_Train); } // Select k by cross validation if (!m_kNNValid && (m_CrossValidate) && (m_kNNUpper >= 1)) { crossValidate(); } m_NNSearch.addInstanceInfo(instance); m_kNN = 1000; Instances neighbours = m_NNSearch.kNearestNeighbours(instance, m_kNN); double[] distances = m_NNSearch.getDistances(); // System.out.println("--------------classify instance--------- "); // System.out.println("neighbours.numInstances"+neighbours.numInstances()); // System.out.println("distances.length"+distances.length); // System.out.println("--------------classify instance--------- "); /* for (int k = 0; k < distances.length; k++) { //System.out.println("-------"); //System.out.println("distance of "+k+" "+distances[k]); //System.out.println("instance of "+k+" "+neighbours.instance(k)); //distances[k] = distances[k]+0.1; //System.out.println("------- after add 0.1"); //System.out.println("distance of "+k+" "+distances[k]); } */ Instances instances = new Instances(m_Train); // int attrnum = instances.numAttributes(); instances.deleteWithMissingClass(); Instances newm_Train = new Instances(instances, 0, instances.numInstances()); for (int k = 0; k < neighbours.numInstances(); k++) { // System.out.println("-------"); // Instance in = new Instance(); Instance insk = neighbours.instance(k); // System.out.println("instance "+k+" "+neighbours.instance(k)); // System.out.println("-------"); double dis = distances[k] + 0.1; // System.out.println("dis "+dis); dis = (1 / dis) * 10; // System.out.println("1/dis "+dis); int weightnum = (int) dis; // System.out.println("weightnum "+weightnum); for (int s = 0; s < weightnum; s++) { newm_Train.add(insk); } } // System.out.println("number of instances "+newm_Train.numInstances()); /* for (int k = 0; k < newm_Train.numInstances(); k++) { System.out.println("-------"); System.out.println("instance "+k+" "+newm_Train.instance(k)); System.out.println("-------"); } /* for (int k = 0; k < distances.length; k++) { System.out.println("-------"); System.out.println("distance of "+k+" "+distances[k]); System.out.println("-------"); }*/ nb.buildClassifier(newm_Train); double[] dis = nb.distributionForInstance(instance); // double[] distribution = makeDistribution(neighbours, distances); return dis; // return distribution; }