/** * loading training data * * @param mergedAlgSimVec algebra instances normally for applying the remove useless and * standardization filtering * @return * @throws Exception */ public static AttributeFilterMeta refineInstances(Instances mergedAlgSimVec) throws Exception { AttributeFilterMeta res = new AttributeFilterMeta(); Map<String, Integer> attributeIndex = new HashMap<String, Integer>(); // List<Attribute> atts = new ArrayList<Attribute>(); // mergedAlgSimVecClass.attribute(0). for (int i = 0; i < mergedAlgSimVec.numAttributes(); i++) { attributeIndex.put(mergedAlgSimVec.attribute(i).name(), i); } Instances mergedUselessFilteredAlgSimVecClass = removeUseless(mergedAlgSimVec); Standardize stdFilter = new Standardize(); // kepp in meta stdFilter.setInputFormat( mergedUselessFilteredAlgSimVecClass); // initializing the filter once with training set Instances stdFilterdInstances = standardize(mergedUselessFilteredAlgSimVecClass, stdFilter); // record removed attributes/columns in a matrix Set<String> selectedAtt = new HashSet<String>(); for (int i = 0; i < stdFilterdInstances.numAttributes(); i++) { selectedAtt.add(stdFilterdInstances.attribute(i).name()); } List<Integer> deletedAttIndex = new ArrayList<Integer>(); for (Entry<String, Integer> e : attributeIndex.entrySet()) { if (selectedAtt.contains(e.getKey()) == false) { deletedAttIndex.add(e.getValue()); } } int[] removedAttributes = Ints.toArray(deletedAttIndex); res.setInstances(stdFilterdInstances); res.setRemovedAttributes(removedAttributes); res.setStandardizeFilter(stdFilter); return res; }
/** * Method for building the classifier. * * @param instances the set of training instances * @throws Exception if the classifier can't be built successfully */ public void buildClassifier(Instances instances) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(instances); // remove instances with missing class instances = new Instances(instances); instances.deleteWithMissingClass(); // Removes all the instances with weight equal to 0. // MUST be done since condition (8) of Keerthi's paper // is made with the assertion Ci > 0 (See equation (3a). Instances data = new Instances(instances, 0); for (int i = 0; i < instances.numInstances(); i++) { if (instances.instance(i).weight() > 0) { data.add(instances.instance(i)); } } if (data.numInstances() == 0) { throw new Exception( "No training instances left after removing " + "instance with either a weight null or a missing class!"); } instances = data; m_onlyNumeric = true; for (int i = 0; i < instances.numAttributes(); i++) { if (i != instances.classIndex()) { if (!instances.attribute(i).isNumeric()) { m_onlyNumeric = false; break; } } } m_Missing = new ReplaceMissingValues(); m_Missing.setInputFormat(instances); instances = Filter.useFilter(instances, m_Missing); if (getCapabilities().handles(Capability.NUMERIC_ATTRIBUTES)) { if (!m_onlyNumeric) { m_NominalToBinary = new NominalToBinary(); m_NominalToBinary.setInputFormat(instances); instances = Filter.useFilter(instances, m_NominalToBinary); } else { m_NominalToBinary = null; } } else { m_NominalToBinary = null; } // retrieve two different class values used to determine filter transformation double y0 = instances.instance(0).classValue(); int index = 1; while (index < instances.numInstances() && instances.instance(index).classValue() == y0) { index++; } if (index == instances.numInstances()) { // degenerate case, all class values are equal // we don't want to deal with this, too much hassle throw new Exception( "All class values are the same. At least two class values should be different"); } double y1 = instances.instance(index).classValue(); // apply filters if (m_filterType == FILTER_STANDARDIZE) { m_Filter = new Standardize(); ((Standardize) m_Filter).setIgnoreClass(true); m_Filter.setInputFormat(instances); instances = Filter.useFilter(instances, m_Filter); } else if (m_filterType == FILTER_NORMALIZE) { m_Filter = new Normalize(); ((Normalize) m_Filter).setIgnoreClass(true); m_Filter.setInputFormat(instances); instances = Filter.useFilter(instances, m_Filter); } else { m_Filter = null; } if (m_Filter != null) { double z0 = instances.instance(0).classValue(); double z1 = instances.instance(index).classValue(); m_x1 = (y0 - y1) / (z0 - z1); // no division by zero, since y0 != y1 guaranteed => z0 != z1 ??? m_x0 = (y0 - m_x1 * z0); // = y1 - m_x1 * z1 } else { m_x1 = 1.0; m_x0 = 0.0; } m_optimizer.setSMOReg(this); m_optimizer.buildClassifier(instances); }