/** * Returns the Capabilities of this filter, customized based on the data. I.e., if removes all * class capabilities, in case there's not class attribute present or removes the NO_CLASS * capability, in case that there's a class present. * * @param data the data to use for customization * @return the capabilities of this object, based on the data * @see #getCapabilities() */ public Capabilities getCapabilities(Instances data) { Capabilities result; Capabilities classes; Iterator iter; Capability cap; result = getCapabilities(); // no class? -> remove all class capabilites apart from NO_CLASS if (data.classIndex() == -1) { classes = result.getClassCapabilities(); iter = classes.capabilities(); while (iter.hasNext()) { cap = (Capability) iter.next(); if (cap != Capability.NO_CLASS) { result.disable(cap); result.disableDependency(cap); } } } // class? -> remove NO_CLASS else { result.disable(Capability.NO_CLASS); result.disableDependency(Capability.NO_CLASS); } return result; }
/** * loads the given dataset and prints the Capabilities necessary to process it. * * <p>Valid parameters: * * <p>-file filename <br> * the file to load * * <p>-c index the explicit index of the class attribute (default: none) * * @param args the commandline arguments * @throws Exception if something goes wrong */ public static void main(String[] args) throws Exception { String tmpStr; String filename; DataSource source; Instances data; int classIndex; Capabilities cap; Iterator iter; if (args.length == 0) { System.out.println( "\nUsage: " + Capabilities.class.getName() + " -file <dataset> [-c <class index>]\n"); return; } // get parameters tmpStr = Utils.getOption("file", args); if (tmpStr.length() == 0) throw new Exception("No file provided with option '-file'!"); else filename = tmpStr; tmpStr = Utils.getOption("c", args); if (tmpStr.length() != 0) { if (tmpStr.equals("first")) classIndex = 0; else if (tmpStr.equals("last")) classIndex = -2; // last else classIndex = Integer.parseInt(tmpStr) - 1; } else { classIndex = -3; // not set } // load data source = new DataSource(filename); if (classIndex == -3) data = source.getDataSet(); else if (classIndex == -2) data = source.getDataSet(source.getStructure().numAttributes() - 1); else data = source.getDataSet(classIndex); // determine and print capabilities cap = forInstances(data); System.out.println("File: " + filename); System.out.println( "Class index: " + ((data.classIndex() == -1) ? "not set" : "" + (data.classIndex() + 1))); System.out.println("Capabilities:"); iter = cap.capabilities(); while (iter.hasNext()) System.out.println("- " + iter.next()); }
/** * Tests a certain range of attributes of the given data, whether it can be processed by the * handler, given its capabilities. Classifiers implementing the <code> * MultiInstanceCapabilitiesHandler</code> interface are checked automatically for their * multi-instance Capabilities (if no bags, then only the bag-structure, otherwise only the first * bag). * * @param data the data to test * @param fromIndex the range of attributes - start (incl.) * @param toIndex the range of attributes - end (incl.) * @return true if all the tests succeeded * @see MultiInstanceCapabilitiesHandler * @see #m_InstancesTest * @see #m_MissingValuesTest * @see #m_MissingClassValuesTest * @see #m_MinimumNumberInstancesTest */ public boolean test(Instances data, int fromIndex, int toIndex) { int i; int n; int m; Attribute att; Instance inst; boolean testClass; Capabilities cap; boolean missing; Iterator iter; // shall we test the data? if (!m_InstancesTest) return true; // no Capabilities? -> warning if ((m_Capabilities.size() == 0) || ((m_Capabilities.size() == 1) && handles(Capability.NO_CLASS))) System.err.println(createMessage("No capabilities set!")); // any attributes? if (toIndex - fromIndex < 0) { m_FailReason = new WekaException(createMessage("No attributes!")); return false; } // do wee need to test the class attribute, i.e., is the class attribute // within the range of attributes? testClass = (data.classIndex() > -1) && (data.classIndex() >= fromIndex) && (data.classIndex() <= toIndex); // attributes for (i = fromIndex; i <= toIndex; i++) { att = data.attribute(i); // class is handled separately if (i == data.classIndex()) continue; // check attribute types if (!test(att)) return false; } // class if (!handles(Capability.NO_CLASS) && (data.classIndex() == -1)) { m_FailReason = new UnassignedClassException(createMessage("Class attribute not set!")); return false; } // special case: no class attribute can be handled if (handles(Capability.NO_CLASS) && (data.classIndex() > -1)) { cap = getClassCapabilities(); cap.disable(Capability.NO_CLASS); iter = cap.capabilities(); if (!iter.hasNext()) { m_FailReason = new WekaException(createMessage("Cannot handle any class attribute!")); return false; } } if (testClass && !handles(Capability.NO_CLASS)) { att = data.classAttribute(); if (!test(att, true)) return false; // special handling of RELATIONAL class // TODO: store additional Capabilities for this case // missing class labels if (m_MissingClassValuesTest) { if (!handles(Capability.MISSING_CLASS_VALUES)) { for (i = 0; i < data.numInstances(); i++) { if (data.instance(i).classIsMissing()) { m_FailReason = new WekaException(createMessage("Cannot handle missing class values!")); return false; } } } else { if (m_MinimumNumberInstancesTest) { int hasClass = 0; for (i = 0; i < data.numInstances(); i++) { if (!data.instance(i).classIsMissing()) hasClass++; } // not enough instances with class labels? if (hasClass < getMinimumNumberInstances()) { m_FailReason = new WekaException( createMessage( "Not enough training instances with class labels (required: " + getMinimumNumberInstances() + ", provided: " + hasClass + ")!")); return false; } } } } } // missing values if (m_MissingValuesTest) { if (!handles(Capability.MISSING_VALUES)) { missing = false; for (i = 0; i < data.numInstances(); i++) { inst = data.instance(i); if (inst instanceof SparseInstance) { for (m = 0; m < inst.numValues(); m++) { n = inst.index(m); // out of scope? if (n < fromIndex) continue; if (n > toIndex) break; // skip class if (n == inst.classIndex()) continue; if (inst.isMissing(n)) { missing = true; break; } } } else { for (n = fromIndex; n <= toIndex; n++) { // skip class if (n == inst.classIndex()) continue; if (inst.isMissing(n)) { missing = true; break; } } } if (missing) { m_FailReason = new NoSupportForMissingValuesException( createMessage("Cannot handle missing values!")); return false; } } } } // instances if (m_MinimumNumberInstancesTest) { if (data.numInstances() < getMinimumNumberInstances()) { m_FailReason = new WekaException( createMessage( "Not enough training instances (required: " + getMinimumNumberInstances() + ", provided: " + data.numInstances() + ")!")); return false; } } // Multi-Instance? -> check structure (regardless of attribute range!) if (handles(Capability.ONLY_MULTIINSTANCE)) { // number of attributes? if (data.numAttributes() != 3) { m_FailReason = new WekaException( createMessage("Incorrect Multi-Instance format, must be 'bag-id, bag, class'!")); return false; } // type of attributes and position of class? if (!data.attribute(0).isNominal() || !data.attribute(1).isRelationValued() || (data.classIndex() != data.numAttributes() - 1)) { m_FailReason = new WekaException( createMessage( "Incorrect Multi-Instance format, must be 'NOMINAL att, RELATIONAL att, CLASS att'!")); return false; } // check data immediately if (getOwner() instanceof MultiInstanceCapabilitiesHandler) { MultiInstanceCapabilitiesHandler handler = (MultiInstanceCapabilitiesHandler) getOwner(); cap = handler.getMultiInstanceCapabilities(); boolean result; if (data.numInstances() > 0) result = cap.test(data.attribute(1).relation(0)); else result = cap.test(data.attribute(1).relation()); if (!result) { m_FailReason = cap.m_FailReason; return false; } } } // passed all tests! return true; }