예제 #1
0
  /**
   * Returns the Capabilities of this filter, customized based on the data. I.e., if removes all
   * class capabilities, in case there's not class attribute present or removes the NO_CLASS
   * capability, in case that there's a class present.
   *
   * @param data the data to use for customization
   * @return the capabilities of this object, based on the data
   * @see #getCapabilities()
   */
  public Capabilities getCapabilities(Instances data) {
    Capabilities result;
    Capabilities classes;
    Iterator iter;
    Capability cap;

    result = getCapabilities();

    // no class? -> remove all class capabilites apart from NO_CLASS
    if (data.classIndex() == -1) {
      classes = result.getClassCapabilities();
      iter = classes.capabilities();
      while (iter.hasNext()) {
        cap = (Capability) iter.next();
        if (cap != Capability.NO_CLASS) {
          result.disable(cap);
          result.disableDependency(cap);
        }
      }
    }
    // class? -> remove NO_CLASS
    else {
      result.disable(Capability.NO_CLASS);
      result.disableDependency(Capability.NO_CLASS);
    }

    return result;
  }
예제 #2
0
  /**
   * loads the given dataset and prints the Capabilities necessary to process it.
   *
   * <p>Valid parameters:
   *
   * <p>-file filename <br>
   * the file to load
   *
   * <p>-c index the explicit index of the class attribute (default: none)
   *
   * @param args the commandline arguments
   * @throws Exception if something goes wrong
   */
  public static void main(String[] args) throws Exception {
    String tmpStr;
    String filename;
    DataSource source;
    Instances data;
    int classIndex;
    Capabilities cap;
    Iterator iter;

    if (args.length == 0) {
      System.out.println(
          "\nUsage: " + Capabilities.class.getName() + " -file <dataset> [-c <class index>]\n");
      return;
    }

    // get parameters
    tmpStr = Utils.getOption("file", args);
    if (tmpStr.length() == 0) throw new Exception("No file provided with option '-file'!");
    else filename = tmpStr;

    tmpStr = Utils.getOption("c", args);
    if (tmpStr.length() != 0) {
      if (tmpStr.equals("first")) classIndex = 0;
      else if (tmpStr.equals("last")) classIndex = -2; // last
      else classIndex = Integer.parseInt(tmpStr) - 1;
    } else {
      classIndex = -3; // not set
    }

    // load data
    source = new DataSource(filename);
    if (classIndex == -3) data = source.getDataSet();
    else if (classIndex == -2) data = source.getDataSet(source.getStructure().numAttributes() - 1);
    else data = source.getDataSet(classIndex);

    // determine and print capabilities
    cap = forInstances(data);
    System.out.println("File: " + filename);
    System.out.println(
        "Class index: " + ((data.classIndex() == -1) ? "not set" : "" + (data.classIndex() + 1)));
    System.out.println("Capabilities:");
    iter = cap.capabilities();
    while (iter.hasNext()) System.out.println("- " + iter.next());
  }
예제 #3
0
  /**
   * Tests a certain range of attributes of the given data, whether it can be processed by the
   * handler, given its capabilities. Classifiers implementing the <code>
   * MultiInstanceCapabilitiesHandler</code> interface are checked automatically for their
   * multi-instance Capabilities (if no bags, then only the bag-structure, otherwise only the first
   * bag).
   *
   * @param data the data to test
   * @param fromIndex the range of attributes - start (incl.)
   * @param toIndex the range of attributes - end (incl.)
   * @return true if all the tests succeeded
   * @see MultiInstanceCapabilitiesHandler
   * @see #m_InstancesTest
   * @see #m_MissingValuesTest
   * @see #m_MissingClassValuesTest
   * @see #m_MinimumNumberInstancesTest
   */
  public boolean test(Instances data, int fromIndex, int toIndex) {
    int i;
    int n;
    int m;
    Attribute att;
    Instance inst;
    boolean testClass;
    Capabilities cap;
    boolean missing;
    Iterator iter;

    // shall we test the data?
    if (!m_InstancesTest) return true;

    // no Capabilities? -> warning
    if ((m_Capabilities.size() == 0)
        || ((m_Capabilities.size() == 1) && handles(Capability.NO_CLASS)))
      System.err.println(createMessage("No capabilities set!"));

    // any attributes?
    if (toIndex - fromIndex < 0) {
      m_FailReason = new WekaException(createMessage("No attributes!"));
      return false;
    }

    // do wee need to test the class attribute, i.e., is the class attribute
    // within the range of attributes?
    testClass =
        (data.classIndex() > -1)
            && (data.classIndex() >= fromIndex)
            && (data.classIndex() <= toIndex);

    // attributes
    for (i = fromIndex; i <= toIndex; i++) {
      att = data.attribute(i);

      // class is handled separately
      if (i == data.classIndex()) continue;

      // check attribute types
      if (!test(att)) return false;
    }

    // class
    if (!handles(Capability.NO_CLASS) && (data.classIndex() == -1)) {
      m_FailReason = new UnassignedClassException(createMessage("Class attribute not set!"));
      return false;
    }

    // special case: no class attribute can be handled
    if (handles(Capability.NO_CLASS) && (data.classIndex() > -1)) {
      cap = getClassCapabilities();
      cap.disable(Capability.NO_CLASS);
      iter = cap.capabilities();
      if (!iter.hasNext()) {
        m_FailReason = new WekaException(createMessage("Cannot handle any class attribute!"));
        return false;
      }
    }

    if (testClass && !handles(Capability.NO_CLASS)) {
      att = data.classAttribute();
      if (!test(att, true)) return false;

      // special handling of RELATIONAL class
      // TODO: store additional Capabilities for this case

      // missing class labels
      if (m_MissingClassValuesTest) {
        if (!handles(Capability.MISSING_CLASS_VALUES)) {
          for (i = 0; i < data.numInstances(); i++) {
            if (data.instance(i).classIsMissing()) {
              m_FailReason =
                  new WekaException(createMessage("Cannot handle missing class values!"));
              return false;
            }
          }
        } else {
          if (m_MinimumNumberInstancesTest) {
            int hasClass = 0;

            for (i = 0; i < data.numInstances(); i++) {
              if (!data.instance(i).classIsMissing()) hasClass++;
            }

            // not enough instances with class labels?
            if (hasClass < getMinimumNumberInstances()) {
              m_FailReason =
                  new WekaException(
                      createMessage(
                          "Not enough training instances with class labels (required: "
                              + getMinimumNumberInstances()
                              + ", provided: "
                              + hasClass
                              + ")!"));
              return false;
            }
          }
        }
      }
    }

    // missing values
    if (m_MissingValuesTest) {
      if (!handles(Capability.MISSING_VALUES)) {
        missing = false;
        for (i = 0; i < data.numInstances(); i++) {
          inst = data.instance(i);

          if (inst instanceof SparseInstance) {
            for (m = 0; m < inst.numValues(); m++) {
              n = inst.index(m);

              // out of scope?
              if (n < fromIndex) continue;
              if (n > toIndex) break;

              // skip class
              if (n == inst.classIndex()) continue;

              if (inst.isMissing(n)) {
                missing = true;
                break;
              }
            }
          } else {
            for (n = fromIndex; n <= toIndex; n++) {
              // skip class
              if (n == inst.classIndex()) continue;

              if (inst.isMissing(n)) {
                missing = true;
                break;
              }
            }
          }

          if (missing) {
            m_FailReason =
                new NoSupportForMissingValuesException(
                    createMessage("Cannot handle missing values!"));
            return false;
          }
        }
      }
    }

    // instances
    if (m_MinimumNumberInstancesTest) {
      if (data.numInstances() < getMinimumNumberInstances()) {
        m_FailReason =
            new WekaException(
                createMessage(
                    "Not enough training instances (required: "
                        + getMinimumNumberInstances()
                        + ", provided: "
                        + data.numInstances()
                        + ")!"));
        return false;
      }
    }

    // Multi-Instance? -> check structure (regardless of attribute range!)
    if (handles(Capability.ONLY_MULTIINSTANCE)) {
      // number of attributes?
      if (data.numAttributes() != 3) {
        m_FailReason =
            new WekaException(
                createMessage("Incorrect Multi-Instance format, must be 'bag-id, bag, class'!"));
        return false;
      }

      // type of attributes and position of class?
      if (!data.attribute(0).isNominal()
          || !data.attribute(1).isRelationValued()
          || (data.classIndex() != data.numAttributes() - 1)) {
        m_FailReason =
            new WekaException(
                createMessage(
                    "Incorrect Multi-Instance format, must be 'NOMINAL att, RELATIONAL att, CLASS att'!"));
        return false;
      }

      // check data immediately
      if (getOwner() instanceof MultiInstanceCapabilitiesHandler) {
        MultiInstanceCapabilitiesHandler handler = (MultiInstanceCapabilitiesHandler) getOwner();
        cap = handler.getMultiInstanceCapabilities();
        boolean result;
        if (data.numInstances() > 0) result = cap.test(data.attribute(1).relation(0));
        else result = cap.test(data.attribute(1).relation());

        if (!result) {
          m_FailReason = cap.m_FailReason;
          return false;
        }
      }
    }

    // passed all tests!
    return true;
  }