/**
   * Parses a given list of options. Valid options are:
   *
   * <p>-I num <br>
   * The number of iterations to be performed. (default 1)
   *
   * <p>-E num <br>
   * The exponent for the polynomial kernel. (default 1)
   *
   * <p>-S num <br>
   * The seed for the random number generator. (default 1)
   *
   * <p>-M num <br>
   * The maximum number of alterations allowed. (default 10000)
   *
   * <p>
   *
   * @param options the list of options as an array of strings
   * @exception Exception if an option is not supported
   */
  public void setOptions(String[] options) throws Exception {

    String iterationsString = Utils.getOption('I', options);
    if (iterationsString.length() != 0) {
      m_NumIterations = Integer.parseInt(iterationsString);
    } else {
      m_NumIterations = 1;
    }
    String exponentsString = Utils.getOption('E', options);
    if (exponentsString.length() != 0) {
      m_Exponent = (new Double(exponentsString)).doubleValue();
    } else {
      m_Exponent = 1.0;
    }
    String seedString = Utils.getOption('S', options);
    if (seedString.length() != 0) {
      m_Seed = Integer.parseInt(seedString);
    } else {
      m_Seed = 1;
    }
    String alterationsString = Utils.getOption('M', options);
    if (alterationsString.length() != 0) {
      m_MaxK = Integer.parseInt(alterationsString);
    } else {
      m_MaxK = 10000;
    }
  }
Exemplo n.º 2
0
  /**
   * Parses a given list of options. Valid options are:
   *
   * <p>-W classname <br>
   * Specify the full class name of a weak classifier as the basis for bagging (required).
   *
   * <p>-I num <br>
   * Set the number of bagging iterations (default 10).
   *
   * <p>-S seed <br>
   * Random number seed for resampling (default 1).
   *
   * <p>-P num <br>
   * Size of each bag, as a percentage of the training size (default 100).
   *
   * <p>-O <br>
   * Compute out of bag error.
   *
   * <p>Options after -- are passed to the designated classifier.
   *
   * <p>
   *
   * @param options the list of options as an array of strings
   * @exception Exception if an option is not supported
   */
  @Override
  public void setOptions(String[] options) throws Exception {

    String bagSize = Utils.getOption('P', options);
    if (bagSize.length() != 0) {
      setBagSizePercent(Integer.parseInt(bagSize));
    } else {
      setBagSizePercent(100);
    }

    setCalcOutOfBag(Utils.getFlag('O', options));

    super.setOptions(options);
  }
  /**
   * Evaluates a clusterer with the options given in an array of strings. It takes the string
   * indicated by "-t" as training file, the string indicated by "-T" as test file. If the test file
   * is missing, a stratified ten-fold cross-validation is performed (distribution clusterers only).
   * Using "-x" you can change the number of folds to be used, and using "-s" the random seed. If
   * the "-p" option is present it outputs the classification for each test instance. If you provide
   * the name of an object file using "-l", a clusterer will be loaded from the given file. If you
   * provide the name of an object file using "-d", the clusterer built from the training data will
   * be saved to the given file.
   *
   * @param clusterer machine learning clusterer
   * @param options the array of string containing the options
   * @exception Exception if model could not be evaluated successfully
   * @return a string describing the results
   */
  public static String evaluateClusterer(Clusterer clusterer, String[] options) throws Exception {
    int seed = 1, folds = 10;
    boolean doXval = false;
    Instances train = null;
    Instances test = null;
    Random random;
    String trainFileName,
        testFileName,
        seedString,
        foldsString,
        objectInputFileName,
        objectOutputFileName,
        attributeRangeString;
    String[] savedOptions = null;
    boolean printClusterAssignments = false;
    Range attributesToOutput = null;
    ObjectInputStream objectInputStream = null;
    ObjectOutputStream objectOutputStream = null;
    StringBuffer text = new StringBuffer();
    int theClass = -1; // class based evaluation of clustering

    try {
      if (Utils.getFlag('h', options)) {
        throw new Exception("Help requested.");
      }

      // Get basic options (options the same for all clusterers
      // printClusterAssignments = Utils.getFlag('p', options);
      objectInputFileName = Utils.getOption('l', options);
      objectOutputFileName = Utils.getOption('d', options);
      trainFileName = Utils.getOption('t', options);
      testFileName = Utils.getOption('T', options);

      // Check -p option
      try {
        attributeRangeString = Utils.getOption('p', options);
      } catch (Exception e) {
        throw new Exception(
            e.getMessage()
                + "\nNOTE: the -p option has changed. "
                + "It now expects a parameter specifying a range of attributes "
                + "to list with the predictions. Use '-p 0' for none.");
      }
      if (attributeRangeString.length() != 0) {
        printClusterAssignments = true;
        if (!attributeRangeString.equals("0")) attributesToOutput = new Range(attributeRangeString);
      }

      if (trainFileName.length() == 0) {
        if (objectInputFileName.length() == 0) {
          throw new Exception("No training file and no object " + "input file given.");
        }

        if (testFileName.length() == 0) {
          throw new Exception("No training file and no test file given.");
        }
      } else {
        if ((objectInputFileName.length() != 0) && (printClusterAssignments == false)) {
          throw new Exception("Can't use both train and model file " + "unless -p specified.");
        }
      }

      seedString = Utils.getOption('s', options);

      if (seedString.length() != 0) {
        seed = Integer.parseInt(seedString);
      }

      foldsString = Utils.getOption('x', options);

      if (foldsString.length() != 0) {
        folds = Integer.parseInt(foldsString);
        doXval = true;
      }
    } catch (Exception e) {
      throw new Exception('\n' + e.getMessage() + makeOptionString(clusterer));
    }

    try {
      if (trainFileName.length() != 0) {
        train = new Instances(new BufferedReader(new FileReader(trainFileName)));

        String classString = Utils.getOption('c', options);
        if (classString.length() != 0) {
          if (classString.compareTo("last") == 0) {
            theClass = train.numAttributes();
          } else if (classString.compareTo("first") == 0) {
            theClass = 1;
          } else {
            theClass = Integer.parseInt(classString);
          }
          if (doXval || testFileName.length() != 0) {
            throw new Exception("Can only do class based evaluation on the " + "training data");
          }

          if (objectInputFileName.length() != 0) {
            throw new Exception("Can't load a clusterer and do class based " + "evaluation");
          }
        }

        if (theClass != -1) {
          if (theClass < 1 || theClass > train.numAttributes()) {
            throw new Exception("Class is out of range!");
          }
          if (!train.attribute(theClass - 1).isNominal()) {
            throw new Exception("Class must be nominal!");
          }
          train.setClassIndex(theClass - 1);
        }
      }

      if (objectInputFileName.length() != 0) {
        objectInputStream = new ObjectInputStream(new FileInputStream(objectInputFileName));
      }

      if (objectOutputFileName.length() != 0) {
        objectOutputStream = new ObjectOutputStream(new FileOutputStream(objectOutputFileName));
      }
    } catch (Exception e) {
      throw new Exception("ClusterEvaluation: " + e.getMessage() + '.');
    }

    // Save options
    if (options != null) {
      savedOptions = new String[options.length];
      System.arraycopy(options, 0, savedOptions, 0, options.length);
    }

    if (objectInputFileName.length() != 0) {
      Utils.checkForRemainingOptions(options);
    }

    // Set options for clusterer
    if (clusterer instanceof OptionHandler) {
      ((OptionHandler) clusterer).setOptions(options);
    }

    Utils.checkForRemainingOptions(options);

    if (objectInputFileName.length() != 0) {
      // Load the clusterer from file
      clusterer = (Clusterer) objectInputStream.readObject();
      objectInputStream.close();
    } else {
      // Build the clusterer if no object file provided
      if (theClass == -1) {
        clusterer.buildClusterer(train);
      } else {
        Remove removeClass = new Remove();
        removeClass.setAttributeIndices("" + theClass);
        removeClass.setInvertSelection(false);
        removeClass.setInputFormat(train);
        Instances clusterTrain = Filter.useFilter(train, removeClass);
        clusterer.buildClusterer(clusterTrain);
        ClusterEvaluation ce = new ClusterEvaluation();
        ce.setClusterer(clusterer);
        ce.evaluateClusterer(train);

        return "\n\n=== Clustering stats for training data ===\n\n" + ce.clusterResultsToString();
      }
    }

    /* Output cluster predictions only (for the test data if specified,
    otherwise for the training data */
    if (printClusterAssignments) {
      return printClusterings(clusterer, train, testFileName, attributesToOutput);
    }

    text.append(clusterer.toString());
    text.append(
        "\n\n=== Clustering stats for training data ===\n\n"
            + printClusterStats(clusterer, trainFileName));

    if (testFileName.length() != 0) {
      text.append(
          "\n\n=== Clustering stats for testing data ===\n\n"
              + printClusterStats(clusterer, testFileName));
    }

    if ((clusterer instanceof DensityBasedClusterer)
        && (doXval == true)
        && (testFileName.length() == 0)
        && (objectInputFileName.length() == 0)) {
      // cross validate the log likelihood on the training data
      random = new Random(seed);
      random.setSeed(seed);
      train.randomize(random);
      text.append(
          crossValidateModel(clusterer.getClass().getName(), train, folds, savedOptions, random));
    }

    // Save the clusterer if an object output file is provided
    if (objectOutputFileName.length() != 0) {
      objectOutputStream.writeObject(clusterer);
      objectOutputStream.flush();
      objectOutputStream.close();
    }

    return text.toString();
  }
  /** Main method. */
  public static void main(String[] args) {
    try {
      String[] options = args;
      StRipShort classifier = new StRipShort();
      InstancesShort train = null, tempTrain, test = null, template = null;
      int seed = 1, folds = 10, classIndex = -1;
      String trainFileName,
          testFileName,
          sourceClass,
          classIndexString,
          seedString,
          foldsString,
          objectInputFileName,
          objectOutputFileName,
          attributeRangeString;
      boolean IRstatistics = false,
          noOutput = false,
          printClassifications = false,
          trainStatistics = true,
          printMargins = false,
          printComplexityStatistics = false,
          printGraph = false,
          classStatistics = false,
          printSource = false;
      StringBuffer text = new StringBuffer();
      BufferedReader trainReader = null, testReader = null;
      ObjectInputStream objectInputStream = null;
      CostMatrix costMatrix = null;
      StringBuffer schemeOptionsText = null;
      Range attributesToOutput = null;
      long trainTimeStart = 0, trainTimeElapsed = 0, testTimeStart = 0, testTimeElapsed = 0;

      classIndexString = Utils.getOption('c', options);
      if (classIndexString.length() != 0) {
        classIndex = Integer.parseInt(classIndexString);
      }
      trainFileName = Utils.getOption('t', options);
      objectInputFileName = Utils.getOption('l', options);
      objectOutputFileName = Utils.getOption('d', options);
      testFileName = Utils.getOption('T', options);
      if (trainFileName.length() == 0) {
        if (objectInputFileName.length() == 0) {
          throw new Exception("No training file and no object " + "input file given.");
        }
        if (testFileName.length() == 0) {
          throw new Exception("No training file and no test " + "file given.");
        }
      }
      try {
        if (trainFileName.length() != 0) {
          trainReader = new BufferedReader(new FileReader(trainFileName));
        }
        if (testFileName.length() != 0) {
          testReader = new BufferedReader(new FileReader(testFileName));
        }
        if (objectInputFileName.length() != 0) {
          InputStream is = new FileInputStream(objectInputFileName);
          if (objectInputFileName.endsWith(".gz")) {
            is = new GZIPInputStream(is);
          }
          objectInputStream = new ObjectInputStream(is);
        }
      } catch (Exception e) {
        throw new Exception("Can't open file " + e.getMessage() + '.');
      }
      if (testFileName.length() != 0) {
        template = test = new InstancesShort(testReader, 1);
        if (classIndex != -1) {
          test.setClassIndex(classIndex - 1);
        } else {
          test.setClassIndex(test.numAttributes() - 1);
        }
        if (classIndex > test.numAttributes()) {
          throw new Exception("Index of class attribute too large.");
        }
      }
      seedString = Utils.getOption('s', options);
      if (seedString.length() != 0) {
        seed = Integer.parseInt(seedString);
      }
      foldsString = Utils.getOption('x', options);
      if (foldsString.length() != 0) {
        folds = Integer.parseInt(foldsString);
      }

      classStatistics = Utils.getFlag('i', options);
      noOutput = Utils.getFlag('o', options);
      trainStatistics = !Utils.getFlag('v', options);
      printComplexityStatistics = Utils.getFlag('k', options);
      printMargins = Utils.getFlag('r', options);
      printGraph = Utils.getFlag('g', options);
      sourceClass = Utils.getOption('z', options);
      printSource = (sourceClass.length() != 0);
      for (int i = 0; i < options.length; i++) {
        if (options[i].length() != 0) {
          if (schemeOptionsText == null) {
            schemeOptionsText = new StringBuffer();
          }
          if (options[i].indexOf(' ') != -1) {
            schemeOptionsText.append('"' + options[i] + "\" ");
          } else {
            schemeOptionsText.append(options[i] + " ");
          }
        }
      }
      classifier.setOptions(options);
      Utils.checkForRemainingOptions(options);
      train = new ModifiedInstancesShort(trainReader);
      if (classIndex != -1) {
        train.setClassIndex(classIndex - 1);
      } else {
        train.setClassIndex(train.numAttributes() - 1);
      }
      train.cleanUpValues();
      // System.err.println(train);
      classifier.buildClassifier(train);
    } catch (Exception e) {
      e.printStackTrace();
      System.err.println(e.getMessage());
    }
  }