/**
   * Parses a given list of options. Valid options are:
   *
   * <p>-I num <br>
   * The number of iterations to be performed. (default 1)
   *
   * <p>-E num <br>
   * The exponent for the polynomial kernel. (default 1)
   *
   * <p>-S num <br>
   * The seed for the random number generator. (default 1)
   *
   * <p>-M num <br>
   * The maximum number of alterations allowed. (default 10000)
   *
   * <p>
   *
   * @param options the list of options as an array of strings
   * @exception Exception if an option is not supported
   */
  public void setOptions(String[] options) throws Exception {

    String iterationsString = Utils.getOption('I', options);
    if (iterationsString.length() != 0) {
      m_NumIterations = Integer.parseInt(iterationsString);
    } else {
      m_NumIterations = 1;
    }
    String exponentsString = Utils.getOption('E', options);
    if (exponentsString.length() != 0) {
      m_Exponent = (new Double(exponentsString)).doubleValue();
    } else {
      m_Exponent = 1.0;
    }
    String seedString = Utils.getOption('S', options);
    if (seedString.length() != 0) {
      m_Seed = Integer.parseInt(seedString);
    } else {
      m_Seed = 1;
    }
    String alterationsString = Utils.getOption('M', options);
    if (alterationsString.length() != 0) {
      m_MaxK = Integer.parseInt(alterationsString);
    } else {
      m_MaxK = 10000;
    }
  }
Ejemplo n.º 2
0
  /**
   * Parses a given list of options. Valid options are:
   *
   * <p>-W classname <br>
   * Specify the full class name of a weak classifier as the basis for bagging (required).
   *
   * <p>-I num <br>
   * Set the number of bagging iterations (default 10).
   *
   * <p>-S seed <br>
   * Random number seed for resampling (default 1).
   *
   * <p>-P num <br>
   * Size of each bag, as a percentage of the training size (default 100).
   *
   * <p>-O <br>
   * Compute out of bag error.
   *
   * <p>Options after -- are passed to the designated classifier.
   *
   * <p>
   *
   * @param options the list of options as an array of strings
   * @exception Exception if an option is not supported
   */
  @Override
  public void setOptions(String[] options) throws Exception {

    String bagSize = Utils.getOption('P', options);
    if (bagSize.length() != 0) {
      setBagSizePercent(Integer.parseInt(bagSize));
    } else {
      setBagSizePercent(100);
    }

    setCalcOutOfBag(Utils.getFlag('O', options));

    super.setOptions(options);
  }
  /**
   * Performs a cross-validation for a DensityBasedClusterer clusterer on a set of instances.
   *
   * @param clustererString a string naming the class of the clusterer
   * @param data the data on which the cross-validation is to be performed
   * @param numFolds the number of folds for the cross-validation
   * @param options the options to the clusterer
   * @param random a random number generator
   * @return a string containing the cross validated log likelihood
   * @exception Exception if a clusterer could not be generated
   */
  public static String crossValidateModel(
      String clustererString, Instances data, int numFolds, String[] options, Random random)
      throws Exception {
    Clusterer clusterer = null;
    Instances train, test;
    String[] savedOptions = null;
    double foldAv;
    double CvAv = 0.0;
    double[] tempDist;
    StringBuffer CvString = new StringBuffer();

    if (options != null) {
      savedOptions = new String[options.length];
    }

    data = new Instances(data);

    // create clusterer
    try {
      clusterer = (Clusterer) Class.forName(clustererString).newInstance();
    } catch (Exception e) {
      throw new Exception("Can't find class with name " + clustererString + '.');
    }

    if (!(clusterer instanceof DensityBasedClusterer)) {
      throw new Exception(clustererString + " must be a distrinbution " + "clusterer.");
    }

    // Save options
    if (options != null) {
      System.arraycopy(options, 0, savedOptions, 0, options.length);
    }

    // Parse options
    if (clusterer instanceof OptionHandler) {
      try {
        ((OptionHandler) clusterer).setOptions(savedOptions);
        Utils.checkForRemainingOptions(savedOptions);
      } catch (Exception e) {
        throw new Exception("Can't parse given options in " + "cross-validation!");
      }
    }
    CvAv = crossValidateModel((DensityBasedClusterer) clusterer, data, numFolds, random);

    CvString.append(
        "\n" + numFolds + " fold CV Log Likelihood: " + Utils.doubleToString(CvAv, 6, 4) + "\n");
    return CvString.toString();
  }
  /**
   * Evaluates cluster assignments with respect to actual class labels. Assumes that m_Clusterer has
   * been trained and tested on inst (minus the class).
   *
   * @param inst the instances (including class) to evaluate with respect to
   * @exception Exception if something goes wrong
   */
  private void evaluateClustersWithRespectToClass(Instances inst) throws Exception {
    int numClasses = inst.classAttribute().numValues();
    int[][] counts = new int[m_numClusters][numClasses];
    int[] clusterTotals = new int[m_numClusters];
    double[] best = new double[m_numClusters + 1];
    double[] current = new double[m_numClusters + 1];

    for (int i = 0; i < inst.numInstances(); i++) {
      counts[(int) m_clusterAssignments[i]][(int) inst.instance(i).classValue()]++;
      clusterTotals[(int) m_clusterAssignments[i]]++;
    }

    best[m_numClusters] = Double.MAX_VALUE;
    mapClasses(0, counts, clusterTotals, current, best, 0);

    m_clusteringResults.append("\n\nClass attribute: " + inst.classAttribute().name() + "\n");
    m_clusteringResults.append("Classes to Clusters:\n");
    String matrixString = toMatrixString(counts, clusterTotals, inst);
    m_clusteringResults.append(matrixString).append("\n");

    int Cwidth = 1 + (int) (Math.log(m_numClusters) / Math.log(10));
    // add the minimum error assignment
    for (int i = 0; i < m_numClusters; i++) {
      if (clusterTotals[i] > 0) {
        m_clusteringResults.append("Cluster " + Utils.doubleToString((double) i, Cwidth, 0));
        m_clusteringResults.append(" <-- ");

        if (best[i] < 0) {
          m_clusteringResults.append("No class\n");
        } else {
          m_clusteringResults.append(inst.classAttribute().value((int) best[i])).append("\n");
        }
      }
    }
    m_clusteringResults.append(
        "\nIncorrectly clustered instances :\t"
            + best[m_numClusters]
            + "\t"
            + (Utils.doubleToString((best[m_numClusters] / inst.numInstances() * 100.0), 8, 4))
            + " %\n");

    // copy the class assignments
    m_classToCluster = new int[m_numClusters];
    for (int i = 0; i < m_numClusters; i++) {
      m_classToCluster[i] = (int) best[i];
    }
  }
  /**
   * Returns a "confusion" style matrix of classes to clusters assignments
   *
   * @param counts the counts of classes for each cluster
   * @param clusterTotals total number of examples in each cluster
   * @param inst the training instances (with class)
   * @exception Exception if matrix can't be generated
   */
  private String toMatrixString(int[][] counts, int[] clusterTotals, Instances inst)
      throws Exception {
    StringBuffer ms = new StringBuffer();

    int maxval = 0;
    for (int i = 0; i < m_numClusters; i++) {
      for (int j = 0; j < counts[i].length; j++) {
        if (counts[i][j] > maxval) {
          maxval = counts[i][j];
        }
      }
    }

    int Cwidth =
        1
            + Math.max(
                (int) (Math.log(maxval) / Math.log(10)),
                (int) (Math.log(m_numClusters) / Math.log(10)));

    ms.append("\n");

    for (int i = 0; i < m_numClusters; i++) {
      if (clusterTotals[i] > 0) {
        ms.append(" ").append(Utils.doubleToString((double) i, Cwidth, 0));
      }
    }
    ms.append("  <-- assigned to cluster\n");

    for (int i = 0; i < counts[0].length; i++) {

      for (int j = 0; j < m_numClusters; j++) {
        if (clusterTotals[j] > 0) {
          ms.append(" ").append(Utils.doubleToString((double) counts[j][i], Cwidth, 0));
        }
      }
      ms.append(" | ").append(inst.classAttribute().value(i)).append("\n");
    }

    return ms.toString();
  }
Ejemplo n.º 6
0
  /**
   * Creates a new dataset of the same size using random sampling with replacement according to the
   * given weight vector. The weights of the instances in the new dataset are set to one. The length
   * of the weight vector has to be the same as the number of instances in the dataset, and all
   * weights have to be positive.
   *
   * @param data the data to be sampled from
   * @param random a random number generator
   * @param sampled indicating which instance has been sampled
   * @return the new dataset
   * @exception IllegalArgumentException if the weights array is of the wrong length or contains
   *     negative weights.
   */
  public final Instances resampleWithWeights(Instances data, Random random, boolean[] sampled) {

    double[] weights = new double[data.numInstances()];
    for (int i = 0; i < weights.length; i++) {
      weights[i] = data.instance(i).weight();
    }
    Instances newData = new Instances(data, data.numInstances());
    if (data.numInstances() == 0) {
      return newData;
    }
    double[] probabilities = new double[data.numInstances()];
    double sumProbs = 0, sumOfWeights = Utils.sum(weights);
    for (int i = 0; i < data.numInstances(); i++) {
      sumProbs += random.nextDouble();
      probabilities[i] = sumProbs;
    }
    Utils.normalize(probabilities, sumProbs / sumOfWeights);

    // Make sure that rounding errors don't mess things up
    probabilities[data.numInstances() - 1] = sumOfWeights;
    int k = 0;
    int l = 0;
    sumProbs = 0;
    while ((k < data.numInstances() && (l < data.numInstances()))) {
      if (weights[l] < 0) {
        throw new IllegalArgumentException("Weights have to be positive.");
      }
      sumProbs += weights[l];
      while ((k < data.numInstances()) && (probabilities[k] <= sumProbs)) {
        newData.add(data.instance(l));
        sampled[l] = true;
        newData.instance(k).setWeight(1);
        k++;
      }
      l++;
    }
    return newData;
  }
Ejemplo n.º 7
0
  /**
   * Calculates the class membership probabilities for the given test instance.
   *
   * @param instance the instance to be classified
   * @return preedicted class probability distribution
   * @exception Exception if distribution can't be computed successfully
   */
  @Override
  public double[] distributionForInstance(Instance instance) throws Exception {

    double[] sums = new double[instance.numClasses()], newProbs;

    for (int i = 0; i < m_NumIterations; i++) {
      if (instance.classAttribute().isNumeric() == true) {
        sums[0] += m_Classifiers[i].classifyInstance(instance);
      } else {
        newProbs = m_Classifiers[i].distributionForInstance(instance);
        for (int j = 0; j < newProbs.length; j++) sums[j] += newProbs[j];
      }
    }
    if (instance.classAttribute().isNumeric() == true) {
      sums[0] /= m_NumIterations;
      return sums;
    } else if (Utils.eq(Utils.sum(sums), 0)) {
      return sums;
    } else {
      Utils.normalize(sums);
      return sums;
    }
  }
Ejemplo n.º 8
0
  /**
   * Returns description of the bagged classifier.
   *
   * @return description of the bagged classifier as a string
   */
  @Override
  public String toString() {

    if (m_Classifiers == null) {
      return "Bagging: No model built yet.";
    }
    StringBuffer text = new StringBuffer();
    text.append("All the base classifiers: \n\n");
    for (int i = 0; i < m_Classifiers.length; i++)
      text.append(m_Classifiers[i].toString() + "\n\n");

    if (m_CalcOutOfBag) {
      text.append("Out of bag error: " + Utils.doubleToString(m_OutOfBagError, 4) + "\n\n");
    }

    return text.toString();
  }
  private static String numToString(double num) {
    int precision = 1;
    int whole = (int) Math.abs(num);
    double decimal = Math.abs(num) - whole;
    int nondecimal;
    nondecimal = (whole > 0) ? (int) (Math.log(whole) / Math.log(10)) : 1;

    precision = (decimal > 0) ? (int) Math.abs(((Math.log(Math.abs(num)) / Math.log(10)))) + 2 : 1;
    if (precision > 5) {
      precision = 1;
    }

    String numString =
        reconcile.weka.core.Utils.doubleToString(num, nondecimal + 1 + precision, precision);

    return numString;
  }
  /**
   * Print the cluster statistics for either the training or the testing data.
   *
   * @param clusterer the clusterer to use for generating statistics.
   * @return a string containing cluster statistics.
   * @exception if statistics can't be generated.
   */
  private static String printClusterStats(Clusterer clusterer, String fileName) throws Exception {
    StringBuffer text = new StringBuffer();
    int i = 0;
    int cnum;
    double loglk = 0.0;
    double[] dist;
    double temp;
    int cc = clusterer.numberOfClusters();
    double[] instanceStats = new double[cc];
    int unclusteredInstances = 0;

    if (fileName.length() != 0) {
      BufferedReader inStream = null;

      try {
        inStream = new BufferedReader(new FileReader(fileName));
      } catch (Exception e) {
        throw new Exception("Can't open file " + e.getMessage() + '.');
      }

      Instances inst = new Instances(inStream, 1);

      while (inst.readInstance(inStream)) {
        try {
          cnum = clusterer.clusterInstance(inst.instance(0));

          if (clusterer instanceof DensityBasedClusterer) {
            loglk += ((DensityBasedClusterer) clusterer).logDensityForInstance(inst.instance(0));
            //	    temp = Utils.sum(dist);
          }
          instanceStats[cnum]++;
        } catch (Exception e) {
          unclusteredInstances++;
        }
        inst.delete(0);
        i++;
      }

      /*
           // count the actual number of used clusters
           int count = 0;
           for (i = 0; i < cc; i++) {
      if (instanceStats[i] > 0) {
        count++;
      }
           }
           if (count > 0) {
      double [] tempStats = new double [count];
      count=0;
      for (i=0;i<cc;i++) {
        if (instanceStats[i] > 0) {
          tempStats[count++] = instanceStats[i];
      }
      }
      instanceStats = tempStats;
      cc = instanceStats.length;
      } */

      int clustFieldWidth = (int) ((Math.log(cc) / Math.log(10)) + 1);
      int numInstFieldWidth = (int) ((Math.log(i) / Math.log(10)) + 1);
      double sum = Utils.sum(instanceStats);
      loglk /= sum;
      text.append("Clustered Instances\n");

      for (i = 0; i < cc; i++) {
        if (instanceStats[i] > 0) {
          text.append(
              Utils.doubleToString((double) i, clustFieldWidth, 0)
                  + "      "
                  + Utils.doubleToString(instanceStats[i], numInstFieldWidth, 0)
                  + " ("
                  + Utils.doubleToString((instanceStats[i] / sum * 100.0), 3, 0)
                  + "%)\n");
        }
      }
      if (unclusteredInstances > 0) {
        text.append("\nUnclustered Instances : " + unclusteredInstances);
      }

      if (clusterer instanceof DensityBasedClusterer) {
        text.append("\n\nLog likelihood: " + Utils.doubleToString(loglk, 1, 5) + "\n");
      }
    }

    return text.toString();
  }
  /**
   * Evaluates a clusterer with the options given in an array of strings. It takes the string
   * indicated by "-t" as training file, the string indicated by "-T" as test file. If the test file
   * is missing, a stratified ten-fold cross-validation is performed (distribution clusterers only).
   * Using "-x" you can change the number of folds to be used, and using "-s" the random seed. If
   * the "-p" option is present it outputs the classification for each test instance. If you provide
   * the name of an object file using "-l", a clusterer will be loaded from the given file. If you
   * provide the name of an object file using "-d", the clusterer built from the training data will
   * be saved to the given file.
   *
   * @param clusterer machine learning clusterer
   * @param options the array of string containing the options
   * @exception Exception if model could not be evaluated successfully
   * @return a string describing the results
   */
  public static String evaluateClusterer(Clusterer clusterer, String[] options) throws Exception {
    int seed = 1, folds = 10;
    boolean doXval = false;
    Instances train = null;
    Instances test = null;
    Random random;
    String trainFileName,
        testFileName,
        seedString,
        foldsString,
        objectInputFileName,
        objectOutputFileName,
        attributeRangeString;
    String[] savedOptions = null;
    boolean printClusterAssignments = false;
    Range attributesToOutput = null;
    ObjectInputStream objectInputStream = null;
    ObjectOutputStream objectOutputStream = null;
    StringBuffer text = new StringBuffer();
    int theClass = -1; // class based evaluation of clustering

    try {
      if (Utils.getFlag('h', options)) {
        throw new Exception("Help requested.");
      }

      // Get basic options (options the same for all clusterers
      // printClusterAssignments = Utils.getFlag('p', options);
      objectInputFileName = Utils.getOption('l', options);
      objectOutputFileName = Utils.getOption('d', options);
      trainFileName = Utils.getOption('t', options);
      testFileName = Utils.getOption('T', options);

      // Check -p option
      try {
        attributeRangeString = Utils.getOption('p', options);
      } catch (Exception e) {
        throw new Exception(
            e.getMessage()
                + "\nNOTE: the -p option has changed. "
                + "It now expects a parameter specifying a range of attributes "
                + "to list with the predictions. Use '-p 0' for none.");
      }
      if (attributeRangeString.length() != 0) {
        printClusterAssignments = true;
        if (!attributeRangeString.equals("0")) attributesToOutput = new Range(attributeRangeString);
      }

      if (trainFileName.length() == 0) {
        if (objectInputFileName.length() == 0) {
          throw new Exception("No training file and no object " + "input file given.");
        }

        if (testFileName.length() == 0) {
          throw new Exception("No training file and no test file given.");
        }
      } else {
        if ((objectInputFileName.length() != 0) && (printClusterAssignments == false)) {
          throw new Exception("Can't use both train and model file " + "unless -p specified.");
        }
      }

      seedString = Utils.getOption('s', options);

      if (seedString.length() != 0) {
        seed = Integer.parseInt(seedString);
      }

      foldsString = Utils.getOption('x', options);

      if (foldsString.length() != 0) {
        folds = Integer.parseInt(foldsString);
        doXval = true;
      }
    } catch (Exception e) {
      throw new Exception('\n' + e.getMessage() + makeOptionString(clusterer));
    }

    try {
      if (trainFileName.length() != 0) {
        train = new Instances(new BufferedReader(new FileReader(trainFileName)));

        String classString = Utils.getOption('c', options);
        if (classString.length() != 0) {
          if (classString.compareTo("last") == 0) {
            theClass = train.numAttributes();
          } else if (classString.compareTo("first") == 0) {
            theClass = 1;
          } else {
            theClass = Integer.parseInt(classString);
          }
          if (doXval || testFileName.length() != 0) {
            throw new Exception("Can only do class based evaluation on the " + "training data");
          }

          if (objectInputFileName.length() != 0) {
            throw new Exception("Can't load a clusterer and do class based " + "evaluation");
          }
        }

        if (theClass != -1) {
          if (theClass < 1 || theClass > train.numAttributes()) {
            throw new Exception("Class is out of range!");
          }
          if (!train.attribute(theClass - 1).isNominal()) {
            throw new Exception("Class must be nominal!");
          }
          train.setClassIndex(theClass - 1);
        }
      }

      if (objectInputFileName.length() != 0) {
        objectInputStream = new ObjectInputStream(new FileInputStream(objectInputFileName));
      }

      if (objectOutputFileName.length() != 0) {
        objectOutputStream = new ObjectOutputStream(new FileOutputStream(objectOutputFileName));
      }
    } catch (Exception e) {
      throw new Exception("ClusterEvaluation: " + e.getMessage() + '.');
    }

    // Save options
    if (options != null) {
      savedOptions = new String[options.length];
      System.arraycopy(options, 0, savedOptions, 0, options.length);
    }

    if (objectInputFileName.length() != 0) {
      Utils.checkForRemainingOptions(options);
    }

    // Set options for clusterer
    if (clusterer instanceof OptionHandler) {
      ((OptionHandler) clusterer).setOptions(options);
    }

    Utils.checkForRemainingOptions(options);

    if (objectInputFileName.length() != 0) {
      // Load the clusterer from file
      clusterer = (Clusterer) objectInputStream.readObject();
      objectInputStream.close();
    } else {
      // Build the clusterer if no object file provided
      if (theClass == -1) {
        clusterer.buildClusterer(train);
      } else {
        Remove removeClass = new Remove();
        removeClass.setAttributeIndices("" + theClass);
        removeClass.setInvertSelection(false);
        removeClass.setInputFormat(train);
        Instances clusterTrain = Filter.useFilter(train, removeClass);
        clusterer.buildClusterer(clusterTrain);
        ClusterEvaluation ce = new ClusterEvaluation();
        ce.setClusterer(clusterer);
        ce.evaluateClusterer(train);

        return "\n\n=== Clustering stats for training data ===\n\n" + ce.clusterResultsToString();
      }
    }

    /* Output cluster predictions only (for the test data if specified,
    otherwise for the training data */
    if (printClusterAssignments) {
      return printClusterings(clusterer, train, testFileName, attributesToOutput);
    }

    text.append(clusterer.toString());
    text.append(
        "\n\n=== Clustering stats for training data ===\n\n"
            + printClusterStats(clusterer, trainFileName));

    if (testFileName.length() != 0) {
      text.append(
          "\n\n=== Clustering stats for testing data ===\n\n"
              + printClusterStats(clusterer, testFileName));
    }

    if ((clusterer instanceof DensityBasedClusterer)
        && (doXval == true)
        && (testFileName.length() == 0)
        && (objectInputFileName.length() == 0)) {
      // cross validate the log likelihood on the training data
      random = new Random(seed);
      random.setSeed(seed);
      train.randomize(random);
      text.append(
          crossValidateModel(clusterer.getClass().getName(), train, folds, savedOptions, random));
    }

    // Save the clusterer if an object output file is provided
    if (objectOutputFileName.length() != 0) {
      objectOutputStream.writeObject(clusterer);
      objectOutputStream.flush();
      objectOutputStream.close();
    }

    return text.toString();
  }
  /**
   * Evaluate the clusterer on a set of instances. Calculates clustering statistics and stores
   * cluster assigments for the instances in m_clusterAssignments
   *
   * @param test the set of instances to cluster
   * @exception Exception if something goes wrong
   */
  public void evaluateClusterer(Instances test) throws Exception {
    int i = 0;
    int cnum;
    double loglk = 0.0;
    double[] dist;
    double temp;
    int cc = m_Clusterer.numberOfClusters();
    m_numClusters = cc;
    int numInstFieldWidth = (int) ((Math.log(test.numInstances()) / Math.log(10)) + 1);
    double[] instanceStats = new double[cc];
    m_clusterAssignments = new double[test.numInstances()];
    Instances testCopy = test;
    boolean hasClass = (testCopy.classIndex() >= 0);
    int unclusteredInstances = 0;

    // If class is set then do class based evaluation as well
    if (hasClass) {
      if (testCopy.classAttribute().isNumeric()) {
        throw new Exception("ClusterEvaluation: Class must be nominal!");
      }
      Remove removeClass = new Remove();
      removeClass.setAttributeIndices("" + (testCopy.classIndex() + 1));
      removeClass.setInvertSelection(false);
      removeClass.setInputFormat(testCopy);
      testCopy = Filter.useFilter(testCopy, removeClass);
    }

    for (i = 0; i < testCopy.numInstances(); i++) {
      cnum = -1;
      try {
        if (m_Clusterer instanceof DensityBasedClusterer) {
          loglk +=
              ((DensityBasedClusterer) m_Clusterer).logDensityForInstance(testCopy.instance(i));
          //	  temp = Utils.sum(dist);

          //	  Utils.normalize(dist);
          cnum = m_Clusterer.clusterInstance(testCopy.instance(i));
          // Utils.maxIndex(dist);
          m_clusterAssignments[i] = (double) cnum;
        } else {
          cnum = m_Clusterer.clusterInstance(testCopy.instance(i));
          m_clusterAssignments[i] = (double) cnum;
        }
      } catch (Exception e) {
        unclusteredInstances++;
      }

      if (cnum != -1) {
        instanceStats[cnum]++;
      }
    }

    /* // count the actual number of used clusters
       int count = 0;
       for (i = 0; i < cc; i++) {
         if (instanceStats[i] > 0) {
    count++;
         }
       }
       if (count > 0) {
         double [] tempStats = new double [count];
         double [] map = new double [m_clusterAssignments.length];
         count=0;
         for (i=0;i<cc;i++) {
    if (instanceStats[i] > 0) {
      tempStats[count] = instanceStats[i];
      map[i] = count;
      count++;
    }
         }
         instanceStats = tempStats;
         cc = instanceStats.length;
         for (i=0;i<m_clusterAssignments.length;i++) {
    m_clusterAssignments[i] = map[(int)m_clusterAssignments[i]];
         }
         } */

    double sum = Utils.sum(instanceStats);
    loglk /= sum;
    m_logL = loglk;

    m_clusteringResults.append(m_Clusterer.toString());
    m_clusteringResults.append("Clustered Instances\n\n");
    int clustFieldWidth = (int) ((Math.log(cc) / Math.log(10)) + 1);
    for (i = 0; i < cc; i++) {
      if (instanceStats[i] > 0) {
        m_clusteringResults.append(
            Utils.doubleToString((double) i, clustFieldWidth, 0)
                + "      "
                + Utils.doubleToString(instanceStats[i], numInstFieldWidth, 0)
                + " ("
                + Utils.doubleToString((instanceStats[i] / sum * 100.0), 3, 0)
                + "%)\n");
      }
    }

    if (unclusteredInstances > 0) {
      m_clusteringResults.append("\nUnclustered instances : " + unclusteredInstances);
    }

    if (m_Clusterer instanceof DensityBasedClusterer) {
      m_clusteringResults.append("\n\nLog likelihood: " + Utils.doubleToString(loglk, 1, 5) + "\n");
    }

    if (hasClass) {
      evaluateClustersWithRespectToClass(test);
    }
  }
Ejemplo n.º 13
0
  /**
   * Creates a new instance of a associator given it's class name and (optional) arguments to pass
   * to it's setOptions method. If the associator implements OptionHandler and the options parameter
   * is non-null, the associator will have it's options set.
   *
   * @param associatorName the fully qualified class name of the associator
   * @param options an array of options suitable for passing to setOptions. May be null.
   * @return the newly created associator, ready for use.
   * @exception Exception if the associator name is invalid, or the options supplied are not
   *     acceptable to the associator
   */
  public static Associator forName(String associatorName, String[] options) throws Exception {

    return (Associator) Utils.forName(Associator.class, associatorName, options);
  }
  /** Main method. */
  public static void main(String[] args) {
    try {
      String[] options = args;
      StRipShort classifier = new StRipShort();
      InstancesShort train = null, tempTrain, test = null, template = null;
      int seed = 1, folds = 10, classIndex = -1;
      String trainFileName,
          testFileName,
          sourceClass,
          classIndexString,
          seedString,
          foldsString,
          objectInputFileName,
          objectOutputFileName,
          attributeRangeString;
      boolean IRstatistics = false,
          noOutput = false,
          printClassifications = false,
          trainStatistics = true,
          printMargins = false,
          printComplexityStatistics = false,
          printGraph = false,
          classStatistics = false,
          printSource = false;
      StringBuffer text = new StringBuffer();
      BufferedReader trainReader = null, testReader = null;
      ObjectInputStream objectInputStream = null;
      CostMatrix costMatrix = null;
      StringBuffer schemeOptionsText = null;
      Range attributesToOutput = null;
      long trainTimeStart = 0, trainTimeElapsed = 0, testTimeStart = 0, testTimeElapsed = 0;

      classIndexString = Utils.getOption('c', options);
      if (classIndexString.length() != 0) {
        classIndex = Integer.parseInt(classIndexString);
      }
      trainFileName = Utils.getOption('t', options);
      objectInputFileName = Utils.getOption('l', options);
      objectOutputFileName = Utils.getOption('d', options);
      testFileName = Utils.getOption('T', options);
      if (trainFileName.length() == 0) {
        if (objectInputFileName.length() == 0) {
          throw new Exception("No training file and no object " + "input file given.");
        }
        if (testFileName.length() == 0) {
          throw new Exception("No training file and no test " + "file given.");
        }
      }
      try {
        if (trainFileName.length() != 0) {
          trainReader = new BufferedReader(new FileReader(trainFileName));
        }
        if (testFileName.length() != 0) {
          testReader = new BufferedReader(new FileReader(testFileName));
        }
        if (objectInputFileName.length() != 0) {
          InputStream is = new FileInputStream(objectInputFileName);
          if (objectInputFileName.endsWith(".gz")) {
            is = new GZIPInputStream(is);
          }
          objectInputStream = new ObjectInputStream(is);
        }
      } catch (Exception e) {
        throw new Exception("Can't open file " + e.getMessage() + '.');
      }
      if (testFileName.length() != 0) {
        template = test = new InstancesShort(testReader, 1);
        if (classIndex != -1) {
          test.setClassIndex(classIndex - 1);
        } else {
          test.setClassIndex(test.numAttributes() - 1);
        }
        if (classIndex > test.numAttributes()) {
          throw new Exception("Index of class attribute too large.");
        }
      }
      seedString = Utils.getOption('s', options);
      if (seedString.length() != 0) {
        seed = Integer.parseInt(seedString);
      }
      foldsString = Utils.getOption('x', options);
      if (foldsString.length() != 0) {
        folds = Integer.parseInt(foldsString);
      }

      classStatistics = Utils.getFlag('i', options);
      noOutput = Utils.getFlag('o', options);
      trainStatistics = !Utils.getFlag('v', options);
      printComplexityStatistics = Utils.getFlag('k', options);
      printMargins = Utils.getFlag('r', options);
      printGraph = Utils.getFlag('g', options);
      sourceClass = Utils.getOption('z', options);
      printSource = (sourceClass.length() != 0);
      for (int i = 0; i < options.length; i++) {
        if (options[i].length() != 0) {
          if (schemeOptionsText == null) {
            schemeOptionsText = new StringBuffer();
          }
          if (options[i].indexOf(' ') != -1) {
            schemeOptionsText.append('"' + options[i] + "\" ");
          } else {
            schemeOptionsText.append(options[i] + " ");
          }
        }
      }
      classifier.setOptions(options);
      Utils.checkForRemainingOptions(options);
      train = new ModifiedInstancesShort(trainReader);
      if (classIndex != -1) {
        train.setClassIndex(classIndex - 1);
      } else {
        train.setClassIndex(train.numAttributes() - 1);
      }
      train.cleanUpValues();
      // System.err.println(train);
      classifier.buildClassifier(train);
    } catch (Exception e) {
      e.printStackTrace();
      System.err.println(e.getMessage());
    }
  }