Beispiel #1
0
  /**
   * Sets the options.
   *
   * @param options the options
   * @throws Exception if invalid option
   */
  @Override
  public void setOptions(String[] options) throws Exception {
    String tmpStr;

    super.setOptions(options);

    tmpStr = Utils.getOption('a', options);
    if (tmpStr.length() != 0) {
      setNumAttributes(Integer.parseInt(tmpStr));
    } else {
      setNumAttributes(defaultNumAttributes());
    }

    setClassFlag(Utils.getFlag('c', options));

    tmpStr = Utils.getOption('b', options);
    setBooleanIndices(tmpStr);
    m_booleanCols.setUpper(getNumAttributes() - 1);

    tmpStr = Utils.getOption('m', options);
    setNominalIndices(tmpStr);
    m_nominalCols.setUpper(getNumAttributes() - 1);

    // check indices
    tmpStr = checkIndices();
    if (tmpStr.length() > 0) {
      throw new IllegalArgumentException(tmpStr);
    }
  }
  /**
   * Process a classifier's prediction for an instance and update a set of plotting instances and
   * additional plotting info. m_PlotShape for nominal class datasets holds shape types (actual data
   * points have automatic shape type assignment; classifier error data points have box shape type).
   * For numeric class datasets, the actual data points are stored in m_PlotInstances and m_PlotSize
   * stores the error (which is later converted to shape size values).
   *
   * @param toPredict the actual data point
   * @param classifier the classifier
   * @param eval the evaluation object to use for evaluating the classifier on the instance to
   *     predict
   * @see #m_PlotShapes
   * @see #m_PlotSizes
   * @see #m_PlotInstances
   */
  public void process(Instance toPredict, Classifier classifier, Evaluation eval) {
    double pred;
    double[] values;
    int i;

    try {
      pred = eval.evaluateModelOnceAndRecordPrediction(classifier, toPredict);

      if (classifier instanceof weka.classifiers.misc.InputMappedClassifier) {
        toPredict =
            ((weka.classifiers.misc.InputMappedClassifier) classifier)
                .constructMappedInstance(toPredict);
      }

      if (!m_SaveForVisualization) return;

      if (m_PlotInstances != null) {
        values = new double[m_PlotInstances.numAttributes()];
        for (i = 0; i < m_PlotInstances.numAttributes(); i++) {
          if (i < toPredict.classIndex()) {
            values[i] = toPredict.value(i);
          } else if (i == toPredict.classIndex()) {
            values[i] = pred;
            values[i + 1] = toPredict.value(i);
            i++;
          } else {
            values[i] = toPredict.value(i - 1);
          }
        }

        m_PlotInstances.add(new DenseInstance(1.0, values));

        if (toPredict.classAttribute().isNominal()) {
          if (toPredict.isMissing(toPredict.classIndex()) || Utils.isMissingValue(pred)) {
            m_PlotShapes.addElement(new Integer(Plot2D.MISSING_SHAPE));
          } else if (pred != toPredict.classValue()) {
            // set to default error point shape
            m_PlotShapes.addElement(new Integer(Plot2D.ERROR_SHAPE));
          } else {
            // otherwise set to constant (automatically assigned) point shape
            m_PlotShapes.addElement(new Integer(Plot2D.CONST_AUTOMATIC_SHAPE));
          }
          m_PlotSizes.addElement(new Integer(Plot2D.DEFAULT_SHAPE_SIZE));
        } else {
          // store the error (to be converted to a point size later)
          Double errd = null;
          if (!toPredict.isMissing(toPredict.classIndex()) && !Utils.isMissingValue(pred)) {
            errd = new Double(pred - toPredict.classValue());
            m_PlotShapes.addElement(new Integer(Plot2D.CONST_AUTOMATIC_SHAPE));
          } else {
            // missing shape if actual class not present or prediction is missing
            m_PlotShapes.addElement(new Integer(Plot2D.MISSING_SHAPE));
          }
          m_PlotSizes.addElement(errd);
        }
      }
    } catch (Exception ex) {
      ex.printStackTrace();
    }
  }
  /**
   * Parses a given list of options.
   *
   * <p>
   * <!-- options-start -->
   * Valid options are:
   *
   * <p>
   *
   * <pre> -M
   *  Minimize expected misclassification cost. Default is to
   *  reweight training instances according to costs per class</pre>
   *
   * <pre> -C &lt;cost file name&gt;
   *  File name of a cost matrix to use. If this is not supplied,
   *  a cost matrix will be loaded on demand. The name of the
   *  on-demand file is the relation name of the training data
   *  plus ".cost", and the path to the on-demand file is
   *  specified with the -N option.</pre>
   *
   * <pre> -N &lt;directory&gt;
   *  Name of a directory to search for cost files when loading
   *  costs on demand (default current directory).</pre>
   *
   * <pre> -cost-matrix &lt;matrix&gt;
   *  The cost matrix in Matlab single line format.</pre>
   *
   * <pre> -S &lt;num&gt;
   *  Random number seed.
   *  (default 1)</pre>
   *
   * <pre> -D
   *  If set, classifier is run in debug mode and
   *  may output additional info to the console</pre>
   *
   * <pre> -W
   *  Full name of base classifier.
   *  (default: weka.classifiers.rules.ZeroR)</pre>
   *
   * <pre>
   * Options specific to classifier weka.classifiers.rules.ZeroR:
   * </pre>
   *
   * <pre> -D
   *  If set, classifier is run in debug mode and
   *  may output additional info to the console</pre>
   *
   * <!-- options-end -->
   * Options after -- are passed to the designated classifier.
   *
   * <p>
   *
   * @param options the list of options as an array of strings
   * @throws Exception if an option is not supported
   */
  public void setOptions(String[] options) throws Exception {

    setMinimizeExpectedCost(Utils.getFlag('M', options));

    String costFile = Utils.getOption('C', options);
    if (costFile.length() != 0) {
      try {
        setCostMatrix(new CostMatrix(new BufferedReader(new FileReader(costFile))));
      } catch (Exception ex) {
        // now flag as possible old format cost matrix. Delay cost matrix
        // loading until buildClassifer is called
        setCostMatrix(null);
      }
      setCostMatrixSource(new SelectedTag(MATRIX_SUPPLIED, TAGS_MATRIX_SOURCE));
      m_CostFile = costFile;
    } else {
      setCostMatrixSource(new SelectedTag(MATRIX_ON_DEMAND, TAGS_MATRIX_SOURCE));
    }

    String demandDir = Utils.getOption('N', options);
    if (demandDir.length() != 0) {
      setOnDemandDirectory(new File(demandDir));
    }

    String cost_matrix = Utils.getOption("cost-matrix", options);
    if (cost_matrix.length() != 0) {
      StringWriter writer = new StringWriter();
      CostMatrix.parseMatlab(cost_matrix).write(writer);
      setCostMatrix(new CostMatrix(new StringReader(writer.toString())));
      setCostMatrixSource(new SelectedTag(MATRIX_SUPPLIED, TAGS_MATRIX_SOURCE));
    }

    super.setOptions(options);
  }
  /**
   * Parses a given list of options.
   *
   * <p>
   * <!-- options-start -->
   * Valid options are:
   *
   * <p>
   *
   * <pre> -L &lt;double&gt;
   *  The epsilon parameter in epsilon-insensitive loss function.
   *  (default 1.0e-3)</pre>
   *
   * <pre> -W &lt;double&gt;
   *  The random number seed.
   *  (default 1)</pre>
   *
   * <!-- options-end -->
   *
   * @param options the list of options as an array of strings
   * @throws Exception if an option is not supported
   */
  public void setOptions(String[] options) throws Exception {
    String tmpStr;

    tmpStr = Utils.getOption('L', options);
    if (tmpStr.length() != 0) {
      setEpsilonParameter(Double.parseDouble(tmpStr));
    } else {
      setEpsilonParameter(1.0e-3);
    }

    /*
    tmpStr = Utils.getOption('S', options);
    if (tmpStr.length() != 0)
      setLossType(new SelectedTag(tmpStr, TAGS_LOSS_TYPE));
    else
      setLossType(new SelectedTag(EPSILON, TAGS_LOSS_TYPE));
    */

    tmpStr = Utils.getOption('W', options);
    if (tmpStr.length() != 0) {
      setSeed(Integer.parseInt(tmpStr));
    } else {
      setSeed(1);
    }
  }
  /**
   * Returns a textual description of this classifier.
   *
   * @return a textual description of this classifier.
   */
  @Override
  public String toString() {

    if (m_probOfClass == null) {
      return "NaiveBayesMultinomialText: No model built yet.\n";
    }

    StringBuffer result = new StringBuffer();

    // build a master dictionary over all classes
    HashSet<String> master = new HashSet<String>();
    for (int i = 0; i < m_data.numClasses(); i++) {
      LinkedHashMap<String, Count> classDict = m_probOfWordGivenClass.get(i);

      for (String key : classDict.keySet()) {
        master.add(key);
      }
    }

    result.append("Dictionary size: " + master.size()).append("\n\n");

    result.append("The independent frequency of a class\n");
    result.append("--------------------------------------\n");

    for (int i = 0; i < m_data.numClasses(); i++) {
      result
          .append(m_data.classAttribute().value(i))
          .append("\t")
          .append(Double.toString(m_probOfClass[i]))
          .append("\n");
    }

    result.append("\nThe frequency of a word given the class\n");
    result.append("-----------------------------------------\n");

    for (int i = 0; i < m_data.numClasses(); i++) {
      result.append(Utils.padLeft(m_data.classAttribute().value(i), 11)).append("\t");
    }

    result.append("\n");

    Iterator<String> masterIter = master.iterator();
    while (masterIter.hasNext()) {
      String word = masterIter.next();

      for (int i = 0; i < m_data.numClasses(); i++) {
        LinkedHashMap<String, Count> classDict = m_probOfWordGivenClass.get(i);
        Count c = classDict.get(word);
        if (c == null) {
          result.append("<laplace=1>\t");
        } else {
          result.append(Utils.padLeft(Double.toString(c.m_count), 11)).append("\t");
        }
      }
      result.append(word);
      result.append("\n");
    }

    return result.toString();
  }
Beispiel #6
0
  /**
   * Returns description of the bias-variance decomposition results.
   *
   * @return the bias-variance decomposition results as a string
   */
  public String toString() {

    String result = "\nBias-Variance Decomposition\n";

    if (getClassifier() == null) {
      return "Invalid setup";
    }

    result += "\nClassifier   : " + getClassifier().getClass().getName();
    if (getClassifier() instanceof OptionHandler) {
      result += Utils.joinOptions(((OptionHandler) m_Classifier).getOptions());
    }
    result += "\nData File    : " + getDataFileName();
    result += "\nClass Index  : ";
    if (getClassIndex() == 0) {
      result += "last";
    } else {
      result += getClassIndex();
    }
    result += "\nTraining Pool: " + getTrainPoolSize();
    result += "\nIterations   : " + getTrainIterations();
    result += "\nSeed         : " + getSeed();
    result += "\nError        : " + Utils.doubleToString(getError(), 6, 4);
    result += "\nSigma^2      : " + Utils.doubleToString(getSigma(), 6, 4);
    result += "\nBias^2       : " + Utils.doubleToString(getBias(), 6, 4);
    result += "\nVariance     : " + Utils.doubleToString(getVariance(), 6, 4);

    return result + "\n";
  }
Beispiel #7
0
  @Override
  public String toString() {
    StringBuffer text = new StringBuffer();
    text.append("\nsIB\n===\n");
    text.append("\nNumber of clusters: " + m_numCluster + "\n");

    for (int j = 0; j < m_numCluster; j++) {
      text.append(
          "\nCluster: "
              + j
              + " Size : "
              + bestT.size(j)
              + " Prior probability: "
              + Utils.doubleToString(bestT.Pt[j], 4)
              + "\n\n");
      for (int i = 0; i < m_numAttributes; i++) {
        text.append("Attribute: " + m_data.attribute(i).name() + "\n");
        text.append(
            "Probability given the cluster = "
                + Utils.doubleToString(bestT.Py_t.get(i, j), 4)
                + "\n");
      }
    }
    return text.toString();
  }
  /**
   * Parses a given list of options.
   *
   * <p>
   * <!-- options-start -->
   * Valid options are:
   *
   * <p>
   *
   * <pre> -F &lt;filter specification&gt;
   *  Full class name of filter to use, followed
   *  by filter options.
   *  eg: "weka.filters.unsupervised.attribute.Remove -V -R 1,2"
   *  (default: weka.filters.MultiFilter with
   *  weka.filters.unsupervised.attribute.ReplaceMissingValues)</pre>
   *
   * <pre> -c &lt;the class index&gt;
   *  The class index.
   *  (default: -1, i.e. unset)</pre>
   *
   * <pre> -W
   *  Full name of base associator.
   *  (default: weka.associations.Apriori)</pre>
   *
   * <pre>
   * Options specific to associator weka.associations.Apriori:
   * </pre>
   *
   * <pre> -N &lt;required number of rules output&gt;
   *  The required number of rules. (default = 10)</pre>
   *
   * <pre> -T &lt;0=confidence | 1=lift | 2=leverage | 3=Conviction&gt;
   *  The metric type by which to rank rules. (default = confidence)</pre>
   *
   * <pre> -C &lt;minimum metric score of a rule&gt;
   *  The minimum confidence of a rule. (default = 0.9)</pre>
   *
   * <pre> -D &lt;delta for minimum support&gt;
   *  The delta by which the minimum support is decreased in
   *  each iteration. (default = 0.05)</pre>
   *
   * <pre> -U &lt;upper bound for minimum support&gt;
   *  Upper bound for minimum support. (default = 1.0)</pre>
   *
   * <pre> -M &lt;lower bound for minimum support&gt;
   *  The lower bound for the minimum support. (default = 0.1)</pre>
   *
   * <pre> -S &lt;significance level&gt;
   *  If used, rules are tested for significance at
   *  the given level. Slower. (default = no significance testing)</pre>
   *
   * <pre> -I
   *  If set the itemsets found are also output. (default = no)</pre>
   *
   * <pre> -R
   *  Remove columns that contain all missing values (default = no)</pre>
   *
   * <pre> -V
   *  Report progress iteratively. (default = no)</pre>
   *
   * <pre> -A
   *  If set class association rules are mined. (default = no)</pre>
   *
   * <pre> -c &lt;the class index&gt;
   *  The class index. (default = last)</pre>
   *
   * <!-- options-end -->
   *
   * @param options the list of options as an array of strings
   * @throws Exception if an option is not supported
   */
  public void setOptions(String[] options) throws Exception {
    String tmpStr;

    tmpStr = Utils.getOption('F', options);
    if (tmpStr.length() > 0) {
      String[] filterSpec = Utils.splitOptions(tmpStr);
      if (filterSpec.length == 0)
        throw new IllegalArgumentException("Invalid filter specification string");
      String filterName = filterSpec[0];
      filterSpec[0] = "";
      setFilter((Filter) Utils.forName(Filter.class, filterName, filterSpec));
    } else {
      setFilter(new weka.filters.supervised.attribute.Discretize());
    }

    tmpStr = Utils.getOption('c', options);
    if (tmpStr.length() > 0) {
      if (tmpStr.equalsIgnoreCase("last")) {
        setClassIndex(0);
      } else if (tmpStr.equalsIgnoreCase("first")) {
        setClassIndex(1);
      } else {
        setClassIndex(Integer.parseInt(tmpStr));
      }
    } else {
      setClassIndex(-1);
    }

    super.setOptions(options);
  }
Beispiel #9
0
  /**
   * Parses a given list of options.
   * <!-- options-start -->
   * Valid options are:
   *
   * <p>
   *
   * <pre>
   * -S &lt;num&gt;
   *  Specifies the random number seed
   *  (default 1)
   * </pre>
   *
   * <pre>
   * -P &lt;percentage&gt;
   *  Specifies percentage of SMOTE instances to create.
   *  (default 100.0)
   * </pre>
   *
   * <pre>
   * -K &lt;nearest-neighbors&gt;
   *  Specifies the number of nearest neighbors to use.
   *  (default 5)
   * </pre>
   *
   * <pre>
   * -C &lt;value-index&gt;
   *  Specifies the index of the nominal class value to SMOTE
   *  (default 0: auto-detect non-empty minority class))
   * </pre>
   *
   * <!-- options-end -->
   *
   * @param options the list of options as an array of strings
   * @throws Exception if an option is not supported
   */
  public void setOptions(String[] options) throws Exception {
    String seedStr = Utils.getOption('S', options);
    if (seedStr.length() != 0) {
      setRandomSeed(Integer.parseInt(seedStr));
    } else {
      setRandomSeed(1);
    }

    String percentageStr = Utils.getOption('P', options);
    if (percentageStr.length() != 0) {
      setPercentage(new Double(percentageStr).doubleValue());
    } else {
      setPercentage(100.0);
    }

    String nnStr = Utils.getOption('K', options);
    if (nnStr.length() != 0) {
      setNearestNeighbors(Integer.parseInt(nnStr));
    } else {
      setNearestNeighbors(5);
    }

    String classValueIndexStr = Utils.getOption('C', options);
    if (classValueIndexStr.length() != 0) {
      setClassValue(classValueIndexStr);
    } else {
      m_DetectMinorityClass = true;
    }
  }
Beispiel #10
0
  /**
   * Parses the options for this object.
   *
   * <p>
   * <!-- options-start -->
   * Valid options are:
   *
   * <p>
   *
   * <pre> -D
   *  Turns on output of debugging information.</pre>
   *
   * <pre> -A &lt;Haar&gt;
   *  The algorithm to use.
   *  (default: HAAR)</pre>
   *
   * <pre> -P &lt;Zero&gt;
   *  The padding to use.
   *  (default: ZERO)</pre>
   *
   * <pre> -F &lt;filter specification&gt;
   *  The filter to use as preprocessing step (classname and options).
   *  (default: MultiFilter with ReplaceMissingValues and Normalize)</pre>
   *
   * <pre>
   * Options specific to filter weka.filters.MultiFilter ('-F'):
   * </pre>
   *
   * <pre> -D
   *  Turns on output of debugging information.</pre>
   *
   * <pre> -F &lt;classname [options]&gt;
   *  A filter to apply (can be specified multiple times).</pre>
   *
   * <!-- options-end -->
   *
   * @param options the options to use
   * @throws Exception if the option setting fails
   */
  public void setOptions(String[] options) throws Exception {
    String tmpStr;
    String[] tmpOptions;
    Filter filter;

    super.setOptions(options);

    tmpStr = Utils.getOption("A", options);
    if (tmpStr.length() != 0) setAlgorithm(new SelectedTag(tmpStr, TAGS_ALGORITHM));
    else setAlgorithm(new SelectedTag(ALGORITHM_HAAR, TAGS_ALGORITHM));

    tmpStr = Utils.getOption("P", options);
    if (tmpStr.length() != 0) setPadding(new SelectedTag(tmpStr, TAGS_PADDING));
    else setPadding(new SelectedTag(PADDING_ZERO, TAGS_PADDING));

    tmpStr = Utils.getOption("F", options);
    tmpOptions = Utils.splitOptions(tmpStr);
    if (tmpOptions.length != 0) {
      tmpStr = tmpOptions[0];
      tmpOptions[0] = "";
      setFilter((Filter) Utils.forName(Filter.class, tmpStr, tmpOptions));
    } else {
      filter = new MultiFilter();
      ((MultiFilter) filter)
          .setFilters(
              new Filter[] {
                new weka.filters.unsupervised.attribute.ReplaceMissingValues(),
                new weka.filters.unsupervised.attribute.Normalize()
              });
      setFilter(filter);
    }
  }
Beispiel #11
0
  /**
   * Gets the current settings of the classifier.
   *
   * @return an array of strings suitable for passing to setOptions
   */
  public String[] getOptions() {

    Vector<String> result = new Vector<String>();

    result.add("-C");
    result.add("" + getC());

    result.add("-N");
    result.add("" + m_filterType);

    result.add("-I");
    result.add(
        ""
            + getRegOptimizer().getClass().getName()
            + " "
            + Utils.joinOptions(getRegOptimizer().getOptions()));

    result.add("-K");
    result.add(
        "" + getKernel().getClass().getName() + " " + Utils.joinOptions(getKernel().getOptions()));

    Collections.addAll(result, super.getOptions());

    return (String[]) result.toArray(new String[result.size()]);
  }
Beispiel #12
0
  /**
   * Parses a given list of options. Valid options are:
   *
   * <p>-D <br>
   * If set, clusterer is run in debug mode and may output additional info to the console.
   *
   * <p>-do-not-check-capabilities <br>
   * If set, clusterer capabilities are not checked before clusterer is built (use with caution).
   *
   * <p>
   *
   * @param options the list of options as an array of strings
   * @exception Exception if an option is not supported
   */
  @Override
  public void setOptions(String[] options) throws Exception {

    Option.setOptionsForHierarchy(options, this, AbstractClusterer.class);
    setDebug(Utils.getFlag("output-debug-info", options));
    setDoNotCheckCapabilities(Utils.getFlag("do-not-check-capabilities", options));
  }
Beispiel #13
0
  /**
   * Parses a given list of options.
   *
   * <p>
   * <!-- options-start -->
   * Valid options are:
   *
   * <p>
   *
   * <pre> -A
   *  The nearest neighbour search algorithm to use (default: weka.core.neighboursearch.LinearNNSearch).
   * </pre>
   *
   * <pre> -K &lt;number of neighbours&gt;
   *  Set the number of neighbours used to set the kernel bandwidth.
   *  (default all)</pre>
   *
   * <pre> -U &lt;number of weighting method&gt;
   *  Set the weighting kernel shape to use. 0=Linear, 1=Epanechnikov,
   *  2=Tricube, 3=Inverse, 4=Gaussian.
   *  (default 0 = Linear)</pre>
   *
   * <pre> -D
   *  If set, classifier is run in debug mode and
   *  may output additional info to the console</pre>
   *
   * <pre> -W
   *  Full name of base classifier.
   *  (default: weka.classifiers.trees.DecisionStump)</pre>
   *
   * <pre>
   * Options specific to classifier weka.classifiers.trees.DecisionStump:
   * </pre>
   *
   * <pre> -D
   *  If set, classifier is run in debug mode and
   *  may output additional info to the console</pre>
   *
   * <!-- options-end -->
   *
   * @param options the list of options as an array of strings
   * @throws Exception if an option is not supported
   */
  public void setOptions(String[] options) throws Exception {

    String knnString = Utils.getOption('K', options);
    if (knnString.length() != 0) {
      setKNN(Integer.parseInt(knnString));
    } else {
      setKNN(-1);
    }

    String weightString = Utils.getOption('U', options);
    if (weightString.length() != 0) {
      setWeightingKernel(Integer.parseInt(weightString));
    } else {
      setWeightingKernel(LINEAR);
    }

    String nnSearchClass = Utils.getOption('A', options);
    if (nnSearchClass.length() != 0) {
      String nnSearchClassSpec[] = Utils.splitOptions(nnSearchClass);
      if (nnSearchClassSpec.length == 0) {
        throw new Exception("Invalid NearestNeighbourSearch algorithm " + "specification string.");
      }
      String className = nnSearchClassSpec[0];
      nnSearchClassSpec[0] = "";

      setNearestNeighbourSearchAlgorithm(
          (NearestNeighbourSearch)
              Utils.forName(NearestNeighbourSearch.class, className, nnSearchClassSpec));
    } else this.setNearestNeighbourSearchAlgorithm(new LinearNNSearch());

    super.setOptions(options);
  }
Beispiel #14
0
  /**
   * Classifies the given test instance. The instance has to belong to a dataset when it's being
   * classified. Note that a classifier MUST implement either this or distributionForInstance().
   *
   * @param instance the instance to be classified
   * @return the predicted most likely class for the instance or Utils.missingValue() if no
   *     prediction is made
   * @exception Exception if an error occurred during the prediction
   */
  @Override
  public double classifyInstance(Instance instance) throws Exception {

    double[] dist = distributionForInstance(instance);
    if (dist == null) {
      throw new Exception("Null distribution predicted");
    }
    switch (instance.classAttribute().type()) {
      case Attribute.NOMINAL:
        double max = 0;
        int maxIndex = 0;

        for (int i = 0; i < dist.length; i++) {
          if (dist[i] > max) {
            maxIndex = i;
            max = dist[i];
          }
        }
        if (max > 0) {
          return maxIndex;
        } else {
          return Utils.missingValue();
        }
      case Attribute.NUMERIC:
      case Attribute.DATE:
        return dist[0];
      default:
        return Utils.missingValue();
    }
  }
Beispiel #15
0
  /**
   * Outputs the linear regression model as a string.
   *
   * @return the model as string
   */
  public String toString() {

    if (m_TransformedData == null) {
      return "Linear Regression: No model built yet.";
    }
    try {
      StringBuffer text = new StringBuffer();
      int column = 0;
      boolean first = true;

      text.append("\nLinear Regression Model\n\n");

      text.append(m_TransformedData.classAttribute().name() + " =\n\n");
      for (int i = 0; i < m_TransformedData.numAttributes(); i++) {
        if ((i != m_ClassIndex) && (m_SelectedAttributes[i])) {
          if (!first) text.append(" +\n");
          else first = false;
          text.append(Utils.doubleToString(m_Coefficients[column], 12, 4) + " * ");
          text.append(m_TransformedData.attribute(i).name());
          column++;
        }
      }
      text.append(" +\n" + Utils.doubleToString(m_Coefficients[column], 12, 4));
      return text.toString();
    } catch (Exception e) {
      return "Can't print Linear Regression!";
    }
  }
  /**
   * Parses a given list of options. Valid options are:
   *
   * <p>-B classifierstring <br>
   * Classifierstring should contain the full class name of a scheme included for selection followed
   * by options to the classifier (required, option should be used once for each classifier).
   *
   * <p>
   *
   * @param options the list of options as an array of strings
   * @exception Exception if an option is not supported
   */
  public void setOptions(String[] options) throws Exception {

    // Iterate through the schemes
    Vector classifiers = new Vector();
    while (true) {
      String classifierString = Utils.getOption('B', options);
      if (classifierString.length() == 0) {
        break;
      }
      String[] classifierSpec = Utils.splitOptions(classifierString);
      if (classifierSpec.length == 0) {
        throw new IllegalArgumentException("Invalid classifier specification string");
      }
      String classifierName = classifierSpec[0];
      classifierSpec[0] = "";
      classifiers.addElement(Classifier.forName(classifierName, classifierSpec));
    }
    if (classifiers.size() == 0) {
      classifiers.addElement(new weka.classifiers.rules.ZeroR());
    }
    Classifier[] classifiersArray = new Classifier[classifiers.size()];
    for (int i = 0; i < classifiersArray.length; i++) {
      classifiersArray[i] = (Classifier) classifiers.elementAt(i);
    }
    setClassifiers(classifiersArray);
  }
  /**
   * Parses a given list of options.
   *
   * <p>
   * <!-- options-start -->
   * Valid options are:
   *
   * <p>
   *
   * <pre> -C &lt;col&gt;
   *  Sets the attribute index (default last).</pre>
   *
   * <pre> -F &lt;value index&gt;
   *  Sets the first value's index (default first).</pre>
   *
   * <pre> -S &lt;value index&gt;
   *  Sets the second value's index (default last).</pre>
   *
   * <!-- options-end -->
   *
   * @param options the list of options as an array of strings
   * @throws Exception if an option is not supported
   */
  public void setOptions(String[] options) throws Exception {

    String attIndex = Utils.getOption('C', options);
    if (attIndex.length() != 0) {
      setAttributeIndex(attIndex);
    } else {
      setAttributeIndex("last");
    }

    String firstValIndex = Utils.getOption('F', options);
    if (firstValIndex.length() != 0) {
      setFirstValueIndex(firstValIndex);
    } else {
      setFirstValueIndex("first");
    }

    String secondValIndex = Utils.getOption('S', options);
    if (secondValIndex.length() != 0) {
      setSecondValueIndex(secondValIndex);
    } else {
      setSecondValueIndex("last");
    }

    if (getInputFormat() != null) {
      setInputFormat(getInputFormat());
    }
  }
Beispiel #18
0
        @Override
        boolean evaluate(
            Instance inst,
            int lhsAttIndex,
            String rhsOperand,
            double numericOperand,
            Pattern regexPattern,
            boolean rhsIsAttribute,
            int rhsAttIndex) {

          if (rhsIsAttribute) {
            if (inst.isMissing(lhsAttIndex) && inst.isMissing(rhsAttIndex)) {
              return true;
            }
            if (inst.isMissing(lhsAttIndex) || inst.isMissing(rhsAttIndex)) {
              return false;
            }
            return Utils.eq(inst.value(lhsAttIndex), inst.value(rhsAttIndex));
          }

          if (inst.isMissing(lhsAttIndex)) {
            return false;
          }
          return (Utils.eq(inst.value(lhsAttIndex), numericOperand));
        }
Beispiel #19
0
  /** Computes average class values for each attribute and value */
  private void computeAverageClassValues() {

    double totalCounts, sum;
    Instance instance;
    double[] counts;

    double[][] avgClassValues = new double[getInputFormat().numAttributes()][0];
    m_Indices = new int[getInputFormat().numAttributes()][0];
    for (int j = 0; j < getInputFormat().numAttributes(); j++) {
      Attribute att = getInputFormat().attribute(j);
      if (att.isNominal()) {
        avgClassValues[j] = new double[att.numValues()];
        counts = new double[att.numValues()];
        for (int i = 0; i < getInputFormat().numInstances(); i++) {
          instance = getInputFormat().instance(i);
          if (!instance.classIsMissing() && (!instance.isMissing(j))) {
            counts[(int) instance.value(j)] += instance.weight();
            avgClassValues[j][(int) instance.value(j)] += instance.weight() * instance.classValue();
          }
        }
        sum = Utils.sum(avgClassValues[j]);
        totalCounts = Utils.sum(counts);
        if (Utils.gr(totalCounts, 0)) {
          for (int k = 0; k < att.numValues(); k++) {
            if (Utils.gr(counts[k], 0)) {
              avgClassValues[j][k] /= counts[k];
            } else {
              avgClassValues[j][k] = sum / totalCounts;
            }
          }
        }
        m_Indices[j] = Utils.sort(avgClassValues[j]);
      }
    }
  }
  /**
   * Create the options array to pass to the classifier. The parameter values and positions are
   * taken from m_ClassifierOptions and m_CVParams.
   *
   * @return the options array
   */
  protected String[] createOptions() {

    String[] options = new String[m_ClassifierOptions.length + 2 * m_CVParams.size()];
    int start = 0, end = options.length;

    // Add the cross-validation parameters and their values
    for (int i = 0; i < m_CVParams.size(); i++) {
      CVParameter cvParam = (CVParameter) m_CVParams.elementAt(i);
      double paramValue = cvParam.m_ParamValue;
      if (cvParam.m_RoundParam) {
        //	paramValue = (double)((int) (paramValue + 0.5));
        paramValue = Math.rint(paramValue);
      }
      if (cvParam.m_AddAtEnd) {
        options[--end] = "" + Utils.doubleToString(paramValue, 4);
        options[--end] = "-" + cvParam.m_ParamChar;
      } else {
        options[start++] = "-" + cvParam.m_ParamChar;
        options[start++] = "" + Utils.doubleToString(paramValue, 4);
      }
    }
    // Add the static parameters
    System.arraycopy(m_ClassifierOptions, 0, options, start, m_ClassifierOptions.length);

    return options;
  }
  @Override
  public void setOptions(String[] options) throws Exception {

    // these are options to the hadoop map task (especially the -names-file)

    String existing = Utils.getOption("existing-header", options);
    setPathToExistingHeader(existing);

    String attNames = Utils.getOption('A', options);
    setAttributeNames(attNames);

    String namesFile = Utils.getOption("names-file", options);
    setAttributeNamesFile(namesFile);

    String outputName = Utils.getOption("header-file-name", options);
    setOutputHeaderFileName(outputName);

    super.setOptions(options);

    // any options to pass on to the underlying Weka csv to arff map task?
    CSVToARFFHeaderMapTask tempMap = new CSVToARFFHeaderMapTask();
    tempMap.setOptions(options);

    String optsToWekaMapTask = Utils.joinOptions(tempMap.getOptions());
    if (!DistributedJobConfig.isEmpty(optsToWekaMapTask)) {
      setCsvToArffTaskOptions(optsToWekaMapTask);
    }
  }
Beispiel #22
0
  /**
   * Parses a given list of options.
   *
   * <p>
   * <!-- options-start -->
   * Valid options are:
   *
   * <p>
   *
   * <pre> -B &lt;num&gt;
   *  Manual blend setting (default 20%)
   * </pre>
   *
   * <pre> -E
   *  Enable entropic auto-blend setting (symbolic class only)
   * </pre>
   *
   * <pre> -M &lt;char&gt;
   *  Specify the missing value treatment mode (default a)
   *  Valid options are: a(verage), d(elete), m(axdiff), n(ormal)
   * </pre>
   *
   * <!-- options-end -->
   *
   * @param options the list of options as an array of strings
   * @throws Exception if an option is not supported
   */
  public void setOptions(String[] options) throws Exception {
    String debug = "(KStar.setOptions)";
    String blendStr = Utils.getOption('B', options);
    if (blendStr.length() != 0) {
      setGlobalBlend(Integer.parseInt(blendStr));
    }

    setEntropicAutoBlend(Utils.getFlag('E', options));

    String missingModeStr = Utils.getOption('M', options);
    if (missingModeStr.length() != 0) {
      switch (missingModeStr.charAt(0)) {
        case 'a':
          setMissingMode(new SelectedTag(M_AVERAGE, TAGS_MISSING));
          break;
        case 'd':
          setMissingMode(new SelectedTag(M_DELETE, TAGS_MISSING));
          break;
        case 'm':
          setMissingMode(new SelectedTag(M_MAXDIFF, TAGS_MISSING));
          break;
        case 'n':
          setMissingMode(new SelectedTag(M_NORMAL, TAGS_MISSING));
          break;
        default:
          setMissingMode(new SelectedTag(M_AVERAGE, TAGS_MISSING));
      }
    }
    Utils.checkForRemainingOptions(options);
  }
Beispiel #23
0
  /**
   * Parses a given list of options.
   *
   * <p>
   * <!-- options-start -->
   * Valid options are:
   *
   * <p>
   *
   * <pre> -P &lt;start set&gt;
   *  Specify a starting set of attributes.
   *  Eg. 1,3,5-7.</pre>
   *
   * <pre> -D &lt;0 = backward | 1 = forward | 2 = bi-directional&gt;
   *  Direction of search. (default = 1).</pre>
   *
   * <pre> -N &lt;num&gt;
   *  Number of non-improving nodes to
   *  consider before terminating search.</pre>
   *
   * <pre> -S &lt;num&gt;
   *  Size of lookup cache for evaluated subsets.
   *  Expressed as a multiple of the number of
   *  attributes in the data set. (default = 1)</pre>
   *
   * <!-- options-end -->
   *
   * @param options the list of options as an array of strings
   * @throws Exception if an option is not supported
   */
  public void setOptions(String[] options) throws Exception {
    String optionString;
    resetOptions();

    optionString = Utils.getOption('P', options);
    if (optionString.length() != 0) {
      setStartSet(optionString);
    }

    optionString = Utils.getOption('D', options);

    if (optionString.length() != 0) {
      setDirection(new SelectedTag(Integer.parseInt(optionString), TAGS_SELECTION));
    } else {
      setDirection(new SelectedTag(SELECTION_FORWARD, TAGS_SELECTION));
    }

    optionString = Utils.getOption('N', options);

    if (optionString.length() != 0) {
      setSearchTermination(Integer.parseInt(optionString));
    }

    optionString = Utils.getOption('S', options);
    if (optionString.length() != 0) {
      setLookupCacheSize(Integer.parseInt(optionString));
    }

    m_debug = Utils.getFlag('Z', options);
  }
  /**
   * Gets the current settings of FuzzyRoughSubsetEval
   *
   * @return an array of strings suitable for passing to setOptions()
   */
  public String[] getOptions() {
    Vector<String> result;

    result = new Vector<String>();

    result.add("-Z");
    result.add(
        (m_FuzzyMeasure.getClass().getName() + " " + Utils.joinOptions(m_FuzzyMeasure.getOptions()))
            .trim());

    result.add("-I");
    result.add(
        (m_Implicator.getClass().getName() + " " + Utils.joinOptions(m_Implicator.getOptions()))
            .trim());

    result.add("-T");
    result.add(
        (m_TNorm.getClass().getName() + " " + Utils.joinOptions(m_TNorm.getOptions())).trim());

    result.add("-R");
    result.add(
        (m_Similarity.getClass().getName() + " " + Utils.joinOptions(m_Similarity.getOptions()))
            .trim());

    return result.toArray(new String[result.size()]);
  }
Beispiel #25
0
  /**
   * Calculate the potential to decrease DL of the ruleset, i.e. the possible DL that could be
   * decreased by deleting the rule whose index and simple statstics are given. If there's no
   * potentials (i.e. smOrEq 0 && error rate < 0.5), it returns NaN.
   *
   * <p>The way this procedure does is copied from original RIPPER implementation and is quite
   * bizzare because it does not update the following rules' stats recursively any more when testing
   * each rule, which means it assumes after deletion no data covered by the following rules (or
   * regards the deleted rule as the last rule). Reasonable assumption?
   *
   * <p>
   *
   * @param index the index of the rule in m_Ruleset to be deleted
   * @param expFPOverErr expected FP/(FP+FN)
   * @param rulesetStat the simple statistics of the ruleset, updated if the rule should be deleted
   * @param ruleStat the simple statistics of the rule to be deleted
   * @param checkErr whether check if error rate >= 0.5
   * @return the potential DL that could be decreased
   */
  public double potential(
      int index, double expFPOverErr, double[] rulesetStat, double[] ruleStat, boolean checkErr) {
    // System.out.println("!!!inside potential: ");
    // Restore the stats if deleted
    double pcov = rulesetStat[0] - ruleStat[0];
    double puncov = rulesetStat[1] + ruleStat[0];
    double pfp = rulesetStat[4] - ruleStat[4];
    double pfn = rulesetStat[5] + ruleStat[2];

    double dataDLWith =
        dataDL(expFPOverErr, rulesetStat[0], rulesetStat[1], rulesetStat[4], rulesetStat[5]);
    double theoryDLWith = theoryDL(index);
    double dataDLWithout = dataDL(expFPOverErr, pcov, puncov, pfp, pfn);

    double potential = dataDLWith + theoryDLWith - dataDLWithout;
    double err = ruleStat[4] / ruleStat[0];
    /*System.out.println("!!!"+dataDLWith +" | "+
      theoryDLWith + " | "
      +dataDLWithout+"|"+ruleStat[4] + " / " + ruleStat[0]);
    */
    boolean overErr = Utils.grOrEq(err, 0.5);
    if (!checkErr) overErr = false;

    if (Utils.grOrEq(potential, 0.0) || overErr) {
      // If deleted, update ruleset stats.  Other stats do not matter
      rulesetStat[0] = pcov;
      rulesetStat[1] = puncov;
      rulesetStat[4] = pfp;
      rulesetStat[5] = pfn;
      return potential;
    } else return Double.NaN;
  }
Beispiel #26
0
  /**
   * Parses a given list of options.
   *
   * <p>
   * <!-- options-start -->
   * Valid options are:
   *
   * <p>
   *
   * <pre> -C &lt;col&gt;
   *  Index of the attribute to be changed
   *  (default last attribute)</pre>
   *
   * <pre> -M
   *  Treat missing values as an extra value
   * </pre>
   *
   * <pre> -P &lt;num&gt;
   *  Specify the percentage of noise introduced
   *  to the data (default 10)</pre>
   *
   * <pre> -S &lt;num&gt;
   *  Specify the random number seed (default 1)</pre>
   *
   * <!-- options-end -->
   *
   * @param options the list of options as an array of strings
   * @throws Exception if an option is not supported
   */
  public void setOptions(String[] options) throws Exception {

    String indexString = Utils.getOption('C', options);
    if (indexString.length() != 0) {
      setAttributeIndex(indexString);
    } else {
      setAttributeIndex("last");
    }

    if (Utils.getFlag('M', options)) {
      setUseMissing(true);
    }

    String percentString = Utils.getOption('P', options);
    if (percentString.length() != 0) {
      setPercent((int) Double.valueOf(percentString).doubleValue());
    } else {
      setPercent(10);
    }

    String seedString = Utils.getOption('S', options);
    if (seedString.length() != 0) {
      setRandomSeed(Integer.parseInt(seedString));
    } else {
      setRandomSeed(1);
    }
  }
  /**
   * Adds the prediction intervals as additional attributes at the end. Since classifiers can
   * returns varying number of intervals per instance, the dataset is filled with missing values for
   * non-existing intervals.
   */
  protected void addPredictionIntervals() {
    int maxNum;
    int num;
    int i;
    int n;
    FastVector preds;
    FastVector atts;
    Instances data;
    Instance inst;
    Instance newInst;
    double[] values;
    double[][] predInt;

    // determine the maximum number of intervals
    maxNum = 0;
    preds = m_Evaluation.predictions();
    for (i = 0; i < preds.size(); i++) {
      num = ((NumericPrediction) preds.elementAt(i)).predictionIntervals().length;
      if (num > maxNum) maxNum = num;
    }

    // create new header
    atts = new FastVector();
    for (i = 0; i < m_PlotInstances.numAttributes(); i++)
      atts.addElement(m_PlotInstances.attribute(i));
    for (i = 0; i < maxNum; i++) {
      atts.addElement(new Attribute("predictionInterval_" + (i + 1) + "-lowerBoundary"));
      atts.addElement(new Attribute("predictionInterval_" + (i + 1) + "-upperBoundary"));
      atts.addElement(new Attribute("predictionInterval_" + (i + 1) + "-width"));
    }
    data = new Instances(m_PlotInstances.relationName(), atts, m_PlotInstances.numInstances());
    data.setClassIndex(m_PlotInstances.classIndex());

    // update data
    for (i = 0; i < m_PlotInstances.numInstances(); i++) {
      inst = m_PlotInstances.instance(i);
      // copy old values
      values = new double[data.numAttributes()];
      System.arraycopy(inst.toDoubleArray(), 0, values, 0, inst.numAttributes());
      // add interval data
      predInt = ((NumericPrediction) preds.elementAt(i)).predictionIntervals();
      for (n = 0; n < maxNum; n++) {
        if (n < predInt.length) {
          values[m_PlotInstances.numAttributes() + n * 3 + 0] = predInt[n][0];
          values[m_PlotInstances.numAttributes() + n * 3 + 1] = predInt[n][1];
          values[m_PlotInstances.numAttributes() + n * 3 + 2] = predInt[n][1] - predInt[n][0];
        } else {
          values[m_PlotInstances.numAttributes() + n * 3 + 0] = Utils.missingValue();
          values[m_PlotInstances.numAttributes() + n * 3 + 1] = Utils.missingValue();
          values[m_PlotInstances.numAttributes() + n * 3 + 2] = Utils.missingValue();
        }
      }
      // create new Instance
      newInst = new DenseInstance(inst.weight(), values);
      data.add(newInst);
    }

    m_PlotInstances = data;
  }
Beispiel #28
0
  /**
   * Convert a single instance over. The converted instance is added to the end of the output queue.
   *
   * @param instance the instance to convert
   */
  protected void convertInstance(Instance instance) {

    int index = 0;
    double[] vals = new double[outputFormatPeek().numAttributes()];
    // Copy and convert the values
    for (int i = 0; i < getInputFormat().numAttributes(); i++) {
      if (m_DiscretizeCols.isInRange(i) && getInputFormat().attribute(i).isNumeric()) {
        int j;
        double currentVal = instance.value(i);
        if (m_CutPoints[i] == null) {
          if (instance.isMissing(i)) {
            vals[index] = Utils.missingValue();
          } else {
            vals[index] = 0;
          }
          index++;
        } else {
          if (!m_MakeBinary) {
            if (instance.isMissing(i)) {
              vals[index] = Utils.missingValue();
            } else {
              for (j = 0; j < m_CutPoints[i].length; j++) {
                if (currentVal <= m_CutPoints[i][j]) {
                  break;
                }
              }
              vals[index] = j;
            }
            index++;
          } else {
            for (j = 0; j < m_CutPoints[i].length; j++) {
              if (instance.isMissing(i)) {
                vals[index] = Utils.missingValue();
              } else if (currentVal <= m_CutPoints[i][j]) {
                vals[index] = 0;
              } else {
                vals[index] = 1;
              }
              index++;
            }
          }
        }
      } else {
        vals[index] = instance.value(i);
        index++;
      }
    }

    Instance inst = null;
    if (instance instanceof SparseInstance) {
      inst = new SparseInstance(instance.weight(), vals);
    } else {
      inst = new DenseInstance(instance.weight(), vals);
    }
    inst.setDataset(getOutputFormat());
    copyValues(inst, false, instance.dataset(), getOutputFormat());
    inst.setDataset(getOutputFormat());
    push(inst);
  }
 /**
  * Parses a given list of options.
  *
  * @param options the list of options as an array of strings
  * @exception Exception if an option is not supported
  */
 public void setOptions(String[] options) throws Exception {
   String optionString = Utils.getOption('A', options);
   if (optionString.length() != 0) setAlphaStar(Double.parseDouble(optionString));
   optionString = Utils.getOption('S', options);
   if (optionString.length() != 0) setSigma(Double.parseDouble(optionString));
   optionString = Utils.getOption('R', options);
   if (optionString.length() != 0) setR(Double.parseDouble(optionString));
   setUseSparseMatrix(Utils.getFlag('M', options));
 }
  /**
   * Parses a given list of options.
   *
   * <p>
   * <!-- options-start -->
   * Valid options are:
   *
   * <p>
   *
   * <pre> -D
   *  Enables debugging output (if available) to be printed.
   *  (default: off)</pre>
   *
   * <pre> -no-checks
   *  Turns off all checks - use with caution!
   *  (default: checks on)</pre>
   *
   * <pre> -C &lt;num&gt;
   *  The size of the cache (a prime number), 0 for full cache and
   *  -1 to turn it off.
   *  (default: 250007)</pre>
   *
   * <pre> -E &lt;num&gt;
   *  The Exponent to use.
   *  (default: 1.0)</pre>
   *
   * <pre> -L
   *  Use lower-order terms.
   *  (default: no)</pre>
   *
   * <!-- options-end -->
   *
   * @param options the list of options as an array of strings
   * @throws Exception if an option is not supported
   */
  public void setOptions(String[] options) throws Exception {
    String tmpStr;

    tmpStr = Utils.getOption('E', options);
    if (tmpStr.length() != 0) setExponent(Double.parseDouble(tmpStr));
    else setExponent(1.0);

    setUseLowerOrder(Utils.getFlag('L', options));

    super.setOptions(options);
  }