예제 #1
0
  /**
   * Returns description of the bias-variance decomposition results.
   *
   * @return the bias-variance decomposition results as a string
   */
  public String toString() {

    String result = "\nBias-Variance Decomposition\n";

    if (getClassifier() == null) {
      return "Invalid setup";
    }

    result += "\nClassifier   : " + getClassifier().getClass().getName();
    if (getClassifier() instanceof OptionHandler) {
      result += Utils.joinOptions(((OptionHandler) m_Classifier).getOptions());
    }
    result += "\nData File    : " + getDataFileName();
    result += "\nClass Index  : ";
    if (getClassIndex() == 0) {
      result += "last";
    } else {
      result += getClassIndex();
    }
    result += "\nTraining Pool: " + getTrainPoolSize();
    result += "\nIterations   : " + getTrainIterations();
    result += "\nSeed         : " + getSeed();
    result += "\nError        : " + Utils.doubleToString(getError(), 6, 4);
    result += "\nSigma^2      : " + Utils.doubleToString(getSigma(), 6, 4);
    result += "\nBias^2       : " + Utils.doubleToString(getBias(), 6, 4);
    result += "\nVariance     : " + Utils.doubleToString(getVariance(), 6, 4);

    return result + "\n";
  }
  /**
   * Create the options array to pass to the classifier. The parameter values and positions are
   * taken from m_ClassifierOptions and m_CVParams.
   *
   * @return the options array
   */
  protected String[] createOptions() {

    String[] options = new String[m_ClassifierOptions.length + 2 * m_CVParams.size()];
    int start = 0, end = options.length;

    // Add the cross-validation parameters and their values
    for (int i = 0; i < m_CVParams.size(); i++) {
      CVParameter cvParam = (CVParameter) m_CVParams.elementAt(i);
      double paramValue = cvParam.m_ParamValue;
      if (cvParam.m_RoundParam) {
        //	paramValue = (double)((int) (paramValue + 0.5));
        paramValue = Math.rint(paramValue);
      }
      if (cvParam.m_AddAtEnd) {
        options[--end] = "" + Utils.doubleToString(paramValue, 4);
        options[--end] = "-" + cvParam.m_ParamChar;
      } else {
        options[start++] = "-" + cvParam.m_ParamChar;
        options[start++] = "" + Utils.doubleToString(paramValue, 4);
      }
    }
    // Add the static parameters
    System.arraycopy(m_ClassifierOptions, 0, options, start, m_ClassifierOptions.length);

    return options;
  }
예제 #3
0
  /**
   * Outputs the linear regression model as a string.
   *
   * @return the model as string
   */
  public String toString() {

    if (m_TransformedData == null) {
      return "Linear Regression: No model built yet.";
    }
    try {
      StringBuffer text = new StringBuffer();
      int column = 0;
      boolean first = true;

      text.append("\nLinear Regression Model\n\n");

      text.append(m_TransformedData.classAttribute().name() + " =\n\n");
      for (int i = 0; i < m_TransformedData.numAttributes(); i++) {
        if ((i != m_ClassIndex) && (m_SelectedAttributes[i])) {
          if (!first) text.append(" +\n");
          else first = false;
          text.append(Utils.doubleToString(m_Coefficients[column], 12, 4) + " * ");
          text.append(m_TransformedData.attribute(i).name());
          column++;
        }
      }
      text.append(" +\n" + Utils.doubleToString(m_Coefficients[column], 12, 4));
      return text.toString();
    } catch (Exception e) {
      return "Can't print Linear Regression!";
    }
  }
예제 #4
0
  @Override
  public String toString() {
    StringBuffer text = new StringBuffer();
    text.append("\nsIB\n===\n");
    text.append("\nNumber of clusters: " + m_numCluster + "\n");

    for (int j = 0; j < m_numCluster; j++) {
      text.append(
          "\nCluster: "
              + j
              + " Size : "
              + bestT.size(j)
              + " Prior probability: "
              + Utils.doubleToString(bestT.Pt[j], 4)
              + "\n\n");
      for (int i = 0; i < m_numAttributes; i++) {
        text.append("Attribute: " + m_data.attribute(i).name() + "\n");
        text.append(
            "Probability given the cluster = "
                + Utils.doubleToString(bestT.Py_t.get(i, j), 4)
                + "\n");
      }
    }
    return text.toString();
  }
예제 #5
0
 /**
  * Output the current partition
  *
  * @param insts
  * @return a string that describes the partition
  */
 @Override
 public String toString() {
   StringBuffer text = new StringBuffer();
   text.append("score (L) : " + Utils.doubleToString(L, 4) + "\n");
   text.append("number of changes : " + counter + "\n");
   for (int i = 0; i < m_numCluster; i++) {
     text.append("\nCluster " + i + "\n");
     text.append("size : " + size(i) + "\n");
     text.append("prior prob : " + Utils.doubleToString(Pt[i], 4) + "\n");
   }
   return text.toString();
 }
예제 #6
0
  /**
   * Prints the condition satisfied by instances in a subset.
   *
   * @param index of subset
   * @param data training set.
   */
  @Override
  public final String rightSide(int index, Instances data) {

    StringBuffer text;

    text = new StringBuffer();
    if (data.attribute(m_attIndex).isNominal()) {
      text.append(" = " + data.attribute(m_attIndex).value(index));
    } else if (index == 0) {
      text.append(" <= " + Utils.doubleToString(m_splitPoint, 6));
    } else {
      text.append(" > " + Utils.doubleToString(m_splitPoint, 6));
    }
    return text.toString();
  }
예제 #7
0
  /**
   * returns a description of the search as a String
   *
   * @return a description of the search
   */
  public String toString() {
    StringBuffer BfString = new StringBuffer();
    BfString.append("\tBest first.\n\tStart set: ");

    if (m_starting == null) {
      BfString.append("no attributes\n");
    } else {
      BfString.append(startSetToString() + "\n");
    }

    BfString.append("\tSearch direction: ");

    if (m_searchDirection == SELECTION_BACKWARD) {
      BfString.append("backward\n");
    } else {
      if (m_searchDirection == SELECTION_FORWARD) {
        BfString.append("forward\n");
      } else {
        BfString.append("bi-directional\n");
      }
    }

    BfString.append("\tStale search after " + m_maxStale + " node expansions\n");
    BfString.append("\tTotal number of subsets evaluated: " + m_totalEvals + "\n");
    BfString.append(
        "\tMerit of best subset found: "
            + Utils.doubleToString(Math.abs(m_bestMerit), 8, 3)
            + "\n");
    return BfString.toString();
  }
예제 #8
0
  /** calculates the mean of the given numeric column */
  private void calcMean() {
    ArffSortedTableModel model;
    int i;
    double mean;

    // no column selected?
    if (m_CurrentCol == -1) return;

    model = (ArffSortedTableModel) m_TableArff.getModel();

    // not numeric?
    if (!model.getAttributeAt(m_CurrentCol).isNumeric()) return;

    mean = 0;
    for (i = 0; i < model.getRowCount(); i++)
      mean += model.getInstances().instance(i).value(m_CurrentCol - 1);
    mean = mean / model.getRowCount();

    // show result
    ComponentHelper.showMessageBox(
        getParent(),
        "Mean for attribute...",
        "Mean for attribute '"
            + m_TableArff.getPlainColumnName(m_CurrentCol)
            + "':\n\t"
            + Utils.doubleToString(mean, 3),
        JOptionPane.OK_CANCEL_OPTION,
        JOptionPane.PLAIN_MESSAGE);
  }
  /**
   * Finds the best parameter combination. (recursive for each parameter being optimised).
   *
   * @param depth the index of the parameter to be optimised at this level
   * @param trainData the data the search is based on
   * @param random a random number generator
   * @throws Exception if an error occurs
   */
  protected void findParamsByCrossValidation(int depth, Instances trainData, Random random)
      throws Exception {

    if (depth < m_CVParams.size()) {
      CVParameter cvParam = (CVParameter) m_CVParams.elementAt(depth);

      double upper;
      switch ((int) (cvParam.m_Lower - cvParam.m_Upper + 0.5)) {
        case 1:
          upper = m_NumAttributes;
          break;
        case 2:
          upper = m_TrainFoldSize;
          break;
        default:
          upper = cvParam.m_Upper;
          break;
      }
      double increment = (upper - cvParam.m_Lower) / (cvParam.m_Steps - 1);
      for (cvParam.m_ParamValue = cvParam.m_Lower;
          cvParam.m_ParamValue <= upper;
          cvParam.m_ParamValue += increment) {
        findParamsByCrossValidation(depth + 1, trainData, random);
      }
    } else {

      Evaluation evaluation = new Evaluation(trainData);

      // Set the classifier options
      String[] options = createOptions();
      if (m_Debug) {
        System.err.print("Setting options for " + m_Classifier.getClass().getName() + ":");
        for (int i = 0; i < options.length; i++) {
          System.err.print(" " + options[i]);
        }
        System.err.println("");
      }
      ((OptionHandler) m_Classifier).setOptions(options);
      for (int j = 0; j < m_NumFolds; j++) {

        // We want to randomize the data the same way for every
        // learning scheme.
        Instances train = trainData.trainCV(m_NumFolds, j, new Random(1));
        Instances test = trainData.testCV(m_NumFolds, j);
        m_Classifier.buildClassifier(train);
        evaluation.setPriors(train);
        evaluation.evaluateModel(m_Classifier, test);
      }
      double error = evaluation.errorRate();
      if (m_Debug) {
        System.err.println("Cross-validated error rate: " + Utils.doubleToString(error, 6, 4));
      }
      if ((m_BestPerformance == -99) || (error < m_BestPerformance)) {

        m_BestPerformance = error;
        m_BestClassifierOptions = createOptions();
      }
    }
  }
예제 #10
0
 public String toStringMetric(
     int premiseSupport, int consequenceSupport, int totalSupport, int totalTransactions) {
   return m_stringVal
       + ":("
       + Utils.doubleToString(
           compute(premiseSupport, consequenceSupport, totalSupport, totalTransactions), 2)
       + ")";
 }
예제 #11
0
  /**
   * Get a bin range string for a specified bin of some attribute's cut points.
   *
   * @param cutPoints The attribute's cut points; never null.
   * @param j The bin number (zero based); never out of range.
   * @param precision the precision for the range values
   * @return The bin range string.
   */
  private static String binRangeString(double[] cutPoints, int j, int precision) {
    assert cutPoints != null;

    int n = cutPoints.length;
    assert 0 <= j && j <= n;

    return j == 0
        ? "" + "(" + "-inf" + "-" + Utils.doubleToString(cutPoints[0], precision) + "]"
        : j == n
            ? "" + "(" + Utils.doubleToString(cutPoints[n - 1], precision) + "-" + "inf" + ")"
            : ""
                + "("
                + Utils.doubleToString(cutPoints[j - 1], precision)
                + "-"
                + Utils.doubleToString(cutPoints[j], precision)
                + "]";
  }
예제 #12
0
  /**
   * returns a description of the search
   *
   * @return a description of the search as a String
   */
  public String toString() {
    StringBuffer GAString = new StringBuffer();
    GAString.append("\tGenetic search.\n\tStart set: ");

    if (m_starting == null) {
      GAString.append("no attributes\n");
    } else {
      GAString.append(startSetToString() + "\n");
    }
    GAString.append("\tPopulation size: " + m_popSize);
    GAString.append("\n\tNumber of generations: " + m_maxGenerations);
    GAString.append("\n\tProbability of crossover: " + Utils.doubleToString(m_pCrossover, 6, 3));
    GAString.append("\n\tProbability of mutation: " + Utils.doubleToString(m_pMutation, 6, 3));
    GAString.append("\n\tReport frequency: " + m_reportFrequency);
    GAString.append("\n\tRandom number seed: " + m_seed + "\n");
    GAString.append(m_generationReports.toString());
    return GAString.toString();
  }
예제 #13
0
  /**
   * Returns a description of the classifier.
   *
   * @return a description of the classifier as a string.
   */
  @Override
  public String toString() {

    if (m_Instances == null) {
      return "Naive Bayes (simple): No model built yet.";
    }
    try {
      StringBuffer text = new StringBuffer("Naive Bayes (simple)");
      int attIndex;

      for (int i = 0; i < m_Instances.numClasses(); i++) {
        text.append(
            "\n\nClass "
                + m_Instances.classAttribute().value(i)
                + ": P(C) = "
                + Utils.doubleToString(m_Priors[i], 10, 8)
                + "\n\n");
        Enumeration<Attribute> enumAtts = m_Instances.enumerateAttributes();
        attIndex = 0;
        while (enumAtts.hasMoreElements()) {
          Attribute attribute = enumAtts.nextElement();
          text.append("Attribute " + attribute.name() + "\n");
          if (attribute.isNominal()) {
            for (int j = 0; j < attribute.numValues(); j++) {
              text.append(attribute.value(j) + "\t");
            }
            text.append("\n");
            for (int j = 0; j < attribute.numValues(); j++) {
              text.append(Utils.doubleToString(m_Counts[i][attIndex][j], 10, 8) + "\t");
            }
          } else {
            text.append("Mean: " + Utils.doubleToString(m_Means[i][attIndex], 10, 8) + "\t");
            text.append("Standard Deviation: " + Utils.doubleToString(m_Devs[i][attIndex], 10, 8));
          }
          text.append("\n\n");
          attIndex++;
        }
      }

      return text.toString();
    } catch (Exception e) {
      return "Can't print Naive Bayes classifier!";
    }
  }
예제 #14
0
  /**
   * Prints out the classifier.
   *
   * @return a description of the classifier as a string
   */
  @Override
  public String toString() {
    StringBuffer text = new StringBuffer();
    text.append("SMOreg\n\n");
    if (m_weights != null) {
      text.append("weights (not support vectors):\n");
      // it's a linear machine
      for (int i = 0; i < m_data.numAttributes(); i++) {
        if (i != m_classIndex) {
          text.append(
              (m_weights[i] >= 0 ? " + " : " - ")
                  + Utils.doubleToString(Math.abs(m_weights[i]), 12, 4)
                  + " * ");
          if (m_SVM.getFilterType().getSelectedTag().getID() == SMOreg.FILTER_STANDARDIZE) {
            text.append("(standardized) ");
          } else if (m_SVM.getFilterType().getSelectedTag().getID() == SMOreg.FILTER_NORMALIZE) {
            text.append("(normalized) ");
          }
          text.append(m_data.attribute(i).name() + "\n");
        }
      }
    } else {
      // non linear, print out all supportvectors
      text.append("Support vectors:\n");
      for (int i = 0; i < m_nInstances; i++) {
        if (m_alpha[i] > 0) {
          text.append("+" + m_alpha[i] + " * k[" + i + "]\n");
        }
        if (m_alphaStar[i] > 0) {
          text.append("-" + m_alphaStar[i] + " * k[" + i + "]\n");
        }
      }
    }

    text.append((m_b <= 0 ? " + " : " - ") + Utils.doubleToString(Math.abs(m_b), 12, 4) + "\n\n");

    text.append("\n\nNumber of kernel evaluations: " + m_nEvals);
    if (m_nCacheHits >= 0 && m_nEvals > 0) {
      double hitRatio = 1 - m_nEvals * 1.0 / (m_nCacheHits + m_nEvals);
      text.append(" (" + Utils.doubleToString(hitRatio * 100, 7, 3).trim() + "% cached)");
    }

    return text.toString();
  }
예제 #15
0
파일: Test.java 프로젝트: paolopavan/cfr
  /**
   * Returns the test represented by a string in Prolog notation.
   *
   * @return a string representing the test in Prolog notation
   */
  public String toPrologString() {
    Attribute att = m_Dataset.attribute(m_AttIndex);
    StringBuffer str = new StringBuffer();
    String attName = m_Dataset.attribute(m_AttIndex).name();
    if (att.isNumeric()) {
      str = str.append(attName + " ");
      if (m_Not) str = str.append(">= " + Utils.doubleToString(m_Split, 3));
      else str = str.append("< " + Utils.doubleToString(m_Split, 3));
    } else {
      String value = att.value((int) m_Split);

      if (value == "false") {
        str = str.append("not(" + attName + ")");
      } else {
        str = str.append(attName);
      }
    }
    return str.toString();
  }
예제 #16
0
파일: Test.java 프로젝트: paolopavan/cfr
 /**
  * Gives a string representation of the test in Prolog notation, starting from the comparison
  * symbol.
  *
  * @return a string representing the test in Prolog notation
  */
 private String testPrologComparisonString() {
   Attribute att = m_Dataset.attribute(m_AttIndex);
   if (att.isNumeric()) {
     return ((m_Not ? ">= " : "< ") + Utils.doubleToString(m_Split, 3));
   } else {
     if (att.numValues() != 2) return ((m_Not ? "!= " : "= ") + att.value((int) m_Split));
     else
       return ("= " + (m_Not ? att.value((int) m_Split == 0 ? 1 : 0) : att.value((int) m_Split)));
   }
 }
예제 #17
0
  /**
   * Buildclassifier selects a classifier from the set of classifiers by minimising error on the
   * training data.
   *
   * @param data the training data to be used for generating the boosted classifier.
   * @exception Exception if the classifier could not be built successfully
   */
  public void buildClassifier(Instances data) throws Exception {

    if (m_Classifiers.length == 0) {
      throw new Exception("No base classifiers have been set!");
    }
    Instances newData = new Instances(data);
    newData.deleteWithMissingClass();
    newData.randomize(new Random(m_Seed));
    if (newData.classAttribute().isNominal() && (m_NumXValFolds > 1))
      newData.stratify(m_NumXValFolds);
    Instances train = newData; // train on all data by default
    Instances test = newData; // test on training data by default
    Classifier bestClassifier = null;
    int bestIndex = -1;
    double bestPerformance = Double.NaN;
    int numClassifiers = m_Classifiers.length;
    for (int i = 0; i < numClassifiers; i++) {
      Classifier currentClassifier = getClassifier(i);
      Evaluation evaluation;
      if (m_NumXValFolds > 1) {
        evaluation = new Evaluation(newData);
        for (int j = 0; j < m_NumXValFolds; j++) {
          train = newData.trainCV(m_NumXValFolds, j);
          test = newData.testCV(m_NumXValFolds, j);
          currentClassifier.buildClassifier(train);
          evaluation.setPriors(train);
          evaluation.evaluateModel(currentClassifier, test);
        }
      } else {
        currentClassifier.buildClassifier(train);
        evaluation = new Evaluation(train);
        evaluation.evaluateModel(currentClassifier, test);
      }

      double error = evaluation.errorRate();
      if (m_Debug) {
        System.err.println(
            "Error rate: "
                + Utils.doubleToString(error, 6, 4)
                + " for classifier "
                + currentClassifier.getClass().getName());
      }

      if ((i == 0) || (error < bestPerformance)) {
        bestClassifier = currentClassifier;
        bestPerformance = error;
        bestIndex = i;
      }
    }
    m_ClassifierIndex = bestIndex;
    m_Classifier = bestClassifier;
    if (m_NumXValFolds > 1) {
      m_Classifier.buildClassifier(newData);
    }
  }
  public String toString() {

    if (m_attribute == null) {
      return "No model built yet.";
    }
    StringBuffer text = new StringBuffer();
    if (m_attribute == null) {
      text.append("Predicting constant " + m_intercept);
    } else {
      text.append("Linear regression on " + m_attribute.name() + "\n\n");
      text.append(Utils.doubleToString(m_slope, 2) + " * " + m_attribute.name());
      if (m_intercept > 0) {
        text.append(" + " + Utils.doubleToString(m_intercept, 2));
      } else {
        text.append(" - " + Utils.doubleToString((-m_intercept), 2));
      }
    }
    text.append("\n");
    return text.toString();
  }
예제 #19
0
  /**
   * Determines the output format based on the input format and returns this. In case the output
   * format cannot be returned immediately, i.e., immediateOutputFormat() returns false, then this
   * method will be called from batchFinished().
   *
   * @param inputFormat the input format to base the output format on
   * @return the output format
   * @throws Exception in case the determination goes wrong
   * @see #hasImmediateOutputFormat()
   * @see #batchFinished()
   */
  protected Instances determineOutputFormat(Instances inputFormat) throws Exception {

    Instances data;
    Instances result;
    FastVector atts;
    FastVector values;
    HashSet hash;
    int i;
    int n;
    boolean isDate;
    Instance inst;
    Vector sorted;

    m_Cols.setUpper(inputFormat.numAttributes() - 1);
    data = new Instances(inputFormat);
    atts = new FastVector();
    for (i = 0; i < data.numAttributes(); i++) {
      if (!m_Cols.isInRange(i) || !data.attribute(i).isNumeric()) {
        atts.addElement(data.attribute(i));
        continue;
      }

      // date attribute?
      isDate = (data.attribute(i).type() == Attribute.DATE);

      // determine all available attribtues in dataset
      hash = new HashSet();
      for (n = 0; n < data.numInstances(); n++) {
        inst = data.instance(n);
        if (inst.isMissing(i)) continue;

        if (isDate) hash.add(inst.stringValue(i));
        else hash.add(new Double(inst.value(i)));
      }

      // sort values
      sorted = new Vector();
      for (Object o : hash) sorted.add(o);
      Collections.sort(sorted);

      // create attribute from sorted values
      values = new FastVector();
      for (Object o : sorted) {
        if (isDate) values.addElement(o.toString());
        else values.addElement(Utils.doubleToString(((Double) o).doubleValue(), MAX_DECIMALS));
      }
      atts.addElement(new Attribute(data.attribute(i).name(), values));
    }

    result = new Instances(inputFormat.relationName(), atts, 0);
    result.setClassIndex(inputFormat.classIndex());

    return result;
  }
예제 #20
0
    public String toXML(
        int premiseSupport, int consequenceSupport, int totalSupport, int totalTransactions) {
      String result =
          "<CRITERE name=\""
              + m_stringVal
              + "\" value=\" "
              + Utils.doubleToString(
                  compute(premiseSupport, consequenceSupport, totalSupport, totalTransactions), 2)
              + "\"/>";

      return result;
    }
예제 #21
0
  /**
   * returns a description of the search as a String
   *
   * @return a description of the search
   */
  public String toString() {
    StringBuffer text = new StringBuffer();
    text.append("\tRankSearch :\n");
    text.append("\tAttribute evaluator : " + getAttributeEvaluator().getClass().getName() + " ");
    if (m_ASEval instanceof OptionHandler) {
      String[] evaluatorOptions = new String[0];
      evaluatorOptions = ((OptionHandler) m_ASEval).getOptions();
      for (int i = 0; i < evaluatorOptions.length; i++) {
        text.append(evaluatorOptions[i] + ' ');
      }
    }
    text.append("\n");
    text.append("\tAttribute ranking : \n");
    int rlength = (int) (Math.log(m_Ranking.length) / Math.log(10) + 1);
    for (int i = 0; i < m_Ranking.length; i++) {
      text.append(
          "\t "
              + Utils.doubleToString((double) (m_Ranking[i] + 1), rlength, 0)
              + " "
              + m_Instances.attribute(m_Ranking[i]).name()
              + '\n');
    }
    text.append("\tMerit of best subset found : ");
    int fieldwidth = 3;
    double precision = (m_bestMerit - (int) m_bestMerit);
    if (Math.abs(m_bestMerit) > 0) {
      fieldwidth = (int) Math.abs((Math.log(Math.abs(m_bestMerit)) / Math.log(10))) + 2;
    }
    if (Math.abs(precision) > 0) {
      precision = Math.abs((Math.log(Math.abs(precision)) / Math.log(10))) + 3;
    } else {
      precision = 2;
    }

    text.append(
        Utils.doubleToString(Math.abs(m_bestMerit), fieldwidth + (int) precision, (int) precision)
            + "\n");
    return text.toString();
  }
예제 #22
0
  /**
   * generates a report on the current population
   *
   * @return a report as a String
   */
  private String populationReport(int genNum) {
    int i;
    StringBuffer temp = new StringBuffer();

    if (genNum == 0) {
      temp.append("\nInitial population\n");
    } else {
      temp.append("\nGeneration: " + genNum + "\n");
    }
    temp.append("merit   \tscaled  \tsubset\n");

    for (i = 0; i < m_popSize; i++) {
      temp.append(
          Utils.doubleToString(Math.abs(m_population[i].getObjective()), 8, 5)
              + "\t"
              + Utils.doubleToString(m_population[i].getFitness(), 8, 5)
              + "\t");

      temp.append(printPopMember(m_population[i].getChromosome()) + "\n");
    }
    return temp.toString();
  }
예제 #23
0
  /** Outputs the association rules. */
  public String toString() {

    StringBuffer text = new StringBuffer();

    if (m_allTheRules[0].size() == 0) return "\nNo large itemsets and rules found!\n";
    text.append("\nPredictiveApriori\n===================\n\n");
    text.append("\nBest rules found:\n\n");

    for (int i = 0; i < m_allTheRules[0].size(); i++) {
      text.append(
          Utils.doubleToString((double) i + 1, (int) (Math.log(m_numRules) / Math.log(10) + 1), 0)
              + ". "
              + ((ItemSet) m_allTheRules[0].elementAt(i)).toString(m_instances)
              + " ==> "
              + ((ItemSet) m_allTheRules[1].elementAt(i)).toString(m_instances)
              + "    acc:("
              + Utils.doubleToString(((Double) m_allTheRules[2].elementAt(i)).doubleValue(), 5)
              + ")");

      text.append('\n');
    }

    return text.toString();
  }
  /**
   * determines the "K" for the neighbors from the training set, initializes the labels of the test
   * set to "missing" and generates the neighbors for all instances in the test set
   *
   * @throws Exception if initialization fails
   */
  protected void initialize() throws Exception {
    int i;
    double timeStart;
    double timeEnd;
    Instances trainingNew;
    Instances testNew;

    // determine K
    if (getVerbose()) System.out.println("\nOriginal KNN = " + m_KNN);
    ((IBk) m_Classifier).setKNN(m_KNN);
    ((IBk) m_Classifier).setCrossValidate(true);
    m_Classifier.buildClassifier(m_TrainsetNew);
    m_Classifier.toString(); // necessary to crossvalidate IBk!
    ((IBk) m_Classifier).setCrossValidate(false);
    m_KNNdetermined = ((IBk) m_Classifier).getKNN();
    if (getVerbose()) System.out.println("Determined KNN = " + m_KNNdetermined);

    // set class labels in test set to "missing"
    for (i = 0; i < m_TestsetNew.numInstances(); i++) m_TestsetNew.instance(i).setClassMissing();

    // copy data
    trainingNew = new Instances(m_TrainsetNew);
    testNew = new Instances(m_TestsetNew);

    // filter data
    m_Missing.setInputFormat(trainingNew);
    trainingNew = Filter.useFilter(trainingNew, m_Missing);
    testNew = Filter.useFilter(testNew, m_Missing);

    // create the list of neighbors for the instances in the test set
    m_NeighborsTestset = new Neighbors[m_TestsetNew.numInstances()];
    timeStart = System.currentTimeMillis();
    for (i = 0; i < testNew.numInstances(); i++) {
      m_NeighborsTestset[i] =
          new Neighbors(
              testNew.instance(i), m_TestsetNew.instance(i), m_KNNdetermined, trainingNew, testNew);
      m_NeighborsTestset[i].setVerbose(getVerbose() || getDebug());
      m_NeighborsTestset[i].setUseNaiveSearch(getUseNaiveSearch());
      m_NeighborsTestset[i].find();
    }
    timeEnd = System.currentTimeMillis();

    if (getVerbose())
      System.out.println(
          "Time for finding neighbors: " + Utils.doubleToString((timeEnd - timeStart) / 1000.0, 3));
  }
예제 #25
0
  /**
   * Optimize the partition
   *
   * @param tmpT partition to be optimized
   * @param input object describing the statistics of the training dataset
   * @return the optimized partition
   */
  private Partition sIB_OptimizeT(Partition tmpT, Input input) {
    boolean done = false;
    int change = 0, loopCounter = 0;
    if (m_verbose) {
      System.out.println("Optimizing...");
      System.out.println("-------------");
    }
    while (!done) {
      change = 0;
      for (int i = 0; i < m_numInstances; i++) {
        int old_t = tmpT.Pt_x[i];
        // If the current cluster only has one instance left, leave it.
        if (tmpT.size(old_t) == 1) {
          if (m_verbose) {
            System.out.format("cluster %s has only 1 doc remain\n", old_t);
          }
          continue;
        }
        // draw the instance out from its previous cluster
        reduce_x(i, old_t, tmpT, input);

        // re-cluster the instance
        int new_t = clusterInstance(i, input, tmpT);
        if (new_t != old_t) {
          change++;
          updateAssignment(i, new_t, tmpT, input.Px[i], input.Py_x);
        }
      }

      tmpT.counter += change;
      if (m_verbose) {
        System.out.format("iteration %s , changes : %s\n", loopCounter, change);
      }
      done = checkConvergence(change, loopCounter);
      loopCounter++;
    }

    // compute the sIB score
    tmpT.L = sIB_local_MI(tmpT.Py_t, tmpT.Pt);
    if (m_verbose) {
      System.out.format("score (L) : %s \n", Utils.doubleToString(tmpT.L, 4));
    }
    return tmpT;
  }
예제 #26
0
  /**
   * returns a string representation of the classifier
   *
   * @return string representation of the classifier
   */
  public String toString() {
    StringBuffer result;
    String classname;
    int i;

    // only ZeroR model?
    if (m_ZeroR != null) {
      result = new StringBuffer();
      result.append(this.getClass().getName().replaceAll(".*\\.", "") + "\n");
      result.append(
          this.getClass().getName().replaceAll(".*\\.", "").replaceAll(".", "=") + "\n\n");
      result.append("Warning: No model could be built, hence ZeroR model is used:\n\n");
      result.append(m_ZeroR.toString());
    } else {
      classname = this.getClass().getName().replaceAll(".*\\.", "");
      result = new StringBuffer();
      result.append(classname + "\n");
      result.append(classname.replaceAll(".", "=") + "\n\n");

      if (m_Header == null) {
        result.append("No Model built yet.\n");
      } else {
        if (getInternals()) {
          result.append("Mutual information of attributes with class attribute:\n");
          for (i = 0; i < m_Header.numAttributes(); i++) {
            // skip class
            if (i == m_Header.classIndex()) continue;

            result.append(
                (i + 1)
                    + ". "
                    + m_Header.attribute(i).name()
                    + ": "
                    + Utils.doubleToString(m_mutualInformation[i], 6)
                    + "\n");
          }
        } else {
          result.append("Model built successfully.\n");
        }
      }
    }

    return result.toString();
  }
예제 #27
0
  /**
   * Processes the given data (may change the provided dataset) and returns the modified version.
   * This method is called in batchFinished().
   *
   * @param instances the data to process
   * @return the modified data
   * @throws Exception in case the processing goes wrong
   * @see #batchFinished()
   */
  protected Instances process(Instances instances) throws Exception {
    Instances result;
    int i;
    int n;
    double[] values;
    String value;
    Instance inst;
    Instance newInst;

    // we need the complete input data!
    if (!isFirstBatchDone()) setOutputFormat(determineOutputFormat(getInputFormat()));

    result = new Instances(getOutputFormat());

    for (i = 0; i < instances.numInstances(); i++) {
      inst = instances.instance(i);
      values = inst.toDoubleArray();

      for (n = 0; n < values.length; n++) {
        if (!m_Cols.isInRange(n) || !instances.attribute(n).isNumeric() || inst.isMissing(n))
          continue;

        // get index of value
        if (instances.attribute(n).type() == Attribute.DATE) value = inst.stringValue(n);
        else value = Utils.doubleToString(inst.value(n), MAX_DECIMALS);

        values[n] = result.attribute(n).indexOfValue(value);
      }

      // generate new instance
      if (inst instanceof SparseInstance) newInst = new SparseInstance(inst.weight(), values);
      else newInst = new DenseInstance(inst.weight(), values);

      // copy possible string, relational values
      newInst.setDataset(getOutputFormat());
      copyValues(newInst, false, inst.dataset(), getOutputFormat());

      result.add(newInst);
    }

    return result;
  }
예제 #28
0
  /**
   * returns a description of the search as a String
   *
   * @return a description of the search
   */
  public String toString() {
    StringBuffer BfString = new StringBuffer();
    BfString.append("\tAttribute ranking.\n");

    if (m_starting != null) {
      BfString.append("\tIgnored attributes: ");

      BfString.append(startSetToString());
      BfString.append("\n");
    }

    if (m_threshold != -Double.MAX_VALUE) {
      BfString.append(
          "\tThreshold for discarding attributes: "
              + Utils.doubleToString(m_threshold, 8, 4)
              + "\n");
    }

    return BfString.toString();
  }
예제 #29
0
  /**
   * returns a description of the search as a String
   *
   * @return a description of the search
   */
  public String toString() {
    StringBuffer LFSString = new StringBuffer();

    LFSString.append("\tLinear Forward Selection.\n\tStart set: ");

    if (m_starting == null) {
      LFSString.append("no attributes\n");
    } else {
      LFSString.append(startSetToString() + "\n");
    }

    LFSString.append("\tForward selection method: ");

    if (m_forwardSearchMethod == SEARCH_METHOD_FORWARD) {
      LFSString.append("forward selection\n");
    } else {
      LFSString.append("floating forward selection\n");
    }

    LFSString.append("\tStale search after " + m_maxStale + " node expansions\n");

    LFSString.append("\tLinear Forward Selection Type: ");

    if (m_linearSelectionType == TYPE_FIXED_SET) {
      LFSString.append("fixed-set\n");
    } else {
      LFSString.append("fixed-width\n");
    }

    LFSString.append(
        "\tNumber of top-ranked attributes that are used: " + m_numUsedAttributes + "\n");

    LFSString.append("\tTotal number of subsets evaluated: " + m_totalEvals + "\n");
    LFSString.append(
        "\tMerit of best subset found: "
            + Utils.doubleToString(Math.abs(m_bestMerit), 8, 3)
            + "\n");

    return LFSString.toString();
  }
예제 #30
0
  /**
   * Returns a string summarising the stats so far.
   *
   * @return the summary string
   */
  public String toString() {

    return "Count   "
        + Utils.doubleToString(count, 8)
        + '\n'
        + "Min     "
        + Utils.doubleToString(min, 8)
        + '\n'
        + "Max     "
        + Utils.doubleToString(max, 8)
        + '\n'
        + "Sum     "
        + Utils.doubleToString(sum, 8)
        + '\n'
        + "SumSq   "
        + Utils.doubleToString(sumSq, 8)
        + '\n'
        + "Mean    "
        + Utils.doubleToString(mean, 8)
        + '\n'
        + "StdDev  "
        + Utils.doubleToString(stdDev, 8)
        + '\n';
  }