Exemple #1
0
  /**
   * Searches the attribute subset space using a genetic algorithm.
   *
   * @param ASEval the attribute evaluator to guide the search
   * @param data the training instances.
   * @return an array (not necessarily ordered) of selected attribute indexes
   * @throws Exception if the search can't be completed
   */
  @Override
  public int[] search(ASEvaluation ASEval, Instances data) throws Exception {

    m_best = null;
    m_generationReports = new StringBuffer();

    if (!(ASEval instanceof SubsetEvaluator)) {
      throw new Exception(ASEval.getClass().getName() + " is not a " + "Subset evaluator!");
    }

    if (ASEval instanceof UnsupervisedSubsetEvaluator) {
      m_hasClass = false;
    } else {
      m_hasClass = true;
      m_classIndex = data.classIndex();
    }

    SubsetEvaluator ASEvaluator = (SubsetEvaluator) ASEval;
    m_numAttribs = data.numAttributes();

    m_startRange.setUpper(m_numAttribs - 1);
    if (!(getStartSet().equals(""))) {
      m_starting = m_startRange.getSelection();
    }

    // initial random population
    m_lookupTable = new Hashtable<BitSet, GABitSet>(m_lookupTableSize);
    m_random = new Random(m_seed);
    m_population = new GABitSet[m_popSize];

    // set up random initial population
    initPopulation();
    evaluatePopulation(ASEvaluator);
    populationStatistics();
    scalePopulation();
    checkBest();
    m_generationReports.append(populationReport(0));

    boolean converged;
    for (int i = 1; i <= m_maxGenerations; i++) {
      generation();
      evaluatePopulation(ASEvaluator);
      populationStatistics();
      scalePopulation();
      // find the best pop member and check for convergence
      converged = checkBest();

      if ((i == m_maxGenerations) || ((i % m_reportFrequency) == 0) || (converged == true)) {
        m_generationReports.append(populationReport(i));
        if (converged == true) {
          break;
        }
      }
    }
    return attributeList(m_best.getChromosome());
  }
Exemple #2
0
  /**
   * Returns an enumeration describing the available options.
   *
   * @return an enumeration of all the available options.
   */
  public Enumeration listOptions() {
    Vector newVector = new Vector(4);

    newVector.addElement(
        new Option(
            "\tclass name of attribute evaluator to use for ranking. Place any\n"
                + "\tevaluator options LAST on the command line following a \"--\".\n"
                + "\teg.:\n"
                + "\t\t-A weka.attributeSelection.GainRatioAttributeEval ... -- -M\n"
                + "\t(default: weka.attributeSelection.GainRatioAttributeEval)",
            "A",
            1,
            "-A <attribute evaluator>"));

    newVector.addElement(
        new Option(
            "\tnumber of attributes to be added from the"
                + "\n\tranking in each iteration (default = 1).",
            "S",
            1,
            "-S <step size>"));

    newVector.addElement(
        new Option(
            "\tpoint in the ranking to start evaluating from. "
                + "\n\t(default = 0, ie. the head of the ranking).",
            "R",
            1,
            "-R <start point>"));

    if ((m_ASEval != null) && (m_ASEval instanceof OptionHandler)) {
      newVector.addElement(
          new Option(
              "",
              "",
              0,
              "\nOptions specific to " + "evaluator " + m_ASEval.getClass().getName() + ":"));
      Enumeration enu = ((OptionHandler) m_ASEval).listOptions();

      while (enu.hasMoreElements()) {
        newVector.addElement(enu.nextElement());
      }
    }

    return newVector.elements();
  }
Exemple #3
0
  /**
   * Parses a given list of options.
   *
   * <p>
   * <!-- options-start -->
   * Valid options are:
   *
   * <p>
   *
   * <pre> -A &lt;attribute evaluator&gt;
   *  class name of attribute evaluator to use for ranking. Place any
   *  evaluator options LAST on the command line following a "--".
   *  eg.:
   *   -A weka.attributeSelection.GainRatioAttributeEval ... -- -M
   *  (default: weka.attributeSelection.GainRatioAttributeEval)</pre>
   *
   * <pre> -S &lt;step size&gt;
   *  number of attributes to be added from the
   *  ranking in each iteration (default = 1).</pre>
   *
   * <pre> -R &lt;start point&gt;
   *  point in the ranking to start evaluating from.
   *  (default = 0, ie. the head of the ranking).</pre>
   *
   * <pre>
   * Options specific to evaluator weka.attributeSelection.GainRatioAttributeEval:
   * </pre>
   *
   * <pre> -M
   *  treat missing values as a seperate value.</pre>
   *
   * <!-- options-end -->
   *
   * @param options the list of options as an array of strings
   * @throws Exception if an option is not supported
   */
  public void setOptions(String[] options) throws Exception {
    String optionString;
    resetOptions();

    optionString = Utils.getOption('S', options);
    if (optionString.length() != 0) {
      setStepSize(Integer.parseInt(optionString));
    }

    optionString = Utils.getOption('R', options);
    if (optionString.length() != 0) {
      setStartPoint(Integer.parseInt(optionString));
    }

    optionString = Utils.getOption('A', options);
    if (optionString.length() == 0) optionString = GainRatioAttributeEval.class.getName();
    setAttributeEvaluator(ASEvaluation.forName(optionString, Utils.partitionOptions(options)));
  }
Exemple #4
0
  /**
   * Ranks attributes using the specified attribute evaluator and then searches the ranking using
   * the supplied subset evaluator.
   *
   * @param ASEval the subset evaluator to guide the search
   * @param data the training instances.
   * @return an array (not necessarily ordered) of selected attribute indexes
   * @throws Exception if the search can't be completed
   */
  public int[] search(ASEvaluation ASEval, Instances data) throws Exception {

    double best_merit = -Double.MAX_VALUE;
    double temp_merit;
    BitSet temp_group, best_group = null;

    if (!(ASEval instanceof SubsetEvaluator)) {
      throw new Exception(ASEval.getClass().getName() + " is not a " + "Subset evaluator!");
    }

    m_SubsetEval = ASEval;
    m_Instances = data;
    m_numAttribs = m_Instances.numAttributes();

    /*    if (m_ASEval instanceof AttributeTransformer) {
    throw new Exception("Can't use an attribute transformer "
                        +"with RankSearch");
                        } */
    if (m_ASEval instanceof UnsupervisedAttributeEvaluator
        || m_ASEval instanceof UnsupervisedSubsetEvaluator) {
      m_hasClass = false;
      /*      if (!(m_SubsetEval instanceof UnsupervisedSubsetEvaluator)) {
      throw new Exception("Must use an unsupervised subset evaluator.");
      } */
    } else {
      m_hasClass = true;
      m_classIndex = m_Instances.classIndex();
    }

    if (m_ASEval instanceof AttributeEvaluator) {
      // generate the attribute ranking first
      Ranker ranker = new Ranker();
      m_ASEval.buildEvaluator(m_Instances);
      if (m_ASEval instanceof AttributeTransformer) {
        // get the transformed data a rebuild the subset evaluator
        m_Instances = ((AttributeTransformer) m_ASEval).transformedData(m_Instances);
        ((ASEvaluation) m_SubsetEval).buildEvaluator(m_Instances);
      }
      m_Ranking = ranker.search(m_ASEval, m_Instances);
    } else {
      GreedyStepwise fs = new GreedyStepwise();
      double[][] rankres;
      fs.setGenerateRanking(true);
      ((ASEvaluation) m_ASEval).buildEvaluator(m_Instances);
      fs.search(m_ASEval, m_Instances);
      rankres = fs.rankedAttributes();
      m_Ranking = new int[rankres.length];
      for (int i = 0; i < rankres.length; i++) {
        m_Ranking[i] = (int) rankres[i][0];
      }
    }

    // now evaluate the attribute ranking
    for (int i = m_startPoint; i < m_Ranking.length; i += m_add) {
      temp_group = new BitSet(m_numAttribs);
      for (int j = 0; j <= i; j++) {
        temp_group.set(m_Ranking[j]);
      }
      temp_merit = ((SubsetEvaluator) m_SubsetEval).evaluateSubset(temp_group);

      if (temp_merit > best_merit) {
        best_merit = temp_merit;
        ;
        best_group = temp_group;
      }
    }
    m_bestMerit = best_merit;
    return attributeList(best_group);
  }
Exemple #5
0
  /**
   * Searches the attribute subset space by best first search
   *
   * @param ASEval the attribute evaluator to guide the search
   * @param data the training instances.
   * @return an array (not necessarily ordered) of selected attribute indexes
   * @throws Exception if the search can't be completed
   */
  public int[] search(ASEvaluation ASEval, Instances data) throws Exception {
    m_totalEvals = 0;
    if (!(ASEval instanceof SubsetEvaluator)) {
      throw new Exception(ASEval.getClass().getName() + " is not a " + "Subset evaluator!");
    }

    if (ASEval instanceof UnsupervisedSubsetEvaluator) {
      m_hasClass = false;
    } else {
      m_hasClass = true;
      m_classIndex = data.classIndex();
    }

    SubsetEvaluator ASEvaluator = (SubsetEvaluator) ASEval;
    m_numAttribs = data.numAttributes();
    int i, j;
    int best_size = 0;
    int size = 0;
    int done;
    int sd = m_searchDirection;
    BitSet best_group, temp_group;
    int stale;
    double best_merit;
    double merit;
    boolean z;
    boolean added;
    Link2 tl;
    Hashtable lookup = new Hashtable(m_cacheSize * m_numAttribs);
    int insertCount = 0;
    int cacheHits = 0;
    LinkedList2 bfList = new LinkedList2(m_maxStale);
    best_merit = -Double.MAX_VALUE;
    stale = 0;
    best_group = new BitSet(m_numAttribs);

    m_startRange.setUpper(m_numAttribs - 1);
    if (!(getStartSet().equals(""))) {
      m_starting = m_startRange.getSelection();
    }
    // If a starting subset has been supplied, then initialise the bitset
    if (m_starting != null) {
      for (i = 0; i < m_starting.length; i++) {
        if ((m_starting[i]) != m_classIndex) {
          best_group.set(m_starting[i]);
        }
      }

      best_size = m_starting.length;
      m_totalEvals++;
    } else {
      if (m_searchDirection == SELECTION_BACKWARD) {
        setStartSet("1-last");
        m_starting = new int[m_numAttribs];

        // init initial subset to all attributes
        for (i = 0, j = 0; i < m_numAttribs; i++) {
          if (i != m_classIndex) {
            best_group.set(i);
            m_starting[j++] = i;
          }
        }

        best_size = m_numAttribs - 1;
        m_totalEvals++;
      }
    }

    // evaluate the initial subset
    best_merit = ASEvaluator.evaluateSubset(best_group);
    // add the initial group to the list and the hash table
    Object[] best = new Object[1];
    best[0] = best_group.clone();
    bfList.addToList(best, best_merit);
    BitSet tt = (BitSet) best_group.clone();
    String hashC = tt.toString();
    lookup.put(hashC, new Double(best_merit));

    while (stale < m_maxStale) {
      added = false;

      if (m_searchDirection == SELECTION_BIDIRECTIONAL) {
        // bi-directional search
        done = 2;
        sd = SELECTION_FORWARD;
      } else {
        done = 1;
      }

      // finished search?
      if (bfList.size() == 0) {
        stale = m_maxStale;
        break;
      }

      // copy the attribute set at the head of the list
      tl = bfList.getLinkAt(0);
      temp_group = (BitSet) (tl.getData()[0]);
      temp_group = (BitSet) temp_group.clone();
      // remove the head of the list
      bfList.removeLinkAt(0);
      // count the number of bits set (attributes)
      int kk;

      for (kk = 0, size = 0; kk < m_numAttribs; kk++) {
        if (temp_group.get(kk)) {
          size++;
        }
      }

      do {
        for (i = 0; i < m_numAttribs; i++) {
          if (sd == SELECTION_FORWARD) {
            z = ((i != m_classIndex) && (!temp_group.get(i)));
          } else {
            z = ((i != m_classIndex) && (temp_group.get(i)));
          }

          if (z) {
            // set the bit (attribute to add/delete)
            if (sd == SELECTION_FORWARD) {
              temp_group.set(i);
              size++;
            } else {
              temp_group.clear(i);
              size--;
            }

            /* if this subset has been seen before, then it is already
            in the list (or has been fully expanded) */
            tt = (BitSet) temp_group.clone();
            hashC = tt.toString();

            if (lookup.containsKey(hashC) == false) {
              merit = ASEvaluator.evaluateSubset(temp_group);
              m_totalEvals++;

              // insert this one in the hashtable
              if (insertCount > m_cacheSize * m_numAttribs) {
                lookup = new Hashtable(m_cacheSize * m_numAttribs);
                insertCount = 0;
              }
              hashC = tt.toString();
              lookup.put(hashC, new Double(merit));
              insertCount++;
            } else {
              merit = ((Double) lookup.get(hashC)).doubleValue();
              cacheHits++;
            }

            // insert this one in the list
            Object[] add = new Object[1];
            add[0] = tt.clone();
            bfList.addToList(add, merit);

            if (m_debug) {
              System.out.print("Group: ");
              printGroup(tt, m_numAttribs);
              System.out.println("Merit: " + merit);
            }

            // is this better than the best?
            if (sd == SELECTION_FORWARD) {
              z = ((merit - best_merit) > 0.00001);
            } else {
              if (merit == best_merit) {
                z = (size < best_size);
              } else {
                z = (merit > best_merit);
              }
            }

            if (z) {
              added = true;
              stale = 0;
              best_merit = merit;
              //		best_size = (size + best_size);
              best_size = size;
              best_group = (BitSet) (temp_group.clone());
            }

            // unset this addition(deletion)
            if (sd == SELECTION_FORWARD) {
              temp_group.clear(i);
              size--;
            } else {
              temp_group.set(i);
              size++;
            }
          }
        }

        if (done == 2) {
          sd = SELECTION_BACKWARD;
        }

        done--;
      } while (done > 0);

      /* if we haven't added a new attribute subset then full expansion
      of this node hasen't resulted in anything better */
      if (!added) {
        stale++;
      }
    }

    m_bestMerit = best_merit;
    return attributeList(best_group);
  }
Exemple #6
0
  /**
   * Kind of a dummy search algorithm. Calls a Attribute evaluator to evaluate each attribute not
   * included in the startSet and then sorts them to produce a ranked list of attributes.
   *
   * @param ASEval the attribute evaluator to guide the search
   * @param data the training instances.
   * @return an array (not necessarily ordered) of selected attribute indexes
   * @throws Exception if the search can't be completed
   */
  public int[] search(ASEvaluation ASEval, Instances data) throws Exception {
    int i, j;

    if (!(ASEval instanceof AttributeEvaluator)) {
      throw new Exception(ASEval.getClass().getName() + " is not a" + "Attribute evaluator!");
    }

    m_numAttribs = data.numAttributes();

    if (ASEval instanceof UnsupervisedAttributeEvaluator) {
      m_hasClass = false;
    } else {
      m_classIndex = data.classIndex();
      if (m_classIndex >= 0) {
        m_hasClass = true;
      } else {
        m_hasClass = false;
      }
    }

    // get the transformed data and check to see if the transformer
    // preserves a class index
    if (ASEval instanceof AttributeTransformer) {
      data = ((AttributeTransformer) ASEval).transformedHeader();
      if (m_classIndex >= 0 && data.classIndex() >= 0) {
        m_classIndex = data.classIndex();
        m_hasClass = true;
      }
    }

    m_startRange.setUpper(m_numAttribs - 1);
    if (!(getStartSet().equals(""))) {
      m_starting = m_startRange.getSelection();
    }

    int sl = 0;
    if (m_starting != null) {
      sl = m_starting.length;
    }
    if ((m_starting != null) && (m_hasClass == true)) {
      // see if the supplied list contains the class index
      boolean ok = false;
      for (i = 0; i < sl; i++) {
        if (m_starting[i] == m_classIndex) {
          ok = true;
          break;
        }
      }

      if (ok == false) {
        sl++;
      }
    } else {
      if (m_hasClass == true) {
        sl++;
      }
    }

    m_attributeList = new int[m_numAttribs - sl];
    m_attributeMerit = new double[m_numAttribs - sl];

    // add in those attributes not in the starting (omit list)
    for (i = 0, j = 0; i < m_numAttribs; i++) {
      if (!inStarting(i)) {
        m_attributeList[j++] = i;
      }
    }

    AttributeEvaluator ASEvaluator = (AttributeEvaluator) ASEval;

    for (i = 0; i < m_attributeList.length; i++) {
      m_attributeMerit[i] = ASEvaluator.evaluateAttribute(m_attributeList[i]);
    }

    double[][] tempRanked = rankedAttributes();
    int[] rankedAttributes = new int[m_attributeList.length];

    for (i = 0; i < m_attributeList.length; i++) {
      rankedAttributes[i] = (int) tempRanked[i][0];
    }

    return rankedAttributes;
  }
  /**
   * Searches the attribute subset space by linear forward selection
   *
   * @param ASEval the attribute evaluator to guide the search
   * @param data the training instances.
   * @return an array (not necessarily ordered) of selected attribute indexes
   * @exception Exception if the search can't be completed
   */
  public int[] search(ASEvaluation ASEval, Instances data) throws Exception {
    m_totalEvals = 0;

    if (!(ASEval instanceof SubsetEvaluator)) {
      throw new Exception(ASEval.getClass().getName() + " is not a " + "Subset evaluator!");
    }

    if (ASEval instanceof UnsupervisedSubsetEvaluator) {
      m_hasClass = false;
    } else {
      m_hasClass = true;
      m_classIndex = data.classIndex();
    }

    ((ASEvaluation) ASEval).buildEvaluator(data);

    m_numAttribs = data.numAttributes();

    if (m_numUsedAttributes > m_numAttribs) {
      System.out.println(
          "Decreasing number of top-ranked attributes to total number of attributes: "
              + data.numAttributes());
      m_numUsedAttributes = m_numAttribs;
    }

    BitSet start_group = new BitSet(m_numAttribs);
    m_startRange.setUpper(m_numAttribs - 1);

    if (!(getStartSet().equals(""))) {
      m_starting = m_startRange.getSelection();
    }

    // If a starting subset has been supplied, then initialise the bitset
    if (m_starting != null) {
      for (int i = 0; i < m_starting.length; i++) {
        if ((m_starting[i]) != m_classIndex) {
          start_group.set(m_starting[i]);
        }
      }
    }

    LFSMethods LFS = new LFSMethods();

    int[] ranking;

    if (m_performRanking) {
      ranking = LFS.rankAttributes(data, (SubsetEvaluator) ASEval, m_verbose);
    } else {
      ranking = new int[m_numAttribs];

      for (int i = 0; i < ranking.length; i++) {
        ranking[i] = i;
      }
    }

    if (m_forwardSearchMethod == SEARCH_METHOD_FORWARD) {
      LFS.forwardSearch(
          m_cacheSize,
          start_group,
          ranking,
          m_numUsedAttributes,
          m_linearSelectionType == TYPE_FIXED_WIDTH,
          m_maxStale,
          -1,
          data,
          (SubsetEvaluator) ASEval,
          m_verbose);
    } else if (m_forwardSearchMethod == SEARCH_METHOD_FLOATING) {
      LFS.floatingForwardSearch(
          m_cacheSize,
          start_group,
          ranking,
          m_numUsedAttributes,
          m_linearSelectionType == TYPE_FIXED_WIDTH,
          m_maxStale,
          data,
          (SubsetEvaluator) ASEval,
          m_verbose);
    }

    m_totalEvals = LFS.getNumEvalsTotal();
    m_bestMerit = LFS.getBestMerit();

    return attributeList(LFS.getBestGroup());
  }