/** * Searches the attribute subset space using a genetic algorithm. * * @param ASEval the attribute evaluator to guide the search * @param data the training instances. * @return an array (not necessarily ordered) of selected attribute indexes * @throws Exception if the search can't be completed */ @Override public int[] search(ASEvaluation ASEval, Instances data) throws Exception { m_best = null; m_generationReports = new StringBuffer(); if (!(ASEval instanceof SubsetEvaluator)) { throw new Exception(ASEval.getClass().getName() + " is not a " + "Subset evaluator!"); } if (ASEval instanceof UnsupervisedSubsetEvaluator) { m_hasClass = false; } else { m_hasClass = true; m_classIndex = data.classIndex(); } SubsetEvaluator ASEvaluator = (SubsetEvaluator) ASEval; m_numAttribs = data.numAttributes(); m_startRange.setUpper(m_numAttribs - 1); if (!(getStartSet().equals(""))) { m_starting = m_startRange.getSelection(); } // initial random population m_lookupTable = new Hashtable<BitSet, GABitSet>(m_lookupTableSize); m_random = new Random(m_seed); m_population = new GABitSet[m_popSize]; // set up random initial population initPopulation(); evaluatePopulation(ASEvaluator); populationStatistics(); scalePopulation(); checkBest(); m_generationReports.append(populationReport(0)); boolean converged; for (int i = 1; i <= m_maxGenerations; i++) { generation(); evaluatePopulation(ASEvaluator); populationStatistics(); scalePopulation(); // find the best pop member and check for convergence converged = checkBest(); if ((i == m_maxGenerations) || ((i % m_reportFrequency) == 0) || (converged == true)) { m_generationReports.append(populationReport(i)); if (converged == true) { break; } } } return attributeList(m_best.getChromosome()); }
/** * Returns an enumeration describing the available options. * * @return an enumeration of all the available options. */ public Enumeration listOptions() { Vector newVector = new Vector(4); newVector.addElement( new Option( "\tclass name of attribute evaluator to use for ranking. Place any\n" + "\tevaluator options LAST on the command line following a \"--\".\n" + "\teg.:\n" + "\t\t-A weka.attributeSelection.GainRatioAttributeEval ... -- -M\n" + "\t(default: weka.attributeSelection.GainRatioAttributeEval)", "A", 1, "-A <attribute evaluator>")); newVector.addElement( new Option( "\tnumber of attributes to be added from the" + "\n\tranking in each iteration (default = 1).", "S", 1, "-S <step size>")); newVector.addElement( new Option( "\tpoint in the ranking to start evaluating from. " + "\n\t(default = 0, ie. the head of the ranking).", "R", 1, "-R <start point>")); if ((m_ASEval != null) && (m_ASEval instanceof OptionHandler)) { newVector.addElement( new Option( "", "", 0, "\nOptions specific to " + "evaluator " + m_ASEval.getClass().getName() + ":")); Enumeration enu = ((OptionHandler) m_ASEval).listOptions(); while (enu.hasMoreElements()) { newVector.addElement(enu.nextElement()); } } return newVector.elements(); }
/** * Parses a given list of options. * * <p> * <!-- options-start --> * Valid options are: * * <p> * * <pre> -A <attribute evaluator> * class name of attribute evaluator to use for ranking. Place any * evaluator options LAST on the command line following a "--". * eg.: * -A weka.attributeSelection.GainRatioAttributeEval ... -- -M * (default: weka.attributeSelection.GainRatioAttributeEval)</pre> * * <pre> -S <step size> * number of attributes to be added from the * ranking in each iteration (default = 1).</pre> * * <pre> -R <start point> * point in the ranking to start evaluating from. * (default = 0, ie. the head of the ranking).</pre> * * <pre> * Options specific to evaluator weka.attributeSelection.GainRatioAttributeEval: * </pre> * * <pre> -M * treat missing values as a seperate value.</pre> * * <!-- options-end --> * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ public void setOptions(String[] options) throws Exception { String optionString; resetOptions(); optionString = Utils.getOption('S', options); if (optionString.length() != 0) { setStepSize(Integer.parseInt(optionString)); } optionString = Utils.getOption('R', options); if (optionString.length() != 0) { setStartPoint(Integer.parseInt(optionString)); } optionString = Utils.getOption('A', options); if (optionString.length() == 0) optionString = GainRatioAttributeEval.class.getName(); setAttributeEvaluator(ASEvaluation.forName(optionString, Utils.partitionOptions(options))); }
/** * Ranks attributes using the specified attribute evaluator and then searches the ranking using * the supplied subset evaluator. * * @param ASEval the subset evaluator to guide the search * @param data the training instances. * @return an array (not necessarily ordered) of selected attribute indexes * @throws Exception if the search can't be completed */ public int[] search(ASEvaluation ASEval, Instances data) throws Exception { double best_merit = -Double.MAX_VALUE; double temp_merit; BitSet temp_group, best_group = null; if (!(ASEval instanceof SubsetEvaluator)) { throw new Exception(ASEval.getClass().getName() + " is not a " + "Subset evaluator!"); } m_SubsetEval = ASEval; m_Instances = data; m_numAttribs = m_Instances.numAttributes(); /* if (m_ASEval instanceof AttributeTransformer) { throw new Exception("Can't use an attribute transformer " +"with RankSearch"); } */ if (m_ASEval instanceof UnsupervisedAttributeEvaluator || m_ASEval instanceof UnsupervisedSubsetEvaluator) { m_hasClass = false; /* if (!(m_SubsetEval instanceof UnsupervisedSubsetEvaluator)) { throw new Exception("Must use an unsupervised subset evaluator."); } */ } else { m_hasClass = true; m_classIndex = m_Instances.classIndex(); } if (m_ASEval instanceof AttributeEvaluator) { // generate the attribute ranking first Ranker ranker = new Ranker(); m_ASEval.buildEvaluator(m_Instances); if (m_ASEval instanceof AttributeTransformer) { // get the transformed data a rebuild the subset evaluator m_Instances = ((AttributeTransformer) m_ASEval).transformedData(m_Instances); ((ASEvaluation) m_SubsetEval).buildEvaluator(m_Instances); } m_Ranking = ranker.search(m_ASEval, m_Instances); } else { GreedyStepwise fs = new GreedyStepwise(); double[][] rankres; fs.setGenerateRanking(true); ((ASEvaluation) m_ASEval).buildEvaluator(m_Instances); fs.search(m_ASEval, m_Instances); rankres = fs.rankedAttributes(); m_Ranking = new int[rankres.length]; for (int i = 0; i < rankres.length; i++) { m_Ranking[i] = (int) rankres[i][0]; } } // now evaluate the attribute ranking for (int i = m_startPoint; i < m_Ranking.length; i += m_add) { temp_group = new BitSet(m_numAttribs); for (int j = 0; j <= i; j++) { temp_group.set(m_Ranking[j]); } temp_merit = ((SubsetEvaluator) m_SubsetEval).evaluateSubset(temp_group); if (temp_merit > best_merit) { best_merit = temp_merit; ; best_group = temp_group; } } m_bestMerit = best_merit; return attributeList(best_group); }
/** * Searches the attribute subset space by best first search * * @param ASEval the attribute evaluator to guide the search * @param data the training instances. * @return an array (not necessarily ordered) of selected attribute indexes * @throws Exception if the search can't be completed */ public int[] search(ASEvaluation ASEval, Instances data) throws Exception { m_totalEvals = 0; if (!(ASEval instanceof SubsetEvaluator)) { throw new Exception(ASEval.getClass().getName() + " is not a " + "Subset evaluator!"); } if (ASEval instanceof UnsupervisedSubsetEvaluator) { m_hasClass = false; } else { m_hasClass = true; m_classIndex = data.classIndex(); } SubsetEvaluator ASEvaluator = (SubsetEvaluator) ASEval; m_numAttribs = data.numAttributes(); int i, j; int best_size = 0; int size = 0; int done; int sd = m_searchDirection; BitSet best_group, temp_group; int stale; double best_merit; double merit; boolean z; boolean added; Link2 tl; Hashtable lookup = new Hashtable(m_cacheSize * m_numAttribs); int insertCount = 0; int cacheHits = 0; LinkedList2 bfList = new LinkedList2(m_maxStale); best_merit = -Double.MAX_VALUE; stale = 0; best_group = new BitSet(m_numAttribs); m_startRange.setUpper(m_numAttribs - 1); if (!(getStartSet().equals(""))) { m_starting = m_startRange.getSelection(); } // If a starting subset has been supplied, then initialise the bitset if (m_starting != null) { for (i = 0; i < m_starting.length; i++) { if ((m_starting[i]) != m_classIndex) { best_group.set(m_starting[i]); } } best_size = m_starting.length; m_totalEvals++; } else { if (m_searchDirection == SELECTION_BACKWARD) { setStartSet("1-last"); m_starting = new int[m_numAttribs]; // init initial subset to all attributes for (i = 0, j = 0; i < m_numAttribs; i++) { if (i != m_classIndex) { best_group.set(i); m_starting[j++] = i; } } best_size = m_numAttribs - 1; m_totalEvals++; } } // evaluate the initial subset best_merit = ASEvaluator.evaluateSubset(best_group); // add the initial group to the list and the hash table Object[] best = new Object[1]; best[0] = best_group.clone(); bfList.addToList(best, best_merit); BitSet tt = (BitSet) best_group.clone(); String hashC = tt.toString(); lookup.put(hashC, new Double(best_merit)); while (stale < m_maxStale) { added = false; if (m_searchDirection == SELECTION_BIDIRECTIONAL) { // bi-directional search done = 2; sd = SELECTION_FORWARD; } else { done = 1; } // finished search? if (bfList.size() == 0) { stale = m_maxStale; break; } // copy the attribute set at the head of the list tl = bfList.getLinkAt(0); temp_group = (BitSet) (tl.getData()[0]); temp_group = (BitSet) temp_group.clone(); // remove the head of the list bfList.removeLinkAt(0); // count the number of bits set (attributes) int kk; for (kk = 0, size = 0; kk < m_numAttribs; kk++) { if (temp_group.get(kk)) { size++; } } do { for (i = 0; i < m_numAttribs; i++) { if (sd == SELECTION_FORWARD) { z = ((i != m_classIndex) && (!temp_group.get(i))); } else { z = ((i != m_classIndex) && (temp_group.get(i))); } if (z) { // set the bit (attribute to add/delete) if (sd == SELECTION_FORWARD) { temp_group.set(i); size++; } else { temp_group.clear(i); size--; } /* if this subset has been seen before, then it is already in the list (or has been fully expanded) */ tt = (BitSet) temp_group.clone(); hashC = tt.toString(); if (lookup.containsKey(hashC) == false) { merit = ASEvaluator.evaluateSubset(temp_group); m_totalEvals++; // insert this one in the hashtable if (insertCount > m_cacheSize * m_numAttribs) { lookup = new Hashtable(m_cacheSize * m_numAttribs); insertCount = 0; } hashC = tt.toString(); lookup.put(hashC, new Double(merit)); insertCount++; } else { merit = ((Double) lookup.get(hashC)).doubleValue(); cacheHits++; } // insert this one in the list Object[] add = new Object[1]; add[0] = tt.clone(); bfList.addToList(add, merit); if (m_debug) { System.out.print("Group: "); printGroup(tt, m_numAttribs); System.out.println("Merit: " + merit); } // is this better than the best? if (sd == SELECTION_FORWARD) { z = ((merit - best_merit) > 0.00001); } else { if (merit == best_merit) { z = (size < best_size); } else { z = (merit > best_merit); } } if (z) { added = true; stale = 0; best_merit = merit; // best_size = (size + best_size); best_size = size; best_group = (BitSet) (temp_group.clone()); } // unset this addition(deletion) if (sd == SELECTION_FORWARD) { temp_group.clear(i); size--; } else { temp_group.set(i); size++; } } } if (done == 2) { sd = SELECTION_BACKWARD; } done--; } while (done > 0); /* if we haven't added a new attribute subset then full expansion of this node hasen't resulted in anything better */ if (!added) { stale++; } } m_bestMerit = best_merit; return attributeList(best_group); }
/** * Kind of a dummy search algorithm. Calls a Attribute evaluator to evaluate each attribute not * included in the startSet and then sorts them to produce a ranked list of attributes. * * @param ASEval the attribute evaluator to guide the search * @param data the training instances. * @return an array (not necessarily ordered) of selected attribute indexes * @throws Exception if the search can't be completed */ public int[] search(ASEvaluation ASEval, Instances data) throws Exception { int i, j; if (!(ASEval instanceof AttributeEvaluator)) { throw new Exception(ASEval.getClass().getName() + " is not a" + "Attribute evaluator!"); } m_numAttribs = data.numAttributes(); if (ASEval instanceof UnsupervisedAttributeEvaluator) { m_hasClass = false; } else { m_classIndex = data.classIndex(); if (m_classIndex >= 0) { m_hasClass = true; } else { m_hasClass = false; } } // get the transformed data and check to see if the transformer // preserves a class index if (ASEval instanceof AttributeTransformer) { data = ((AttributeTransformer) ASEval).transformedHeader(); if (m_classIndex >= 0 && data.classIndex() >= 0) { m_classIndex = data.classIndex(); m_hasClass = true; } } m_startRange.setUpper(m_numAttribs - 1); if (!(getStartSet().equals(""))) { m_starting = m_startRange.getSelection(); } int sl = 0; if (m_starting != null) { sl = m_starting.length; } if ((m_starting != null) && (m_hasClass == true)) { // see if the supplied list contains the class index boolean ok = false; for (i = 0; i < sl; i++) { if (m_starting[i] == m_classIndex) { ok = true; break; } } if (ok == false) { sl++; } } else { if (m_hasClass == true) { sl++; } } m_attributeList = new int[m_numAttribs - sl]; m_attributeMerit = new double[m_numAttribs - sl]; // add in those attributes not in the starting (omit list) for (i = 0, j = 0; i < m_numAttribs; i++) { if (!inStarting(i)) { m_attributeList[j++] = i; } } AttributeEvaluator ASEvaluator = (AttributeEvaluator) ASEval; for (i = 0; i < m_attributeList.length; i++) { m_attributeMerit[i] = ASEvaluator.evaluateAttribute(m_attributeList[i]); } double[][] tempRanked = rankedAttributes(); int[] rankedAttributes = new int[m_attributeList.length]; for (i = 0; i < m_attributeList.length; i++) { rankedAttributes[i] = (int) tempRanked[i][0]; } return rankedAttributes; }
/** * Searches the attribute subset space by linear forward selection * * @param ASEval the attribute evaluator to guide the search * @param data the training instances. * @return an array (not necessarily ordered) of selected attribute indexes * @exception Exception if the search can't be completed */ public int[] search(ASEvaluation ASEval, Instances data) throws Exception { m_totalEvals = 0; if (!(ASEval instanceof SubsetEvaluator)) { throw new Exception(ASEval.getClass().getName() + " is not a " + "Subset evaluator!"); } if (ASEval instanceof UnsupervisedSubsetEvaluator) { m_hasClass = false; } else { m_hasClass = true; m_classIndex = data.classIndex(); } ((ASEvaluation) ASEval).buildEvaluator(data); m_numAttribs = data.numAttributes(); if (m_numUsedAttributes > m_numAttribs) { System.out.println( "Decreasing number of top-ranked attributes to total number of attributes: " + data.numAttributes()); m_numUsedAttributes = m_numAttribs; } BitSet start_group = new BitSet(m_numAttribs); m_startRange.setUpper(m_numAttribs - 1); if (!(getStartSet().equals(""))) { m_starting = m_startRange.getSelection(); } // If a starting subset has been supplied, then initialise the bitset if (m_starting != null) { for (int i = 0; i < m_starting.length; i++) { if ((m_starting[i]) != m_classIndex) { start_group.set(m_starting[i]); } } } LFSMethods LFS = new LFSMethods(); int[] ranking; if (m_performRanking) { ranking = LFS.rankAttributes(data, (SubsetEvaluator) ASEval, m_verbose); } else { ranking = new int[m_numAttribs]; for (int i = 0; i < ranking.length; i++) { ranking[i] = i; } } if (m_forwardSearchMethod == SEARCH_METHOD_FORWARD) { LFS.forwardSearch( m_cacheSize, start_group, ranking, m_numUsedAttributes, m_linearSelectionType == TYPE_FIXED_WIDTH, m_maxStale, -1, data, (SubsetEvaluator) ASEval, m_verbose); } else if (m_forwardSearchMethod == SEARCH_METHOD_FLOATING) { LFS.floatingForwardSearch( m_cacheSize, start_group, ranking, m_numUsedAttributes, m_linearSelectionType == TYPE_FIXED_WIDTH, m_maxStale, data, (SubsetEvaluator) ASEval, m_verbose); } m_totalEvals = LFS.getNumEvalsTotal(); m_bestMerit = LFS.getBestMerit(); return attributeList(LFS.getBestGroup()); }