/** * Initializes the ranges using all instances of the dataset. Sets m_Ranges. * * @return the ranges */ public double[][] initializeRanges() { if (m_Data == null) { m_Ranges = null; return m_Ranges; } int numAtt = m_Data.numAttributes(); double[][] ranges = new double[numAtt][3]; if (m_Data.numInstances() <= 0) { initializeRangesEmpty(numAtt, ranges); m_Ranges = ranges; return m_Ranges; } else { // initialize ranges using the first instance updateRangesFirst(m_Data.instance(0), numAtt, ranges); } // update ranges, starting from the second for (int i = 1; i < m_Data.numInstances(); i++) { updateRanges(m_Data.instance(i), numAtt, ranges); } m_Ranges = ranges; return m_Ranges; }
/** * initializes the algorithm * * @param data the data to work with * @throws Exception if m_SVM is null */ protected void init(Instances data) throws Exception { if (m_SVM == null) { throw new Exception("SVM not initialized in optimizer. Use RegOptimizer.setSVMReg()"); } m_C = m_SVM.getC(); m_data = data; m_classIndex = data.classIndex(); m_nInstances = data.numInstances(); // Initialize kernel m_kernel = Kernel.makeCopy(m_SVM.getKernel()); m_kernel.buildKernel(data); // init m_target m_target = new double[m_nInstances]; for (int i = 0; i < m_nInstances; i++) { m_target[i] = data.instance(i).classValue(); } m_random = new Random(m_nSeed); // initialize alpha and alpha* array to all zero m_alpha = new double[m_target.length]; m_alphaStar = new double[m_target.length]; m_supportVectors = new SMOset(m_nInstances); m_b = 0.0; m_nEvals = 0; m_nCacheHits = -1; }
/** * Generates the classifier. * * @param instances set of instances serving as training data * @throws Exception if the classifier has not been generated successfully */ public void buildClassifier(Instances instances) throws Exception { if (!(m_Classifier instanceof WeightedInstancesHandler)) { throw new IllegalArgumentException("Classifier must be a " + "WeightedInstancesHandler!"); } // can classifier handle the data? getCapabilities().testWithFail(instances); // remove instances with missing class instances = new Instances(instances); instances.deleteWithMissingClass(); // only class? -> build ZeroR model if (instances.numAttributes() == 1) { System.err.println( "Cannot build model (only class attribute present in data!), " + "using ZeroR model instead!"); m_ZeroR = new weka.classifiers.rules.ZeroR(); m_ZeroR.buildClassifier(instances); return; } else { m_ZeroR = null; } m_Train = new Instances(instances, 0, instances.numInstances()); m_NNSearch.setInstances(m_Train); }
// 构造一个tri-trainer分类器。 public Tritrainer( String classifier, String trainingIns_File, String testIns_File, double precentage) { try { this.classifier1 = (Classifier) Class.forName(classifier).newInstance(); this.classifier2 = (Classifier) Class.forName(classifier).newInstance(); this.classifier3 = (Classifier) Class.forName(classifier).newInstance(); Instances trainingInstances = Util.getInstances(trainingIns_File); // 将trainIns_File按照precentage和(1-precentage)的比例切割成labeledIns和unlabeledIns; int length = trainingInstances.numInstances(); int i = new Double(length * precentage).intValue(); labeledIns = new Instances(trainingInstances, 0); for (int j = 0; j < i; j++) { labeledIns.add(trainingInstances.firstInstance()); trainingInstances.delete(0); } unlabeledIns = trainingInstances; testIns = Util.getInstances(testIns_File); Init(); } catch (Exception e) { } }
// 将样本集中裁剪提取成m个样本组成的集合; public void SubSample(Instances inst, int m) { inst.randomize(new Random()); while (inst.numInstances() != m) { inst.delete(0); } // System.out.println("subsample:=" + inst.numInstances() + " m:=" + m ); }
// 计算h1,h2分类器共同的分类错误率; public double measureBothError(Classifier h1, Classifier h2, Instances test) { int m = test.numInstances(); double value1, value2, value; int error = 0, total = 0; try { for (int i = 0; i < m; i++) { value = test.instance(i).classValue(); value1 = h1.classifyInstance(test.instance(i)); value2 = h2.classifyInstance(test.instance(i)); // 两分类器做出相同决策 if (value1 == value2) { // 两分类器做出相同决策的样本数量 total++; // 两分类器做出相同错误决策 if (value != value1) { // 两分类器做出相同错误决策的样本数量 error++; } } } } catch (Exception e) { System.out.println(e); } // System.out.println("m:=" + m); // System.out.println("error:=" + error +"; total:=" + total); // 两个分类器的分类错误率= 两分类器做出相同错误决策的样本数量/两分类器做出相同决策的样本数量 return (error * 1.0) / total; }
private static IList<IList<IAgent>> clusteringUsingWeka( final IScope scope, final Clusterer clusterer, final IList<String> attributes, final IAddressableContainer<Integer, IAgent, Integer, IAgent> agents) throws GamaRuntimeException { Instances dataset = convertToInstances(scope, attributes, agents); try { clusterer.buildClusterer(dataset); IList<IList<IAgent>> groupes = GamaListFactory.create(Types.LIST.of(Types.AGENT)); for (int i = 0; i < clusterer.numberOfClusters(); i++) { groupes.add(GamaListFactory.<IAgent>create(Types.AGENT)); } for (int i = 0; i < dataset.numInstances(); i++) { Instance inst = dataset.instance(i); int clusterIndex = -1; clusterIndex = clusterer.clusterInstance(inst); IList<IAgent> groupe = groupes.get(clusterIndex); groupe.add(agents.get(scope, i)); } return groupes; } catch (Exception e) { return null; } }
/** * Private function to compute default number of accurate instances in the specified data for the * consequent of the rule * * @param data the data in question * @return the default accuracy number */ private double computeDefAccu(Instances data) { double defAccu = 0; for (int i = 0; i < data.numInstances(); i++) { Instance inst = data.instance(i); if ((int) inst.classValue() == (int) m_Consequent) defAccu += inst.weight(); } return defAccu; }
/** * Computes the entropy of a dataset. * * @param data the data for which entropy is to be computed * @return the entropy of the data's class distribution * @throws Exception if computation fails */ private double computeEntropy(Instances data) throws Exception { double[] classCounts = new double[data.numClasses()]; Enumeration instEnum = data.enumerateInstances(); while (instEnum.hasMoreElements()) { Instance inst = (Instance) instEnum.nextElement(); classCounts[(int) inst.classValue()]++; } double entropy = 0; for (int j = 0; j < data.numClasses(); j++) { if (classCounts[j] > 0) { entropy -= classCounts[j] * Utils.log2(classCounts[j]); } } entropy /= (double) data.numInstances(); return entropy + Utils.log2(data.numInstances()); }
public int getClusterNumber(String objectID) { int datasetIndex = -1; for (int i = 0; i < m_Sequences.numInstances(); i++) { if (objectID.equals(m_Sequences.instance(i).stringValue(0))) datasetIndex = i; } return cluster[datasetIndex]; }
/** * Determines the output format based on the input format and returns this. In case the output * format cannot be returned immediately, i.e., immediateOutputFormat() returns false, then this * method will be called from batchFinished(). * * @param inputFormat the input format to base the output format on * @return the output format * @throws Exception in case the determination goes wrong * @see #hasImmediateOutputFormat() * @see #batchFinished() */ protected Instances determineOutputFormat(Instances inputFormat) throws Exception { Instances data; Instances result; FastVector atts; FastVector values; HashSet hash; int i; int n; boolean isDate; Instance inst; Vector sorted; m_Cols.setUpper(inputFormat.numAttributes() - 1); data = new Instances(inputFormat); atts = new FastVector(); for (i = 0; i < data.numAttributes(); i++) { if (!m_Cols.isInRange(i) || !data.attribute(i).isNumeric()) { atts.addElement(data.attribute(i)); continue; } // date attribute? isDate = (data.attribute(i).type() == Attribute.DATE); // determine all available attribtues in dataset hash = new HashSet(); for (n = 0; n < data.numInstances(); n++) { inst = data.instance(n); if (inst.isMissing(i)) continue; if (isDate) hash.add(inst.stringValue(i)); else hash.add(new Double(inst.value(i))); } // sort values sorted = new Vector(); for (Object o : hash) sorted.add(o); Collections.sort(sorted); // create attribute from sorted values values = new FastVector(); for (Object o : sorted) { if (isDate) values.addElement(o.toString()); else values.addElement(Utils.doubleToString(((Double) o).doubleValue(), MAX_DECIMALS)); } atts.addElement(new Attribute(data.attribute(i).name(), values)); } result = new Instances(inputFormat.relationName(), atts, 0); result.setClassIndex(inputFormat.classIndex()); return result; }
protected void initMinMax(Instances data) { m_Min = new double[data.numAttributes()]; m_Max = new double[data.numAttributes()]; for (int i = 0; i < data.numAttributes(); i++) { m_Min[i] = m_Max[i] = Double.NaN; } for (int i = 0; i < data.numInstances(); i++) { updateMinMax(data.instance(i)); } }
/** * Labels the artificially generated data. * * @param artData the artificially generated instances * @exception Exception if instances cannot be labeled successfully */ protected void labelData(Instances artData) throws Exception { Instance curr; double[] probs; for (int i = 0; i < artData.numInstances(); i++) { curr = artData.instance(i); // compute the class membership probs predicted by the current ensemble probs = distributionForInstance(curr); // select class label inversely proportional to the ensemble predictions curr.setClassValue(inverseLabel(probs)); } }
/** * Computes the error in classification on the given data. * * @param data the instances to be classified * @return classification error * @exception Exception if error can not be computed successfully */ protected double computeError(Instances data) throws Exception { double error = 0.0; int numInstances = data.numInstances(); Instance curr; for (int i = 0; i < numInstances; i++) { curr = data.instance(i); // Check if the instance has been misclassified if (curr.classValue() != ((int) classifyInstance(curr))) error++; } return (error / numInstances); }
/** * Computes information gain for an attribute. * * @param data the data for which info gain is to be computed * @param att the attribute * @return the information gain for the given attribute and data * @throws Exception if computation fails */ private double computeInfoGain(Instances data, Attribute att) throws Exception { double infoGain = computeEntropy(data); Instances[] splitData = splitData(data, att); for (int j = 0; j < att.numValues(); j++) { if (splitData[j].numInstances() > 0) { infoGain -= ((double) splitData[j].numInstances() / (double) data.numInstances()) * computeEntropy(splitData[j]); } } return infoGain; }
/** * Implements the splitData function. This procedure is to split the data into bags according to * the nominal attribute value The infoGain for each bag is also calculated. * * @param data the data to be split * @param defAcRt the default accuracy rate for data * @param cl the class label to be predicted * @return the array of data after split */ public Instances[] splitData(Instances data, double defAcRt, double cl) { int bag = att.numValues(); Instances[] splitData = new Instances[bag]; for (int x = 0; x < bag; x++) { splitData[x] = new Instances(data, data.numInstances()); accurate[x] = 0; coverage[x] = 0; } for (int x = 0; x < data.numInstances(); x++) { Instance inst = data.instance(x); if (!inst.isMissing(att)) { int v = (int) inst.value(att); splitData[v].add(inst); coverage[v] += inst.weight(); if ((int) inst.classValue() == (int) cl) accurate[v] += inst.weight(); } } for (int x = 0; x < bag; x++) { double t = coverage[x] + 1.0; double p = accurate[x] + 1.0; double infoGain = // Utils.eq(defAcRt, 1.0) ? // accurate[x]/(double)numConds : accurate[x] * (Utils.log2(p / t) - Utils.log2(defAcRt)); if (infoGain > maxInfoGain) { maxInfoGain = infoGain; cover = coverage[x]; accu = accurate[x]; accuRate = p / t; value = (double) x; } } return splitData; }
/** * Initializes a gain ratio attribute evaluator. Discretizes all attributes that are numeric. * * @param data set of instances serving as training data * @throws Exception if the evaluator has not been generated successfully */ public void buildEvaluator(Instances data) throws Exception { // can evaluator handle data? getCapabilities().testWithFail(data); m_trainInstances = data; m_classIndex = m_trainInstances.classIndex(); m_numAttribs = m_trainInstances.numAttributes(); m_numInstances = m_trainInstances.numInstances(); Discretize disTransform = new Discretize(); disTransform.setUseBetterEncoding(true); disTransform.setInputFormat(m_trainInstances); m_trainInstances = Filter.useFilter(m_trainInstances, disTransform); m_numClasses = m_trainInstances.attribute(m_classIndex).numValues(); }
/** * Splits a dataset according to the values of a nominal attribute. * * @param data the data which is to be split * @param att the attribute to be used for splitting * @return the sets of instances produced by the split */ private Instances[] splitData(Instances data, Attribute att) { Instances[] splitData = new Instances[att.numValues()]; for (int j = 0; j < att.numValues(); j++) { splitData[j] = new Instances(data, data.numInstances()); } Enumeration instEnum = data.enumerateInstances(); while (instEnum.hasMoreElements()) { Instance inst = (Instance) instEnum.nextElement(); splitData[(int) inst.value(att)].add(inst); } for (int i = 0; i < splitData.length; i++) { splitData[i].compactify(); } return splitData; }
/** * Generates a clusterer by the mean of spectral clustering algorithm. * * @param data set of instances serving as training data * @exception Exception if the clusterer has not been generated successfully */ public void buildClusterer(Instances data) throws java.lang.Exception { m_Sequences = new Instances(data); int n = data.numInstances(); int k = data.numAttributes(); DoubleMatrix2D w; if (useSparseMatrix) w = DoubleFactory2D.sparse.make(n, n); else w = DoubleFactory2D.dense.make(n, n); double[][] v1 = new double[n][]; for (int i = 0; i < n; i++) v1[i] = data.instance(i).toDoubleArray(); v = DoubleFactory2D.dense.make(v1); double sigma_sq = sigma * sigma; // Sets up similarity matrix for (int i = 0; i < n; i++) for (int j = i; j < n; j++) { /*double dist = distnorm2(v.viewRow(i), v.viewRow(j)); if((r == -1) || (dist < r)) { double sim = Math.exp(- (dist * dist) / (2 * sigma_sq)); w.set(i, j, sim); w.set(j, i, sim); }*/ /* String [] key = {data.instance(i).stringValue(0), data.instance(j).stringValue(0)}; System.out.println(key[0]); System.out.println(key[1]); System.out.println(simScoreMap.containsKey(key)); Double simValue = simScoreMap.get(key);*/ double sim = sim_matrix[i][j]; w.set(i, j, sim); w.set(j, i, sim); } // Partitions points int[][] p = partition(w, alpha_star); // Deploys results numOfClusters = p.length; cluster = new int[n]; for (int i = 0; i < p.length; i++) for (int j = 0; j < p[i].length; j++) cluster[p[i][j]] = i; // System.out.println("Final partition:"); // UtilsJS.printMatrix(p); // System.out.println("Cluster:\n"); // UtilsJS.printArray(cluster); this.numOfClusters = cluster[Utils.maxIndex(cluster)] + 1; // System.out.println("Num clusters:\t"+this.numOfClusters); }
// 通过h1,h2分类器学习样本集,将h1,h2分类决策相同的样本放入L中,得到标记集合; public void updateL(Classifier h1, Classifier h2, Instances L, Instances test) { int length = unlabeledIns.numInstances(); double value1 = 0.0, value2 = 0.0; try { for (int i = 0; i < length; i++) { value1 = h1.classifyInstance(test.instance(i)); value2 = h2.classifyInstance(test.instance(i)); if (value1 == value2) { // 当两个分类器做出相同决策时重新标记样本的类别; test.instance(i).setClassValue(value1); L.add(test.instance(i)); } } } catch (Exception e) { System.out.println(e); } // return false; }
/** * Processes the given data (may change the provided dataset) and returns the modified version. * This method is called in batchFinished(). * * @param instances the data to process * @return the modified data * @throws Exception in case the processing goes wrong * @see #batchFinished() */ protected Instances process(Instances instances) throws Exception { Instances result; int i; int n; double[] values; String value; Instance inst; Instance newInst; // we need the complete input data! if (!isFirstBatchDone()) setOutputFormat(determineOutputFormat(getInputFormat())); result = new Instances(getOutputFormat()); for (i = 0; i < instances.numInstances(); i++) { inst = instances.instance(i); values = inst.toDoubleArray(); for (n = 0; n < values.length; n++) { if (!m_Cols.isInRange(n) || !instances.attribute(n).isNumeric() || inst.isMissing(n)) continue; // get index of value if (instances.attribute(n).type() == Attribute.DATE) value = inst.stringValue(n); else value = Utils.doubleToString(inst.value(n), MAX_DECIMALS); values[n] = result.attribute(n).indexOfValue(value); } // generate new instance if (inst instanceof SparseInstance) newInst = new SparseInstance(inst.weight(), values); else newInst = new DenseInstance(inst.weight(), values); // copy possible string, relational values newInst.setDataset(getOutputFormat()); copyValues(newInst, false, inst.dataset(), getOutputFormat()); result.add(newInst); } return result; }
/** * Method for building an Id3 tree. * * @param data the training data * @exception Exception if decision tree can't be built successfully */ private void makeTree(Instances data) throws Exception { // Check if no instances have reached this node. if (data.numInstances() == 0) { m_Attribute = null; m_ClassValue = Utils.missingValue(); m_Distribution = new double[data.numClasses()]; return; } // Compute attribute with maximum information gain. double[] infoGains = new double[data.numAttributes()]; Enumeration attEnum = data.enumerateAttributes(); while (attEnum.hasMoreElements()) { Attribute att = (Attribute) attEnum.nextElement(); infoGains[att.index()] = computeInfoGain(data, att); } m_Attribute = data.attribute(Utils.maxIndex(infoGains)); // Make leaf if information gain is zero. // Otherwise create successors. if (Utils.eq(infoGains[m_Attribute.index()], 0)) { m_Attribute = null; m_Distribution = new double[data.numClasses()]; Enumeration instEnum = data.enumerateInstances(); while (instEnum.hasMoreElements()) { Instance inst = (Instance) instEnum.nextElement(); m_Distribution[(int) inst.classValue()]++; } Utils.normalize(m_Distribution); m_ClassValue = Utils.maxIndex(m_Distribution); m_ClassAttribute = data.classAttribute(); } else { Instances[] splitData = splitData(data, m_Attribute); m_Successors = new Id3[m_Attribute.numValues()]; for (int j = 0; j < m_Attribute.numValues(); j++) { m_Successors[j] = new Id3(); m_Successors[j].makeTree(splitData[j]); } } }
/** * Generates a clusterer. Has to initialize all fields of the clusterer that are not being set via * options. * * @param data set of instances serving as training data * @exception Exception if the clusterer has not been generated successfully */ public void buildClusterer(Instances data) throws Exception { // long start = System.currentTimeMillis(); if (data.checkForStringAttributes()) { throw new Exception("Can't handle string attributes!"); } m_ReplaceMissingFilter = new ReplaceMissingValues(); m_ReplaceMissingFilter.setInputFormat(data); m_instances = Filter.useFilter(data, m_ReplaceMissingFilter); initMinMax(m_instances); m_ClusterCentroids = new Instances(m_instances, m_NumClusters); int n = m_instances.numInstances(); Random r = new Random(m_Seed); boolean[] selected = new boolean[n]; double[] minDistance = new double[n]; for (int i = 0; i < n; i++) minDistance[i] = Double.MAX_VALUE; int firstI = r.nextInt(n); m_ClusterCentroids.add(m_instances.instance(firstI)); selected[firstI] = true; updateMinDistance(minDistance, selected, m_instances, m_instances.instance(firstI)); if (m_NumClusters > n) m_NumClusters = n; for (int i = 1; i < m_NumClusters; i++) { int nextI = farthestAway(minDistance, selected); m_ClusterCentroids.add(m_instances.instance(nextI)); selected[nextI] = true; updateMinDistance(minDistance, selected, m_instances, m_instances.instance(nextI)); } m_instances = new Instances(m_instances, 0); // long end = System.currentTimeMillis(); // System.out.println("Clustering Time = " + (end-start)); }
/** * Initializes the ranges of a subset of the instances of this dataset. Therefore m_Ranges is not * set. * * @param instList list of indexes of the subset * @return the ranges * @throws Exception if something goes wrong */ public double[][] initializeRanges(int[] instList) throws Exception { if (m_Data == null) { throw new Exception("No instances supplied."); } int numAtt = m_Data.numAttributes(); double[][] ranges = new double[numAtt][3]; if (m_Data.numInstances() <= 0) { initializeRangesEmpty(numAtt, ranges); return ranges; } else { // initialize ranges using the first instance updateRangesFirst(m_Data.instance(instList[0]), numAtt, ranges); // update ranges, starting from the second for (int i = 1; i < instList.length; i++) { updateRanges(m_Data.instance(instList[i]), numAtt, ranges); } } return ranges; }
/** * Generates an attribute evaluator. Has to initialise all fields of the evaluator that are not * being set via options. * * @param data set of instances serving as training data * @throws Exception if the evaluator has not been generated successfully */ public void buildEvaluator(Instances data) throws Exception { // can evaluator handle data? getCapabilities().testWithFail(data); m_trainInstances = new Instances(data); m_trainInstances.deleteWithMissingClass(); m_numAttribs = m_trainInstances.numAttributes(); m_numInstances = m_trainInstances.numInstances(); // if the data has no decision feature, m_classIndex is negative m_classIndex = m_trainInstances.classIndex(); // supervised if (m_classIndex >= 0) { m_isNumeric = m_trainInstances.attribute(m_classIndex).isNumeric(); if (m_isNumeric) { m_DecisionSimilarity = m_Similarity; } else m_DecisionSimilarity = m_SimilarityEq; } m_Similarity.setInstances(m_trainInstances); m_DecisionSimilarity.setInstances(m_trainInstances); m_SimilarityEq.setInstances(m_trainInstances); m_composition = m_Similarity.getTNorm(); m_FuzzyMeasure.set( m_Similarity, m_DecisionSimilarity, m_TNorm, m_composition, m_Implicator, m_SNorm, m_numInstances, m_numAttribs, m_classIndex, m_trainInstances); }
public void calculateConfidences(Instances data) throws Exception { RipperRule tempRule = (RipperRule) this.copy(); while (tempRule.hasAntds()) { double acc = 0; double cov = 0; for (int i = 0; i < data.numInstances(); i++) { double membershipValue = tempRule.coverageDegree(data.instance(i)); cov += membershipValue; if (m_Consequent == data.instance(i).classValue()) { acc += membershipValue; } } // m-estimate double m = 2.0; ((Antd) this.m_Antds.elementAt((int) tempRule.size() - 1)).m_confidence = (acc + m * (aprioriDistribution[(int) m_Consequent] / Utils.sum(aprioriDistribution))) / (cov + m); tempRule.m_Antds.removeElementAt(tempRule.m_Antds.size() - 1); } }
/** * Add new instances to the given set of instances. * * @param data given instances * @param newData set of instances to add to given instances */ protected void addInstances(Instances data, Instances newData) { for (int i = 0; i < newData.numInstances(); i++) data.add(newData.instance(i)); }
/** * Removes a specified number of instances from the given set of instances. * * @param data given instances * @param numRemove number of instances to delete from the given instances */ protected void removeInstances(Instances data, int numRemove) { int num = data.numInstances(); for (int i = num - 1; i > num - 1 - numRemove; i--) { data.delete(i); } }
/** * Build Decorate classifier * * @param data the training data to be used for generating the classifier * @exception Exception if the classifier could not be built successfully */ public void buildClassifier(Instances data) throws Exception { if (m_Classifier == null) { throw new Exception("A base classifier has not been specified!"); } if (data.checkForStringAttributes()) { throw new UnsupportedAttributeTypeException("Cannot handle string attributes!"); } if (data.classAttribute().isNumeric()) { throw new UnsupportedClassTypeException("Decorate can't handle a numeric class!"); } if (m_NumIterations < m_DesiredSize) throw new Exception("Max number of iterations must be >= desired ensemble size!"); // initialize random number generator if (m_Seed == -1) m_Random = new Random(); else m_Random = new Random(m_Seed); int i = 1; // current committee size int numTrials = 1; // number of Decorate iterations Instances divData = new Instances(data); // local copy of data - diversity data divData.deleteWithMissingClass(); Instances artData = null; // artificial data // compute number of artficial instances to add at each iteration int artSize = (int) (Math.abs(m_ArtSize) * divData.numInstances()); if (artSize == 0) artSize = 1; // atleast add one random example computeStats(data); // Compute training data stats for creating artificial examples // initialize new committee m_Committee = new Vector(); Classifier newClassifier = m_Classifier; newClassifier.buildClassifier(divData); m_Committee.add(newClassifier); double eComm = computeError(divData); // compute ensemble error if (m_Debug) System.out.println( "Initialize:\tClassifier " + i + " added to ensemble. Ensemble error = " + eComm); // repeat till desired committee size is reached OR the max number of iterations is exceeded while (i < m_DesiredSize && numTrials < m_NumIterations) { // Generate artificial training examples artData = generateArtificialData(artSize, data); // Label artificial examples labelData(artData); addInstances(divData, artData); // Add new artificial data // Build new classifier Classifier tmp[] = Classifier.makeCopies(m_Classifier, 1); newClassifier = tmp[0]; newClassifier.buildClassifier(divData); // Remove all the artificial data removeInstances(divData, artSize); // Test if the new classifier should be added to the ensemble m_Committee.add(newClassifier); // add new classifier to current committee double currError = computeError(divData); if (currError <= eComm) { // adding the new member did not increase the error i++; eComm = currError; if (m_Debug) System.out.println( "Iteration: " + (1 + numTrials) + "\tClassifier " + i + " added to ensemble. Ensemble error = " + eComm); } else { // reject the current classifier because it increased the ensemble error m_Committee.removeElementAt(m_Committee.size() - 1); // pop the last member } numTrials++; } }
public static void main(String args[]) { Timers timer = new Timers(); try { // Get the data set path. String referenceFile = Utils.getOption('r', args); String queryFile = Utils.getOption('q', args); if (referenceFile.length() == 0) throw new IllegalArgumentException( "Required option: File containing" + "the reference dataset."); // Load input dataset. DataSource source = new DataSource(referenceFile); Instances referenceData = source.getDataSet(); Instances queryData = null; if (queryFile.length() != 0) { source = new DataSource(queryFile); queryData = source.getDataSet(); } timer.StartTimer("total_time"); // Get all the parameters. String leafSize = Utils.getOption('l', args); String neighbors = Utils.getOption('k', args); // Validate options. int k = 0; if (neighbors.length() == 0) { throw new IllegalArgumentException( "Required option: Number of " + "furthest neighbors to find."); } else { k = Integer.parseInt(neighbors); if (k < 1 || k > referenceData.numInstances()) throw new IllegalArgumentException("[Fatal] Invalid k"); } int l = 20; if (leafSize.length() != 0) l = Integer.parseInt(leafSize); // Create KDTree. KDTree tree = new KDTree(); tree.setMaxInstInLeaf(l); tree.setInstances(referenceData); // Perform All K-Nearest-Neighbors. if (queryFile.length() != 0) { for (int i = 0; i < queryData.numInstances(); i++) { Instances out = tree.kNearestNeighbours(queryData.instance(i), k); } } else { for (int i = 0; i < referenceData.numInstances(); i++) { Instances out = tree.kNearestNeighbours(referenceData.instance(i), k); } } timer.StopTimer("total_time"); timer.PrintTimer("total_time"); } catch (IOException e) { System.err.println(USAGE); } catch (Exception e) { e.printStackTrace(); } }