// 构造一个tri-trainer分类器。 public Tritrainer( String classifier, String trainingIns_File, String testIns_File, double precentage) { try { this.classifier1 = (Classifier) Class.forName(classifier).newInstance(); this.classifier2 = (Classifier) Class.forName(classifier).newInstance(); this.classifier3 = (Classifier) Class.forName(classifier).newInstance(); Instances trainingInstances = Util.getInstances(trainingIns_File); // 将trainIns_File按照precentage和(1-precentage)的比例切割成labeledIns和unlabeledIns; int length = trainingInstances.numInstances(); int i = new Double(length * precentage).intValue(); labeledIns = new Instances(trainingInstances, 0); for (int j = 0; j < i; j++) { labeledIns.add(trainingInstances.firstInstance()); trainingInstances.delete(0); } unlabeledIns = trainingInstances; testIns = Util.getInstances(testIns_File); Init(); } catch (Exception e) { } }
// 将样本集中裁剪提取成m个样本组成的集合; public void SubSample(Instances inst, int m) { inst.randomize(new Random()); while (inst.numInstances() != m) { inst.delete(0); } // System.out.println("subsample:=" + inst.numInstances() + " m:=" + m ); }
// 计算h1,h2分类器共同的分类错误率; public double measureBothError(Classifier h1, Classifier h2, Instances test) { int m = test.numInstances(); double value1, value2, value; int error = 0, total = 0; try { for (int i = 0; i < m; i++) { value = test.instance(i).classValue(); value1 = h1.classifyInstance(test.instance(i)); value2 = h2.classifyInstance(test.instance(i)); // 两分类器做出相同决策 if (value1 == value2) { // 两分类器做出相同决策的样本数量 total++; // 两分类器做出相同错误决策 if (value != value1) { // 两分类器做出相同错误决策的样本数量 error++; } } } } catch (Exception e) { System.out.println(e); } // System.out.println("m:=" + m); // System.out.println("error:=" + error +"; total:=" + total); // 两个分类器的分类错误率= 两分类器做出相同错误决策的样本数量/两分类器做出相同决策的样本数量 return (error * 1.0) / total; }
private static IList<IList<IAgent>> clusteringUsingWeka( final IScope scope, final Clusterer clusterer, final IList<String> attributes, final IAddressableContainer<Integer, IAgent, Integer, IAgent> agents) throws GamaRuntimeException { Instances dataset = convertToInstances(scope, attributes, agents); try { clusterer.buildClusterer(dataset); IList<IList<IAgent>> groupes = GamaListFactory.create(Types.LIST.of(Types.AGENT)); for (int i = 0; i < clusterer.numberOfClusters(); i++) { groupes.add(GamaListFactory.<IAgent>create(Types.AGENT)); } for (int i = 0; i < dataset.numInstances(); i++) { Instance inst = dataset.instance(i); int clusterIndex = -1; clusterIndex = clusterer.clusterInstance(inst); IList<IAgent> groupe = groupes.get(clusterIndex); groupe.add(agents.get(scope, i)); } return groupes; } catch (Exception e) { return null; } }
/** * Generate artificial training examples. * * @param artSize size of examples set to create * @param data training data * @return the set of unlabeled artificial examples */ protected Instances generateArtificialData(int artSize, Instances data) { int numAttributes = data.numAttributes(); Instances artData = new Instances(data, artSize); double[] att; Instance artInstance; for (int i = 0; i < artSize; i++) { att = new double[numAttributes]; for (int j = 0; j < numAttributes; j++) { if (data.attribute(j).isNominal()) { // Select nominal value based on the frequency of occurence in the training data double[] stats = (double[]) m_AttributeStats.get(j); att[j] = (double) selectIndexProbabilistically(stats); } else if (data.attribute(j).isNumeric()) { // Generate numeric value from the Guassian distribution // defined by the mean and std dev of the attribute double[] stats = (double[]) m_AttributeStats.get(j); att[j] = (m_Random.nextGaussian() * stats[1]) + stats[0]; } else System.err.println("Decorate can only handle numeric and nominal values."); } artInstance = new Instance(1.0, att); artData.add(artInstance); } return artData; }
/** * initializes the algorithm * * @param data the data to work with * @throws Exception if m_SVM is null */ protected void init(Instances data) throws Exception { if (m_SVM == null) { throw new Exception("SVM not initialized in optimizer. Use RegOptimizer.setSVMReg()"); } m_C = m_SVM.getC(); m_data = data; m_classIndex = data.classIndex(); m_nInstances = data.numInstances(); // Initialize kernel m_kernel = Kernel.makeCopy(m_SVM.getKernel()); m_kernel.buildKernel(data); // init m_target m_target = new double[m_nInstances]; for (int i = 0; i < m_nInstances; i++) { m_target[i] = data.instance(i).classValue(); } m_random = new Random(m_nSeed); // initialize alpha and alpha* array to all zero m_alpha = new double[m_target.length]; m_alphaStar = new double[m_target.length]; m_supportVectors = new SMOset(m_nInstances); m_b = 0.0; m_nEvals = 0; m_nCacheHits = -1; }
/** * Generates the classifier. * * @param instances set of instances serving as training data * @throws Exception if the classifier has not been generated successfully */ public void buildClassifier(Instances instances) throws Exception { if (!(m_Classifier instanceof WeightedInstancesHandler)) { throw new IllegalArgumentException("Classifier must be a " + "WeightedInstancesHandler!"); } // can classifier handle the data? getCapabilities().testWithFail(instances); // remove instances with missing class instances = new Instances(instances); instances.deleteWithMissingClass(); // only class? -> build ZeroR model if (instances.numAttributes() == 1) { System.err.println( "Cannot build model (only class attribute present in data!), " + "using ZeroR model instead!"); m_ZeroR = new weka.classifiers.rules.ZeroR(); m_ZeroR.buildClassifier(instances); return; } else { m_ZeroR = null; } m_Train = new Instances(instances, 0, instances.numInstances()); m_NNSearch.setInstances(m_Train); }
/** * wrap up various variables to save memeory and do some housekeeping after optimization has * finished. * * @throws Exception if something goes wrong */ protected void wrapUp() throws Exception { m_target = null; m_nEvals = m_kernel.numEvals(); m_nCacheHits = m_kernel.numCacheHits(); if ((m_SVM.getKernel() instanceof PolyKernel) && ((PolyKernel) m_SVM.getKernel()).getExponent() == 1.0) { // convert alpha's to weights double[] weights = new double[m_data.numAttributes()]; for (int k = m_supportVectors.getNext(-1); k != -1; k = m_supportVectors.getNext(k)) { for (int j = 0; j < weights.length; j++) { if (j != m_classIndex) { weights[j] += (m_alpha[k] - m_alphaStar[k]) * m_data.instance(k).value(j); } } } m_weights = weights; // release memory m_alpha = null; m_alphaStar = null; m_kernel = null; } m_bModelBuilt = true; }
private static Instances convertToInstances( final IScope scope, final IList<String> attributes, final IAddressableContainer<Integer, IAgent, Integer, IAgent> agents) throws GamaRuntimeException { FastVector attribs = new FastVector(); for (String att : attributes) { attribs.addElement(new Attribute(att)); } Instances dataset = new Instances(scope.getAgentScope().getName(), attribs, agents.length(scope)); for (IAgent ag : agents.iterable(scope)) { int nb = attributes.size(); double vals[] = new double[nb]; for (int i = 0; i < nb; i++) { String attrib = attributes.get(i); Double var = Cast.asFloat(scope, ag.getDirectVarValue(scope, attrib)); vals[i] = var; } Instance instance = new Instance(1, vals); dataset.add(instance); } return dataset; }
/** * Initializes the ranges using all instances of the dataset. Sets m_Ranges. * * @return the ranges */ public double[][] initializeRanges() { if (m_Data == null) { m_Ranges = null; return m_Ranges; } int numAtt = m_Data.numAttributes(); double[][] ranges = new double[numAtt][3]; if (m_Data.numInstances() <= 0) { initializeRangesEmpty(numAtt, ranges); m_Ranges = ranges; return m_Ranges; } else { // initialize ranges using the first instance updateRangesFirst(m_Data.instance(0), numAtt, ranges); } // update ranges, starting from the second for (int i = 1; i < m_Data.numInstances(); i++) { updateRanges(m_Data.instance(i), numAtt, ranges); } m_Ranges = ranges; return m_Ranges; }
/** initializes the attribute indices. */ protected void initializeAttributeIndices() { m_AttributeIndices.setUpper(m_Data.numAttributes() - 1); m_ActiveIndices = new boolean[m_Data.numAttributes()]; for (int i = 0; i < m_ActiveIndices.length; i++) { m_ActiveIndices[i] = m_AttributeIndices.isInRange(i); } }
/** * Private function to compute default number of accurate instances in the specified data for the * consequent of the rule * * @param data the data in question * @return the default accuracy number */ private double computeDefAccu(Instances data) { double defAccu = 0; for (int i = 0; i < data.numInstances(); i++) { Instance inst = data.instance(i); if ((int) inst.classValue() == (int) m_Consequent) defAccu += inst.weight(); } return defAccu; }
/** * GetKs - return [K_1,K_2,...,K_L] where each Y_j \in {1,...,K_j}. In the multi-label case, K[j] * = 2 for all j = 1,...,L. * * @param D a dataset * @return an array of the number of values that each label can take */ private static int[] getKs(Instances D) { int L = D.classIndex(); int K[] = new int[L]; for (int k = 0; k < L; k++) { K[k] = D.attribute(k).numValues(); } return K; }
public int getClusterNumber(String objectID) { int datasetIndex = -1; for (int i = 0; i < m_Sequences.numInstances(); i++) { if (objectID.equals(m_Sequences.instance(i).stringValue(0))) datasetIndex = i; } return cluster[datasetIndex]; }
/** * Discretizes an attribute using bins. * * @param instances the dataset to discretize. * @param attIndex the attribute index. * @param bins the bins. */ public static void discretize(Instances instances, int attIndex, Bins bins) { Attribute attribute = instances.getAttributes().get(attIndex); BinnedAttribute binnedAttribute = new BinnedAttribute(attribute.getName(), bins); binnedAttribute.setIndex(attribute.getIndex()); instances.getAttributes().set(attIndex, binnedAttribute); for (Instance instance : instances) { int v = bins.getIndex(instance.getValue(attribute.getIndex())); instance.setValue(attribute.getIndex(), v); } }
protected void initMinMax(Instances data) { m_Min = new double[data.numAttributes()]; m_Max = new double[data.numAttributes()]; for (int i = 0; i < data.numAttributes(); i++) { m_Min[i] = m_Max[i] = Double.NaN; } for (int i = 0; i < data.numInstances(); i++) { updateMinMax(data.instance(i)); } }
public static Instances getDatasetDB(String tableName) throws Exception { Statement st = null; ResultSet rs = null; Connection conn = getDBConn(); String query; switch (tableName) { case "all": query = Config.selectAll; break; case "chiller1": query = Config.selectChiller1; break; case "chiller2": query = Config.selectChiller2; break; case "consumption": query = Config.selectConsumption; break; default: query = Config.selectAll; break; } st = conn.createStatement(); rs = st.executeQuery(query); ResultSetMetaData rsmd = rs.getMetaData(); ArrayList<Attribute> attributes = new ArrayList<Attribute>(); int numAtts = rsmd.getColumnCount(); for (int i = 1; i <= numAtts; i++) { String attName = (rsmd.getColumnName(i)); Attribute att = new Attribute(attName); attributes.add(att); } Instances data = new Instances(tableName, attributes, 0); weka.filters.unsupervised.attribute.Add addAtt = new weka.filters.unsupervised.attribute.Add(); addAtt.setOptions(weka.core.Utils.splitOptions("-T NOM -N class -L T,F -C last")); addAtt.setInputFormat(data); data = Filter.useFilter(data, addAtt); while (rs.next()) { double[] values = new double[numAtts + 1]; for (int i = 1; i <= numAtts; i++) { values[i - 1] = rs.getDouble(i); } data.add(new DenseInstance(1.0, values)); } return data; }
/** * Computes the error in classification on the given data. * * @param data the instances to be classified * @return classification error * @exception Exception if error can not be computed successfully */ protected double computeError(Instances data) throws Exception { double error = 0.0; int numInstances = data.numInstances(); Instance curr; for (int i = 0; i < numInstances; i++) { curr = data.instance(i); // Check if the instance has been misclassified if (curr.classValue() != ((int) classifyInstance(curr))) error++; } return (error / numInstances); }
/** * Parses a given list of options. * * <p> * <!-- options-start --> * Valid options are: * * <p> * * <pre> -i <the input file> * The input file</pre> * * <pre> -o <the output file> * The output file</pre> * * <pre> -c <the class index> * The class index</pre> * * <!-- options-end --> * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ public void setOptions(String[] options) throws Exception { String outputString = Utils.getOption('o', options); String inputString = Utils.getOption('i', options); String indexString = Utils.getOption('c', options); ArffLoader loader = new ArffLoader(); resetOptions(); // parse index int index = -1; if (indexString.length() != 0) { if (indexString.equals("first")) index = 0; else { if (indexString.equals("last")) index = -1; else index = Integer.parseInt(indexString); } } if (inputString.length() != 0) { try { File input = new File(inputString); loader.setFile(input); Instances inst = loader.getDataSet(); if (index == -1) inst.setClassIndex(inst.numAttributes() - 1); else inst.setClassIndex(index); setInstances(inst); } catch (Exception ex) { throw new IOException( "No data set loaded. Data set has to be arff format (Reason: " + ex.toString() + ")."); } } else throw new IOException("No data set to save."); if (outputString.length() != 0) { // add appropriate file extension if (!outputString.endsWith(getFileExtension())) { if (outputString.lastIndexOf('.') != -1) outputString = (outputString.substring(0, outputString.lastIndexOf('.'))) + getFileExtension(); else outputString = outputString + getFileExtension(); } try { File output = new File(outputString); setFile(output); } catch (Exception ex) { throw new IOException("Cannot create output file."); } } if (index == -1) index = getInstances().numAttributes() - 1; getInstances().setClassIndex(index); }
/** * Labels the artificially generated data. * * @param artData the artificially generated instances * @exception Exception if instances cannot be labeled successfully */ protected void labelData(Instances artData) throws Exception { Instance curr; double[] probs; for (int i = 0; i < artData.numInstances(); i++) { curr = artData.instance(i); // compute the class membership probs predicted by the current ensemble probs = distributionForInstance(curr); // select class label inversely proportional to the ensemble predictions curr.setClassValue(inverseLabel(probs)); } }
/** * Inserts an instance into the hash table * * @param inst instance to be inserted * @param instA to create the hash key from * @throws Exception if the instance can't be inserted */ private void insertIntoTable(Instance inst, double[] instA) throws Exception { double[] tempClassDist2; double[] newDist; DecisionTableHashKey thekey; if (instA != null) { thekey = new DecisionTableHashKey(instA); } else { thekey = new DecisionTableHashKey(inst, inst.numAttributes(), false); } // see if this one is already in the table tempClassDist2 = (double[]) m_entries.get(thekey); if (tempClassDist2 == null) { if (m_classIsNominal) { newDist = new double[m_theInstances.classAttribute().numValues()]; // Leplace estimation for (int i = 0; i < m_theInstances.classAttribute().numValues(); i++) { newDist[i] = 1.0; } newDist[(int) inst.classValue()] = inst.weight(); // add to the table m_entries.put(thekey, newDist); } else { newDist = new double[2]; newDist[0] = inst.classValue() * inst.weight(); newDist[1] = inst.weight(); // add to the table m_entries.put(thekey, newDist); } } else { // update the distribution for this instance if (m_classIsNominal) { tempClassDist2[(int) inst.classValue()] += inst.weight(); // update the table m_entries.put(thekey, tempClassDist2); } else { tempClassDist2[0] += (inst.classValue() * inst.weight()); tempClassDist2[1] += inst.weight(); // update the table m_entries.put(thekey, tempClassDist2); } } }
/** * Adds the supplied instance to the training set. * * @param instance the instance to add * @throws Exception if instance could not be incorporated successfully */ public void updateClassifier(Instance instance) throws Exception { if (m_Train == null) { throw new Exception("No training instance structure set!"); } else if (m_Train.equalHeaders(instance.dataset()) == false) { throw new Exception( "Incompatible instance types\n" + m_Train.equalHeadersMsg(instance.dataset())); } if (!instance.classIsMissing()) { m_NNSearch.update(instance); m_Train.add(instance); } }
/** * Initializes a gain ratio attribute evaluator. Discretizes all attributes that are numeric. * * @param data set of instances serving as training data * @throws Exception if the evaluator has not been generated successfully */ public void buildEvaluator(Instances data) throws Exception { // can evaluator handle data? getCapabilities().testWithFail(data); m_trainInstances = data; m_classIndex = m_trainInstances.classIndex(); m_numAttribs = m_trainInstances.numAttributes(); m_numInstances = m_trainInstances.numInstances(); Discretize disTransform = new Discretize(); disTransform.setUseBetterEncoding(true); disTransform.setInputFormat(m_trainInstances); m_trainInstances = Filter.useFilter(m_trainInstances, disTransform); m_numClasses = m_trainInstances.attribute(m_classIndex).numValues(); }
/** * Splits a dataset according to the values of a nominal attribute. * * @param data the data which is to be split * @param att the attribute to be used for splitting * @return the sets of instances produced by the split */ private Instances[] splitData(Instances data, Attribute att) { Instances[] splitData = new Instances[att.numValues()]; for (int j = 0; j < att.numValues(); j++) { splitData[j] = new Instances(data, data.numInstances()); } Enumeration instEnum = data.enumerateInstances(); while (instEnum.hasMoreElements()) { Instance inst = (Instance) instEnum.nextElement(); splitData[(int) inst.value(att)].add(inst); } for (int i = 0; i < splitData.length; i++) { splitData[i].compactify(); } return splitData; }
/** * SVMOutput of an instance in the training set, m_data This uses the cache, unlike * SVMOutput(Instance) * * @param index index of the training instance in m_data * @return the SVM output * @throws Exception if something goes wrong */ protected double SVMOutput(int index) throws Exception { double result = -m_b; for (int i = m_supportVectors.getNext(-1); i != -1; i = m_supportVectors.getNext(i)) { result += (m_alpha[i] - m_alphaStar[i]) * m_kernel.eval(index, i, m_data.instance(index)); } return result; }
/** * Compute the value of the objective function. * * @return the score * @throws Exception if something goes wrong */ protected double getScore() throws Exception { double res = 0; double t = 0, t2 = 0; double sumAlpha = 0.0; for (int i = 0; i < m_nInstances; i++) { sumAlpha += (m_alpha[i] - m_alphaStar[i]); for (int j = 0; j < m_nInstances; j++) { t += (m_alpha[i] - m_alphaStar[i]) * (m_alpha[j] - m_alphaStar[j]) * m_kernel.eval(i, j, m_data.instance(i)); } // switch(m_nLossType) { // case L1: // t2 += m_data.instance(i).classValue() * (m_alpha[i] - m_alpha_[i]); // break; // case L2: // t2 += m_data.instance(i).classValue() * (m_alpha[i] - m_alpha_[i]) - (0.5/m_SVM.getC()) // * (m_alpha[i]*m_alpha[i] + m_alpha_[i]*m_alpha_[i]); // break; // case HUBER: // t2 += m_data.instance(i).classValue() * (m_alpha[i] - m_alpha_[i]) - // (0.5*m_SVM.getEpsilon()/m_SVM.getC()) * (m_alpha[i]*m_alpha[i] + m_alpha_[i]*m_alpha_[i]); // break; // case EPSILON: // t2 += m_data.instance(i).classValue() * (m_alpha[i] - m_alphaStar[i]) - m_epsilon * // (m_alpha[i] + m_alphaStar[i]); t2 += m_target[i] * (m_alpha[i] - m_alphaStar[i]) - m_epsilon * (m_alpha[i] + m_alphaStar[i]); // break; // } } res += -0.5 * t + t2; return res; }
/** * * * <pre> * Usage: Discretizer * -r attribute file path * -i input dataset path * -o output dataset path * [-d] discretized attribute file path * [-m] output attribute file path * [-n] maximum num of bins (default: 256) * [-t] training file path * </pre> * * @param args the command line arguments. * @throws Exception */ public static void main(String[] args) throws Exception { Options app = new Options(); CmdLineParser parser = new CmdLineParser(Discretizer.class, app); try { parser.parse(args); if (app.maxNumBins < 0) { throw new IllegalArgumentException(); } } catch (IllegalArgumentException e) { parser.printUsage(); System.exit(1); } List<Attribute> attributes = null; if (app.trainPath != null) { Instances trainSet = InstancesReader.read(app.attPath, app.trainPath); attributes = trainSet.getAttributes(); for (int i = 0; i < attributes.size(); i++) { Attribute attribute = attributes.get(i); if (attribute.getType() == Type.NUMERIC) { // Only discretize numeric attributes Discretizer.discretize(trainSet, i, app.maxNumBins); } } } else if (app.disAttPath != null) { attributes = AttributesReader.read(app.disAttPath).v1; } else { parser.printUsage(); System.exit(1); } Instances instances = InstancesReader.read(app.attPath, app.inputPath); List<Attribute> attrs = instances.getAttributes(); for (int i = 0; i < attrs.size(); i++) { Attribute attr = attrs.get(i); if (attr.getType() == Type.NUMERIC) { BinnedAttribute binnedAttr = (BinnedAttribute) attributes.get(i); // Only discretize numeric attributes Discretizer.discretize(instances, i, binnedAttr.getBins()); } } if (app.outputAttPath != null) { InstancesWriter.write(instances, app.outputAttPath, app.outputPath); } else { InstancesWriter.write(instances, app.outputPath); } }
/** * Computes the entropy of a dataset. * * @param data the data for which entropy is to be computed * @return the entropy of the data's class distribution * @throws Exception if computation fails */ private double computeEntropy(Instances data) throws Exception { double[] classCounts = new double[data.numClasses()]; Enumeration instEnum = data.enumerateInstances(); while (instEnum.hasMoreElements()) { Instance inst = (Instance) instEnum.nextElement(); classCounts[(int) inst.classValue()]++; } double entropy = 0; for (int j = 0; j < data.numClasses(); j++) { if (classCounts[j] > 0) { entropy -= classCounts[j] * Utils.log2(classCounts[j]); } } entropy /= (double) data.numInstances(); return entropy + Utils.log2(data.numInstances()); }
/** * Generates a clusterer by the mean of spectral clustering algorithm. * * @param data set of instances serving as training data * @exception Exception if the clusterer has not been generated successfully */ public void buildClusterer(Instances data) throws java.lang.Exception { m_Sequences = new Instances(data); int n = data.numInstances(); int k = data.numAttributes(); DoubleMatrix2D w; if (useSparseMatrix) w = DoubleFactory2D.sparse.make(n, n); else w = DoubleFactory2D.dense.make(n, n); double[][] v1 = new double[n][]; for (int i = 0; i < n; i++) v1[i] = data.instance(i).toDoubleArray(); v = DoubleFactory2D.dense.make(v1); double sigma_sq = sigma * sigma; // Sets up similarity matrix for (int i = 0; i < n; i++) for (int j = i; j < n; j++) { /*double dist = distnorm2(v.viewRow(i), v.viewRow(j)); if((r == -1) || (dist < r)) { double sim = Math.exp(- (dist * dist) / (2 * sigma_sq)); w.set(i, j, sim); w.set(j, i, sim); }*/ /* String [] key = {data.instance(i).stringValue(0), data.instance(j).stringValue(0)}; System.out.println(key[0]); System.out.println(key[1]); System.out.println(simScoreMap.containsKey(key)); Double simValue = simScoreMap.get(key);*/ double sim = sim_matrix[i][j]; w.set(i, j, sim); w.set(j, i, sim); } // Partitions points int[][] p = partition(w, alpha_star); // Deploys results numOfClusters = p.length; cluster = new int[n]; for (int i = 0; i < p.length; i++) for (int j = 0; j < p[i].length; j++) cluster[p[i][j]] = i; // System.out.println("Final partition:"); // UtilsJS.printMatrix(p); // System.out.println("Cluster:\n"); // UtilsJS.printArray(cluster); this.numOfClusters = cluster[Utils.maxIndex(cluster)] + 1; // System.out.println("Num clusters:\t"+this.numOfClusters); }
protected void updateMinDistance( double[] minDistance, boolean[] selected, Instances data, Instance center) { for (int i = 0; i < selected.length; i++) if (!selected[i]) { double d = distance(center, data.instance(i)); if (d < minDistance[i]) minDistance[i] = d; } }