public static Instances getInstances(String file) throws Exception { DataSource datasource = new DataSource(file); Instances data = datasource.getDataSet(); System.out.println("Class index is : " + data.classIndex()); if (data.classIndex() == -1) data.setClassIndex(data.numAttributes() - 1); return data; }
/** * Method for building this classifier. * * @param training the training instances * @param test the test instances * @throws Exception if something goes wrong */ public void buildClassifier(Instances training, Instances test) throws Exception { m_ClassifierBuilt = true; m_Random = new Random(m_Seed); m_Trainset = training; m_Testset = test; // set class index? if ((m_Trainset.classIndex() == -1) || (m_Testset.classIndex() == -1)) { m_Trainset.setClassIndex(m_Trainset.numAttributes() - 1); m_Testset.setClassIndex(m_Trainset.numAttributes() - 1); } // are datasets correct? checkData(); // any other data restrictions not met? checkRestrictions(); // generate sets generateSets(); // performs the restarts/iterations build(); m_Random = null; }
/** * Sets the format of the input instances. * * @param instanceInfo an Instances object containing the input instance structure (any instances * contained in the object are ignored - only the structure is required). * @return true if the outputFormat may be collected immediately * @throws UnsupportedAttributeTypeException if selected attributes are not numeric or nominal. */ public boolean setInputFormat(Instances instanceInfo) throws Exception { if ((instanceInfo.classIndex() > 0) && (!getFillWithMissing())) { throw new IllegalArgumentException( "TimeSeriesTranslate: Need to fill in missing values " + "using appropriate option when class index is set."); } super.setInputFormat(instanceInfo); // Create the output buffer Instances outputFormat = new Instances(instanceInfo, 0); for (int i = 0; i < instanceInfo.numAttributes(); i++) { if (i != instanceInfo.classIndex()) { if (m_SelectedCols.isInRange(i)) { if (outputFormat.attribute(i).isNominal() || outputFormat.attribute(i).isNumeric()) { outputFormat.renameAttribute( i, outputFormat.attribute(i).name() + (m_InstanceRange < 0 ? '-' : '+') + Math.abs(m_InstanceRange)); } else { throw new UnsupportedAttributeTypeException( "Only numeric and nominal attributes may be " + " manipulated in time series."); } } } } outputFormat.setClassIndex(instanceInfo.classIndex()); setOutputFormat(outputFormat); return true; }
/** * Builds a regression model for the given data. * * @param data the training data to be used for generating the linear regression function * @throws Exception if the classifier could not be built successfully */ public void buildClassifier(Instances data) throws Exception { if (!m_checksTurnedOff) { // can classifier handle the data? getCapabilities().testWithFail(data); // remove instances with missing class data = new Instances(data); data.deleteWithMissingClass(); } // Preprocess instances if (!m_checksTurnedOff) { m_TransformFilter = new NominalToBinary(); m_TransformFilter.setInputFormat(data); data = Filter.useFilter(data, m_TransformFilter); m_MissingFilter = new ReplaceMissingValues(); m_MissingFilter.setInputFormat(data); data = Filter.useFilter(data, m_MissingFilter); data.deleteWithMissingClass(); } else { m_TransformFilter = null; m_MissingFilter = null; } m_ClassIndex = data.classIndex(); m_TransformedData = data; // Turn all attributes on for a start m_SelectedAttributes = new boolean[data.numAttributes()]; for (int i = 0; i < data.numAttributes(); i++) { if (i != m_ClassIndex) { m_SelectedAttributes[i] = true; } } m_Coefficients = null; // Compute means and standard deviations m_Means = new double[data.numAttributes()]; m_StdDevs = new double[data.numAttributes()]; for (int j = 0; j < data.numAttributes(); j++) { if (j != data.classIndex()) { m_Means[j] = data.meanOrMode(j); m_StdDevs[j] = Math.sqrt(data.variance(j)); if (m_StdDevs[j] == 0) { m_SelectedAttributes[j] = false; } } } m_ClassStdDev = Math.sqrt(data.variance(m_TransformedData.classIndex())); m_ClassMean = data.meanOrMode(m_TransformedData.classIndex()); // Perform the regression findBestModel(); // Save memory m_TransformedData = new Instances(data, 0); }
/** Builds the clusters */ private void buildClusterer() throws Exception { if (m_trainingSet.classIndex() < 0) m_Clusterer.buildClusterer(m_trainingSet); else { // class based evaluation if class attribute is set Remove removeClass = new Remove(); removeClass.setAttributeIndices("" + (m_trainingSet.classIndex() + 1)); removeClass.setInvertSelection(false); removeClass.setInputFormat(m_trainingSet); Instances clusterTrain = Filter.useFilter(m_trainingSet, removeClass); m_Clusterer.buildClusterer(clusterTrain); } }
/** * Constructs an instance suitable for passing to the model for scoring * * @param incoming the incoming instance * @return an instance with values mapped to be consistent with what the model is expecting */ protected Instance mapIncomingFieldsToModelFields(Instance incoming) { Instances modelHeader = m_model.getHeader(); double[] vals = new double[modelHeader.numAttributes()]; for (int i = 0; i < modelHeader.numAttributes(); i++) { if (m_attributeMap[i] < 0) { // missing or type mismatch vals[i] = Utils.missingValue(); continue; } Attribute modelAtt = modelHeader.attribute(i); Attribute incomingAtt = incoming.dataset().attribute(m_attributeMap[i]); if (incoming.isMissing(incomingAtt.index())) { vals[i] = Utils.missingValue(); continue; } if (modelAtt.isNumeric()) { vals[i] = incoming.value(m_attributeMap[i]); } else if (modelAtt.isNominal()) { String incomingVal = incoming.stringValue(m_attributeMap[i]); int modelIndex = modelAtt.indexOfValue(incomingVal); if (modelIndex < 0) { vals[i] = Utils.missingValue(); } else { vals[i] = modelIndex; } } else if (modelAtt.isString()) { vals[i] = 0; modelAtt.setStringValue(incoming.stringValue(m_attributeMap[i])); } } if (modelHeader.classIndex() >= 0) { // set class to missing value vals[modelHeader.classIndex()] = Utils.missingValue(); } Instance newInst = null; if (incoming instanceof SparseInstance) { newInst = new SparseInstance(incoming.weight(), vals); } else { newInst = new DenseInstance(incoming.weight(), vals); } newInst.setDataset(modelHeader); return newInst; }
/** * Determines the output format based on the input format and returns this. In case the output * format cannot be returned immediately, i.e., hasImmediateOutputFormat() returns false, then * this method will called from batchFinished() after the call of preprocess(Instances), in which, * e.g., statistics for the actual processing step can be gathered. * * @param inputFormat the input format to base the output format on * @return the output format * @throws Exception in case the determination goes wrong */ protected Instances determineOutputFormat(Instances inputFormat) throws Exception { Instances result; FastVector atts; int i; int numAtts; Vector<Integer> indices; Vector<Integer> subset; Random rand; int index; // determine the number of attributes numAtts = inputFormat.numAttributes(); if (inputFormat.classIndex() > -1) numAtts--; if (m_NumAttributes < 1) { numAtts = (int) Math.round((double) numAtts * m_NumAttributes); } else { if (m_NumAttributes < numAtts) numAtts = (int) m_NumAttributes; } if (getDebug()) System.out.println("# of atts: " + numAtts); // determine random indices indices = new Vector<Integer>(); for (i = 0; i < inputFormat.numAttributes(); i++) { if (i == inputFormat.classIndex()) continue; indices.add(i); } subset = new Vector<Integer>(); rand = new Random(m_Seed); for (i = 0; i < numAtts; i++) { index = rand.nextInt(indices.size()); subset.add(indices.get(index)); indices.remove(index); } Collections.sort(subset); if (inputFormat.classIndex() > -1) subset.add(inputFormat.classIndex()); if (getDebug()) System.out.println("indices: " + subset); // generate output format atts = new FastVector(); m_Indices = new int[subset.size()]; for (i = 0; i < subset.size(); i++) { atts.addElement(inputFormat.attribute(subset.get(i))); m_Indices[i] = subset.get(i); } result = new Instances(inputFormat.relationName(), atts, 0); if (inputFormat.classIndex() > -1) result.setClassIndex(result.numAttributes() - 1); return result; }
public double ExpectedClassificationError(Instances pool, int attr_i) { // initialize alpha's to one int alpha[][][]; int NumberOfFeatures = pool.numAttributes() - 1; int NumberOfLabels = pool.numClasses(); alpha = new int[NumberOfFeatures][NumberOfLabels][]; for (int i = 0; i < NumberOfFeatures; i++) for (int j = 0; j < NumberOfLabels; j++) alpha[i][j] = new int[pool.attribute(i).numValues()]; for (int i = 0; i < NumberOfFeatures; i++) for (int j = 0; j < NumberOfLabels; j++) for (int k = 0; k < alpha[i][j].length; k++) alpha[i][j][k] = 1; // construct alpha's for (int i = 0; i < NumberOfFeatures; i++) // for each attribute { if (i == pool.classIndex()) // skip the class attribute i++; for (Enumeration<Instance> e = pool.enumerateInstances(); e.hasMoreElements(); ) // for each instance { Instance inst = e.nextElement(); if (!inst.isMissing(i)) // if attribute i is not missing (i.e. its been bought) { int j = (int) inst.classValue(); int k = (int) inst.value(i); alpha[i][j][k]++; } } } return ExpectedClassificationError(alpha, attr_i); }
/** * Returns a string representation of the classifier. * * @return a string representation of the classifier */ public String toString() { StringBuffer result = new StringBuffer( "The independent probability of a class\n--------------------------------------\n"); for (int c = 0; c < m_numClasses; c++) result .append(m_headerInfo.classAttribute().value(c)) .append("\t") .append(Double.toString(m_probOfClass[c])) .append("\n"); result.append( "\nThe probability of a word given the class\n-----------------------------------------\n\t"); for (int c = 0; c < m_numClasses; c++) result.append(m_headerInfo.classAttribute().value(c)).append("\t"); result.append("\n"); for (int w = 0; w < m_numAttributes; w++) { if (w != m_headerInfo.classIndex()) { result.append(m_headerInfo.attribute(w).name()).append("\t"); for (int c = 0; c < m_numClasses; c++) result.append(Double.toString(Math.exp(m_probOfWordGivenClass[c][w]))).append("\t"); result.append("\n"); } } return result.toString(); }
/** * Returns the Capabilities of this filter, customized based on the data. I.e., if removes all * class capabilities, in case there's not class attribute present or removes the NO_CLASS * capability, in case that there's a class present. * * @param data the data to use for customization * @return the capabilities of this object, based on the data * @see #getCapabilities() */ public Capabilities getCapabilities(Instances data) { Capabilities result; Capabilities classes; Iterator iter; Capability cap; result = getCapabilities(); // no class? -> remove all class capabilites apart from NO_CLASS if (data.classIndex() == -1) { classes = result.getClassCapabilities(); iter = classes.capabilities(); while (iter.hasNext()) { cap = (Capability) iter.next(); if (cap != Capability.NO_CLASS) { result.disable(cap); result.disableDependency(cap); } } } // class? -> remove NO_CLASS else { result.disable(Capability.NO_CLASS); result.disableDependency(Capability.NO_CLASS); } return result; }
/** * initializes the algorithm * * @param data the data to work with * @throws Exception if m_SVM is null */ protected void init(Instances data) throws Exception { if (m_SVM == null) { throw new Exception("SVM not initialized in optimizer. Use RegOptimizer.setSVMReg()"); } m_C = m_SVM.getC(); m_data = data; m_classIndex = data.classIndex(); m_nInstances = data.numInstances(); // Initialize kernel m_kernel = Kernel.makeCopy(m_SVM.getKernel()); m_kernel.buildKernel(data); // init m_target m_target = new double[m_nInstances]; for (int i = 0; i < m_nInstances; i++) { m_target[i] = data.instance(i).classValue(); } m_random = new Random(m_nSeed); // initialize alpha and alpha* array to all zero m_alpha = new double[m_target.length]; m_alphaStar = new double[m_target.length]; m_supportVectors = new SMOset(m_nInstances); m_b = 0.0; m_nEvals = 0; m_nCacheHits = -1; }
/** test the batch saving/loading (via setFile(File)). */ public void testBatch() { Instances data; try { // save m_Saver.setInstances(m_Instances); m_Saver.setFile(new File(m_ExportFilename)); m_Saver.writeBatch(); // load ((AbstractFileLoader) m_Loader).setFile(new File(m_ExportFilename)); data = m_Loader.getDataSet(); // compare data try { if (m_Instances.classIndex() != data.classIndex()) { data.setClassIndex(m_Instances.classIndex()); } compareDatasets(m_Instances, data); } catch (Exception e) { fail("Incremental load failed (datasets differ): " + e.toString()); } } catch (Exception e) { e.printStackTrace(); fail("Batch save/load failed: " + e.toString()); } }
/** tests whether a URL can be loaded (via setURL(URL)). */ public void testURLSourcedLoader() { Instances data; if (!(getLoader() instanceof URLSourcedLoader)) { return; } try { // save m_Saver.setInstances(m_Instances); m_Saver.setFile(new File(m_ExportFilename)); m_Saver.writeBatch(); // load ((URLSourcedLoader) m_Loader).setURL(new File(m_ExportFilename).toURI().toURL().toString()); data = m_Loader.getDataSet(); // compare data try { if (m_Instances.classIndex() != data.classIndex()) { data.setClassIndex(m_Instances.classIndex()); } compareDatasets(m_Instances, data); } catch (Exception e) { fail("URL load failed (datasets differ): " + e.toString()); } } catch (Exception e) { e.printStackTrace(); fail("URL load failed: " + e.toString()); } }
/** * Signify that this batch of input to the filter is finished. If the filter requires all * instances prior to filtering, output() may now be called to retrieve the filtered instances. * * @return true if there are instances pending output * @throws IllegalStateException if no input structure has been defined */ public boolean batchFinished() throws Exception { if (getInputFormat() == null) { throw new IllegalStateException("No input instance format defined"); } if (m_attStats == null) { Instances input = getInputFormat(); m_attStats = new AttributeStats[input.numAttributes()]; for (int i = 0; i < input.numAttributes(); i++) { if (input.attribute(i).isNumeric() && (input.classIndex() != i)) { m_attStats[i] = input.attributeStats(i); } } // Convert pending input instances for (int i = 0; i < input.numInstances(); i++) { convertInstance(input.instance(i)); } } // Free memory flushInput(); m_NewBatch = true; return (numPendingOutput() != 0); }
/** trains the classifier */ @Override public void train() throws Exception { if (_train.classIndex() == -1) _train.setClassIndex(_train.numAttributes() - 1); _cl.buildClassifier(_train); // evaluate classifier and print some statistics evaluate(); }
/** tests whether data can be loaded via setSource() with a file stream. */ public void testLoaderWithStream() { Instances data; try { // save m_Saver.setInstances(m_Instances); m_Saver.setFile(new File(m_ExportFilename)); m_Saver.writeBatch(); // load m_Loader.setSource(new FileInputStream(new File(m_ExportFilename))); data = m_Loader.getDataSet(); // compare data try { if (m_Instances.classIndex() != data.classIndex()) { data.setClassIndex(m_Instances.classIndex()); } compareDatasets(m_Instances, data); } catch (Exception e) { fail("File stream loading failed (datasets differ): " + e.toString()); } } catch (Exception e) { e.printStackTrace(); fail("File stream loading failed: " + e.toString()); } }
/** * GetKs - return [K_1,K_2,...,K_L] where each Y_j \in {1,...,K_j}. In the multi-label case, K[j] * = 2 for all j = 1,...,L. * * @param D a dataset * @return an array of the number of values that each label can take */ private static int[] getKs(Instances D) { int L = D.classIndex(); int K[] = new int[L]; for (int k = 0; k < L; k++) { K[k] = D.attribute(k).numValues(); } return K; }
/** * Return the full data set. If the structure hasn't yet been determined by a call to getStructure * then method should do so before processing the rest of the data set. * * @return the structure of the data set as an empty set of Instances * @throws IOException if there is no source or parsing fails */ public Instances getDataSet() throws IOException { if (getDirectory() == null) throw new IOException("No directory/source has been specified"); String directoryPath = getDirectory().getAbsolutePath(); ArrayList<String> classes = new ArrayList<String>(); Enumeration enm = getStructure().classAttribute().enumerateValues(); while (enm.hasMoreElements()) classes.add((String) enm.nextElement()); Instances data = getStructure(); int fileCount = 0; for (int k = 0; k < classes.size(); k++) { String subdirPath = (String) classes.get(k); File subdir = new File(directoryPath + File.separator + subdirPath); String[] files = subdir.list(); for (int j = 0; j < files.length; j++) { try { fileCount++; if (getDebug()) System.err.println("processing " + fileCount + " : " + subdirPath + " : " + files[j]); double[] newInst = null; if (m_OutputFilename) newInst = new double[3]; else newInst = new double[2]; File txt = new File(directoryPath + File.separator + subdirPath + File.separator + files[j]); BufferedReader is; if (m_charSet == null || m_charSet.length() == 0) { is = new BufferedReader(new InputStreamReader(new FileInputStream(txt))); } else { is = new BufferedReader(new InputStreamReader(new FileInputStream(txt), m_charSet)); } StringBuffer txtStr = new StringBuffer(); int c; while ((c = is.read()) != -1) { txtStr.append((char) c); } newInst[0] = (double) data.attribute(0).addStringValue(txtStr.toString()); if (m_OutputFilename) newInst[1] = (double) data.attribute(1).addStringValue(subdirPath + File.separator + files[j]); newInst[data.classIndex()] = (double) k; data.add(new DenseInstance(1.0, newInst)); is.close(); } catch (Exception e) { System.err.println( "failed to convert file: " + directoryPath + File.separator + subdirPath + File.separator + files[j]); } } } return data; }
/** evaluates the classifier */ @Override public void evaluate() throws Exception { // evaluate classifier and print some statistics if (_test.classIndex() == -1) _test.setClassIndex(_test.numAttributes() - 1); Evaluation eval = new Evaluation(_train); eval.evaluateModel(_cl, _test); System.out.println(eval.toSummaryString("\nResults\n======\n", false)); System.out.println(eval.toMatrixString()); }
public static void main(String[] args) throws Exception { BufferedReader reader = new BufferedReader(new FileReader("PCAin.arff")); Instances data = new Instances(reader); reader.close(); if (data.classIndex() == -1) { data.setClassIndex(data.numAttributes() - 1); } pca(data); }
/** * Searches the attribute subset space using a genetic algorithm. * * @param ASEval the attribute evaluator to guide the search * @param data the training instances. * @return an array (not necessarily ordered) of selected attribute indexes * @throws Exception if the search can't be completed */ @Override public int[] search(ASEvaluation ASEval, Instances data) throws Exception { m_best = null; m_generationReports = new StringBuffer(); if (!(ASEval instanceof SubsetEvaluator)) { throw new Exception(ASEval.getClass().getName() + " is not a " + "Subset evaluator!"); } if (ASEval instanceof UnsupervisedSubsetEvaluator) { m_hasClass = false; } else { m_hasClass = true; m_classIndex = data.classIndex(); } SubsetEvaluator ASEvaluator = (SubsetEvaluator) ASEval; m_numAttribs = data.numAttributes(); m_startRange.setUpper(m_numAttribs - 1); if (!(getStartSet().equals(""))) { m_starting = m_startRange.getSelection(); } // initial random population m_lookupTable = new Hashtable<BitSet, GABitSet>(m_lookupTableSize); m_random = new Random(m_seed); m_population = new GABitSet[m_popSize]; // set up random initial population initPopulation(); evaluatePopulation(ASEvaluator); populationStatistics(); scalePopulation(); checkBest(); m_generationReports.append(populationReport(0)); boolean converged; for (int i = 1; i <= m_maxGenerations; i++) { generation(); evaluatePopulation(ASEvaluator); populationStatistics(); scalePopulation(); // find the best pop member and check for convergence converged = checkBest(); if ((i == m_maxGenerations) || ((i % m_reportFrequency) == 0) || (converged == true)) { m_generationReports.append(populationReport(i)); if (converged == true) { break; } } } return attributeList(m_best.getChromosome()); }
/** * Method for building this classifier. Since the collective classifiers also need the test set, * we only store here the training set. * * @param training the training set to use * @throws Exception derived classes may throw Exceptions */ public void buildClassifier(Instances training) throws Exception { m_ClassifierBuilt = false; m_Trainset = training; // set class index? if (m_Trainset.classIndex() == -1) m_Trainset.setClassIndex(m_Trainset.numAttributes() - 1); // necessary for JUnit tests checkRestrictions(); }
/** * Determines the output format based on the input format and returns this. In case the output * format cannot be returned immediately, i.e., immediateOutputFormat() returns false, then this * method will be called from batchFinished(). * * @param inputFormat the input format to base the output format on * @return the output format * @throws Exception in case the determination goes wrong * @see #hasImmediateOutputFormat() * @see #batchFinished() */ protected Instances determineOutputFormat(Instances inputFormat) throws Exception { Instances data; Instances result; FastVector atts; FastVector values; HashSet hash; int i; int n; boolean isDate; Instance inst; Vector sorted; m_Cols.setUpper(inputFormat.numAttributes() - 1); data = new Instances(inputFormat); atts = new FastVector(); for (i = 0; i < data.numAttributes(); i++) { if (!m_Cols.isInRange(i) || !data.attribute(i).isNumeric()) { atts.addElement(data.attribute(i)); continue; } // date attribute? isDate = (data.attribute(i).type() == Attribute.DATE); // determine all available attribtues in dataset hash = new HashSet(); for (n = 0; n < data.numInstances(); n++) { inst = data.instance(n); if (inst.isMissing(i)) continue; if (isDate) hash.add(inst.stringValue(i)); else hash.add(new Double(inst.value(i))); } // sort values sorted = new Vector(); for (Object o : hash) sorted.add(o); Collections.sort(sorted); // create attribute from sorted values values = new FastVector(); for (Object o : sorted) { if (isDate) values.addElement(o.toString()); else values.addElement(Utils.doubleToString(((Double) o).doubleValue(), MAX_DECIMALS)); } atts.addElement(new Attribute(data.attribute(i).name(), values)); } result = new Instances(inputFormat.relationName(), atts, 0); result.setClassIndex(inputFormat.classIndex()); return result; }
private static void evaluateClassifier(Classifier c, Instances trainData, Instances testData) throws Exception { System.err.println( "INFO: Starting split validation to predict '" + trainData.classAttribute().name() + "' using '" + c.getClass().getCanonicalName() + ":" + Arrays.toString(c.getOptions()) + "' (#train=" + trainData.numInstances() + ",#test=" + testData.numInstances() + ") ..."); if (trainData.classIndex() < 0) throw new IllegalStateException("class attribute not set"); c.buildClassifier(trainData); Evaluation eval = new Evaluation(testData); eval.useNoPriors(); double[] predictions = eval.evaluateModel(c, testData); System.out.println(eval.toClassDetailsString()); System.out.println(eval.toSummaryString("\nResults\n======\n", false)); // write predictions to file { System.err.println("INFO: Writing predictions to file ..."); Writer out = new FileWriter("prediction.trec"); writePredictionsTrecEval(predictions, testData, 0, trainData.classIndex(), out); out.close(); } // write predicted distributions to CSV { System.err.println("INFO: Writing predicted distributions to CSV ..."); Writer out = new FileWriter("predicted_distribution.csv"); writePredictedDistributions(c, testData, 0, out); out.close(); } }
/** * Calculates the distance between two instances * * @param test the first instance * @param train the second instance * @return the distance between the two given instances, between 0 and 1 */ protected double distance(Instance first, Instance second) { double distance = 0; int firstI, secondI; for (int p1 = 0, p2 = 0; p1 < first.numValues() || p2 < second.numValues(); ) { if (p1 >= first.numValues()) { firstI = m_instances.numAttributes(); } else { firstI = first.index(p1); } if (p2 >= second.numValues()) { secondI = m_instances.numAttributes(); } else { secondI = second.index(p2); } if (firstI == m_instances.classIndex()) { p1++; continue; } if (secondI == m_instances.classIndex()) { p2++; continue; } double diff; if (firstI == secondI) { diff = difference(firstI, first.valueSparse(p1), second.valueSparse(p2)); p1++; p2++; } else if (firstI > secondI) { diff = difference(secondI, 0, second.valueSparse(p2)); p2++; } else { diff = difference(firstI, first.valueSparse(p1), 0); p1++; } distance += diff * diff; } return Math.sqrt(distance / m_instances.numAttributes()); }
/** * processes the instances using the HAAR algorithm * * @param instances the data to process * @return the modified data * @throws Exception in case the processing goes wrong */ protected Instances processHAAR(Instances instances) throws Exception { Instances result; int i; int n; int j; int clsIdx; double[] oldVal; double[] newVal; int level; int length; double[] clsVal; Attribute clsAtt; clsIdx = instances.classIndex(); clsVal = null; clsAtt = null; if (clsIdx > -1) { clsVal = instances.attributeToDoubleArray(clsIdx); clsAtt = (Attribute) instances.classAttribute().copy(); instances.setClassIndex(-1); instances.deleteAttributeAt(clsIdx); } result = new Instances(instances, 0); level = (int) StrictMath.ceil(StrictMath.log(instances.numAttributes()) / StrictMath.log(2.0)); for (i = 0; i < instances.numInstances(); i++) { oldVal = instances.instance(i).toDoubleArray(); newVal = new double[oldVal.length]; for (n = level; n > 0; n--) { length = (int) StrictMath.pow(2, n - 1); for (j = 0; j < length; j++) { newVal[j] = (oldVal[j * 2] + oldVal[j * 2 + 1]) / StrictMath.sqrt(2); newVal[j + length] = (oldVal[j * 2] - oldVal[j * 2 + 1]) / StrictMath.sqrt(2); } System.arraycopy(newVal, 0, oldVal, 0, newVal.length); } // add new transformed instance result.add(new DenseInstance(1, newVal)); } // add class again if (clsIdx > -1) { result.insertAttributeAt(clsAtt, clsIdx); result.setClassIndex(clsIdx); for (i = 0; i < clsVal.length; i++) result.instance(i).setClassValue(clsVal[i]); } return result; }
// use the learned classifiers to get conditional probability protected double conMI(Instances D_j, Instances D_k, CNode[][] miNodes, int j, int k) throws Exception { int L = D_j.classIndex(); int N = D_j.numInstances(); double y[] = new double[L]; double I = 0.0; // conditional mutual information for y_j and y_k double p_1, p_2; // p( y_j = 1 | x ), p( y_j = 2 | x ) double p_12[] = { 0.0, 0.0 }; // p_12[0] = p( y_j = 1 | y_k = 0, x ) and p_12[1] = p( y_j = 1 | y_k = 1, x ) for (int i = 0; i < N; i++) { Arrays.fill(y, 0); p_1 = Math.max( miNodes[j][0].distribution((Instance) D_j.instance(i).copy(), y)[1], 0.000001); // p( y_j = 1 | x ) p_1 = Math.min(p_1, 0.999999); p_1 = Math.max(p_1, 0.000001); Arrays.fill(y, 0); p_2 = Math.max( miNodes[k][0].distribution((Instance) D_k.instance(i).copy(), y)[1], 0.000001); // p( y_k = 1 | x ) p_2 = Math.min(p_2, 0.999999); p_2 = Math.max(p_2, 0.000001); Arrays.fill(y, 0); p_12[0] = Math.max( miNodes[j][k - j].distribution((Instance) D_j.instance(i).copy(), y)[1], 0.000001); // p( y_j = 1 | y_k = 0, x ) p_12[0] = Math.min(p_12[0], 0.999999); p_12[0] = Math.max(p_12[0], 0.000001); Arrays.fill(y, 0); Arrays.fill(y, k, k + 1, 1.0); p_12[1] = Math.max( miNodes[j][k - j].distribution((Instance) D_j.instance(i).copy(), y)[1], 0.000001); // p( y_j = 1 | y_k = 1, x ) p_12[1] = Math.min(p_12[1], 0.999999); p_12[1] = Math.max(p_12[1], 0.000001); I += (1 - p_12[0]) * (1 - p_2) * Math.log((1 - p_12[0]) / (1 - p_1)); // I( y_j = 0 ; y_k = 0 ) I += (1 - p_12[1]) * (p_2) * Math.log((1 - p_12[1]) / (1 - p_1)); // I( y_j = 0 ; y_k = 1 ) I += (p_12[0]) * (1 - p_2) * Math.log((p_12[0]) / (p_1)); // I( y_j = 1 ; y_k = 0 ) I += (p_12[1]) * (p_2) * Math.log((p_12[1]) / (p_1)); // I( y_j = 1 ; y_k = 0 ) } I = I / N; return I; }
@Override public void train(Instances instance) { // find the best attribute int classIdx = instance.classIndex(); for (int i = 0; i < instance.numInstances(); i++) { if (classIdx == 0) { zeroIns.add(instance.instance(i)); } else { oneIns.add(instance.instance(i)); } } }
/** * Transform. * * @param D original Instances * @param c to be the class Attribute * @param pa_c the parent indices of c * @return new Instances T */ public static Instances transform(Instances D, int c, int pa_c[]) throws Exception { int L = D.classIndex(); int keep[] = A.append(pa_c, c); // keep all parents and self! Arrays.sort(keep); int remv[] = A.invert(keep, L); // i.e., remove the rest < L Arrays.sort(remv); Instances T = F.remove(new Instances(D), remv, false); int map[] = new int[L]; for (int j = 0; j < L; j++) { map[j] = Arrays.binarySearch(keep, j); } T.setClassIndex(map[c]); return T; }
/** * Sets the format of the input instances. * * @param instanceInfo an Instances object containing the input instance structure (any instances * contained in the object are ignored - only the structure is required). * @return true if the outputFormat may be collected immediately * @throws Exception if the input format can't be set successfully */ public boolean setInputFormat(Instances instanceInfo) throws Exception { super.setInputFormat(instanceInfo); m_AttIndex.setUpper(instanceInfo.numAttributes() - 1); m_FirstIndex.setUpper(instanceInfo.attribute(m_AttIndex.getIndex()).numValues() - 1); m_SecondIndex.setUpper(instanceInfo.attribute(m_AttIndex.getIndex()).numValues() - 1); if ((instanceInfo.classIndex() > -1) && (instanceInfo.classIndex() == m_AttIndex.getIndex())) { throw new Exception("Cannot process class attribute."); } if (!instanceInfo.attribute(m_AttIndex.getIndex()).isNominal()) { throw new UnsupportedAttributeTypeException("Chosen attribute not nominal."); } if (instanceInfo.attribute(m_AttIndex.getIndex()).numValues() < 2) { throw new UnsupportedAttributeTypeException( "Chosen attribute has less than " + "two values."); } if (m_SecondIndex.getIndex() <= m_FirstIndex.getIndex()) { // XXX Maybe we should just swap the values?? throw new Exception("The second index has to be greater " + "than the first."); } setOutputFormat(); return true; }