/** * Set the output format. Takes the current average class values and m_InputFormat and calls * setOutputFormat(Instances) appropriately. */ private void setOutputFormat() { Instances newData; FastVector newAtts, newVals; // Compute new attributes newAtts = new FastVector(getInputFormat().numAttributes()); for (int j = 0; j < getInputFormat().numAttributes(); j++) { Attribute att = getInputFormat().attribute(j); if (!m_AttIndices.isInRange(j) || !att.isString()) { // We don't have to copy the attribute because the // attribute index remains unchanged. newAtts.addElement(att); } else { // Compute list of attribute values newVals = new FastVector(att.numValues()); for (int i = 0; i < att.numValues(); i++) { newVals.addElement(att.value(i)); } newAtts.addElement(new Attribute(att.name(), newVals)); } } // Construct new header newData = new Instances(getInputFormat().relationName(), newAtts, 0); newData.setClassIndex(getInputFormat().classIndex()); setOutputFormat(newData); }
/** * Method that finds all large itemsets for the given set of instances. * * @param the instances to be used * @exception Exception if an attribute is numeric */ private void findLargeItemSets(int index) throws Exception { FastVector kMinusOneSets, kSets = new FastVector(); Hashtable hashtable; int i = 0; // Find large itemsets // of length 1 if (index == 1) { kSets = ItemSet.singletons(m_instances); ItemSet.upDateCounters(kSets, m_instances); kSets = ItemSet.deleteItemSets(kSets, m_premiseCount, Integer.MAX_VALUE); if (kSets.size() == 0) return; m_Ls.addElement(kSets); } // of length > 1 if (index > 1) { if (m_Ls.size() > 0) kSets = (FastVector) m_Ls.lastElement(); m_Ls.removeAllElements(); i = index - 2; kMinusOneSets = kSets; kSets = ItemSet.mergeAllItemSets(kMinusOneSets, i, m_instances.numInstances()); hashtable = ItemSet.getHashtable(kMinusOneSets, kMinusOneSets.size()); m_hashtables.addElement(hashtable); kSets = ItemSet.pruneItemSets(kSets, hashtable); ItemSet.upDateCounters(kSets, m_instances); kSets = ItemSet.deleteItemSets(kSets, m_premiseCount, Integer.MAX_VALUE); if (kSets.size() == 0) return; m_Ls.addElement(kSets); } }
/** * Set the labels for nominal attribute creation. * * @param labelList a comma separated list of labels * @throws IllegalArgumentException if the labelList was invalid */ public void setNominalLabels(String labelList) { FastVector labels = new FastVector(10); // Split the labelList up into the vector int commaLoc; while ((commaLoc = labelList.indexOf(',')) >= 0) { String label = labelList.substring(0, commaLoc).trim(); if (!label.equals("")) { labels.addElement(label); } else { throw new IllegalArgumentException( "Invalid label list at " + labelList.substring(commaLoc)); } labelList = labelList.substring(commaLoc + 1); } String label = labelList.trim(); if (!label.equals("")) { labels.addElement(label); } // If everything is OK, make the type change m_Labels = labels; if (labels.size() == 0) { m_AttributeType = Attribute.NUMERIC; } else { m_AttributeType = Attribute.NOMINAL; } }
/** * Sets up the structure for the plot instances. Sets m_PlotInstances to null if instances are not * saved for visualization. * * @see #getSaveForVisualization() */ protected void determineFormat() { FastVector hv; Attribute predictedClass; Attribute classAt; FastVector attVals; int i; if (!m_SaveForVisualization) { m_PlotInstances = null; return; } hv = new FastVector(); classAt = m_Instances.attribute(m_ClassIndex); if (classAt.isNominal()) { attVals = new FastVector(); for (i = 0; i < classAt.numValues(); i++) attVals.addElement(classAt.value(i)); predictedClass = new Attribute("predicted" + classAt.name(), attVals); } else { predictedClass = new Attribute("predicted" + classAt.name()); } for (i = 0; i < m_Instances.numAttributes(); i++) { if (i == m_Instances.classIndex()) hv.addElement(predictedClass); hv.addElement(m_Instances.attribute(i).copy()); } m_PlotInstances = new Instances(m_Instances.relationName() + "_predicted", hv, m_Instances.numInstances()); m_PlotInstances.setClassIndex(m_ClassIndex + 1); }
/** * Adds the prediction intervals as additional attributes at the end. Since classifiers can * returns varying number of intervals per instance, the dataset is filled with missing values for * non-existing intervals. */ protected void addPredictionIntervals() { int maxNum; int num; int i; int n; FastVector preds; FastVector atts; Instances data; Instance inst; Instance newInst; double[] values; double[][] predInt; // determine the maximum number of intervals maxNum = 0; preds = m_Evaluation.predictions(); for (i = 0; i < preds.size(); i++) { num = ((NumericPrediction) preds.elementAt(i)).predictionIntervals().length; if (num > maxNum) maxNum = num; } // create new header atts = new FastVector(); for (i = 0; i < m_PlotInstances.numAttributes(); i++) atts.addElement(m_PlotInstances.attribute(i)); for (i = 0; i < maxNum; i++) { atts.addElement(new Attribute("predictionInterval_" + (i + 1) + "-lowerBoundary")); atts.addElement(new Attribute("predictionInterval_" + (i + 1) + "-upperBoundary")); atts.addElement(new Attribute("predictionInterval_" + (i + 1) + "-width")); } data = new Instances(m_PlotInstances.relationName(), atts, m_PlotInstances.numInstances()); data.setClassIndex(m_PlotInstances.classIndex()); // update data for (i = 0; i < m_PlotInstances.numInstances(); i++) { inst = m_PlotInstances.instance(i); // copy old values values = new double[data.numAttributes()]; System.arraycopy(inst.toDoubleArray(), 0, values, 0, inst.numAttributes()); // add interval data predInt = ((NumericPrediction) preds.elementAt(i)).predictionIntervals(); for (n = 0; n < maxNum; n++) { if (n < predInt.length) { values[m_PlotInstances.numAttributes() + n * 3 + 0] = predInt[n][0]; values[m_PlotInstances.numAttributes() + n * 3 + 1] = predInt[n][1]; values[m_PlotInstances.numAttributes() + n * 3 + 2] = predInt[n][1] - predInt[n][0]; } else { values[m_PlotInstances.numAttributes() + n * 3 + 0] = Utils.missingValue(); values[m_PlotInstances.numAttributes() + n * 3 + 1] = Utils.missingValue(); values[m_PlotInstances.numAttributes() + n * 3 + 2] = Utils.missingValue(); } } // create new Instance newInst = new DenseInstance(inst.weight(), values); data.add(newInst); } m_PlotInstances = data; }
/** * Read the sparse feature vector data from the data file and convert it into the Weka's instance * format. */ public void readSparseFVsFromFile( File dataFile, int numDocs, boolean trainingMode, int numLabels, boolean surroundMode) { int numFeats = 0; int numClasses = 0; labelsFVDoc = new LabelsOfFeatureVectorDoc[numDocs]; // Read the sparse FVs by using the method in MultiClassLearning class MultiClassLearning multiClassL = new MultiClassLearning(); boolean isUsingDataFile = false; File tempFVDataFile = null; multiClassL.getDataFromFile(numDocs, dataFile, isUsingDataFile, tempFVDataFile); // Create the attributes. numFeats = multiClassL.dataFVinDoc.getTotalNumFeatures(); FastVector attributes = new FastVector(numFeats + 1); for (int i = 0; i < numFeats; ++i) attributes.addElement(new Attribute(new Integer(i + 1).toString())); // Add class attribute. if (surroundMode) numClasses = 2 * numLabels + 1; // count the null too, as value -1. else numClasses = numLabels + 1; FastVector classValues = new FastVector(numClasses); classValues.addElement("-1"); // The first class for null class for (int i = 1; i < numClasses; ++i) classValues.addElement(new Integer(i).toString()); attributes.addElement(new Attribute("Class", classValues)); // Create the dataset with capacity of all FVs (but actuall number of FVs // mabe be larger than the pre-specified, because possible multi-label) and // set index of class instancesData = new Instances("SparseFVsData", attributes, multiClassL.dataFVinDoc.getNumTraining()); instancesData.setClassIndex(instancesData.numAttributes() - 1); // Copy the data into the instance; for (int iDoc = 0; iDoc < multiClassL.dataFVinDoc.getNumTrainingDocs(); ++iDoc) { SparseFeatureVector[] fvs = multiClassL.dataFVinDoc.trainingFVinDoc[iDoc].getFvs(); labelsFVDoc[iDoc] = new LabelsOfFeatureVectorDoc(); labelsFVDoc[iDoc].multiLabels = multiClassL.dataFVinDoc.labelsFVDoc[iDoc].multiLabels; for (int i = 0; i < fvs.length; ++i) { // Object valueO = fvs[i].getValues(); double[] values = new double[fvs[i].getLen()]; int[] indexes = new int[fvs[i].getLen()]; for (int j = 0; j < fvs[i].getLen(); ++j) { // values[j] = (double)fvs[i].values[j]; values[j] = fvs[i].nodes[j].value; indexes[j] = fvs[i].nodes[j].index; } SparseInstance inst = new SparseInstance(1.0, values, indexes, 50000); inst.setDataset(instancesData); if (trainingMode && labelsFVDoc[iDoc].multiLabels[i].num > 0) for (int j1 = 0; j1 < labelsFVDoc[iDoc].multiLabels[i].num; ++j1) { inst.setClassValue((labelsFVDoc[iDoc].multiLabels[i].labels[j1])); // label // >0 instancesData.add(inst); } else { inst.setClassValue("-1"); // set label as -1 for null instancesData.add(inst); } } } return; }
private List<Instance> myExtractKeyphrases(String document, int numOfPhrases) throws Exception { // Check whether there is actually any data // if (document.length() == 0 || document.equals("")) { throw new Exception("Couldn't find any data!"); } FastVector atts = new FastVector(3); atts.addElement(new Attribute("doc", (FastVector) null)); atts.addElement(new Attribute("keyphrases", (FastVector) null)); Instances data = new Instances("keyphrase_training_data", atts, 0); List<Instance> myInstances = new ArrayList<Instance>(); double[] newInst = new double[2]; newInst[0] = (double) data.attribute(0).addStringValue(document); newInst[1] = Instance.missingValue(); data.add(new Instance(1.0, newInst)); m_KEAFilter.input(data.instance(0)); data = data.stringFreeStructure(); ke.setNumPhrases(numOfPhrases); int numPhrases = numOfPhrases; // ke.getNumPhrases(); Instance[] topRankedInstances = new Instance[numPhrases]; Instance inst; // Iterating over all extracted keyphrases (inst) while ((inst = m_KEAFilter.output()) != null) { int index = (int) inst.value(m_KEAFilter.getRankIndex()) - 1; if (index < numPhrases) { topRankedInstances[index] = inst; } } double numExtracted = 0, numCorrect = 0; for (int i = 0; i < numPhrases; i++) { if (topRankedInstances[i] != null) { if (!topRankedInstances[i].isMissing(topRankedInstances[i].numAttributes() - 1)) { numExtracted += 1.0; } if ((int) topRankedInstances[i].value(topRankedInstances[i].numAttributes() - 1) == 1) { numCorrect += 1.0; } myInstances.add(topRankedInstances[i]); } } return myInstances; }
public FastVector buildCosineAttributes() { FastVector attributes = new FastVector(2); attributes.addElement(new Attribute("cosine")); FastVector classVal = new FastVector(); classVal.addElement("1"); classVal.addElement("0"); Attribute label = new Attribute("label", classVal); attributes.addElement(label); return attributes; }
/** 构造分类器,主要及时对数据格式,类标,类别数目等进行说明 */ public MessageClassify() throws Exception { String nameOfDataset = "MessageClassification"; FastVector attributes = new FastVector(2); attributes.addElement(new Attribute("Message", (FastVector) null)); FastVector classValues = new FastVector(2); // 类标向量,共有两类 classValues.addElement("0"); classValues.addElement("1"); attributes.addElement(new Attribute("Class", classValues)); instances = new Instances(nameOfDataset, attributes, 100); // 可以把instance认为是行,attribute认为是列 instances.setClassIndex(instances.numAttributes() - 1); // 类表在instance中的那列 }
/** * Determines the output format based on the input format and returns this. In case the output * format cannot be returned immediately, i.e., immediateOutputFormat() returns false, then this * method will be called from batchFinished(). * * @param inputFormat the input format to base the output format on * @return the output format * @throws Exception in case the determination goes wrong * @see #hasImmediateOutputFormat() * @see #batchFinished() */ protected Instances determineOutputFormat(Instances inputFormat) throws Exception { Instances data; Instances result; FastVector atts; FastVector values; HashSet hash; int i; int n; boolean isDate; Instance inst; Vector sorted; m_Cols.setUpper(inputFormat.numAttributes() - 1); data = new Instances(inputFormat); atts = new FastVector(); for (i = 0; i < data.numAttributes(); i++) { if (!m_Cols.isInRange(i) || !data.attribute(i).isNumeric()) { atts.addElement(data.attribute(i)); continue; } // date attribute? isDate = (data.attribute(i).type() == Attribute.DATE); // determine all available attribtues in dataset hash = new HashSet(); for (n = 0; n < data.numInstances(); n++) { inst = data.instance(n); if (inst.isMissing(i)) continue; if (isDate) hash.add(inst.stringValue(i)); else hash.add(new Double(inst.value(i))); } // sort values sorted = new Vector(); for (Object o : hash) sorted.add(o); Collections.sort(sorted); // create attribute from sorted values values = new FastVector(); for (Object o : sorted) { if (isDate) values.addElement(o.toString()); else values.addElement(Utils.doubleToString(((Double) o).doubleValue(), MAX_DECIMALS)); } atts.addElement(new Attribute(data.attribute(i).name(), values)); } result = new Instances(inputFormat.relationName(), atts, 0); result.setClassIndex(inputFormat.classIndex()); return result; }
private static Instances createSet(int l) { FastVector attributes = new FastVector(1 + l); FastVector vals = new FastVector(3); vals.addElement("S1"); vals.addElement("S2"); vals.addElement("S3"); Attribute classAttribute = new Attribute("Sense", vals); attributes.addElement(classAttribute); for (int i = 0; i < l; i++) { attributes.addElement(new Attribute(i + 1 + "")); } Instances set = new Instances("Rel", attributes, 1 + l); set.setClassIndex(0); return set; }
private static Instances convertToInstances( final IScope scope, final IList<String> attributes, final IAddressableContainer<Integer, IAgent, Integer, IAgent> agents) throws GamaRuntimeException { FastVector attribs = new FastVector(); for (String att : attributes) { attribs.addElement(new Attribute(att)); } Instances dataset = new Instances(scope.getAgentScope().getName(), attribs, agents.length(scope)); for (IAgent ag : agents.iterable(scope)) { int nb = attributes.size(); double vals[] = new double[nb]; for (int i = 0; i < nb; i++) { String attrib = attributes.get(i); Double var = Cast.asFloat(scope, ag.getDirectVarValue(scope, attrib)); vals[i] = var; } Instance instance = new Instance(1, vals); dataset.add(instance); } return dataset; }
public weka.core.Instances toWekaInstances() { // attributes FastVector wattrs = new FastVector(); Iterator itr = attributes.iterator(); while (itr.hasNext()) { Attribute attr = (Attribute) itr.next(); wattrs.addElement(attr.toWekaAttribute()); } // data instances weka.core.Instances winsts = new weka.core.Instances(name, wattrs, instances.size()); itr = instances.iterator(); while (itr.hasNext()) { Instance inst = (Instance) itr.next(); Iterator itrval = inst.getValues().iterator(); Iterator itrmis = inst.getMissing().iterator(); double[] vals = new double[wattrs.size()]; for (int i = 0; i < wattrs.size(); i++) { double val = (Double) itrval.next(); if ((Boolean) itrmis.next()) { vals[i] = weka.core.Instance.missingValue(); } else { vals[i] = val; } } weka.core.Instance winst = new weka.core.Instance(1, vals); winst.setDataset(winsts); winsts.add(winst); } winsts.setClassIndex(this.class_index); return winsts; }
/** * Returns an enumeration describing the available options. * * @return an enumeration of all the available options. */ public Enumeration listOptions() { String string1 = "\tThe required number of rules. (default = " + (m_numRules - 5) + ")"; FastVector newVector = new FastVector(1); newVector.addElement(new Option(string1, "N", 1, "-N <required number of rules output>")); return newVector.elements(); }
/** * Determines the output format based on the input format and returns this. * * @param inputFormat the input format to base the output format on * @return the output format * @throws Exception in case the determination goes wrong */ protected Instances determineOutputFormat(Instances inputFormat) throws Exception { Instances result; Attribute att; Attribute attSorted; FastVector atts; FastVector values; Vector<String> sorted; int i; int n; m_AttributeIndices.setUpper(inputFormat.numAttributes() - 1); // determine sorted indices atts = new FastVector(); m_NewOrder = new int[inputFormat.numAttributes()][]; for (i = 0; i < inputFormat.numAttributes(); i++) { att = inputFormat.attribute(i); if (!att.isNominal() || !m_AttributeIndices.isInRange(i)) { m_NewOrder[i] = new int[0]; atts.addElement(inputFormat.attribute(i).copy()); continue; } // sort labels sorted = new Vector<String>(); for (n = 0; n < att.numValues(); n++) sorted.add(att.value(n)); Collections.sort(sorted, m_Comparator); // determine new indices m_NewOrder[i] = new int[att.numValues()]; values = new FastVector(); for (n = 0; n < att.numValues(); n++) { m_NewOrder[i][n] = sorted.indexOf(att.value(n)); values.addElement(sorted.get(n)); } attSorted = new Attribute(att.name(), values); attSorted.setWeight(att.weight()); atts.addElement(attSorted); } // generate new header result = new Instances(inputFormat.relationName(), atts, 0); result.setClassIndex(inputFormat.classIndex()); return result; }
private FastVector attFromStream(StreamElement data) { FastVector fv = new FastVector(); for (int i = 0; i < data.getFieldNames().length; i++) { Attribute a = new Attribute(data.getFieldNames()[i]); fv.addElement(a); } return fv; }
/** * Process a classifier's prediction for an instance and update a set of plotting instances and * additional plotting info. m_PlotShape for nominal class datasets holds shape types (actual data * points have automatic shape type assignment; classifier error data points have box shape type). * For numeric class datasets, the actual data points are stored in m_PlotInstances and m_PlotSize * stores the error (which is later converted to shape size values). * * @param toPredict the actual data point * @param classifier the classifier * @param eval the evaluation object to use for evaluating the classifier on the instance to * predict * @see #m_PlotShapes * @see #m_PlotSizes * @see #m_PlotInstances */ public void process(Instance toPredict, Classifier classifier, Evaluation eval) { double pred; double[] values; int i; try { pred = eval.evaluateModelOnceAndRecordPrediction(classifier, toPredict); if (classifier instanceof weka.classifiers.misc.InputMappedClassifier) { toPredict = ((weka.classifiers.misc.InputMappedClassifier) classifier) .constructMappedInstance(toPredict); } if (!m_SaveForVisualization) return; if (m_PlotInstances != null) { values = new double[m_PlotInstances.numAttributes()]; for (i = 0; i < m_PlotInstances.numAttributes(); i++) { if (i < toPredict.classIndex()) { values[i] = toPredict.value(i); } else if (i == toPredict.classIndex()) { values[i] = pred; values[i + 1] = toPredict.value(i); i++; } else { values[i] = toPredict.value(i - 1); } } m_PlotInstances.add(new DenseInstance(1.0, values)); if (toPredict.classAttribute().isNominal()) { if (toPredict.isMissing(toPredict.classIndex()) || Utils.isMissingValue(pred)) { m_PlotShapes.addElement(new Integer(Plot2D.MISSING_SHAPE)); } else if (pred != toPredict.classValue()) { // set to default error point shape m_PlotShapes.addElement(new Integer(Plot2D.ERROR_SHAPE)); } else { // otherwise set to constant (automatically assigned) point shape m_PlotShapes.addElement(new Integer(Plot2D.CONST_AUTOMATIC_SHAPE)); } m_PlotSizes.addElement(new Integer(Plot2D.DEFAULT_SHAPE_SIZE)); } else { // store the error (to be converted to a point size later) Double errd = null; if (!toPredict.isMissing(toPredict.classIndex()) && !Utils.isMissingValue(pred)) { errd = new Double(pred - toPredict.classValue()); m_PlotShapes.addElement(new Integer(Plot2D.CONST_AUTOMATIC_SHAPE)); } else { // missing shape if actual class not present or prediction is missing m_PlotShapes.addElement(new Integer(Plot2D.MISSING_SHAPE)); } m_PlotSizes.addElement(errd); } } } catch (Exception ex) { ex.printStackTrace(); } }
/** * Add a rule to the ruleset and update the stats * * @param lastRule the rule to be added */ public void addAndUpdate(Rule lastRule) { if (m_Ruleset == null) m_Ruleset = new FastVector(); m_Ruleset.addElement(lastRule); Instances data = (m_Filtered == null) ? m_Data : ((Instances[]) m_Filtered.lastElement())[1]; double[] stats = new double[6]; double[] classCounts = new double[m_Data.classAttribute().numValues()]; Instances[] filtered = computeSimpleStats(m_Ruleset.size() - 1, data, stats, classCounts); if (m_Filtered == null) m_Filtered = new FastVector(); m_Filtered.addElement(filtered); if (m_SimpleStats == null) m_SimpleStats = new FastVector(); m_SimpleStats.addElement(stats); if (m_Distributions == null) m_Distributions = new FastVector(); m_Distributions.addElement(classCounts); }
/** * Trains the classifier on the array of Signal objects. Implementations of this method should * also produce an ordered list of the class names which can be returned with the <code> * getClassNames</code> method. * * @param inputData the Signal array that the model should be trained on. * @throws noMetadataException Thrown if there is no class metadata to train the Gaussian model * with */ public void train(Signal[] inputData) { List classNamesList = new ArrayList(); for (int i = 0; i < inputData.length; i++) { try { String className = inputData[i].getStringMetadata(Signal.PROP_CLASS); if ((className != null) && (!classNamesList.contains(className))) { classNamesList.add(className); } } catch (noMetadataException ex) { throw new RuntimeException("No class metadata found to train model on!", ex); } } Collections.sort(classNamesList); classnames = (String[]) classNamesList.toArray(new String[classNamesList.size()]); FastVector classValues = new FastVector(classnames.length); for (int i = 0; i < classnames.length; i++) { classValues.addElement(classnames[i]); } classAttribute = new Attribute(Signal.PROP_CLASS, classValues); Instances trainingDataSet = new Instances(Signal2Instance.convert(inputData[0], classAttribute)); if (inputData.length > 1) { for (int i = 1; i < inputData.length; i++) { Instances aSignalInstance = Signal2Instance.convert(inputData[i], classAttribute); for (int j = 0; j < aSignalInstance.numInstances(); j++) trainingDataSet.add(aSignalInstance.instance(j)); } } trainingDataSet.setClass(classAttribute); inputData = null; theRule = new MISMO(); // parse options StringTokenizer stOption = new StringTokenizer(this.MISMOOptions, " "); String[] options = new String[stOption.countTokens()]; for (int i = 0; i < options.length; i++) { options[i] = stOption.nextToken(); } try { theRule.setOptions(options); } catch (Exception ex) { throw new RuntimeException("Failed to set MISMO classifier options!", ex); } try { theRule.buildClassifier(trainingDataSet); System.out.println("WEKA: outputting MISMO classifier; " + theRule.globalInfo()); } catch (Exception ex) { throw new RuntimeException("Failed to train classifier!", ex); } }
/** * Filter the data according to the ruleset and compute the basic stats: coverage/uncoverage, * true/false positive/negatives of each rule */ public void countData() { if ((m_Filtered != null) || (m_Ruleset == null) || (m_Data == null)) return; int size = m_Ruleset.size(); m_Filtered = new FastVector(size); m_SimpleStats = new FastVector(size); m_Distributions = new FastVector(size); Instances data = new Instances(m_Data); for (int i = 0; i < size; i++) { double[] stats = new double[6]; // 6 statistics parameters double[] classCounts = new double[m_Data.classAttribute().numValues()]; Instances[] filtered = computeSimpleStats(i, data, stats, classCounts); m_Filtered.addElement(filtered); m_SimpleStats.addElement(stats); m_Distributions.addElement(classCounts); data = filtered[1]; // Data not covered } }
/** Set the output format. Changes the format of the specified date attribute. */ private void setOutputFormat() { // Create new attributes FastVector newAtts = new FastVector(getInputFormat().numAttributes()); for (int j = 0; j < getInputFormat().numAttributes(); j++) { Attribute att = getInputFormat().attribute(j); if (j == m_AttIndex.getIndex()) { newAtts.addElement(new Attribute(att.name(), getDateFormat().toPattern())); } else { newAtts.addElement(att.copy()); } } // Create new header Instances newData = new Instances(getInputFormat().relationName(), newAtts, 0); newData.setClassIndex(getInputFormat().classIndex()); m_OutputAttribute = newData.attribute(m_AttIndex.getIndex()); setOutputFormat(newData); }
/** * Set the output format. Takes the current average class values and m_InputFormat and calls * setOutputFormat(Instances) appropriately. */ private void setOutputFormat() { Instances newData; FastVector newAtts; // Compute new attributes newAtts = new FastVector(getInputFormat().numAttributes()); for (int j = 0; j < getInputFormat().numAttributes(); j++) { Attribute att = getInputFormat().attribute(j); if (!att.isNominal() || !m_AttIndex.isInRange(j)) newAtts.addElement(att); else newAtts.addElement(new Attribute(att.name(), (FastVector) null)); } // Construct new header newData = new Instances(getInputFormat().relationName(), newAtts, 0); newData.setClassIndex(getInputFormat().classIndex()); setOutputFormat(newData); }
private static Instances initializeAttributes() { String nameOfDataset = "Badges"; Instances instances; FastVector attributes = new FastVector(9); for (String featureName : features) { attributes.addElement(new Attribute(featureName, zeroOne)); } Attribute classLabel = new Attribute("Class", labels); // labels is a FastVector of '+' and '-' attributes.addElement(classLabel); instances = new Instances(nameOfDataset, attributes, 0); instances.setClass(classLabel); return instances; }
private void initialiseRelation() { minAttribute = new Attribute("min"); maxAttribute = new Attribute("max"); meanAttribute = new Attribute("mean"); stdDevAttribute = new Attribute("stdDev"); FastVector activities = new FastVector(2); activities.addElement("walking"); activities.addElement("running"); activityAttribute = new Attribute("activity", activities); FastVector features = new FastVector(NUMBER_OF_ATTRIBUTES); features.addElement(minAttribute); features.addElement(maxAttribute); features.addElement(meanAttribute); features.addElement(stdDevAttribute); features.addElement(activityAttribute); instances = new Instances("Activity", features, 0); loadExistingData(); }
/** * Determines the output format based on the input format and returns this. In case the output * format cannot be returned immediately, i.e., hasImmediateOutputFormat() returns false, then * this method will called from batchFinished() after the call of preprocess(Instances), in which, * e.g., statistics for the actual processing step can be gathered. * * @param inputFormat the input format to base the output format on * @return the output format * @throws Exception in case the determination goes wrong */ protected Instances determineOutputFormat(Instances inputFormat) throws Exception { Instances result; FastVector atts; int i; int numAtts; Vector<Integer> indices; Vector<Integer> subset; Random rand; int index; // determine the number of attributes numAtts = inputFormat.numAttributes(); if (inputFormat.classIndex() > -1) numAtts--; if (m_NumAttributes < 1) { numAtts = (int) Math.round((double) numAtts * m_NumAttributes); } else { if (m_NumAttributes < numAtts) numAtts = (int) m_NumAttributes; } if (getDebug()) System.out.println("# of atts: " + numAtts); // determine random indices indices = new Vector<Integer>(); for (i = 0; i < inputFormat.numAttributes(); i++) { if (i == inputFormat.classIndex()) continue; indices.add(i); } subset = new Vector<Integer>(); rand = new Random(m_Seed); for (i = 0; i < numAtts; i++) { index = rand.nextInt(indices.size()); subset.add(indices.get(index)); indices.remove(index); } Collections.sort(subset); if (inputFormat.classIndex() > -1) subset.add(inputFormat.classIndex()); if (getDebug()) System.out.println("indices: " + subset); // generate output format atts = new FastVector(); m_Indices = new int[subset.size()]; for (i = 0; i < subset.size(); i++) { atts.addElement(inputFormat.attribute(subset.get(i))); m_Indices[i] = subset.get(i); } result = new Instances(inputFormat.relationName(), atts, 0); if (inputFormat.classIndex() > -1) result.setClassIndex(result.numAttributes() - 1); return result; }
static { features = new String[] {"firstName", "lastName"}; List<String> ff = new ArrayList<String>(); for (String f : features) { for (int i = 0; i < 9; i++) { for (char letter = 'a'; letter <= 'z'; letter++) { ff.add(f + i + "=" + letter); } } } features = ff.toArray(new String[ff.size()]); zeroOne = new FastVector(2); zeroOne.addElement("1"); zeroOne.addElement("0"); labels = new FastVector(2); labels.addElement("+"); labels.addElement("-"); }
/** * Count data from the position index in the ruleset assuming that given data are not covered by * the rules in position 0...(index-1), and the statistics of these rules are provided.<br> * This procedure is typically useful when a temporary object of RuleStats is constructed in order * to efficiently calculate the relative DL of rule in position index, thus all other stuff is not * needed. * * @param index the given position * @param uncovered the data not covered by rules before index * @param prevRuleStats the provided stats of previous rules */ public void countData(int index, Instances uncovered, double[][] prevRuleStats) { if ((m_Filtered != null) || (m_Ruleset == null)) return; int size = m_Ruleset.size(); m_Filtered = new FastVector(size); m_SimpleStats = new FastVector(size); Instances[] data = new Instances[2]; data[1] = uncovered; for (int i = 0; i < index; i++) { m_SimpleStats.addElement(prevRuleStats[i]); if (i + 1 == index) m_Filtered.addElement(data); else m_Filtered.addElement(new Object()); // Stuff sth. } for (int j = index; j < size; j++) { double[] stats = new double[6]; // 6 statistics parameters Instances[] filtered = computeSimpleStats(j, data[1], stats, null); m_Filtered.addElement(filtered); m_SimpleStats.addElement(stats); data = filtered; // Data not covered } }
public void init() { // 1. set up attributes attributes = new FastVector(); // - numeric: 1 attributes.addElement(new Attribute("IsMarket")); // - numeric: 2 attributes.addElement(new Attribute("IsMonth")); // - numeric: 3 attributes.addElement(new Attribute("IsCategory")); // - numeric: 4 attributes.addElement(new Attribute("IsCompany")); // - numeric: 5 attributes.addElement(new Attribute("IsBrand")); // - numeric: 5 attributes.addElement(new Attribute("OfferValue")); // - nominal: 6 attributeReturn = new FastVector(); attributeReturn.addElement("f"); attributeReturn.addElement("t"); attributes.addElement(new Attribute("Return", attributeReturn)); }
/** * Parses a given list of options. Valid options are: * * <p>-D <br> * Turn on debugging output. * * <p>-S seed <br> * Random number seed (default 1). * * <p>-B classifierstring <br> * Classifierstring should contain the full class name of a scheme included for selection followed * by options to the classifier (required, option should be used once for each classifier). * * <p>-X num_folds <br> * Use cross validation error as the basis for classifier selection. (default 0, is to use error * on the training data instead) * * <p> * * @param options the list of options as an array of strings * @exception Exception if an option is not supported */ public void setOptions(String[] options) throws Exception { setDebug(Utils.getFlag('D', options)); String numFoldsString = Utils.getOption('X', options); if (numFoldsString.length() != 0) { setNumFolds(Integer.parseInt(numFoldsString)); } else { setNumFolds(0); } String randomString = Utils.getOption('S', options); if (randomString.length() != 0) { setSeed(Integer.parseInt(randomString)); } else { setSeed(1); } // Iterate through the schemes FastVector classifiers = new FastVector(); while (true) { String classifierString = Utils.getOption('B', options); if (classifierString.length() == 0) { break; } String[] classifierSpec = Utils.splitOptions(classifierString); if (classifierSpec.length == 0) { throw new Exception("Invalid classifier specification string"); } String classifierName = classifierSpec[0]; classifierSpec[0] = ""; classifiers.addElement(Classifier.forName(classifierName, classifierSpec)); } if (classifiers.size() <= 1) { throw new Exception("At least two classifiers must be specified" + " with the -B option."); } else { Classifier[] classifiersArray = new Classifier[classifiers.size()]; for (int i = 0; i < classifiersArray.length; i++) { classifiersArray[i] = (Classifier) classifiers.elementAt(i); } setClassifiers(classifiersArray); } }
public boolean batchFinished() throws Exception { Instances input = getInputFormat(); String relation = input.relationName(); Instances output = new Instances(relation); int numAttributes = input.numAttributes(); int numInstances = input.numInstances(); for (int i = 0; i < numAttributes; i++) { FastVector vector = new FastVector(); for (int j = 0; j < numInstances; j++) { double value = input.instance(j).value(i); String string = String.valueOf(value); if (vector.indexOf(string) == -1) vector.addElement(string); } Attribute attribute = new Attribute(input.attribute(i).name(), vector); output.appendAttribute(attribute); } setOutputFormat(output); for (int i = 0; i < numInstances; i++) push(input.instance(i)); return super.batchFinished(); }