/** * Process a classifier's prediction for an instance and update a set of plotting instances and * additional plotting info. m_PlotShape for nominal class datasets holds shape types (actual data * points have automatic shape type assignment; classifier error data points have box shape type). * For numeric class datasets, the actual data points are stored in m_PlotInstances and m_PlotSize * stores the error (which is later converted to shape size values). * * @param toPredict the actual data point * @param classifier the classifier * @param eval the evaluation object to use for evaluating the classifier on the instance to * predict * @see #m_PlotShapes * @see #m_PlotSizes * @see #m_PlotInstances */ public void process(Instance toPredict, Classifier classifier, Evaluation eval) { double pred; double[] values; int i; try { pred = eval.evaluateModelOnceAndRecordPrediction(classifier, toPredict); if (classifier instanceof weka.classifiers.misc.InputMappedClassifier) { toPredict = ((weka.classifiers.misc.InputMappedClassifier) classifier) .constructMappedInstance(toPredict); } if (!m_SaveForVisualization) return; if (m_PlotInstances != null) { values = new double[m_PlotInstances.numAttributes()]; for (i = 0; i < m_PlotInstances.numAttributes(); i++) { if (i < toPredict.classIndex()) { values[i] = toPredict.value(i); } else if (i == toPredict.classIndex()) { values[i] = pred; values[i + 1] = toPredict.value(i); i++; } else { values[i] = toPredict.value(i - 1); } } m_PlotInstances.add(new DenseInstance(1.0, values)); if (toPredict.classAttribute().isNominal()) { if (toPredict.isMissing(toPredict.classIndex()) || Utils.isMissingValue(pred)) { m_PlotShapes.addElement(new Integer(Plot2D.MISSING_SHAPE)); } else if (pred != toPredict.classValue()) { // set to default error point shape m_PlotShapes.addElement(new Integer(Plot2D.ERROR_SHAPE)); } else { // otherwise set to constant (automatically assigned) point shape m_PlotShapes.addElement(new Integer(Plot2D.CONST_AUTOMATIC_SHAPE)); } m_PlotSizes.addElement(new Integer(Plot2D.DEFAULT_SHAPE_SIZE)); } else { // store the error (to be converted to a point size later) Double errd = null; if (!toPredict.isMissing(toPredict.classIndex()) && !Utils.isMissingValue(pred)) { errd = new Double(pred - toPredict.classValue()); m_PlotShapes.addElement(new Integer(Plot2D.CONST_AUTOMATIC_SHAPE)); } else { // missing shape if actual class not present or prediction is missing m_PlotShapes.addElement(new Integer(Plot2D.MISSING_SHAPE)); } m_PlotSizes.addElement(errd); } } } catch (Exception ex) { ex.printStackTrace(); } }
private double calcNodeScorePlain(int nNode) { Instances instances = m_BayesNet.m_Instances; ParentSet oParentSet = m_BayesNet.getParentSet(nNode); // determine cardinality of parent set & reserve space for frequency counts int nCardinality = oParentSet.getCardinalityOfParents(); int numValues = instances.attribute(nNode).numValues(); int[] nCounts = new int[nCardinality * numValues]; // initialize (don't need this?) for (int iParent = 0; iParent < nCardinality * numValues; iParent++) { nCounts[iParent] = 0; } // estimate distributions Enumeration enumInsts = instances.enumerateInstances(); while (enumInsts.hasMoreElements()) { Instance instance = (Instance) enumInsts.nextElement(); // updateClassifier; double iCPT = 0; for (int iParent = 0; iParent < oParentSet.getNrOfParents(); iParent++) { int nParent = oParentSet.getParent(iParent); iCPT = iCPT * instances.attribute(nParent).numValues() + instance.value(nParent); } nCounts[numValues * ((int) iCPT) + (int) instance.value(nNode)]++; } return calcScoreOfCounts(nCounts, nCardinality, numValues, instances); } // CalcNodeScore
@Override boolean evaluate( Instance inst, int lhsAttIndex, String rhsOperand, double numericOperand, Pattern regexPattern, boolean rhsIsAttribute, int rhsAttIndex) { if (rhsIsAttribute) { if (inst.isMissing(lhsAttIndex) && inst.isMissing(rhsAttIndex)) { return true; } if (inst.isMissing(lhsAttIndex) || inst.isMissing(rhsAttIndex)) { return false; } return Utils.eq(inst.value(lhsAttIndex), inst.value(rhsAttIndex)); } if (inst.isMissing(lhsAttIndex)) { return false; } return (Utils.eq(inst.value(lhsAttIndex), numericOperand)); }
/** Computes average class values for each attribute and value */ private void computeAverageClassValues() { double totalCounts, sum; Instance instance; double[] counts; double[][] avgClassValues = new double[getInputFormat().numAttributes()][0]; m_Indices = new int[getInputFormat().numAttributes()][0]; for (int j = 0; j < getInputFormat().numAttributes(); j++) { Attribute att = getInputFormat().attribute(j); if (att.isNominal()) { avgClassValues[j] = new double[att.numValues()]; counts = new double[att.numValues()]; for (int i = 0; i < getInputFormat().numInstances(); i++) { instance = getInputFormat().instance(i); if (!instance.classIsMissing() && (!instance.isMissing(j))) { counts[(int) instance.value(j)] += instance.weight(); avgClassValues[j][(int) instance.value(j)] += instance.weight() * instance.classValue(); } } sum = Utils.sum(avgClassValues[j]); totalCounts = Utils.sum(counts); if (Utils.gr(totalCounts, 0)) { for (int k = 0; k < att.numValues(); k++) { if (Utils.gr(counts[k], 0)) { avgClassValues[j][k] /= counts[k]; } else { avgClassValues[j][k] = sum / totalCounts; } } } m_Indices[j] = Utils.sort(avgClassValues[j]); } } }
/** * Compare two datasets to see if they differ. * * @param data1 one set of instances * @param data2 the other set of instances * @throws Exception if the datasets differ */ protected void compareDatasets(Instances data1, Instances data2) throws Exception { if (m_CheckHeader) { if (!data2.equalHeaders(data1)) { throw new Exception("header has been modified\n" + data2.equalHeadersMsg(data1)); } } if (!(data2.numInstances() == data1.numInstances())) { throw new Exception("number of instances has changed"); } for (int i = 0; i < data2.numInstances(); i++) { Instance orig = data1.instance(i); Instance copy = data2.instance(i); for (int j = 0; j < orig.numAttributes(); j++) { if (orig.isMissing(j)) { if (!copy.isMissing(j)) { throw new Exception("instances have changed"); } } else { if (m_CompareValuesAsString) { if (!orig.toString(j).equals(copy.toString(j))) { throw new Exception("instances have changed"); } } else { if (Math.abs(orig.value(j) - copy.value(j)) > m_MaxDiffValues) { throw new Exception("instances have changed"); } } } if (Math.abs(orig.weight() - copy.weight()) > m_MaxDiffWeights) { throw new Exception("instance weights have changed"); } } } }
protected void searchMedian(Instances instances) { medians = new double[instances.numAttributes()]; imputations = new int[instances.numAttributes()]; for (int j = 0; j < instances.numAttributes(); ++j) { int numPresentValues = 0; if (instances.attribute(j).isNumeric()) { double[] values = new double[instances.numInstances()]; for (int i = 0; i < instances.numInstances(); ++i) { Instance current = instances.get(i); if (Utils.isMissingValue(current.value(j)) == false) { values[numPresentValues] = current.value(j); numPresentValues += 1; } } if (numPresentValues > 0) { double[] goodValues = Arrays.copyOf(values, numPresentValues); Median median = new Median(); medians[j] = median.evaluate(goodValues); } } } for (int j = 0; j < instances.numAttributes(); ++j) { if (instances.attribute(j).isNumeric()) { Conversion.log( "OK", "Impute Numeric", "Attribute " + instances.attribute(j) + " - Median: " + medians[j]); } } }
/** * Convert an input instance * * @param current the input instance to convert * @return a transformed instance * @throws Exception if a problem occurs */ protected Instance convertInstance(Instance current) throws Exception { double[] vals = new double[getOutputFormat().numAttributes()]; int index = 0; for (int j = 0; j < current.numAttributes(); j++) { if (j != current.classIndex()) { if (m_unchanged != null && m_unchanged.attribute(current.attribute(j).name()) != null) { vals[index++] = current.value(j); } else { Estimator[] estForAtt = m_estimatorLookup.get(current.attribute(j).name()); for (int k = 0; k < current.classAttribute().numValues(); k++) { if (current.isMissing(j)) { vals[index++] = Utils.missingValue(); } else { double e = estForAtt[k].getProbability(current.value(j)); vals[index++] = e; } } } } } vals[vals.length - 1] = current.classValue(); DenseInstance instNew = new DenseInstance(current.weight(), vals); return instNew; }
/** * Calculates the class membership probabilities for the given test instance. * * @param instance the instance to be classified * @return predicted class probability distribution * @exception Exception if distribution can't be computed */ @Override public double[] distributionForInstance(Instance instance) throws Exception { double[] probs = new double[instance.numClasses()]; int attIndex; for (int j = 0; j < instance.numClasses(); j++) { probs[j] = 1; Enumeration<Attribute> enumAtts = instance.enumerateAttributes(); attIndex = 0; while (enumAtts.hasMoreElements()) { Attribute attribute = enumAtts.nextElement(); if (!instance.isMissing(attribute)) { if (attribute.isNominal()) { probs[j] *= m_Counts[j][attIndex][(int) instance.value(attribute)]; } else { probs[j] *= normalDens(instance.value(attribute), m_Means[j][attIndex], m_Devs[j][attIndex]); } } attIndex++; } probs[j] *= m_Priors[j]; } // Normalize probabilities Utils.normalize(probs); return probs; }
/** * Convert a single instance over. The converted instance is added to the end of the output queue. * * @param instance the instance to convert */ protected void convertInstance(Instance instance) { int index = 0; double[] vals = new double[outputFormatPeek().numAttributes()]; // Copy and convert the values for (int i = 0; i < getInputFormat().numAttributes(); i++) { if (m_DiscretizeCols.isInRange(i) && getInputFormat().attribute(i).isNumeric()) { int j; double currentVal = instance.value(i); if (m_CutPoints[i] == null) { if (instance.isMissing(i)) { vals[index] = Utils.missingValue(); } else { vals[index] = 0; } index++; } else { if (!m_MakeBinary) { if (instance.isMissing(i)) { vals[index] = Utils.missingValue(); } else { for (j = 0; j < m_CutPoints[i].length; j++) { if (currentVal <= m_CutPoints[i][j]) { break; } } vals[index] = j; } index++; } else { for (j = 0; j < m_CutPoints[i].length; j++) { if (instance.isMissing(i)) { vals[index] = Utils.missingValue(); } else if (currentVal <= m_CutPoints[i][j]) { vals[index] = 0; } else { vals[index] = 1; } index++; } } } } else { vals[index] = instance.value(i); index++; } } Instance inst = null; if (instance instanceof SparseInstance) { inst = new SparseInstance(instance.weight(), vals); } else { inst = new DenseInstance(instance.weight(), vals); } inst.setDataset(getOutputFormat()); copyValues(inst, false, instance.dataset(), getOutputFormat()); inst.setDataset(getOutputFormat()); push(inst); }
/** * Convert an <code>Instance</code> to an array of values that matches the format of the mining * schema. First maps raw attribute values and then applies rules for missing values, outliers * etc. * * @param inst the <code>Instance</code> to convert * @param miningSchema the mining schema incoming instance attributes * @return an array of doubles that are values from the incoming Instances, correspond to the * format of the mining schema and have had missing values, outliers etc. dealt with. * @throws Exception if something goes wrong */ public double[] instanceToSchema(Instance inst, MiningSchema miningSchema) throws Exception { Instances miningSchemaI = miningSchema.getMiningSchemaAsInstances(); // allocate enough space for both mining schema fields and any derived fields double[] result = new double[miningSchema.getFieldsAsInstances().numAttributes()]; // Copy over the values for (int i = 0; i < miningSchemaI.numAttributes(); i++) { // if (miningSchemaI.attribute(i).isNumeric()) { result[i] = inst.value(m_fieldsMap[i]); if (miningSchemaI.attribute(i).isNominal() || miningSchemaI.attribute(i).isString()) { // If not missing, look up the index of this incoming categorical value in // the mining schema if (!Utils.isMissingValue(inst.value(m_fieldsMap[i]))) { int[] valueMap = m_nominalValueMaps[i]; int index = valueMap[(int) inst.value(m_fieldsMap[i])]; String incomingAttValue = inst.attribute(m_fieldsMap[i]).value((int) inst.value(m_fieldsMap[i])); /*int index = miningSchemaI.attribute(i).indexOfValue(incomingAttValue); */ if (index >= 0) { result[i] = index; } else { // set this to "unknown" (-1) for nominal valued attributes result[i] = UNKNOWN_NOMINAL_VALUE; String warningString = "[MappingInfo] WARNING: Can't match nominal value " + incomingAttValue; if (m_log != null) { m_log.logMessage(warningString); } else { System.err.println(warningString); } } } } } // Now deal with missing values and outliers... miningSchema.applyMissingAndOutlierTreatments(result); // printInst(result); // now fill in any derived values ArrayList<DerivedFieldMetaInfo> derivedFields = miningSchema.getDerivedFields(); for (int i = 0; i < derivedFields.size(); i++) { DerivedFieldMetaInfo temp = derivedFields.get(i); // System.err.println("Applying : " + temp); double r = temp.getDerivedValue(result); result[i + miningSchemaI.numAttributes()] = r; } /*System.err.print("==> "); for (int i = 0; i < result.length; i++) { System.err.print(" " + result[i]); } System.err.println();*/ return result; }
public void insert(Instance instance, long timestamp) { N++; LST += timestamp; SST += timestamp * timestamp; for (int i = 0; i < instance.numValues(); i++) { LS[i] += instance.value(i); SS[i] += instance.value(i) * instance.value(i); } }
private RunTrace traceToXML(int file_id, int task_id, int run_id) throws Exception { RunTrace trace = new RunTrace(run_id); URL traceURL = apiconnector.getOpenmlFileUrl(file_id, "Task_" + task_id + "_trace.arff"); Instances traceDataset = new Instances(new BufferedReader(Input.getURL(traceURL))); List<Integer> parameterIndexes = new ArrayList<Integer>(); if (traceDataset.attribute("repeat") == null || traceDataset.attribute("fold") == null || traceDataset.attribute("iteration") == null || traceDataset.attribute("evaluation") == null || traceDataset.attribute("selected") == null) { throw new Exception("trace file missing mandatory attributes. "); } for (int i = 0; i < traceDataset.numAttributes(); ++i) { if (traceDataset.attribute(i).name().startsWith("parameter_")) { parameterIndexes.add(i); } } if (parameterIndexes.size() == 0) { throw new Exception( "trace file contains no fields with prefix 'parameter_' (i.e., parameters are not registered). "); } if (traceDataset.numAttributes() > 6 + parameterIndexes.size()) { throw new Exception( "trace file contains illegal attributes (only allow for repeat, fold, iteration, evaluation, selected, setup_string and parameter_*). "); } for (int i = 0; i < traceDataset.numInstances(); ++i) { Instance current = traceDataset.get(i); Integer repeat = (int) current.value(traceDataset.attribute("repeat").index()); Integer fold = (int) current.value(traceDataset.attribute("fold").index()); Integer iteration = (int) current.value(traceDataset.attribute("iteration").index()); Double evaluation = current.value(traceDataset.attribute("evaluation").index()); Boolean selected = current.stringValue(traceDataset.attribute("selected").index()).equals("true"); Map<String, String> parameters = new HashMap<String, String>(); for (int j = 0; j < parameterIndexes.size(); ++j) { int attIdx = parameterIndexes.get(j); if (traceDataset.attribute(attIdx).isNumeric()) { parameters.put(traceDataset.attribute(attIdx).name(), current.value(attIdx) + ""); } else { parameters.put(traceDataset.attribute(attIdx).name(), current.stringValue(attIdx)); } } String setup_string = new JSONObject(parameters).toString(); trace.addIteration( new RunTrace.Trace_iteration( repeat, fold, iteration, setup_string, evaluation, selected)); } return trace; }
/** * Returns index of subset instance is assigned to. Returns -1 if instance is assigned to more * than one subset. * * @exception Exception if something goes wrong */ public final int whichSubset(Instance instance) throws Exception { if (instance.isMissing(m_attIndex)) return -1; else { if (instance.attribute(m_attIndex).isNominal()) { if ((int) m_splitPoint == (int) instance.value(m_attIndex)) return 0; else return 1; } else if (Utils.smOrEq(instance.value(m_attIndex), m_splitPoint)) return 0; else return 1; } }
private List<Object> convert(Instance instance) { List<Object> data = new LinkedList<Object>(); for (int i = 0; i < isNumeric.length; i++) { if (isNumeric[i]) { data.add(instance.value(i)); } else { data.add(instance.attribute(i).value((int) instance.value(i))); } } return data; }
public int calculateAllWrong() { if (run_ids.size() < 2) { throw new RuntimeException("Too few runs to compare. Should be at least 2. "); } ArrayList<Attribute> attributes = new ArrayList<Attribute>(); attributes.add(new Attribute("repeat")); attributes.add(new Attribute("fold")); attributes.add(new Attribute("rowid")); resultSet = new Instances("all-wrong", attributes, task_splits.numInstances()); for (int i = 0; i < task_splits.numInstances(); ++i) { Instance current = task_splits.get(i); boolean test = current.stringValue(task_splits.attribute("type")).equals("TEST"); if (!test) { continue; } Integer row_id = (int) current.value(task_splits.attribute("rowid")); Integer repeat = (int) current.value(task_splits.attribute("repeat")); Integer fold = (int) current.value(task_splits.attribute("fold")); Integer sample = 0; try { sample = (int) current.value(task_splits.attribute("sample")); } catch (Exception e) { } String correctLabel = correct.get(row_id); Integer correctPredictions = 0; for (Integer run_id : run_ids) { // System.out.println(predictions.get(run_id)); // System.out.println(repeat + "," + fold + "," + sample + "," + row_id); if (predictions .get(run_id) .get(repeat) .get(fold) .get(sample) .get(row_id) .equals(correctLabel)) { correctPredictions += 1; } } if (correctPredictions == 0) { double[] instance = {repeat, fold, row_id}; resultSet.add(new DenseInstance(1.0, instance)); } } return resultSet.size(); }
public void testTypical() { m_Filter = getFilter("6,3"); Instances result = useFilter(); assertEquals(m_Instances.numAttributes() - 1, result.numAttributes()); for (int i = 0; i < result.numInstances(); i++) { Instance orig = m_Instances.instance(i); if (orig.isMissing(5) || orig.isMissing(2)) { assertTrue("Instance " + (i + 1) + " should have been ?", result.instance(i).isMissing(4)); } else { assertEquals(orig.value(5) - orig.value(2), result.instance(i).value(4), EXPR_DELTA); } } }
/** * Checks if an instance contains an item set. * * @param instance the instance to be tested * @return true if the given instance contains this item set */ public boolean containedByTreatZeroAsMissing(Instance instance) { if (instance instanceof weka.core.SparseInstance) { int numInstVals = instance.numValues(); int numItemSetVals = m_items.length; for (int p1 = 0, p2 = 0; p1 < numInstVals || p2 < numItemSetVals; ) { int instIndex = Integer.MAX_VALUE; if (p1 < numInstVals) { instIndex = instance.index(p1); } int itemIndex = p2; if (m_items[itemIndex] > -1) { if (itemIndex != instIndex) { return false; } else { if (instance.isMissingSparse(p1)) { return false; } if (m_items[itemIndex] != (int) instance.valueSparse(p1)) { return false; } } p1++; p2++; } else { if (itemIndex < instIndex) { p2++; } else if (itemIndex == instIndex) { p2++; p1++; } } } } else { for (int i = 0; i < instance.numAttributes(); i++) { if (m_items[i] > -1) { if (instance.isMissing(i) || (int) instance.value(i) == 0) { return false; } if (m_items[i] != (int) instance.value(i)) { return false; } } } } return true; }
/** * Convert a single instance over if the class is nominal. The converted instance is added to the * end of the output queue. * * @param instance the instance to convert */ private void convertInstanceNominal(Instance instance) { if (!m_needToTransform) { push(instance); return; } double[] vals = new double[outputFormatPeek().numAttributes()]; int attSoFar = 0; for (int j = 0; j < getInputFormat().numAttributes(); j++) { Attribute att = getInputFormat().attribute(j); if ((!att.isNominal()) || (j == getInputFormat().classIndex())) { vals[attSoFar] = instance.value(j); attSoFar++; } else { if ((att.numValues() <= 2) && (!m_TransformAll)) { vals[attSoFar] = instance.value(j); attSoFar++; } else { if (instance.isMissing(j)) { for (int k = 0; k < att.numValues(); k++) { vals[attSoFar + k] = instance.value(j); } } else { for (int k = 0; k < att.numValues(); k++) { if (k == (int) instance.value(j)) { vals[attSoFar + k] = 1; } else { vals[attSoFar + k] = 0; } } } attSoFar += att.numValues(); } } } Instance inst = null; if (instance instanceof SparseInstance) { inst = new SparseInstance(instance.weight(), vals); } else { inst = new DenseInstance(instance.weight(), vals); } inst.setDataset(getOutputFormat()); copyValues(inst, false, instance.dataset(), getOutputFormat()); inst.setDataset(getOutputFormat()); push(inst); }
/** * Computes class distribution of an instance using the FastRandomTree. * * <p>In Weka's RandomTree, the distributions were normalized so that all probabilities sum to 1; * this would abolish the effect of instance weights on voting. In FastRandomForest 0.97 onwards, * the distributions are normalized by dividing with the number of instances going into a leaf. * * <p> * * @param instance the instance to compute the distribution for * @return the computed class distribution * @throws Exception if computation fails */ @Override public double[] distributionForInstance(Instance instance) throws Exception { double[] returnedDist = null; if (m_Attribute > -1) { // ============================ node is not a leaf if (instance.isMissing(m_Attribute)) { // ---------------- missing value returnedDist = new double[m_MotherForest.getM_Info().numClasses()]; // split instance up for (int i = 0; i < m_Successors.length; i++) { double[] help = m_Successors[i].distributionForInstance(instance); if (help != null) { for (int j = 0; j < help.length; j++) { returnedDist[j] += m_Prop[i] * help[j]; } } } } else if (m_MotherForest.getM_Info().attribute(m_Attribute).isNominal()) { // ------ nominal // returnedDist = m_Successors[(int) instance.value(m_Attribute)] // .distributionForInstance(instance); // 0.99: new - binary splits (also) for nominal attributes if (instance.value(m_Attribute) == m_SplitPoint) { returnedDist = m_Successors[0].distributionForInstance(instance); } else { returnedDist = m_Successors[1].distributionForInstance(instance); } } else { // ------------------------------------------ numeric attributes if (instance.value(m_Attribute) < m_SplitPoint) { returnedDist = m_Successors[0].distributionForInstance(instance); } else { returnedDist = m_Successors[1].distributionForInstance(instance); } } return returnedDist; } else { // =============================================== node is a leaf return m_ClassProbs; } }
private Instance instanceFromStream(StreamElement data) { try { Instance i = new Instance(data.getFieldNames().length); for (int j = 0; j < data.getFieldNames().length; j++) { i.setValue(j, ((Double) data.getData()[j])); } // scaling specific to opensense data!! should be put in the parameters? i.setValue(0, i.value(0) / 1400.0); i.setValue(2, i.value(2) / 50); i.setValue(3, i.value(3) / 100.0); i.setValue(4, i.value(4) / 100.0 - 4); return i; } catch (Exception e) { return null; } }
public double[] normalizedInstance(Instance inst) { // Normalize Instance double[] normalizedInstance = new double[inst.numAttributes()]; for (int j = 0; j < inst.numAttributes() - 1; j++) { int instAttIndex = modelAttIndexToInstanceAttIndex(j, inst); double mean = perceptronattributeStatistics.getValue(j) / perceptronYSeen; double sd = computeSD( squaredperceptronattributeStatistics.getValue(j), perceptronattributeStatistics.getValue(j), perceptronYSeen); if (sd > SD_THRESHOLD) normalizedInstance[j] = (inst.value(instAttIndex) - mean) / sd; else normalizedInstance[j] = inst.value(instAttIndex) - mean; } return normalizedInstance; }
/** * Input an instance for filtering. Ordinarily the instance is processed and made available for * output immediately. Some filters require all instances be read before producing output. * * @param instance the input instance * @return true if the filtered instance may now be collected with output(). * @exception IllegalStateException if no input format has been defined. * @exception Exception if there was a problem during the filtering. */ public boolean input(Instance instance) throws Exception { if (getInputFormat() == null) { throw new IllegalStateException("No input instance format defined"); } if (m_NewBatch) { resetQueue(); m_NewBatch = false; } double[] vals = new double[instance.numAttributes() + 1]; for (int i = 0; i < instance.numAttributes(); i++) { if (instance.isMissing(i)) { vals[i] = Instance.missingValue(); } else { vals[i] = instance.value(i); } } evaluateExpression(vals); Instance inst = null; if (instance instanceof SparseInstance) { inst = new SparseInstance(instance.weight(), vals); } else { inst = new Instance(instance.weight(), vals); } copyStringValues(inst, false, instance.dataset(), getOutputFormat()); inst.setDataset(getOutputFormat()); push(inst); return true; }
/** * Calculate average of every columns * * @param inst * @return */ public Double[] calculateAverage(Instances inst) { Double[] average = new Double[inst.numAttributes() - 1]; for (int i = 0; i < inst.numAttributes() - 1; i++) { average[i] = 0.0; } for (int i = 0; i < inst.numInstances(); i++) { for (int x = 0; x < inst.instance(i).numAttributes() - 1; x++) { Instance ins = inst.instance(i); if (ins != null && !Double.isNaN(ins.value(x))) average[x] += ins.value(x); } } for (int i = 0; i < inst.numAttributes() - 1; i++) { average[i] /= inst.numInstances(); } return average; }
/** * Input an instance for filtering. * * @param instance the input instance * @return true if the filtered instance may now be collected with output(). * @throws Exception if the input format was not set or the date format cannot be parsed */ public boolean input(Instance instance) throws Exception { if (getInputFormat() == null) { throw new IllegalStateException("No input instance format defined"); } if (m_NewBatch) { resetQueue(); m_NewBatch = false; } Instance newInstance = (Instance) instance.copy(); int index = m_AttIndex.getIndex(); if (!newInstance.isMissing(index)) { double value = instance.value(index); try { // Format and parse under the new format to force any required // loss in precision. value = m_OutputAttribute.parseDate(m_OutputAttribute.formatDate(value)); } catch (ParseException pe) { throw new RuntimeException("Output date format couldn't parse its own output!!"); } newInstance.setValue(index, value); } push(newInstance); return true; }
public double ExpectedClassificationError(Instances pool, int attr_i) { // initialize alpha's to one int alpha[][][]; int NumberOfFeatures = pool.numAttributes() - 1; int NumberOfLabels = pool.numClasses(); alpha = new int[NumberOfFeatures][NumberOfLabels][]; for (int i = 0; i < NumberOfFeatures; i++) for (int j = 0; j < NumberOfLabels; j++) alpha[i][j] = new int[pool.attribute(i).numValues()]; for (int i = 0; i < NumberOfFeatures; i++) for (int j = 0; j < NumberOfLabels; j++) for (int k = 0; k < alpha[i][j].length; k++) alpha[i][j][k] = 1; // construct alpha's for (int i = 0; i < NumberOfFeatures; i++) // for each attribute { if (i == pool.classIndex()) // skip the class attribute i++; for (Enumeration<Instance> e = pool.enumerateInstances(); e.hasMoreElements(); ) // for each instance { Instance inst = e.nextElement(); if (!inst.isMissing(i)) // if attribute i is not missing (i.e. its been bought) { int j = (int) inst.classValue(); int k = (int) inst.value(i); alpha[i][j][k]++; } } } return ExpectedClassificationError(alpha, attr_i); }
@Override public void buildClassifier(Instances data) throws Exception { trainingData = data; Attribute classAttribute = data.classAttribute(); prototypes = new ArrayList<>(); classedData = new HashMap<String, ArrayList<Sequence>>(); indexClassedDataInFullData = new HashMap<String, ArrayList<Integer>>(); for (int c = 0; c < data.numClasses(); c++) { classedData.put(data.classAttribute().value(c), new ArrayList<Sequence>()); indexClassedDataInFullData.put(data.classAttribute().value(c), new ArrayList<Integer>()); } sequences = new Sequence[data.numInstances()]; classMap = new String[sequences.length]; for (int i = 0; i < sequences.length; i++) { Instance sample = data.instance(i); MonoDoubleItemSet[] sequence = new MonoDoubleItemSet[sample.numAttributes() - 1]; int shift = (sample.classIndex() == 0) ? 1 : 0; for (int t = 0; t < sequence.length; t++) { sequence[t] = new MonoDoubleItemSet(sample.value(t + shift)); } sequences[i] = new Sequence(sequence); String clas = sample.stringValue(classAttribute); classMap[i] = clas; classedData.get(clas).add(sequences[i]); indexClassedDataInFullData.get(clas).add(i); // System.out.println("Element "+i+" of train is classed "+clas+" and went to element // "+(indexClassedDataInFullData.get(clas).size()-1)); } buildSpecificClassifier(data); }
/** * turns the instance into a libsvm row * * @param inst the instance to transform * @return the generated libsvm row */ protected String instanceToLibsvm(Instance inst) { StringBuffer result; int i; // class result = new StringBuffer("" + inst.classValue()); // attributes for (i = 0; i < inst.numAttributes(); i++) { if (i == inst.classIndex()) continue; if (inst.value(i) == 0) continue; result.append(" " + (i + 1) + ":" + inst.value(i)); } return result.toString(); }
/** * Updates the minimum and maximum values for all the attributes based on a new instance. * * @param instance the new instance */ private void updateMinMax(Instance instance) { for (int j = 0; j < instance.numAttributes(); j++) { if (Double.isNaN(m_Min[j])) { m_Min[j] = instance.value(j); m_Max[j] = instance.value(j); } else { if (instance.value(j) < m_Min[j]) { m_Min[j] = instance.value(j); } else { if (instance.value(j) > m_Max[j]) { m_Max[j] = instance.value(j); } } } } }
public void calculateDifference() { if (run_ids.size() != 2) { throw new RuntimeException("Too many runs to compare. Should be 2. "); } ArrayList<Attribute> attributes = new ArrayList<Attribute>(); attributes.add(new Attribute("repeat")); attributes.add(new Attribute("fold")); attributes.add(new Attribute("rowid")); resultSet = new Instances("difference", attributes, task_splits.numInstances()); for (int i = 0; i < task_splits.numInstances(); ++i) { Instance current = task_splits.get(i); boolean test = current.stringValue(task_splits.attribute("type")).equals("TEST"); if (!test) { continue; } Integer row_id = (int) current.value(task_splits.attribute("rowid")); Integer repeat = (int) current.value(task_splits.attribute("repeat")); Integer fold = (int) current.value(task_splits.attribute("fold")); Integer sample = 0; try { sample = (int) current.value(task_splits.attribute("sample")); } catch (Exception e) { } String label = null; boolean difference = false; for (Integer run_id : run_ids) { String currentLabel = predictions.get(run_id).get(repeat).get(fold).get(sample).get(row_id); if (label == null) { label = currentLabel; } else if (label.equals(currentLabel) == false) { difference = true; } } if (difference) { double[] instance = {repeat, fold, row_id}; resultSet.add(new DenseInstance(1.0, instance)); } } }
/** * @param outFilePath full path to the output file * @param data the instances object containing the data on which the quantizer is learner * @param numClusters the number of clusters in k-means * @param maxIterations the maximum number of k-means iterations * @param seed the seed given to k-means * @param numSlots the number of execution slots to use (>1 = parallel execution) * @param kMeansPlusPlus whether to use kmeans++ for the initialization of the centroids * (true/false) * @throws Exception */ public static void learnAndWriteQuantizer( String outFilePath, Instances data, int numClusters, int maxIterations, int seed, int numSlots, boolean kMeansPlusPlus) throws Exception { System.out.println("--" + data.numInstances() + " vectors loaded--"); System.out.println("Vector dimensionality: " + data.numAttributes()); System.out.println("Clustering settings:"); System.out.println("Num clusters: " + numClusters); System.out.println("Max iterations: " + maxIterations); System.out.println("Seed: " + seed); System.out.println("Clustering started"); long start = System.currentTimeMillis(); // create a new instance for the Clusterer SimpleKMeansWithOutput clusterer = new SimpleKMeansWithOutput(); clusterer.setInitializeUsingKMeansPlusPlusMethod(kMeansPlusPlus); clusterer.setSeed(seed); clusterer.setNumClusters(numClusters); clusterer.setMaxIterations(maxIterations); clusterer.setNumExecutionSlots(numSlots); clusterer.setFastDistanceCalc(false); // build the clusterer clusterer.buildClusterer(data); long end = System.currentTimeMillis(); System.out.println("Clustering completed in " + (end - start) + " ms"); System.out.println("Writing quantizer in file"); // create a new file to store the codebook BufferedWriter out = new BufferedWriter(new FileWriter(new File(outFilePath))); // write the results of the clustering to the new file (csv formated) Instances clusterCentroids = clusterer.getClusterCentroids(); for (int j = 0; j < clusterCentroids.numInstances(); j++) { Instance centroid = clusterCentroids.instance(j); for (int k = 0; k < centroid.numAttributes() - 1; k++) { out.write(centroid.value(k) + ","); } out.write(centroid.value(centroid.numAttributes() - 1) + "\n"); } out.close(); }