// 将一个文本数据转换成weka可以识别的Instance private Instance makeInstance(String text, Instances data) { Instance instance = new Instance(2); Attribute messageAtt = data.attribute("Message"); instance.setValue(messageAtt, messageAtt.addStringValue(text)); instance.setDataset(data); return instance; }
public void restart() { IMDb = 0L; Oracle = 0L; try { clearRecords(); recordsSelect = "SELECT " + userColumn + ", " + objectColumn + ", "; for (Attribute attr : attributes) { if (attr.isRelationValued() || attr.name().isEmpty()) continue; recordsSelect += attr.name() + " ,"; } recordsSelect = recordsSelect.substring(0, recordsSelect.length() - 1); recordsSelect += " FROM " + recordsTable; if (betweenCondition != null) { recordsSelect += " WHERE " + betweenCondition; } if (userID != null) { // We add where clause, if it is not present if (betweenCondition == null || !recordsSelect.endsWith(betweenCondition)) recordsSelect += " WHERE "; // AND if where is already there else recordsSelect += " AND "; recordsSelect += userColumn + " = " + userID; } recordsStatement = provider.getConn().prepareStatement(recordsSelect); records = recordsStatement.executeQuery(); records.next(); } catch (Exception e) { e.printStackTrace(); } getAttributes(); }
/** * Método que inicializa uma regra com os valores obtidos atraves do Apriori. * * @param b Corpo da regre * @param h Cabecao da regra * @param conf Confianca da regra * @param e Lista com os atributos da regra * @param c Atributo da classe da regra */ public Regra(ItemSet b, ItemSet h, double conf, Enumeration<Attribute> e, Attribute c) throws Exception { cabeca = h.itemAt(0); confianca = conf; int corpoTemp[] = b.items(); atributosNaoVazios = 0; corpo = new Atributo[corpoTemp.length]; int i = 0; while (e.hasMoreElements()) { Attribute att = (Attribute) e.nextElement(); // attributes.add(att); if (att.isNominal()) { if (corpoTemp[i] == -1) { AtributoNominal vazio = new AtributoNominal(true, att, i); corpo[i++] = vazio; } else { AtributoNominal nominal = new AtributoNominal(corpoTemp[i], AtributoNominal.igual, att, i); corpo[i++] = nominal; } } else { throw new Exception("Atributo não nominal!"); } } classe = c; matrizContigencia = new MatrizContingencia(); getNumAtributosNaoVazios(); }
@Override public String classify(User user, Sample sample) { Instances trainingSet = new TrainingSetBuilder() .setAttributes(user.getBssids()) .setClassAttribute( "Location", user.getLocations().stream().map(Location::getName).collect(Collectors.toList())) .build("TrainingSet", 1); // Create instance Map<String, Integer> BSSIDLevelMap = getBSSIDLevelMap(sample); Instance instance = new Instance(trainingSet.numAttributes()); for (Enumeration e = trainingSet.enumerateAttributes(); e.hasMoreElements(); ) { Attribute attribute = (Attribute) e.nextElement(); String bssid = attribute.name(); int level = (BSSIDLevelMap.containsKey(bssid)) ? BSSIDLevelMap.get(bssid) : 0; instance.setValue(attribute, level); } if (sample.getLocation() != null) instance.setValue(trainingSet.classAttribute(), sample.getLocation()); instance.setDataset(trainingSet); trainingSet.add(instance); int predictedClass = classify(fromBase64(user.getClassifiers()), instance); return trainingSet.classAttribute().value(predictedClass); }
/** * Input an instance for filtering. * * @param instance the input instance * @return true if the filtered instance may now be collected with output(). * @throws Exception if the input format was not set or the date format cannot be parsed */ public boolean input(Instance instance) throws Exception { if (getInputFormat() == null) { throw new IllegalStateException("No input instance format defined"); } if (m_NewBatch) { resetQueue(); m_NewBatch = false; } Instance newInstance = (Instance) instance.copy(); int index = m_AttIndex.getIndex(); if (!newInstance.isMissing(index)) { double value = instance.value(index); try { // Format and parse under the new format to force any required // loss in precision. value = m_OutputAttribute.parseDate(m_OutputAttribute.formatDate(value)); } catch (ParseException pe) { throw new RuntimeException("Output date format couldn't parse its own output!!"); } newInstance.setValue(index, value); } push(newInstance); return true; }
@Override public List<Classifier> buildClassifiers(User user, List<Sample> validSamples) { Instances trainingSet = new TrainingSetBuilder() .setAttributes(user.getBssids()) .setClassAttribute( "Location", user.getLocations().stream().map(Location::getName).collect(Collectors.toList())) .build("TrainingSet", validSamples.size()); // Create instances validSamples.forEach( sample -> { Map<String, Integer> BSSIDLevelMap = getBSSIDLevelMap(sample); Instance instance = new Instance(trainingSet.numAttributes()); for (Enumeration e = trainingSet.enumerateAttributes(); e.hasMoreElements(); ) { Attribute attribute = (Attribute) e.nextElement(); String bssid = attribute.name(); int level = (BSSIDLevelMap.containsKey(bssid)) ? BSSIDLevelMap.get(bssid) : 0; instance.setValue(attribute, level); } instance.setValue(trainingSet.classAttribute(), sample.getLocation()); instance.setDataset(trainingSet); trainingSet.add(instance); }); // Build classifiers List<Classifier> classifiers = buildClassifiers(trainingSet); return classifiers; }
/** * Calculates the class membership probabilities for the given test instance. * * @param instance the instance to be classified * @return predicted class probability distribution * @exception Exception if distribution can't be computed */ @Override public double[] distributionForInstance(Instance instance) throws Exception { double[] probs = new double[instance.numClasses()]; int attIndex; for (int j = 0; j < instance.numClasses(); j++) { probs[j] = 1; Enumeration<Attribute> enumAtts = instance.enumerateAttributes(); attIndex = 0; while (enumAtts.hasMoreElements()) { Attribute attribute = enumAtts.nextElement(); if (!instance.isMissing(attribute)) { if (attribute.isNominal()) { probs[j] *= m_Counts[j][attIndex][(int) instance.value(attribute)]; } else { probs[j] *= normalDens(instance.value(attribute), m_Means[j][attIndex], m_Devs[j][attIndex]); } } attIndex++; } probs[j] *= m_Priors[j]; } // Normalize probabilities Utils.normalize(probs); return probs; }
/** Computes average class values for each attribute and value */ private void computeAverageClassValues() { double totalCounts, sum; Instance instance; double[] counts; double[][] avgClassValues = new double[getInputFormat().numAttributes()][0]; m_Indices = new int[getInputFormat().numAttributes()][0]; for (int j = 0; j < getInputFormat().numAttributes(); j++) { Attribute att = getInputFormat().attribute(j); if (att.isNominal()) { avgClassValues[j] = new double[att.numValues()]; counts = new double[att.numValues()]; for (int i = 0; i < getInputFormat().numInstances(); i++) { instance = getInputFormat().instance(i); if (!instance.classIsMissing() && (!instance.isMissing(j))) { counts[(int) instance.value(j)] += instance.weight(); avgClassValues[j][(int) instance.value(j)] += instance.weight() * instance.classValue(); } } sum = Utils.sum(avgClassValues[j]); totalCounts = Utils.sum(counts); if (Utils.gr(totalCounts, 0)) { for (int k = 0; k < att.numValues(); k++) { if (Utils.gr(counts[k], 0)) { avgClassValues[j][k] /= counts[k]; } else { avgClassValues[j][k] = sum / totalCounts; } } } m_Indices[j] = Utils.sort(avgClassValues[j]); } } }
/** * Compute the number of all possible conditions that could appear in a rule of a given data. For * nominal attributes, it's the number of values that could appear; for numeric attributes, it's * the number of values * 2, i.e. <= and >= are counted as different possible conditions. * * @param data the given data * @return number of all conditions of the data */ public static double numAllConditions(Instances data) { double total = 0; Enumeration attEnum = data.enumerateAttributes(); while (attEnum.hasMoreElements()) { Attribute att = (Attribute) attEnum.nextElement(); if (att.isNominal()) total += (double) att.numValues(); else total += 2.0 * (double) data.numDistinctValues(att); } return total; }
/** Find the fold attribute within a dataset. */ private Attribute getAttribute(Instances data) { SingleIndex index = new SingleIndex(super.getAttributeIndex()); index.setUpper(data.numAttributes() - 1); Attribute att = data.attribute(index.getIndex()); if (att == null) throw new NoSuchElementException( "attribute #" + super.getAttributeIndex() + " does not exist"); if (!att.isNominal() && !att.isString()) throw new IllegalArgumentException("Attribute '" + att + "' is not nominal"); return att; }
/** Helper method to represent an Attribute as a single-element vector. */ private int[] toValueIndexArray(Attribute attribute) { TIntArrayList out = new TIntArrayList(); for (String value : values) { int valueIndex = attribute.indexOfValue(value); if (valueIndex < 0) throw new NoSuchElementException( "no such value: '" + value + "' in attribute '" + attribute.toString() + "'"); out.add(valueIndex); } return out.toNativeArray(); }
public double classifyInstance(Instance inst) throws Exception { if (m_attribute == null) { return m_intercept; } else { if (inst.isMissing(m_attribute.index())) { throw new Exception("UnivariateLinearRegression: No missing values!"); } return m_intercept + m_slope * inst.value(m_attribute.index()); } }
/** * Adds this tree recursively to the buffer. * * @param id the unqiue id for the method * @param buffer the buffer to add the source code to * @return the last ID being used * @throws Exception if something goes wrong */ protected int toSource(int id, StringBuffer buffer) throws Exception { int result; int i; int newID; StringBuffer[] subBuffers; buffer.append("\n"); buffer.append(" protected static double node" + id + "(Object[] i) {\n"); // leaf? if (m_Attribute == null) { result = id; if (Double.isNaN(m_ClassValue)) buffer.append(" return Double.NaN;"); else buffer.append(" return " + m_ClassValue + ";"); if (m_ClassAttribute != null) buffer.append(" // " + m_ClassAttribute.value((int) m_ClassValue)); buffer.append("\n"); buffer.append(" }\n"); } else { buffer.append(" // " + m_Attribute.name() + "\n"); // subtree calls subBuffers = new StringBuffer[m_Attribute.numValues()]; newID = id; for (i = 0; i < m_Attribute.numValues(); i++) { newID++; buffer.append(" "); if (i > 0) buffer.append("else "); buffer.append( "if (((String) i[" + m_Attribute.index() + "]).equals(\"" + m_Attribute.value(i) + "\"))\n"); buffer.append(" return node" + newID + "(i);\n"); subBuffers[i] = new StringBuffer(); newID = m_Successors[i].toSource(newID, subBuffers[i]); } buffer.append(" else\n"); buffer.append( " throw new IllegalArgumentException(\"Value '\" + i[" + m_Attribute.index() + "] + \"' is not allowed!\");\n"); buffer.append(" }\n"); // output subtree code for (i = 0; i < m_Attribute.numValues(); i++) { buffer.append(subBuffers[i].toString()); } subBuffers = null; result = newID; } return result; }
/** * Splits a dataset according to the values of a nominal attribute. * * @param data the data which is to be split * @param att the attribute to be used for splitting * @return the sets of instances produced by the split */ private Instances[] splitData(Instances data, Attribute att) { Instances[] splitData = new Instances[att.numValues()]; for (int j = 0; j < att.numValues(); j++) { splitData[j] = new Instances(data, data.numInstances()); } Enumeration instEnum = data.enumerateInstances(); while (instEnum.hasMoreElements()) { Instance inst = (Instance) instEnum.nextElement(); splitData[(int) inst.value(att)].add(inst); } for (int i = 0; i < splitData.length; i++) { splitData[i].compactify(); } return splitData; }
@Override public int compare(InstanceHolder o1, InstanceHolder o2) { // both missing is equal if (o1.m_instance.isMissing(m_attribute) && o2.m_instance.isMissing(m_attribute)) { return 0; } // one missing - missing instances should all be at the end // regardless of whether order is ascending or descending if (o1.m_instance.isMissing(m_attribute)) { return 1; } if (o2.m_instance.isMissing(m_attribute)) { return -1; } int cmp = 0; if (!m_attribute.isString() && !m_attribute.isRelationValued()) { double val1 = o1.m_instance.value(m_attribute); double val2 = o2.m_instance.value(m_attribute); cmp = Double.compare(val1, val2); } else if (m_attribute.isString()) { String val1 = o1.m_stringVals.get(m_attribute.name()); String val2 = o2.m_stringVals.get(m_attribute.name()); /* * String val1 = o1.stringValue(m_attribute); String val2 = * o2.stringValue(m_attribute); */ // TODO case insensitive? cmp = val1.compareTo(val2); } else { throw new IllegalArgumentException( "Can't sort according to " + "relation-valued attribute values!"); } if (m_descending) { return -cmp; } return cmp; }
/** * Cálculo da precisão de Laplace * * @return Precisão de Laplace */ public double getLaplace() { double temp = matrizContigencia.getB(); int numClasses = classe.numValues(); if (temp != 0) laplace = (matrizContigencia.getH_B() + 1) / (temp + numClasses); else laplace = 0; return laplace; }
/** * Convert a single instance over if the class is numeric. The converted instance is added to the * end of the output queue. * * @param instance the instance to convert */ private void convertInstanceNumeric(Instance instance) { if (!m_needToTransform) { push(instance); return; } double[] vals = new double[outputFormatPeek().numAttributes()]; int attSoFar = 0; for (int j = 0; j < getInputFormat().numAttributes(); j++) { Attribute att = getInputFormat().attribute(j); if ((!att.isNominal()) || (j == getInputFormat().classIndex())) { vals[attSoFar] = instance.value(j); attSoFar++; } else { if (instance.isMissing(j)) { for (int k = 0; k < att.numValues() - 1; k++) { vals[attSoFar + k] = instance.value(j); } } else { int k = 0; while ((int) instance.value(j) != m_Indices[j][k]) { vals[attSoFar + k] = 1; k++; } while (k < att.numValues() - 1) { vals[attSoFar + k] = 0; k++; } } attSoFar += att.numValues() - 1; } } Instance inst = null; if (instance instanceof SparseInstance) { inst = new SparseInstance(instance.weight(), vals); } else { inst = new DenseInstance(instance.weight(), vals); } inst.setDataset(getOutputFormat()); copyValues(inst, false, instance.dataset(), getOutputFormat()); inst.setDataset(getOutputFormat()); push(inst); }
/** * Set the output format. Takes the current average class values and m_InputFormat and calls * setOutputFormat(Instances) appropriately. */ private void setOutputFormat() { Instances newData; FastVector newAtts; // Compute new attributes newAtts = new FastVector(getInputFormat().numAttributes()); for (int j = 0; j < getInputFormat().numAttributes(); j++) { Attribute att = getInputFormat().attribute(j); if (!att.isNominal() || !m_AttIndex.isInRange(j)) newAtts.addElement(att); else newAtts.addElement(new Attribute(att.name(), (FastVector) null)); } // Construct new header newData = new Instances(getInputFormat().relationName(), newAtts, 0); newData.setClassIndex(getInputFormat().classIndex()); setOutputFormat(newData); }
/** * Calculate metric value * * @param mlData Multi-label dataset to which calculate the metric * @return Value of the metric */ public double calculate(MultiLabelInstances mlData) { Instances instances = mlData.getDataSet(); int nInstances = mlData.getNumInstances(); double avg; double var2; double var4; double val; int nNumeric = 0; double mean = 0; Set<Attribute> attributesSet = mlData.getFeatureAttributes(); for (Attribute att : attributesSet) { if (att.isNumeric()) { nNumeric++; avg = instances.meanOrMode(att); var2 = 0; var4 = 0; for (Instance inst : instances) { val = inst.value(att); var2 += Math.pow(val - avg, 2); var4 += Math.pow(val - avg, 4); } double kurtosis = (nInstances * var4 / Math.pow(var2, 2)) - 3; double sampleKurtosis = (kurtosis * (nInstances + 1) + 6) * (nInstances - 1) / ((nInstances - 2) * (nInstances - 3)); mean += sampleKurtosis; } } if (nNumeric > 0) { mean = mean / nNumeric; } else { mean = Double.NaN; } this.value = mean; return value; }
/** Set the output format. Changes the format of the specified date attribute. */ private void setOutputFormat() { // Create new attributes FastVector newAtts = new FastVector(getInputFormat().numAttributes()); for (int j = 0; j < getInputFormat().numAttributes(); j++) { Attribute att = getInputFormat().attribute(j); if (j == m_AttIndex.getIndex()) { newAtts.addElement(new Attribute(att.name(), getDateFormat().toPattern())); } else { newAtts.addElement(att.copy()); } } // Create new header Instances newData = new Instances(getInputFormat().relationName(), newAtts, 0); newData.setClassIndex(getInputFormat().classIndex()); m_OutputAttribute = newData.attribute(m_AttIndex.getIndex()); setOutputFormat(newData); }
/** * Returns the test represented by a string in Prolog notation. * * @return a string representing the test in Prolog notation */ public String toPrologString() { Attribute att = m_Dataset.attribute(m_AttIndex); StringBuffer str = new StringBuffer(); String attName = m_Dataset.attribute(m_AttIndex).name(); if (att.isNumeric()) { str = str.append(attName + " "); if (m_Not) str = str.append(">= " + Utils.doubleToString(m_Split, 3)); else str = str.append("< " + Utils.doubleToString(m_Split, 3)); } else { String value = att.value((int) m_Split); if (value == "false") { str = str.append("not(" + attName + ")"); } else { str = str.append(attName); } } return str.toString(); }
/** * Constructs an instance suitable for passing to the model for scoring * * @param incoming the incoming instance * @return an instance with values mapped to be consistent with what the model is expecting */ protected Instance mapIncomingFieldsToModelFields(Instance incoming) { Instances modelHeader = m_model.getHeader(); double[] vals = new double[modelHeader.numAttributes()]; for (int i = 0; i < modelHeader.numAttributes(); i++) { if (m_attributeMap[i] < 0) { // missing or type mismatch vals[i] = Utils.missingValue(); continue; } Attribute modelAtt = modelHeader.attribute(i); Attribute incomingAtt = incoming.dataset().attribute(m_attributeMap[i]); if (incoming.isMissing(incomingAtt.index())) { vals[i] = Utils.missingValue(); continue; } if (modelAtt.isNumeric()) { vals[i] = incoming.value(m_attributeMap[i]); } else if (modelAtt.isNominal()) { String incomingVal = incoming.stringValue(m_attributeMap[i]); int modelIndex = modelAtt.indexOfValue(incomingVal); if (modelIndex < 0) { vals[i] = Utils.missingValue(); } else { vals[i] = modelIndex; } } else if (modelAtt.isString()) { vals[i] = 0; modelAtt.setStringValue(incoming.stringValue(m_attributeMap[i])); } } if (modelHeader.classIndex() >= 0) { // set class to missing value vals[modelHeader.classIndex()] = Utils.missingValue(); } Instance newInst = null; if (incoming instanceof SparseInstance) { newInst = new SparseInstance(incoming.weight(), vals); } else { newInst = new DenseInstance(incoming.weight(), vals); } newInst.setDataset(modelHeader); return newInst; }
/** * processes the given instance (may change the provided instance) and returns the modified * version. * * @param instance the instance to process * @return the modified data * @throws Exception in case the processing goes wrong */ protected Instance process(Instance instance) throws Exception { Instance result; Attribute att; double[] values; int i; // adjust indices values = new double[instance.numAttributes()]; for (i = 0; i < instance.numAttributes(); i++) { att = instance.attribute(i); if (!att.isNominal() || !m_AttributeIndices.isInRange(i) || instance.isMissing(i)) values[i] = instance.value(i); else values[i] = m_NewOrder[i][(int) instance.value(i)]; } // create new instance result = new DenseInstance(instance.weight(), values); return result; }
public String toString() { if (m_attribute == null) { return "No model built yet."; } StringBuffer text = new StringBuffer(); if (m_attribute == null) { text.append("Predicting constant " + m_intercept); } else { text.append("Linear regression on " + m_attribute.name() + "\n\n"); text.append(Utils.doubleToString(m_slope, 2) + " * " + m_attribute.name()); if (m_intercept > 0) { text.append(" + " + Utils.doubleToString(m_intercept, 2)); } else { text.append(" - " + Utils.doubleToString((-m_intercept), 2)); } } text.append("\n"); return text.toString(); }
/** * Method for building an Id3 tree. * * @param data the training data * @exception Exception if decision tree can't be built successfully */ private void makeTree(Instances data) throws Exception { // Check if no instances have reached this node. if (data.numInstances() == 0) { m_Attribute = null; m_ClassValue = Utils.missingValue(); m_Distribution = new double[data.numClasses()]; return; } // Compute attribute with maximum information gain. double[] infoGains = new double[data.numAttributes()]; Enumeration attEnum = data.enumerateAttributes(); while (attEnum.hasMoreElements()) { Attribute att = (Attribute) attEnum.nextElement(); infoGains[att.index()] = computeInfoGain(data, att); } m_Attribute = data.attribute(Utils.maxIndex(infoGains)); // Make leaf if information gain is zero. // Otherwise create successors. if (Utils.eq(infoGains[m_Attribute.index()], 0)) { m_Attribute = null; m_Distribution = new double[data.numClasses()]; Enumeration instEnum = data.enumerateInstances(); while (instEnum.hasMoreElements()) { Instance inst = (Instance) instEnum.nextElement(); m_Distribution[(int) inst.classValue()]++; } Utils.normalize(m_Distribution); m_ClassValue = Utils.maxIndex(m_Distribution); m_ClassAttribute = data.classAttribute(); } else { Instances[] splitData = splitData(data, m_Attribute); m_Successors = new Id3[m_Attribute.numValues()]; for (int j = 0; j < m_Attribute.numValues(); j++) { m_Successors[j] = new Id3(); m_Successors[j].makeTree(splitData[j]); } } }
/** * Set the output format. Takes the current average class values and m_InputFormat and calls * setOutputFormat(Instances) appropriately. */ private void setOutputFormat() { Instances newData; FastVector newAtts, newVals; // Compute new attributes newAtts = new FastVector(getInputFormat().numAttributes()); for (int j = 0; j < getInputFormat().numAttributes(); j++) { Attribute att = getInputFormat().attribute(j); if (!m_AttIndices.isInRange(j) || !att.isString()) { // We don't have to copy the attribute because the // attribute index remains unchanged. newAtts.addElement(att); } else { // Compute list of attribute values newVals = new FastVector(att.numValues()); for (int i = 0; i < att.numValues(); i++) { newVals.addElement(att.value(i)); } newAtts.addElement(new Attribute(att.name(), newVals)); } } // Construct new header newData = new Instances(getInputFormat().relationName(), newAtts, 0); newData.setClassIndex(getInputFormat().classIndex()); setOutputFormat(newData); }
/** * Sets up the structure for the plot instances. Sets m_PlotInstances to null if instances are not * saved for visualization. * * @see #getSaveForVisualization() */ protected void determineFormat() { FastVector hv; Attribute predictedClass; Attribute classAt; FastVector attVals; int i; if (!m_SaveForVisualization) { m_PlotInstances = null; return; } hv = new FastVector(); classAt = m_Instances.attribute(m_ClassIndex); if (classAt.isNominal()) { attVals = new FastVector(); for (i = 0; i < classAt.numValues(); i++) attVals.addElement(classAt.value(i)); predictedClass = new Attribute("predicted" + classAt.name(), attVals); } else { predictedClass = new Attribute("predicted" + classAt.name()); } for (i = 0; i < m_Instances.numAttributes(); i++) { if (i == m_Instances.classIndex()) hv.addElement(predictedClass); hv.addElement(m_Instances.attribute(i).copy()); } m_PlotInstances = new Instances(m_Instances.relationName() + "_predicted", hv, m_Instances.numInstances()); m_PlotInstances.setClassIndex(m_ClassIndex + 1); }
/** * Outputs a tree at a certain level. * * @param level the level at which the tree is to be printed * @return the tree as string at the given level */ private String toString(int level) { StringBuffer text = new StringBuffer(); if (m_Attribute == null) { if (Utils.isMissingValue(m_ClassValue)) { text.append(": null"); } else { text.append(": " + m_ClassAttribute.value((int) m_ClassValue)); } } else { for (int j = 0; j < m_Attribute.numValues(); j++) { text.append("\n"); for (int i = 0; i < level; i++) { text.append("| "); } text.append(m_Attribute.name() + " = " + m_Attribute.value(j)); text.append(m_Successors[j].toString(level + 1)); } } return text.toString(); }
/** * Builds a mapping between the header for the incoming data to be scored and the header used to * train the model. Uses attribute names to match between the two. Also constructs a list of * missing attributes and a list of type mismatches. * * @param modelHeader the header of the data used to train the model * @param incomingHeader the header of the incoming data * @throws DistributedWekaException if more than 50% of the attributes expected by the model are * missing or have a type mismatch with the incoming data */ protected void buildAttributeMap(Instances modelHeader, Instances incomingHeader) throws DistributedWekaException { m_attributeMap = new int[modelHeader.numAttributes()]; int problemCount = 0; for (int i = 0; i < modelHeader.numAttributes(); i++) { Attribute modAtt = modelHeader.attribute(i); Attribute incomingAtt = incomingHeader.attribute(modAtt.name()); if (incomingAtt == null) { // missing model attribute m_attributeMap[i] = -1; m_missingMismatch.put(modAtt.name(), "missing from incoming data"); problemCount++; } else if (modAtt.type() != incomingAtt.type()) { // type mismatch m_attributeMap[i] = -1; m_missingMismatch.put( modAtt.name(), "type mismatch - " + "model: " + Attribute.typeToString(modAtt) + " != incoming: " + Attribute.typeToString(incomingAtt)); problemCount++; } else { m_attributeMap[i] = incomingAtt.index(); } } // -1 for the class (if set) int adjustForClass = modelHeader.classIndex() >= 0 ? 1 : 0; if (problemCount > (modelHeader.numAttributes() - adjustForClass) / 2) { throw new DistributedWekaException( "More than 50% of the attributes that the model " + "is expecting to see are either missing or have a type mismatch in the " + "incoming data."); } }
@Override public void updateNode(Instance inst) throws Exception { super.updateDistribution(inst); for (int i = 0; i < inst.numAttributes(); i++) { Attribute a = inst.attribute(i); if (i != inst.classIndex()) { ConditionalSufficientStats stats = m_nodeStats.get(a.name()); if (stats == null) { if (a.isNumeric()) { stats = new GaussianConditionalSufficientStats(); } else { stats = new NominalConditionalSufficientStats(); } m_nodeStats.put(a.name(), stats); } stats.update( inst.value(a), inst.classAttribute().value((int) inst.classValue()), inst.weight()); } } }