public void restart() { IMDb = 0L; Oracle = 0L; try { clearRecords(); recordsSelect = "SELECT " + userColumn + ", " + objectColumn + ", "; for (Attribute attr : attributes) { if (attr.isRelationValued() || attr.name().isEmpty()) continue; recordsSelect += attr.name() + " ,"; } recordsSelect = recordsSelect.substring(0, recordsSelect.length() - 1); recordsSelect += " FROM " + recordsTable; if (betweenCondition != null) { recordsSelect += " WHERE " + betweenCondition; } if (userID != null) { // We add where clause, if it is not present if (betweenCondition == null || !recordsSelect.endsWith(betweenCondition)) recordsSelect += " WHERE "; // AND if where is already there else recordsSelect += " AND "; recordsSelect += userColumn + " = " + userID; } recordsStatement = provider.getConn().prepareStatement(recordsSelect); records = recordsStatement.executeQuery(); records.next(); } catch (Exception e) { e.printStackTrace(); } getAttributes(); }
/** * Sets up the structure for the plot instances. Sets m_PlotInstances to null if instances are not * saved for visualization. * * @see #getSaveForVisualization() */ protected void determineFormat() { FastVector hv; Attribute predictedClass; Attribute classAt; FastVector attVals; int i; if (!m_SaveForVisualization) { m_PlotInstances = null; return; } hv = new FastVector(); classAt = m_Instances.attribute(m_ClassIndex); if (classAt.isNominal()) { attVals = new FastVector(); for (i = 0; i < classAt.numValues(); i++) attVals.addElement(classAt.value(i)); predictedClass = new Attribute("predicted" + classAt.name(), attVals); } else { predictedClass = new Attribute("predicted" + classAt.name()); } for (i = 0; i < m_Instances.numAttributes(); i++) { if (i == m_Instances.classIndex()) hv.addElement(predictedClass); hv.addElement(m_Instances.attribute(i).copy()); } m_PlotInstances = new Instances(m_Instances.relationName() + "_predicted", hv, m_Instances.numInstances()); m_PlotInstances.setClassIndex(m_ClassIndex + 1); }
/** * Set the output format. Takes the current average class values and m_InputFormat and calls * setOutputFormat(Instances) appropriately. */ private void setOutputFormat() { Instances newData; FastVector newAtts, newVals; // Compute new attributes newAtts = new FastVector(getInputFormat().numAttributes()); for (int j = 0; j < getInputFormat().numAttributes(); j++) { Attribute att = getInputFormat().attribute(j); if (!m_AttIndices.isInRange(j) || !att.isString()) { // We don't have to copy the attribute because the // attribute index remains unchanged. newAtts.addElement(att); } else { // Compute list of attribute values newVals = new FastVector(att.numValues()); for (int i = 0; i < att.numValues(); i++) { newVals.addElement(att.value(i)); } newAtts.addElement(new Attribute(att.name(), newVals)); } } // Construct new header newData = new Instances(getInputFormat().relationName(), newAtts, 0); newData.setClassIndex(getInputFormat().classIndex()); setOutputFormat(newData); }
@Override public String classify(User user, Sample sample) { Instances trainingSet = new TrainingSetBuilder() .setAttributes(user.getBssids()) .setClassAttribute( "Location", user.getLocations().stream().map(Location::getName).collect(Collectors.toList())) .build("TrainingSet", 1); // Create instance Map<String, Integer> BSSIDLevelMap = getBSSIDLevelMap(sample); Instance instance = new Instance(trainingSet.numAttributes()); for (Enumeration e = trainingSet.enumerateAttributes(); e.hasMoreElements(); ) { Attribute attribute = (Attribute) e.nextElement(); String bssid = attribute.name(); int level = (BSSIDLevelMap.containsKey(bssid)) ? BSSIDLevelMap.get(bssid) : 0; instance.setValue(attribute, level); } if (sample.getLocation() != null) instance.setValue(trainingSet.classAttribute(), sample.getLocation()); instance.setDataset(trainingSet); trainingSet.add(instance); int predictedClass = classify(fromBase64(user.getClassifiers()), instance); return trainingSet.classAttribute().value(predictedClass); }
@Override public List<Classifier> buildClassifiers(User user, List<Sample> validSamples) { Instances trainingSet = new TrainingSetBuilder() .setAttributes(user.getBssids()) .setClassAttribute( "Location", user.getLocations().stream().map(Location::getName).collect(Collectors.toList())) .build("TrainingSet", validSamples.size()); // Create instances validSamples.forEach( sample -> { Map<String, Integer> BSSIDLevelMap = getBSSIDLevelMap(sample); Instance instance = new Instance(trainingSet.numAttributes()); for (Enumeration e = trainingSet.enumerateAttributes(); e.hasMoreElements(); ) { Attribute attribute = (Attribute) e.nextElement(); String bssid = attribute.name(); int level = (BSSIDLevelMap.containsKey(bssid)) ? BSSIDLevelMap.get(bssid) : 0; instance.setValue(attribute, level); } instance.setValue(trainingSet.classAttribute(), sample.getLocation()); instance.setDataset(trainingSet); trainingSet.add(instance); }); // Build classifiers List<Classifier> classifiers = buildClassifiers(trainingSet); return classifiers; }
/** * Adds this tree recursively to the buffer. * * @param id the unqiue id for the method * @param buffer the buffer to add the source code to * @return the last ID being used * @throws Exception if something goes wrong */ protected int toSource(int id, StringBuffer buffer) throws Exception { int result; int i; int newID; StringBuffer[] subBuffers; buffer.append("\n"); buffer.append(" protected static double node" + id + "(Object[] i) {\n"); // leaf? if (m_Attribute == null) { result = id; if (Double.isNaN(m_ClassValue)) buffer.append(" return Double.NaN;"); else buffer.append(" return " + m_ClassValue + ";"); if (m_ClassAttribute != null) buffer.append(" // " + m_ClassAttribute.value((int) m_ClassValue)); buffer.append("\n"); buffer.append(" }\n"); } else { buffer.append(" // " + m_Attribute.name() + "\n"); // subtree calls subBuffers = new StringBuffer[m_Attribute.numValues()]; newID = id; for (i = 0; i < m_Attribute.numValues(); i++) { newID++; buffer.append(" "); if (i > 0) buffer.append("else "); buffer.append( "if (((String) i[" + m_Attribute.index() + "]).equals(\"" + m_Attribute.value(i) + "\"))\n"); buffer.append(" return node" + newID + "(i);\n"); subBuffers[i] = new StringBuffer(); newID = m_Successors[i].toSource(newID, subBuffers[i]); } buffer.append(" else\n"); buffer.append( " throw new IllegalArgumentException(\"Value '\" + i[" + m_Attribute.index() + "] + \"' is not allowed!\");\n"); buffer.append(" }\n"); // output subtree code for (i = 0; i < m_Attribute.numValues(); i++) { buffer.append(subBuffers[i].toString()); } subBuffers = null; result = newID; } return result; }
private void fieldsMappingString(Instances miningSchemaI, Instances incomingI) { StringBuffer result = new StringBuffer(); int maxLength = 0; for (int i = 0; i < miningSchemaI.numAttributes(); i++) { if (miningSchemaI.attribute(i).name().length() > maxLength) { maxLength = miningSchemaI.attribute(i).name().length(); } } maxLength += 12; // length of " (nominal)"/" (numeric)" int minLength = 13; // "Mining schema".length() String headerS = "Mining schema"; String sep = "-------------"; if (maxLength < minLength) { maxLength = minLength; } headerS = PMMLUtils.pad(headerS, " ", maxLength, false); sep = PMMLUtils.pad(sep, "-", maxLength, false); sep += "\t ----------------\n"; headerS += "\t Incoming fields\n"; result.append(headerS); result.append(sep); for (int i = 0; i < miningSchemaI.numAttributes(); i++) { Attribute temp = miningSchemaI.attribute(i); String attName = "(" + ((temp.isNumeric()) ? "numeric)" : "nominal)") + " " + temp.name(); attName = PMMLUtils.pad(attName, " ", maxLength, false); attName += "\t--> "; result.append(attName); Attribute incoming = incomingI.attribute(m_fieldsMap[i]); String fieldName = "" + (m_fieldsMap[i] + 1) + " (" + ((incoming.isNumeric()) ? "numeric)" : "nominal)"); fieldName += " " + incoming.name(); result.append(fieldName + "\n"); } m_fieldsMappingText = result.toString(); }
public String toString() { if (m_attribute == null) { return "No model built yet."; } StringBuffer text = new StringBuffer(); if (m_attribute == null) { text.append("Predicting constant " + m_intercept); } else { text.append("Linear regression on " + m_attribute.name() + "\n\n"); text.append(Utils.doubleToString(m_slope, 2) + " * " + m_attribute.name()); if (m_intercept > 0) { text.append(" + " + Utils.doubleToString(m_intercept, 2)); } else { text.append(" - " + Utils.doubleToString((-m_intercept), 2)); } } text.append("\n"); return text.toString(); }
@Override public void updateNode(Instance inst) throws Exception { super.updateDistribution(inst); for (int i = 0; i < inst.numAttributes(); i++) { Attribute a = inst.attribute(i); if (i != inst.classIndex()) { ConditionalSufficientStats stats = m_nodeStats.get(a.name()); if (stats == null) { if (a.isNumeric()) { stats = new GaussianConditionalSufficientStats(); } else { stats = new NominalConditionalSufficientStats(); } m_nodeStats.put(a.name(), stats); } stats.update( inst.value(a), inst.classAttribute().value((int) inst.classValue()), inst.weight()); } } }
/** * Builds a mapping between the header for the incoming data to be scored and the header used to * train the model. Uses attribute names to match between the two. Also constructs a list of * missing attributes and a list of type mismatches. * * @param modelHeader the header of the data used to train the model * @param incomingHeader the header of the incoming data * @throws DistributedWekaException if more than 50% of the attributes expected by the model are * missing or have a type mismatch with the incoming data */ protected void buildAttributeMap(Instances modelHeader, Instances incomingHeader) throws DistributedWekaException { m_attributeMap = new int[modelHeader.numAttributes()]; int problemCount = 0; for (int i = 0; i < modelHeader.numAttributes(); i++) { Attribute modAtt = modelHeader.attribute(i); Attribute incomingAtt = incomingHeader.attribute(modAtt.name()); if (incomingAtt == null) { // missing model attribute m_attributeMap[i] = -1; m_missingMismatch.put(modAtt.name(), "missing from incoming data"); problemCount++; } else if (modAtt.type() != incomingAtt.type()) { // type mismatch m_attributeMap[i] = -1; m_missingMismatch.put( modAtt.name(), "type mismatch - " + "model: " + Attribute.typeToString(modAtt) + " != incoming: " + Attribute.typeToString(incomingAtt)); problemCount++; } else { m_attributeMap[i] = incomingAtt.index(); } } // -1 for the class (if set) int adjustForClass = modelHeader.classIndex() >= 0 ? 1 : 0; if (problemCount > (modelHeader.numAttributes() - adjustForClass) / 2) { throw new DistributedWekaException( "More than 50% of the attributes that the model " + "is expecting to see are either missing or have a type mismatch in the " + "incoming data."); } }
/** * Determines the output format based on the input format and returns this. In case the output * format cannot be returned immediately, i.e., hasImmediateOutputFormat() returns false, then * this method will called from batchFinished() after the call of preprocess(Instances), in which, * e.g., statistics for the actual processing step can be gathered. * * @param inputFormat the input format to base the output format on * @return the output format * @throws Exception in case the determination goes wrong */ protected Instances determineOutputFormat(Instances inputFormat) throws Exception { Instances result; Attribute att; ArrayList<Attribute> atts; int i; m_AttributeIndices.setUpper(inputFormat.numAttributes() - 1); // generate new header atts = new ArrayList<Attribute>(); for (i = 0; i < inputFormat.numAttributes(); i++) { att = inputFormat.attribute(i); if (m_AttributeIndices.isInRange(i)) { if (m_ReplaceAll) atts.add(att.copy(att.name().replaceAll(m_Find, m_Replace))); else atts.add(att.copy(att.name().replaceFirst(m_Find, m_Replace))); } else { atts.add((Attribute) att.copy()); } } result = new Instances(inputFormat.relationName(), atts, 0); result.setClassIndex(inputFormat.classIndex()); return result; }
@Override public int compare(InstanceHolder o1, InstanceHolder o2) { // both missing is equal if (o1.m_instance.isMissing(m_attribute) && o2.m_instance.isMissing(m_attribute)) { return 0; } // one missing - missing instances should all be at the end // regardless of whether order is ascending or descending if (o1.m_instance.isMissing(m_attribute)) { return 1; } if (o2.m_instance.isMissing(m_attribute)) { return -1; } int cmp = 0; if (!m_attribute.isString() && !m_attribute.isRelationValued()) { double val1 = o1.m_instance.value(m_attribute); double val2 = o2.m_instance.value(m_attribute); cmp = Double.compare(val1, val2); } else if (m_attribute.isString()) { String val1 = o1.m_stringVals.get(m_attribute.name()); String val2 = o2.m_stringVals.get(m_attribute.name()); /* * String val1 = o1.stringValue(m_attribute); String val2 = * o2.stringValue(m_attribute); */ // TODO case insensitive? cmp = val1.compareTo(val2); } else { throw new IllegalArgumentException( "Can't sort according to " + "relation-valued attribute values!"); } if (m_descending) { return -cmp; } return cmp; }
/** * Determines the output format based on the input format and returns this. * * @param inputFormat the input format to base the output format on * @return the output format * @throws Exception in case the determination goes wrong */ protected Instances determineOutputFormat(Instances inputFormat) throws Exception { Instances result; Attribute att; Attribute attSorted; FastVector atts; FastVector values; Vector<String> sorted; int i; int n; m_AttributeIndices.setUpper(inputFormat.numAttributes() - 1); // determine sorted indices atts = new FastVector(); m_NewOrder = new int[inputFormat.numAttributes()][]; for (i = 0; i < inputFormat.numAttributes(); i++) { att = inputFormat.attribute(i); if (!att.isNominal() || !m_AttributeIndices.isInRange(i)) { m_NewOrder[i] = new int[0]; atts.addElement(inputFormat.attribute(i).copy()); continue; } // sort labels sorted = new Vector<String>(); for (n = 0; n < att.numValues(); n++) sorted.add(att.value(n)); Collections.sort(sorted, m_Comparator); // determine new indices m_NewOrder[i] = new int[att.numValues()]; values = new FastVector(); for (n = 0; n < att.numValues(); n++) { m_NewOrder[i][n] = sorted.indexOf(att.value(n)); values.addElement(sorted.get(n)); } attSorted = new Attribute(att.name(), values); attSorted.setWeight(att.weight()); atts.addElement(attSorted); } // generate new header result = new Instances(inputFormat.relationName(), atts, 0); result.setClassIndex(inputFormat.classIndex()); return result; }
/** * Returns a description of the classifier. * * @return a description of the classifier as a string. */ @Override public String toString() { if (m_Instances == null) { return "Naive Bayes (simple): No model built yet."; } try { StringBuffer text = new StringBuffer("Naive Bayes (simple)"); int attIndex; for (int i = 0; i < m_Instances.numClasses(); i++) { text.append( "\n\nClass " + m_Instances.classAttribute().value(i) + ": P(C) = " + Utils.doubleToString(m_Priors[i], 10, 8) + "\n\n"); Enumeration<Attribute> enumAtts = m_Instances.enumerateAttributes(); attIndex = 0; while (enumAtts.hasMoreElements()) { Attribute attribute = enumAtts.nextElement(); text.append("Attribute " + attribute.name() + "\n"); if (attribute.isNominal()) { for (int j = 0; j < attribute.numValues(); j++) { text.append(attribute.value(j) + "\t"); } text.append("\n"); for (int j = 0; j < attribute.numValues(); j++) { text.append(Utils.doubleToString(m_Counts[i][attIndex][j], 10, 8) + "\t"); } } else { text.append("Mean: " + Utils.doubleToString(m_Means[i][attIndex], 10, 8) + "\t"); text.append("Standard Deviation: " + Utils.doubleToString(m_Devs[i][attIndex], 10, 8)); } text.append("\n\n"); attIndex++; } } return text.toString(); } catch (Exception e) { return "Can't print Naive Bayes classifier!"; } }
/** * Set the output format. Takes the current average class values and m_InputFormat and calls * setOutputFormat(Instances) appropriately. */ private void setOutputFormat() { Instances newData; FastVector newAtts; // Compute new attributes newAtts = new FastVector(getInputFormat().numAttributes()); for (int j = 0; j < getInputFormat().numAttributes(); j++) { Attribute att = getInputFormat().attribute(j); if (!att.isNominal() || !m_AttIndex.isInRange(j)) newAtts.addElement(att); else newAtts.addElement(new Attribute(att.name(), (FastVector) null)); } // Construct new header newData = new Instances(getInputFormat().relationName(), newAtts, 0); newData.setClassIndex(getInputFormat().classIndex()); setOutputFormat(newData); }
/** Set the output format. Changes the format of the specified date attribute. */ private void setOutputFormat() { // Create new attributes FastVector newAtts = new FastVector(getInputFormat().numAttributes()); for (int j = 0; j < getInputFormat().numAttributes(); j++) { Attribute att = getInputFormat().attribute(j); if (j == m_AttIndex.getIndex()) { newAtts.addElement(new Attribute(att.name(), getDateFormat().toPattern())); } else { newAtts.addElement(att.copy()); } } // Create new header Instances newData = new Instances(getInputFormat().relationName(), newAtts, 0); newData.setClassIndex(getInputFormat().classIndex()); m_OutputAttribute = newData.attribute(m_AttIndex.getIndex()); setOutputFormat(newData); }
/** * Set the output format. Swapss the desired nominal attribute values in the header and calls * setOutputFormat(Instances) appropriately. */ private void setOutputFormat() { Instances newData; ArrayList<Attribute> newAtts; ArrayList<String> newVals; // Compute new attributes newAtts = new ArrayList<Attribute>(getInputFormat().numAttributes()); for (int j = 0; j < getInputFormat().numAttributes(); j++) { Attribute att = getInputFormat().attribute(j); if (j != m_AttIndex.getIndex()) { newAtts.add((Attribute) att.copy()); } else { // Compute list of attribute values newVals = new ArrayList<String>(att.numValues()); for (int i = 0; i < att.numValues(); i++) { if (i == m_FirstIndex.getIndex()) { newVals.add(att.value(m_SecondIndex.getIndex())); } else if (i == m_SecondIndex.getIndex()) { newVals.add(att.value(m_FirstIndex.getIndex())); } else { newVals.add(att.value(i)); } } Attribute newAtt = new Attribute(att.name(), newVals); newAtt.setWeight(att.weight()); newAtts.add(newAtt); } } // Construct new header newData = new Instances(getInputFormat().relationName(), newAtts, 0); newData.setClassIndex(getInputFormat().classIndex()); setOutputFormat(newData); }
/** * Outputs a tree at a certain level. * * @param level the level at which the tree is to be printed * @return the tree as string at the given level */ private String toString(int level) { StringBuffer text = new StringBuffer(); if (m_Attribute == null) { if (Utils.isMissingValue(m_ClassValue)) { text.append(": null"); } else { text.append(": " + m_ClassAttribute.value((int) m_ClassValue)); } } else { for (int j = 0; j < m_Attribute.numValues(); j++) { text.append("\n"); for (int i = 0; i < level; i++) { text.append("| "); } text.append(m_Attribute.name() + " = " + m_Attribute.value(j)); text.append(m_Successors[j].toString(level + 1)); } } return text.toString(); }
public String toString1() { StringBuffer str = new StringBuffer(); String nome; str.append("< "); for (int i = 0; i < corpo.length; i++) { str.append(corpo[i] + ", "); } nome = classe.value(cabeca); str.append("[" + classe.name() + " = " + nome + "] " + "(" + confianca + ")"); str.append(">"); if (valoresObjetivos != null) { str.append("\t["); for (int i = 0; i < valoresObjetivos.length; i++) { str.append(valoresObjetivos[i] + ", "); } str.deleteCharAt(str.length() - 1); str.deleteCharAt(str.length() - 1); str.append(']'); } str.append("\t(" + getACC() + ")"); str.append("\t(" + getAcerto() + ")"); str.append("\t(" + getConfidence() + ")"); str.append("\t(" + getCov() + ")"); str.append("\t(" + getERR() + ")"); str.append("\t(" + getErro() + ")"); str.append("\t(" + getNegRel() + ")"); str.append("\t\t(" + getSens() + ")"); str.append("\t(" + getSpec() + ")"); // str.append("\n" +matrizContigencia.toString() + "\n"); return str.toString(); }
private static Instance makeInstance(Instances instances, String inputLine) { inputLine = inputLine.trim(); // We need to store the lastName as well... String[] parts = inputLine.split("\\s+"); String label = parts[0]; String firstName = parts[1].toLowerCase(); String lastName = parts[2].toLowerCase(); Instance instance = new Instance(features.length + 1); instance.setDataset(instances); Set<String> feats = new HashSet<String>(); /* feats.add("firstName0=" + firstName.charAt(0)); feats.add("firstNameN=" + firstName.charAt(firstName.length() - 1)); */ for (int f = 0; f < 9; f++) { if (firstName.length() > f) feats.add("firstName" + f + "=" + firstName.charAt(f)); } for (int l = 0; l < 9; l++) { if (lastName.length() > l) feats.add("lastName" + l + "=" + lastName.charAt(l)); } ///////////////////////////////////////////////////////////////// for (int featureId = 0; featureId < features.length; featureId++) { Attribute att = instances.attribute(features[featureId]); String name = att.name(); String featureLabel; if (feats.contains(name)) { featureLabel = "1"; } else featureLabel = "0"; instance.setValue(att, featureLabel); } instance.setClassValue(label); return instance; }
public String toString() { StringBuffer str = new StringBuffer(); String nome; for (int i = 0; i < corpo.length; i++) { str.append(corpo[i] + "\t"); } nome = classe.value(cabeca); str.append("[" + classe.name() + " = " + nome + "]"); if (valoresObjetivos != null) { str.append("\t"); for (int i = 0; i < valoresObjetivos.length; i++) { str.append(new Double(valoresObjetivos[i]).toString().replace('.', ',') + "\t"); } } str.append("\t" + new Double(getACC()).toString().replace('.', ',')); str.append("\t" + new Double(getERR()).toString().replace('.', ',')); str.append("\t" + new Double(getNegRel()).toString().replace('.', ',')); // str.append("\t" + new Double(getAcerto()).toString().replace('.',',')); str.append("\t" + new Double(getConfidence()).toString().replace('.', ',')); str.append("\t" + new Double(getSup()).toString().replace('.', ',')); str.append("\t" + new Double(getCov()).toString().replace('.', ',')); str.append("\t" + new Double(getNovelty()).toString().replace('.', ',')); // str.append("\t" + new Double(getErro()).toString().replace('.',',')); // str.append("\t" + new Double(getSens()).toString().replace('.',',')); // str.append("\t" + new Double(getSpec()).toString().replace('.',',')); return str.toString(); }
/** Set the output format if the class is numeric. */ private void setOutputFormatNumeric() { if (m_Indices == null) { setOutputFormat(null); return; } ArrayList<Attribute> newAtts; int newClassIndex; StringBuffer attributeName; Instances outputFormat; ArrayList<String> vals; // Compute new attributes m_needToTransform = false; for (int i = 0; i < getInputFormat().numAttributes(); i++) { Attribute att = getInputFormat().attribute(i); if (att.isNominal() && (att.numValues() > 2 || m_Numeric || m_TransformAll)) { m_needToTransform = true; break; } } if (!m_needToTransform) { setOutputFormat(getInputFormat()); return; } newClassIndex = getInputFormat().classIndex(); newAtts = new ArrayList<Attribute>(); for (int j = 0; j < getInputFormat().numAttributes(); j++) { Attribute att = getInputFormat().attribute(j); if ((!att.isNominal()) || (j == getInputFormat().classIndex())) { newAtts.add((Attribute) att.copy()); } else { if (j < getInputFormat().classIndex()) { newClassIndex += att.numValues() - 2; } // Compute values for new attributes for (int k = 1; k < att.numValues(); k++) { attributeName = new StringBuffer(att.name() + "="); for (int l = k; l < att.numValues(); l++) { if (l > k) { attributeName.append(','); } attributeName.append(att.value(m_Indices[j][l])); } if (m_Numeric) { newAtts.add(new Attribute(attributeName.toString())); } else { vals = new ArrayList<String>(2); vals.add("f"); vals.add("t"); newAtts.add(new Attribute(attributeName.toString(), vals)); } } } } outputFormat = new Instances(getInputFormat().relationName(), newAtts, 0); outputFormat.setClassIndex(newClassIndex); setOutputFormat(outputFormat); }
/** * Generates the classifier. * * @param instances set of instances serving as training data * @exception Exception if the classifier has not been generated successfully */ @Override public void buildClassifier(Instances instances) throws Exception { int attIndex = 0; double sum; // can classifier handle the data? getCapabilities().testWithFail(instances); // remove instances with missing class instances = new Instances(instances); instances.deleteWithMissingClass(); m_Instances = new Instances(instances, 0); // Reserve space m_Counts = new double[instances.numClasses()][instances.numAttributes() - 1][0]; m_Means = new double[instances.numClasses()][instances.numAttributes() - 1]; m_Devs = new double[instances.numClasses()][instances.numAttributes() - 1]; m_Priors = new double[instances.numClasses()]; Enumeration<Attribute> enu = instances.enumerateAttributes(); while (enu.hasMoreElements()) { Attribute attribute = enu.nextElement(); if (attribute.isNominal()) { for (int j = 0; j < instances.numClasses(); j++) { m_Counts[j][attIndex] = new double[attribute.numValues()]; } } else { for (int j = 0; j < instances.numClasses(); j++) { m_Counts[j][attIndex] = new double[1]; } } attIndex++; } // Compute counts and sums Enumeration<Instance> enumInsts = instances.enumerateInstances(); while (enumInsts.hasMoreElements()) { Instance instance = enumInsts.nextElement(); if (!instance.classIsMissing()) { Enumeration<Attribute> enumAtts = instances.enumerateAttributes(); attIndex = 0; while (enumAtts.hasMoreElements()) { Attribute attribute = enumAtts.nextElement(); if (!instance.isMissing(attribute)) { if (attribute.isNominal()) { m_Counts[(int) instance.classValue()][attIndex][(int) instance.value(attribute)]++; } else { m_Means[(int) instance.classValue()][attIndex] += instance.value(attribute); m_Counts[(int) instance.classValue()][attIndex][0]++; } } attIndex++; } m_Priors[(int) instance.classValue()]++; } } // Compute means Enumeration<Attribute> enumAtts = instances.enumerateAttributes(); attIndex = 0; while (enumAtts.hasMoreElements()) { Attribute attribute = enumAtts.nextElement(); if (attribute.isNumeric()) { for (int j = 0; j < instances.numClasses(); j++) { if (m_Counts[j][attIndex][0] < 2) { throw new Exception( "attribute " + attribute.name() + ": less than two values for class " + instances.classAttribute().value(j)); } m_Means[j][attIndex] /= m_Counts[j][attIndex][0]; } } attIndex++; } // Compute standard deviations enumInsts = instances.enumerateInstances(); while (enumInsts.hasMoreElements()) { Instance instance = enumInsts.nextElement(); if (!instance.classIsMissing()) { enumAtts = instances.enumerateAttributes(); attIndex = 0; while (enumAtts.hasMoreElements()) { Attribute attribute = enumAtts.nextElement(); if (!instance.isMissing(attribute)) { if (attribute.isNumeric()) { m_Devs[(int) instance.classValue()][attIndex] += (m_Means[(int) instance.classValue()][attIndex] - instance.value(attribute)) * (m_Means[(int) instance.classValue()][attIndex] - instance.value(attribute)); } } attIndex++; } } } enumAtts = instances.enumerateAttributes(); attIndex = 0; while (enumAtts.hasMoreElements()) { Attribute attribute = enumAtts.nextElement(); if (attribute.isNumeric()) { for (int j = 0; j < instances.numClasses(); j++) { if (m_Devs[j][attIndex] <= 0) { throw new Exception( "attribute " + attribute.name() + ": standard deviation is 0 for class " + instances.classAttribute().value(j)); } else { m_Devs[j][attIndex] /= m_Counts[j][attIndex][0] - 1; m_Devs[j][attIndex] = Math.sqrt(m_Devs[j][attIndex]); } } } attIndex++; } // Normalize counts enumAtts = instances.enumerateAttributes(); attIndex = 0; while (enumAtts.hasMoreElements()) { Attribute attribute = enumAtts.nextElement(); if (attribute.isNominal()) { for (int j = 0; j < instances.numClasses(); j++) { sum = Utils.sum(m_Counts[j][attIndex]); for (int i = 0; i < attribute.numValues(); i++) { m_Counts[j][attIndex][i] = (m_Counts[j][attIndex][i] + 1) / (sum + attribute.numValues()); } } } attIndex++; } // Normalize priors sum = Utils.sum(m_Priors); for (int j = 0; j < instances.numClasses(); j++) { m_Priors[j] = (m_Priors[j] + 1) / (sum + instances.numClasses()); } }
/** * Set the output format. Takes the current average class values and m_InputFormat and calls * setOutputFormat(Instances) appropriately. */ private void setOutputFormat() { Instances newData; FastVector newAtts, newVals; boolean firstEndsWithPrime = false, secondEndsWithPrime = false; StringBuffer text = new StringBuffer(); // Compute new attributes newAtts = new FastVector(getInputFormat().numAttributes()); for (int j = 0; j < getInputFormat().numAttributes(); j++) { Attribute att = getInputFormat().attribute(j); if (j != m_AttIndex.getIndex()) { newAtts.addElement(att.copy()); } else { // Compute new value if (att.value(m_FirstIndex.getIndex()).endsWith("'")) { firstEndsWithPrime = true; } if (att.value(m_SecondIndex.getIndex()).endsWith("'")) { secondEndsWithPrime = true; } if (firstEndsWithPrime || secondEndsWithPrime) { text.append("'"); } if (firstEndsWithPrime) { text.append( ((String) att.value(m_FirstIndex.getIndex())) .substring(1, ((String) att.value(m_FirstIndex.getIndex())).length() - 1)); } else { text.append((String) att.value(m_FirstIndex.getIndex())); } text.append('_'); if (secondEndsWithPrime) { text.append( ((String) att.value(m_SecondIndex.getIndex())) .substring(1, ((String) att.value(m_SecondIndex.getIndex())).length() - 1)); } else { text.append((String) att.value(m_SecondIndex.getIndex())); } if (firstEndsWithPrime || secondEndsWithPrime) { text.append("'"); } // Compute list of attribute values newVals = new FastVector(att.numValues() - 1); for (int i = 0; i < att.numValues(); i++) { if (i == m_FirstIndex.getIndex()) { newVals.addElement(text.toString()); } else if (i != m_SecondIndex.getIndex()) { newVals.addElement(att.value(i)); } } Attribute newAtt = new Attribute(att.name(), newVals); newAtt.setWeight(getInputFormat().attribute(j).weight()); newAtts.addElement(newAtt); } } // Construct new header newData = new Instances(getInputFormat().relationName(), newAtts, 0); newData.setClassIndex(getInputFormat().classIndex()); setOutputFormat(newData); }
@Override public void init(Instances structure, Environment env) { super.init(structure, env); m_resolvedLhsName = m_lhsAttributeName; m_resolvedRhsOperand = m_rhsOperand; try { m_resolvedLhsName = m_env.substitute(m_resolvedLhsName); m_resolvedRhsOperand = m_env.substitute(m_resolvedRhsOperand); } catch (Exception ex) { } Attribute lhs = null; // try as an index or "special" label first if (m_resolvedLhsName.toLowerCase().startsWith("/first")) { lhs = structure.attribute(0); } else if (m_resolvedLhsName.toLowerCase().startsWith("/last")) { lhs = structure.attribute(structure.numAttributes() - 1); } else { // try as an index try { int indx = Integer.parseInt(m_resolvedLhsName); indx--; lhs = structure.attribute(indx); } catch (NumberFormatException ex) { } } if (lhs == null) { lhs = structure.attribute(m_resolvedLhsName); } if (lhs == null) { throw new IllegalArgumentException( "Data does not contain attribute " + "\"" + m_resolvedLhsName + "\""); } m_lhsAttIndex = lhs.index(); if (m_rhsIsAttribute) { Attribute rhs = null; // try as an index or "special" label first if (m_resolvedRhsOperand.toLowerCase().equals("/first")) { rhs = structure.attribute(0); } else if (m_resolvedRhsOperand.toLowerCase().equals("/last")) { rhs = structure.attribute(structure.numAttributes() - 1); } else { // try as an index try { int indx = Integer.parseInt(m_resolvedRhsOperand); indx--; rhs = structure.attribute(indx); } catch (NumberFormatException ex) { } } if (rhs == null) { rhs = structure.attribute(m_resolvedRhsOperand); } if (rhs == null) { throw new IllegalArgumentException( "Data does not contain attribute " + "\"" + m_resolvedRhsOperand + "\""); } m_rhsAttIndex = rhs.index(); } else if (m_operator != ExpressionType.CONTAINS && m_operator != ExpressionType.STARTSWITH && m_operator != ExpressionType.ENDSWITH && m_operator != ExpressionType.REGEX && m_operator != ExpressionType.ISMISSING) { // make sure the operand is parseable as a number (unless missing has // been specified - equals only) if (lhs.isNominal()) { m_numericOperand = lhs.indexOfValue(m_resolvedRhsOperand); if (m_numericOperand < 0) { throw new IllegalArgumentException( "Unknown nominal value '" + m_resolvedRhsOperand + "' for attribute '" + lhs.name() + "'"); } } else { try { m_numericOperand = Double.parseDouble(m_resolvedRhsOperand); } catch (NumberFormatException e) { throw new IllegalArgumentException( "\"" + m_resolvedRhsOperand + "\" is not parseable as a number!"); } } } if (m_operator == ExpressionType.REGEX) { m_regexPattern = Pattern.compile(m_resolvedRhsOperand); } }
/** * Métod que percorre todos os dados pertencentes à Instances dados. Imprimindo as informações da * base. */ public void percorrerDados() { if (dados != null) { /*Cada exemplo contido nos dados é identificado no Weka através da * classe Instance. Assim, o objeto dados, do tipo Instances, é uma coleçao de * Instance. Voce vai ter metodos que possibilitam acessar todos os exemplos * presentes na base. * */ // Percorre todos os exemples presentes na base for (int i = 0; i < dados.numInstances(); i++) { // Método para obter a instance de número 1. // Voce pode pegar a primeira e a ultima instance tb. // Além de poder deletar entre outras coisas. Instance exemplo = dados.instance(i); /*Uma Intance é formada por vários atributos, que são os atributos * da base. Voce pode percorrer todos os atributos Instace, ou pode * "setar" (set) ou pegar (get) um atributo especifico. * */ // É possível transforma todos os atributos em um array de double double[] arrayAtributos = exemplo.toDoubleArray(); System.out.println("Valores para o exemplo " + i); System.out.print("Array de atributos: "); for (int j = 0; j < arrayAtributos.length; j++) { System.out.print(arrayAtributos[j] + " "); } System.out.println(); // Percorrendo todos os atributos para se obter informacoes sobre eles for (int j = 0; j < exemplo.numAttributes(); j++) { Attribute att = exemplo.attribute(j); double valor = exemplo.value(att); System.out.println( "Valor do atributo " + att.name() + ":" + valor + " - " + att.value((int) valor)); } System.out.println(); // Mudando o valor do atributo 0, para um valor possível do atributos // Obtendo as informacoes do atributo 0; Attribute att = exemplo.attribute(0); // Obtendo o valor do atributo 0. double valorDoAtributo0 = exemplo.value(att); System.out.println("Valor antigo, em double: " + valorDoAtributo0); System.out.println("Valor antigo, em nome: " + att.value((int) valorDoAtributo0)); int novoValor = 1; exemplo.setValue(att, novoValor); valorDoAtributo0 = exemplo.value(att); System.out.println("Valor novo, em nome: " + att.value((int) valorDoAtributo0)); System.out.println(); System.out.println(); } } }
public MappingInfo(Instances dataSet, MiningSchema miningSchema, Logger log) throws Exception { m_log = log; // miningSchema.convertStringAttsToNominal(); Instances fieldsI = miningSchema.getMiningSchemaAsInstances(); m_fieldsMap = new int[fieldsI.numAttributes()]; m_nominalValueMaps = new int[fieldsI.numAttributes()][]; for (int i = 0; i < fieldsI.numAttributes(); i++) { String schemaAttName = fieldsI.attribute(i).name(); boolean found = false; for (int j = 0; j < dataSet.numAttributes(); j++) { if (dataSet.attribute(j).name().equals(schemaAttName)) { Attribute miningSchemaAtt = fieldsI.attribute(i); Attribute incomingAtt = dataSet.attribute(j); // check type match if (miningSchemaAtt.type() != incomingAtt.type()) { throw new Exception( "[MappingInfo] type mismatch for field " + schemaAttName + ". Mining schema type " + miningSchemaAtt.toString() + ". Incoming type " + incomingAtt.toString() + "."); } // check nominal values (number, names...) if (miningSchemaAtt.numValues() != incomingAtt.numValues()) { String warningString = "[MappingInfo] WARNING: incoming nominal attribute " + incomingAtt.name() + " does not have the same " + "number of values as the corresponding mining " + "schema attribute."; if (m_log != null) { m_log.logMessage(warningString); } else { System.err.println(warningString); } } if (miningSchemaAtt.isNominal() || miningSchemaAtt.isString()) { int[] valuesMap = new int[incomingAtt.numValues()]; for (int k = 0; k < incomingAtt.numValues(); k++) { String incomingNomVal = incomingAtt.value(k); int indexInSchema = miningSchemaAtt.indexOfValue(incomingNomVal); if (indexInSchema < 0) { String warningString = "[MappingInfo] WARNING: incoming nominal attribute " + incomingAtt.name() + " has value " + incomingNomVal + " that doesn't occur in the mining schema."; if (m_log != null) { m_log.logMessage(warningString); } else { System.err.println(warningString); } valuesMap[k] = UNKNOWN_NOMINAL_VALUE; } else { valuesMap[k] = indexInSchema; } } m_nominalValueMaps[i] = valuesMap; } /*if (miningSchemaAtt.isNominal()) { for (int k = 0; k < miningSchemaAtt.numValues(); k++) { if (!miningSchemaAtt.value(k).equals(incomingAtt.value(k))) { throw new Exception("[PMMLUtils] value " + k + " (" + miningSchemaAtt.value(k) + ") does not match " + "incoming value (" + incomingAtt.value(k) + ") for attribute " + miningSchemaAtt.name() + "."); } } }*/ found = true; m_fieldsMap[i] = j; } } if (!found) { throw new Exception( "[MappingInfo] Unable to find a match for mining schema " + "attribute " + schemaAttName + " in the " + "incoming instances!"); } } // check class attribute (if set) if (fieldsI.classIndex() >= 0) { if (dataSet.classIndex() < 0) { // first see if we can find a matching class String className = fieldsI.classAttribute().name(); Attribute classMatch = dataSet.attribute(className); if (classMatch == null) { throw new Exception( "[MappingInfo] Can't find match for target field " + className + "in incoming instances!"); } dataSet.setClass(classMatch); } else if (!fieldsI.classAttribute().name().equals(dataSet.classAttribute().name())) { throw new Exception( "[MappingInfo] class attribute in mining schema does not match " + "class attribute in incoming instances!"); } } // Set up the textual description of the mapping fieldsMappingString(fieldsI, dataSet); }