/** * Convert an input instance * * @param current the input instance to convert * @return a transformed instance * @throws Exception if a problem occurs */ protected Instance convertInstance(Instance current) throws Exception { double[] vals = new double[getOutputFormat().numAttributes()]; int index = 0; for (int j = 0; j < current.numAttributes(); j++) { if (j != current.classIndex()) { if (m_unchanged != null && m_unchanged.attribute(current.attribute(j).name()) != null) { vals[index++] = current.value(j); } else { Estimator[] estForAtt = m_estimatorLookup.get(current.attribute(j).name()); for (int k = 0; k < current.classAttribute().numValues(); k++) { if (current.isMissing(j)) { vals[index++] = Utils.missingValue(); } else { double e = estForAtt[k].getProbability(current.value(j)); vals[index++] = e; } } } } } vals[vals.length - 1] = current.classValue(); DenseInstance instNew = new DenseInstance(current.weight(), vals); return instNew; }
public double updateWeights(Instance inst, double learningRatio) { // Normalize Instance double[] normalizedInstance = normalizedInstance(inst); // Compute the Normalized Prediction of Perceptron double normalizedPredict = prediction(normalizedInstance); double normalizedY = normalizeActualClassValue(inst); double sumWeights = 0.0; double delta = normalizedY - normalizedPredict; for (int j = 0; j < inst.numAttributes() - 1; j++) { int instAttIndex = modelAttIndexToInstanceAttIndex(j, inst); if (inst.attribute(instAttIndex).isNumeric()) { this.weightAttribute[j] += learningRatio * delta * normalizedInstance[j]; sumWeights += Math.abs(this.weightAttribute[j]); } } this.weightAttribute[inst.numAttributes() - 1] += learningRatio * delta; sumWeights += Math.abs(this.weightAttribute[inst.numAttributes() - 1]); if (sumWeights > inst.numAttributes()) { // Lasso regression for (int j = 0; j < inst.numAttributes() - 1; j++) { int instAttIndex = modelAttIndexToInstanceAttIndex(j, inst); if (inst.attribute(instAttIndex).isNumeric()) { this.weightAttribute[j] = this.weightAttribute[j] / sumWeights; } } this.weightAttribute[inst.numAttributes() - 1] = this.weightAttribute[inst.numAttributes() - 1] / sumWeights; } return denormalizedPrediction(normalizedPredict); }
/** * Convert a single instance over. The converted instance is added to the end of the output queue. * * @param instance the instance to convert */ private void convertInstance(Instance instance) { Instance inst = null; if (instance instanceof SparseInstance) { double[] newVals = new double[instance.numAttributes()]; int[] newIndices = new int[instance.numAttributes()]; double[] vals = instance.toDoubleArray(); int ind = 0; for (int j = 0; j < instance.numAttributes(); j++) { double value; if (instance.attribute(j).isNumeric() && (!Instance.isMissingValue(vals[j])) && (getInputFormat().classIndex() != j)) { value = vals[j] - m_Means[j]; if (value != 0.0) { newVals[ind] = value; newIndices[ind] = j; ind++; } } else { value = vals[j]; if (value != 0.0) { newVals[ind] = value; newIndices[ind] = j; ind++; } } } double[] tempVals = new double[ind]; int[] tempInd = new int[ind]; System.arraycopy(newVals, 0, tempVals, 0, ind); System.arraycopy(newIndices, 0, tempInd, 0, ind); inst = new SparseInstance(instance.weight(), tempVals, tempInd, instance.numAttributes()); } else { double[] vals = instance.toDoubleArray(); for (int j = 0; j < getInputFormat().numAttributes(); j++) { if (instance.attribute(j).isNumeric() && (!Instance.isMissingValue(vals[j])) && (getInputFormat().classIndex() != j)) { vals[j] = (vals[j] - m_Means[j]); } } inst = new Instance(instance.weight(), vals); } inst.setDataset(instance.dataset()); push(inst); }
/** * Metoda zwracaj??ca list?? warto??ci dla danej instancji. * * @param inst Analizowana instancja * @param attrX Nazwa atrybutu dla osi X. * @param attrY Nazwa atrybutu dla osi Y * @return Lista dwuelementowa z warto??ciami kolejno dla osi X i Y. */ public List<Number> getValueForInstance(Instance inst, String attrX, String attrY) { List<Number> value = new ArrayList<Number>(); Attribute atX = inst.attribute(getAttributeNames().indexOf(attrX)); Attribute atY = inst.attribute(getAttributeNames().indexOf(attrY)); value.add(inst.value(atX)); value.add(inst.value(atY)); return value; }
/** * Convert an <code>Instance</code> to an array of values that matches the format of the mining * schema. First maps raw attribute values and then applies rules for missing values, outliers * etc. * * @param inst the <code>Instance</code> to convert * @param miningSchema the mining schema incoming instance attributes * @return an array of doubles that are values from the incoming Instances, correspond to the * format of the mining schema and have had missing values, outliers etc. dealt with. * @throws Exception if something goes wrong */ public double[] instanceToSchema(Instance inst, MiningSchema miningSchema) throws Exception { Instances miningSchemaI = miningSchema.getMiningSchemaAsInstances(); // allocate enough space for both mining schema fields and any derived fields double[] result = new double[miningSchema.getFieldsAsInstances().numAttributes()]; // Copy over the values for (int i = 0; i < miningSchemaI.numAttributes(); i++) { // if (miningSchemaI.attribute(i).isNumeric()) { result[i] = inst.value(m_fieldsMap[i]); if (miningSchemaI.attribute(i).isNominal() || miningSchemaI.attribute(i).isString()) { // If not missing, look up the index of this incoming categorical value in // the mining schema if (!Utils.isMissingValue(inst.value(m_fieldsMap[i]))) { int[] valueMap = m_nominalValueMaps[i]; int index = valueMap[(int) inst.value(m_fieldsMap[i])]; String incomingAttValue = inst.attribute(m_fieldsMap[i]).value((int) inst.value(m_fieldsMap[i])); /*int index = miningSchemaI.attribute(i).indexOfValue(incomingAttValue); */ if (index >= 0) { result[i] = index; } else { // set this to "unknown" (-1) for nominal valued attributes result[i] = UNKNOWN_NOMINAL_VALUE; String warningString = "[MappingInfo] WARNING: Can't match nominal value " + incomingAttValue; if (m_log != null) { m_log.logMessage(warningString); } else { System.err.println(warningString); } } } } } // Now deal with missing values and outliers... miningSchema.applyMissingAndOutlierTreatments(result); // printInst(result); // now fill in any derived values ArrayList<DerivedFieldMetaInfo> derivedFields = miningSchema.getDerivedFields(); for (int i = 0; i < derivedFields.size(); i++) { DerivedFieldMetaInfo temp = derivedFields.get(i); // System.err.println("Applying : " + temp); double r = temp.getDerivedValue(result); result[i + miningSchemaI.numAttributes()] = r; } /*System.err.print("==> "); for (int i = 0; i < result.length; i++) { System.err.print(" " + result[i]); } System.err.println();*/ return result; }
/** * Returns index of subset instance is assigned to. Returns -1 if instance is assigned to more * than one subset. * * @exception Exception if something goes wrong */ public final int whichSubset(Instance instance) throws Exception { if (instance.isMissing(m_attIndex)) return -1; else { if (instance.attribute(m_attIndex).isNominal()) { if ((int) m_splitPoint == (int) instance.value(m_attIndex)) return 0; else return 1; } else if (Utils.smOrEq(instance.value(m_attIndex), m_splitPoint)) return 0; else return 1; } }
private List<Object> convert(Instance instance) { List<Object> data = new LinkedList<Object>(); for (int i = 0; i < isNumeric.length; i++) { if (isNumeric[i]) { data.add(instance.value(i)); } else { data.add(instance.attribute(i).value((int) instance.value(i))); } } return data; }
/** * Input an instance for filtering. Ordinarily the instance is processed and made available for * output immediately. Some filters require all instances be read before producing output. * * @param instance the input instance * @return true if the filtered instance may now be collected with output(). * @throws IllegalStateException if no input structure has been defined. */ @Override public boolean input(Instance instance) { if (getInputFormat() == null) { throw new IllegalStateException("No input instance format defined"); } if (m_NewBatch) { resetQueue(); m_NewBatch = false; } if (getOutputFormat().numAttributes() == 0) { return false; } if (m_selectedAttributes.length == 0) { push(instance); } else { double vals[] = new double[getOutputFormat().numAttributes()]; for (int i = 0; i < instance.numAttributes(); i++) { double currentV = instance.value(i); if (!m_selectedCols.isInRange(i)) { vals[i] = currentV; } else { if (currentV == Utils.missingValue()) { vals[i] = currentV; } else { String currentS = instance.attribute(i).value((int) currentV); String replace = m_ignoreCase ? m_renameMap.get(currentS.toLowerCase()) : m_renameMap.get(currentS); if (replace == null) { vals[i] = currentV; } else { vals[i] = getOutputFormat().attribute(i).indexOfValue(replace); } } } } Instance inst = null; if (instance instanceof SparseInstance) { inst = new SparseInstance(instance.weight(), vals); } else { inst = new DenseInstance(instance.weight(), vals); } inst.setDataset(getOutputFormat()); copyValues(inst, false, instance.dataset(), getOutputFormat()); inst.setDataset(getOutputFormat()); push(inst); } return true; }
/** * Input an instance for filtering. The instance is processed and made available for output * immediately. * * @param instance the input instance. * @return true if the filtered instance may now be collected with output(). * @throws IllegalStateException if no input structure has been defined. */ public boolean input(Instance instance) { if (getInputFormat() == null) { throw new IllegalStateException("No input instance format defined"); } if (m_NewBatch) { resetQueue(); m_NewBatch = false; } if (isOutputFormatDefined()) { Instance newInstance = (Instance) instance.copy(); // make sure that we get the right indexes set for the converted // string attributes when operating on a second batch of instances for (int i = 0; i < newInstance.numAttributes(); i++) { if (newInstance.attribute(i).isString() && !newInstance.isMissing(i) && m_AttIndices.isInRange(i)) { Attribute outAtt = getOutputFormat().attribute(newInstance.attribute(i).name()); String inVal = newInstance.stringValue(i); int outIndex = outAtt.indexOfValue(inVal); if (outIndex < 0) { newInstance.setMissing(i); } else { newInstance.setValue(i, outIndex); } } } push(newInstance); return true; } bufferInput(instance); return false; }
/** * Determines whether an instance passes the test. * * @param inst the instance * @return true if the instance satisfies the test, false otherwise * @exception Exception if something goes wrong */ public boolean passesTest(Instance inst) throws Exception { if (inst.isMissing(m_AttIndex)) return false; // missing values fail boolean isNominal = inst.attribute(m_AttIndex).isNominal(); double attribVal = inst.value(m_AttIndex); if (!m_Not) { if (isNominal) { if (((int) attribVal) != ((int) m_Split)) return false; } else if (attribVal >= m_Split) return false; } else { if (isNominal) { if (((int) attribVal) == ((int) m_Split)) return false; } else if (attribVal < m_Split) return false; } return true; }
/** * processes the given instance (may change the provided instance) and returns the modified * version. * * @param instance the instance to process * @return the modified data * @throws Exception in case the processing goes wrong */ protected Instance process(Instance instance) throws Exception { Instance result; Attribute att; double[] values; int i; // adjust indices values = new double[instance.numAttributes()]; for (i = 0; i < instance.numAttributes(); i++) { att = instance.attribute(i); if (!att.isNominal() || !m_AttributeIndices.isInRange(i) || instance.isMissing(i)) values[i] = instance.value(i); else values[i] = m_NewOrder[i][(int) instance.value(i)]; } // create new instance result = new DenseInstance(instance.weight(), values); return result; }
@Override public void updateNode(Instance inst) throws Exception { super.updateDistribution(inst); for (int i = 0; i < inst.numAttributes(); i++) { Attribute a = inst.attribute(i); if (i != inst.classIndex()) { ConditionalSufficientStats stats = m_nodeStats.get(a.name()); if (stats == null) { if (a.isNumeric()) { stats = new GaussianConditionalSufficientStats(); } else { stats = new NominalConditionalSufficientStats(); } m_nodeStats.put(a.name(), stats); } stats.update( inst.value(a), inst.classAttribute().value((int) inst.classValue()), inst.weight()); } } }
/** * processes the given instance (may change the provided instance) and returns the modified * version. * * @param instance the instance to process * @return the modified data * @throws Exception in case the processing goes wrong */ @Override protected Instance process(Instance instance) throws Exception { Instance result; int i; double val; double factor; result = (Instance) instance.copy(); if (m_Decimals > -1) { factor = StrictMath.pow(10, m_Decimals); } else { factor = 1; } for (i = 0; i < result.numAttributes(); i++) { // only numeric attributes if (!result.attribute(i).isNumeric()) { continue; } // out of range? if (!m_Cols.isInRange(i)) { continue; } // skip class? if ((result.classIndex() == i) && (!m_IncludeClass)) { continue; } // too small? if (result.value(i) < m_MinThreshold) { if (getDebug()) { System.out.println("Too small: " + result.value(i) + " -> " + m_MinDefault); } result.setValue(i, m_MinDefault); } // too big? else if (result.value(i) > m_MaxThreshold) { if (getDebug()) { System.out.println("Too big: " + result.value(i) + " -> " + m_MaxDefault); } result.setValue(i, m_MaxDefault); } // too close? else if ((result.value(i) - m_CloseTo < m_CloseToTolerance) && (m_CloseTo - result.value(i) < m_CloseToTolerance) && (result.value(i) != m_CloseTo)) { if (getDebug()) { System.out.println("Too close: " + result.value(i) + " -> " + m_CloseToDefault); } result.setValue(i, m_CloseToDefault); } // decimals? if (m_Decimals > -1 && !result.isMissing(i)) { val = result.value(i); val = StrictMath.round(val * factor) / factor; result.setValue(i, val); } } return result; }
/** * Convert a single instance over. The converted instance is added to the end of the output queue. * * @param instance the instance to convert * @throws Exception if instance cannot be converted */ private void convertInstance(Instance instance) throws Exception { Instance inst = null; HashMap symbols = new HashMap(5); if (instance instanceof SparseInstance) { double[] newVals = new double[instance.numAttributes()]; int[] newIndices = new int[instance.numAttributes()]; double[] vals = instance.toDoubleArray(); int ind = 0; double value; for (int j = 0; j < instance.numAttributes(); j++) { if (m_SelectCols.isInRange(j)) { if (instance.attribute(j).isNumeric() && (!Utils.isMissingValue(vals[j])) && (getInputFormat().classIndex() != j)) { symbols.put("A", new Double(vals[j])); symbols.put("MAX", new Double(m_attStats[j].numericStats.max)); symbols.put("MIN", new Double(m_attStats[j].numericStats.min)); symbols.put("MEAN", new Double(m_attStats[j].numericStats.mean)); symbols.put("SD", new Double(m_attStats[j].numericStats.stdDev)); symbols.put("COUNT", new Double(m_attStats[j].numericStats.count)); symbols.put("SUM", new Double(m_attStats[j].numericStats.sum)); symbols.put("SUMSQUARED", new Double(m_attStats[j].numericStats.sumSq)); value = eval(symbols); if (Double.isNaN(value) || Double.isInfinite(value)) { System.err.println("WARNING:Error in evaluating the expression: missing value set"); value = Utils.missingValue(); } if (value != 0.0) { newVals[ind] = value; newIndices[ind] = j; ind++; } } } else { value = vals[j]; if (value != 0.0) { newVals[ind] = value; newIndices[ind] = j; ind++; } } } double[] tempVals = new double[ind]; int[] tempInd = new int[ind]; System.arraycopy(newVals, 0, tempVals, 0, ind); System.arraycopy(newIndices, 0, tempInd, 0, ind); inst = new SparseInstance(instance.weight(), tempVals, tempInd, instance.numAttributes()); } else { double[] vals = instance.toDoubleArray(); for (int j = 0; j < getInputFormat().numAttributes(); j++) { if (m_SelectCols.isInRange(j)) { if (instance.attribute(j).isNumeric() && (!Utils.isMissingValue(vals[j])) && (getInputFormat().classIndex() != j)) { symbols.put("A", new Double(vals[j])); symbols.put("MAX", new Double(m_attStats[j].numericStats.max)); symbols.put("MIN", new Double(m_attStats[j].numericStats.min)); symbols.put("MEAN", new Double(m_attStats[j].numericStats.mean)); symbols.put("SD", new Double(m_attStats[j].numericStats.stdDev)); symbols.put("COUNT", new Double(m_attStats[j].numericStats.count)); symbols.put("SUM", new Double(m_attStats[j].numericStats.sum)); symbols.put("SUMSQUARED", new Double(m_attStats[j].numericStats.sumSq)); vals[j] = eval(symbols); if (Double.isNaN(vals[j]) || Double.isInfinite(vals[j])) { System.err.println("WARNING:Error in Evaluation the Expression: missing value set"); vals[j] = Utils.missingValue(); } } } } inst = new DenseInstance(instance.weight(), vals); } inst.setDataset(instance.dataset()); push(inst); }
/** * Classifies the given test instance. * * @param instance the instance to be classified * @return the predicted class for the instance * @throws Exception if the instance can't be classified */ public double[] distributionForInstance(Instance instance) throws Exception { double[] dist = new double[m_NumClasses]; double[] temp = new double[m_NumClasses]; double weight = 1.0; for (int i = 0; i < instance.numAttributes(); i++) { if (i != m_ClassIndex && !instance.isMissing(i)) { double val = instance.value(i); boolean ok = false; if (instance.attribute(i).isNumeric()) { int k; for (k = m_intervalBounds[i].length - 1; k >= 0; k--) { if (val > m_intervalBounds[i][k]) { for (int j = 0; j < m_NumClasses; j++) { if (m_globalCounts[j] > 0) { temp[j] = ((m_counts[i][k][j] + TINY) / (m_globalCounts[j] + TINY)); } } ok = true; break; } else if (val == m_intervalBounds[i][k]) { for (int j = 0; j < m_NumClasses; j++) { if (m_globalCounts[j] > 0) { temp[j] = ((m_counts[i][k][j] + m_counts[i][k - 1][j]) / 2.0) + TINY; temp[j] /= (m_globalCounts[j] + TINY); } } ok = true; break; } } if (!ok) { throw new Exception("This shouldn't happen"); } } else { // nominal attribute ok = true; for (int j = 0; j < m_NumClasses; j++) { if (m_globalCounts[j] > 0) { temp[j] = ((m_counts[i][(int) val][j] + TINY) / (m_globalCounts[j] + TINY)); } } } double sum = Utils.sum(temp); if (sum <= 0) { for (int j = 0; j < temp.length; j++) { temp[j] = 1.0 / (double) temp.length; } } else { Utils.normalize(temp, sum); } if (m_weightByConfidence) { weight = weka.core.ContingencyTables.entropy(temp); weight = Math.pow(weight, m_bias); if (weight < 1.0) { weight = 1.0; } } for (int j = 0; j < m_NumClasses; j++) { dist[j] += (temp[j] * weight); } } } double sum = Utils.sum(dist); if (sum <= 0) { for (int j = 0; j < dist.length; j++) { dist[j] = 1.0 / (double) dist.length; } return dist; } else { Utils.normalize(dist, sum); return dist; } }
/** * Accepts and processes a classifier encapsulated in an incremental classifier event * * @param ce an <code>IncrementalClassifierEvent</code> value */ @Override public void acceptClassifier(final IncrementalClassifierEvent ce) { try { if (ce.getStatus() == IncrementalClassifierEvent.NEW_BATCH) { m_throughput = new StreamThroughput(statusMessagePrefix()); m_throughput.setSamplePeriod(m_statusFrequency); // m_eval = new Evaluation(ce.getCurrentInstance().dataset()); m_eval = new Evaluation(ce.getStructure()); m_eval.useNoPriors(); m_dataLegend = new Vector(); m_reset = true; m_dataPoint = new double[0]; Instances inst = ce.getStructure(); System.err.println("NEW BATCH"); m_instanceCount = 0; if (m_windowSize > 0) { m_window = new LinkedList<Instance>(); m_windowEval = new Evaluation(ce.getStructure()); m_windowEval.useNoPriors(); m_windowedPreds = new LinkedList<double[]>(); if (m_logger != null) { m_logger.logMessage( statusMessagePrefix() + "[IncrementalClassifierEvaluator] Chart output using windowed " + "evaluation over " + m_windowSize + " instances"); } } /* * if (m_logger != null) { m_logger.statusMessage(statusMessagePrefix() * + "IncrementalClassifierEvaluator: started processing..."); * m_logger.logMessage(statusMessagePrefix() + * " [IncrementalClassifierEvaluator]" + statusMessagePrefix() + * " started processing..."); } */ } else { Instance inst = ce.getCurrentInstance(); if (inst != null) { m_throughput.updateStart(); m_instanceCount++; // if (inst.attribute(inst.classIndex()).isNominal()) { double[] dist = ce.getClassifier().distributionForInstance(inst); double pred = 0; if (!inst.isMissing(inst.classIndex())) { if (m_outputInfoRetrievalStats) { // store predictions so AUC etc can be output. m_eval.evaluateModelOnceAndRecordPrediction(dist, inst); } else { m_eval.evaluateModelOnce(dist, inst); } if (m_windowSize > 0) { m_windowEval.evaluateModelOnce(dist, inst); m_window.addFirst(inst); m_windowedPreds.addFirst(dist); if (m_instanceCount > m_windowSize) { // "forget" the oldest prediction Instance oldest = m_window.removeLast(); double[] oldDist = m_windowedPreds.removeLast(); oldest.setWeight(-oldest.weight()); m_windowEval.evaluateModelOnce(oldDist, oldest); oldest.setWeight(-oldest.weight()); } } } else { pred = ce.getClassifier().classifyInstance(inst); } if (inst.classIndex() >= 0) { // need to check that the class is not missing if (inst.attribute(inst.classIndex()).isNominal()) { if (!inst.isMissing(inst.classIndex())) { if (m_dataPoint.length < 2) { m_dataPoint = new double[3]; m_dataLegend.addElement("Accuracy"); m_dataLegend.addElement("RMSE (prob)"); m_dataLegend.addElement("Kappa"); } // int classV = (int) inst.value(inst.classIndex()); if (m_windowSize > 0) { m_dataPoint[1] = m_windowEval.rootMeanSquaredError(); m_dataPoint[2] = m_windowEval.kappa(); } else { m_dataPoint[1] = m_eval.rootMeanSquaredError(); m_dataPoint[2] = m_eval.kappa(); } // int maxO = Utils.maxIndex(dist); // if (maxO == classV) { // dist[classV] = -1; // maxO = Utils.maxIndex(dist); // } // m_dataPoint[1] -= dist[maxO]; } else { if (m_dataPoint.length < 1) { m_dataPoint = new double[1]; m_dataLegend.addElement("Confidence"); } } double primaryMeasure = 0; if (!inst.isMissing(inst.classIndex())) { if (m_windowSize > 0) { primaryMeasure = 1.0 - m_windowEval.errorRate(); } else { primaryMeasure = 1.0 - m_eval.errorRate(); } } else { // record confidence as the primary measure // (another possibility would be entropy of // the distribution, or perhaps average // confidence) primaryMeasure = dist[Utils.maxIndex(dist)]; } // double [] dataPoint = new double[1]; m_dataPoint[0] = primaryMeasure; // double min = 0; double max = 100; /* * ChartEvent e = new * ChartEvent(IncrementalClassifierEvaluator.this, m_dataLegend, * min, max, dataPoint); */ m_ce.setLegendText(m_dataLegend); m_ce.setMin(0); m_ce.setMax(1); m_ce.setDataPoint(m_dataPoint); m_ce.setReset(m_reset); m_reset = false; } else { // numeric class if (m_dataPoint.length < 1) { m_dataPoint = new double[1]; if (inst.isMissing(inst.classIndex())) { m_dataLegend.addElement("Prediction"); } else { m_dataLegend.addElement("RMSE"); } } if (!inst.isMissing(inst.classIndex())) { double update; if (!inst.isMissing(inst.classIndex())) { if (m_windowSize > 0) { update = m_windowEval.rootMeanSquaredError(); } else { update = m_eval.rootMeanSquaredError(); } } else { update = pred; } m_dataPoint[0] = update; if (update > m_max) { m_max = update; } if (update < m_min) { m_min = update; } } m_ce.setLegendText(m_dataLegend); m_ce.setMin((inst.isMissing(inst.classIndex()) ? m_min : 0)); m_ce.setMax(m_max); m_ce.setDataPoint(m_dataPoint); m_ce.setReset(m_reset); m_reset = false; } notifyChartListeners(m_ce); } m_throughput.updateEnd(m_logger); } if (ce.getStatus() == IncrementalClassifierEvent.BATCH_FINISHED || inst == null) { if (m_logger != null) { m_logger.logMessage( "[IncrementalClassifierEvaluator]" + statusMessagePrefix() + " Finished processing."); } m_throughput.finished(m_logger); // save memory if using windowed evaluation for charting m_windowEval = null; m_window = null; m_windowedPreds = null; if (m_textListeners.size() > 0) { String textTitle = ce.getClassifier().getClass().getName(); textTitle = textTitle.substring(textTitle.lastIndexOf('.') + 1, textTitle.length()); String results = "=== Performance information ===\n\n" + "Scheme: " + textTitle + "\n" + "Relation: " + m_eval.getHeader().relationName() + "\n\n" + m_eval.toSummaryString(); if (m_eval.getHeader().classIndex() >= 0 && m_eval.getHeader().classAttribute().isNominal() && (m_outputInfoRetrievalStats)) { results += "\n" + m_eval.toClassDetailsString(); } if (m_eval.getHeader().classIndex() >= 0 && m_eval.getHeader().classAttribute().isNominal()) { results += "\n" + m_eval.toMatrixString(); } textTitle = "Results: " + textTitle; TextEvent te = new TextEvent(this, results, textTitle); notifyTextListeners(te); } } } } catch (Exception ex) { if (m_logger != null) { m_logger.logMessage( "[IncrementalClassifierEvaluator]" + statusMessagePrefix() + " Error processing prediction " + ex.getMessage()); m_logger.statusMessage( statusMessagePrefix() + "ERROR: problem processing prediction (see log for details)"); } ex.printStackTrace(); stop(); } }
/** * Métod que percorre todos os dados pertencentes à Instances dados. Imprimindo as informações da * base. */ public void percorrerDados() { if (dados != null) { /*Cada exemplo contido nos dados é identificado no Weka através da * classe Instance. Assim, o objeto dados, do tipo Instances, é uma coleçao de * Instance. Voce vai ter metodos que possibilitam acessar todos os exemplos * presentes na base. * */ // Percorre todos os exemples presentes na base for (int i = 0; i < dados.numInstances(); i++) { // Método para obter a instance de número 1. // Voce pode pegar a primeira e a ultima instance tb. // Além de poder deletar entre outras coisas. Instance exemplo = dados.instance(i); /*Uma Intance é formada por vários atributos, que são os atributos * da base. Voce pode percorrer todos os atributos Instace, ou pode * "setar" (set) ou pegar (get) um atributo especifico. * */ // É possível transforma todos os atributos em um array de double double[] arrayAtributos = exemplo.toDoubleArray(); System.out.println("Valores para o exemplo " + i); System.out.print("Array de atributos: "); for (int j = 0; j < arrayAtributos.length; j++) { System.out.print(arrayAtributos[j] + " "); } System.out.println(); // Percorrendo todos os atributos para se obter informacoes sobre eles for (int j = 0; j < exemplo.numAttributes(); j++) { Attribute att = exemplo.attribute(j); double valor = exemplo.value(att); System.out.println( "Valor do atributo " + att.name() + ":" + valor + " - " + att.value((int) valor)); } System.out.println(); // Mudando o valor do atributo 0, para um valor possível do atributos // Obtendo as informacoes do atributo 0; Attribute att = exemplo.attribute(0); // Obtendo o valor do atributo 0. double valorDoAtributo0 = exemplo.value(att); System.out.println("Valor antigo, em double: " + valorDoAtributo0); System.out.println("Valor antigo, em nome: " + att.value((int) valorDoAtributo0)); int novoValor = 1; exemplo.setValue(att, novoValor); valorDoAtributo0 = exemplo.value(att); System.out.println("Valor novo, em nome: " + att.value((int) valorDoAtributo0)); System.out.println(); System.out.println(); } } }
protected void tokenizeInstance(Instance instance, boolean updateDictionary) { if (m_inputVector == null) { m_inputVector = new LinkedHashMap<String, Count>(); } else { m_inputVector.clear(); } if (m_useStopList && m_stopwords == null) { m_stopwords = new Stopwords(); try { if (getStopwords().exists() && !getStopwords().isDirectory()) { m_stopwords.read(getStopwords()); } } catch (Exception ex) { ex.printStackTrace(); } } for (int i = 0; i < instance.numAttributes(); i++) { if (instance.attribute(i).isString() && !instance.isMissing(i)) { m_tokenizer.tokenize(instance.stringValue(i)); while (m_tokenizer.hasMoreElements()) { String word = m_tokenizer.nextElement(); if (m_lowercaseTokens) { word = word.toLowerCase(); } word = m_stemmer.stem(word); if (m_useStopList) { if (m_stopwords.is(word)) { continue; } } Count docCount = m_inputVector.get(word); if (docCount == null) { m_inputVector.put(word, new Count(instance.weight())); } else { docCount.m_count += instance.weight(); } } } } if (updateDictionary) { int classValue = (int) instance.classValue(); LinkedHashMap<String, Count> dictForClass = m_probOfWordGivenClass.get(classValue); // document normalization double iNorm = 0; double fv = 0; if (m_normalize) { for (Count c : m_inputVector.values()) { // word counts or bag-of-words? fv = (m_wordFrequencies) ? c.m_count : 1.0; iNorm += Math.pow(Math.abs(fv), m_lnorm); } iNorm = Math.pow(iNorm, 1.0 / m_lnorm); } for (Map.Entry<String, Count> feature : m_inputVector.entrySet()) { String word = feature.getKey(); double freq = (m_wordFrequencies) ? feature.getValue().m_count : 1.0; // double freq = (feature.getValue().m_count / iNorm * m_norm); if (m_normalize) { freq /= (iNorm * m_norm); } // check all classes for (int i = 0; i < m_data.numClasses(); i++) { LinkedHashMap<String, Count> dict = m_probOfWordGivenClass.get(i); if (dict.get(word) == null) { dict.put(word, new Count(m_leplace)); m_wordsPerClass[i] += m_leplace; } } Count dictCount = dictForClass.get(word); /* * if (dictCount == null) { dictForClass.put(word, new Count(m_leplace + * freq)); m_wordsPerClass[classValue] += (m_leplace + freq); } else { */ dictCount.m_count += freq; m_wordsPerClass[classValue] += freq; // } } pruneDictionary(); } }
/** * Convert a single instance over. The converted instance is added to the end of the output queue. * * @param instance the instance to convert * @throws Exception if conversion fails */ protected void convertInstance(Instance instance) throws Exception { Instance inst = null; if (instance instanceof SparseInstance) { double[] newVals = new double[instance.numAttributes()]; int[] newIndices = new int[instance.numAttributes()]; double[] vals = instance.toDoubleArray(); int ind = 0; for (int j = 0; j < instance.numAttributes(); j++) { double value; if (instance.attribute(j).isNumeric() && (!Utils.isMissingValue(vals[j])) && (getInputFormat().classIndex() != j)) { if (Double.isNaN(m_MinArray[j]) || (m_MaxArray[j] == m_MinArray[j])) { value = 0; } else { value = (vals[j] - m_MinArray[j]) / (m_MaxArray[j] - m_MinArray[j]) * m_Scale + m_Translation; if (Double.isNaN(value)) { throw new Exception( "A NaN value was generated " + "while normalizing " + instance.attribute(j).name()); } } if (value != 0.0) { newVals[ind] = value; newIndices[ind] = j; ind++; } } else { value = vals[j]; if (value != 0.0) { newVals[ind] = value; newIndices[ind] = j; ind++; } } } double[] tempVals = new double[ind]; int[] tempInd = new int[ind]; System.arraycopy(newVals, 0, tempVals, 0, ind); System.arraycopy(newIndices, 0, tempInd, 0, ind); inst = new SparseInstance(instance.weight(), tempVals, tempInd, instance.numAttributes()); } else { double[] vals = instance.toDoubleArray(); for (int j = 0; j < getInputFormat().numAttributes(); j++) { if (instance.attribute(j).isNumeric() && (!Utils.isMissingValue(vals[j])) && (getInputFormat().classIndex() != j)) { if (Double.isNaN(m_MinArray[j]) || (m_MaxArray[j] == m_MinArray[j])) { vals[j] = 0; } else { vals[j] = (vals[j] - m_MinArray[j]) / (m_MaxArray[j] - m_MinArray[j]) * m_Scale + m_Translation; if (Double.isNaN(vals[j])) { throw new Exception( "A NaN value was generated " + "while normalizing " + instance.attribute(j).name()); } } } } inst = new DenseInstance(instance.weight(), vals); } inst.setDataset(instance.dataset()); push(inst); }