/** * Calculates the difference * * @return difference */ public double prototypeDifference(CombStat stat) { double sumdiff = 0; double weight; // Numeric atts: abs difference for (int i = 0; i < m_RegStat.getNbNumericAttributes(); i++) { weight = m_StatManager.getClusteringWeights().getWeight(m_RegStat.getAttribute(i)); sumdiff += Math.abs(prototypeNum(i) - stat.prototypeNum(i)) * weight; // System.err.println("sumdiff: " + Math.abs(prototypeNum(i) - stat.prototypeNum(i)) * // weight); } // Nominal atts: Manhattan distance for (int i = 0; i < m_ClassStat.getNbNominalAttributes(); i++) { weight = m_StatManager.getClusteringWeights().getWeight(m_ClassStat.getAttribute(i)); double sum = 0; double[] proto1 = prototypeNom(i); double[] proto2 = stat.prototypeNom(i); for (int j = 0; j < proto1.length; j++) { sum += Math.abs(proto1[j] - proto2[j]); } sumdiff += sum * weight; // System.err.println("sumdiff: " + (sum * weight)); } // System.err.println("sumdiff-total: " + sumdiff); return sumdiff != 0 ? sumdiff : 0.0; }
public double getSVarSDiff(ClusAttributeWeights scale, ClusStatistic other) { int nbTargetNom = m_ClassStat.getNbNominalAttributes(); int nbTargetNum = m_RegStat.getNbNumericAttributes(); ClassificationStat ocls = ((CombStat) other).getClassificationStat(); RegressionStat oreg = ((CombStat) other).getRegressionStat(); return (m_ClassStat.getSVarSDiff(scale, ocls) * nbTargetNom + m_RegStat.getSVarSDiff(scale, oreg) * nbTargetNum) / (nbTargetNom + nbTargetNum); }
// TODO: Move to ClassificationStat public double meanDistNom(int use) { double sumdist = 0; double weight = 0; for (int i = 0; i < m_ClassStat.getNbNominalAttributes(); i++) { if (use == IN_HEURISTIC) { weight = m_StatManager.getClusteringWeights().getWeight(m_ClassStat.getAttribute(i)); } else { // use == IN_OUTPUT weight = m_StatManager.getDispersionWeights().getWeight(m_ClassStat.getAttribute(i)); } sumdist += meanDistNomOne(i) * weight; } return sumdist; }
// TODO: This error assessment should be changed, I guess. public double getError(ClusAttributeWeights scale) { System.out.println("CombStat :getError"); switch (m_StatManager.getMode()) { case ClusStatManager.MODE_CLASSIFY: return m_ClassStat.getError(scale); case ClusStatManager.MODE_REGRESSION: return m_RegStat.getError(scale); case ClusStatManager.MODE_CLASSIFY_AND_REGRESSION: return m_RegStat.getError(scale) + m_ClassStat.getError(scale); } System.err.println(getClass().getName() + ": getError(): Invalid mode!"); return Double.POSITIVE_INFINITY; }
/** * Checks weather values of a target attribute are significantly different * * @return */ public boolean targetSignDifferent() { boolean res = false; int att = -1; String att_name; String att_name2; ClusStatistic targetStat = m_StatManager.getStatistic(ClusAttrType.ATTR_USE_TARGET); if (targetStat instanceof ClassificationStat) { for (int i = 0; i < targetStat.getNbNominalAttributes(); i++) { att_name = ((ClassificationStat) targetStat).getAttribute(i).getName(); for (int j = 0; j < m_ClassStat.getNbNominalAttributes(); j++) { att_name2 = m_ClassStat.getAttribute(j).getName(); if (att_name.equals(att_name2)) { att = j; break; } } if (SignDifferentNom(att)) { res = true; break; // TODO: If one target att significant, the whole rule significant!? } } // System.out.println("Target sign. testing: " + res); return res; } else if (targetStat instanceof RegressionStat) { for (int i = 0; i < targetStat.getNbNumericAttributes(); i++) { att_name = ((RegressionStat) targetStat).getAttribute(i).getName(); for (int j = 0; j < m_RegStat.getNbNumericAttributes(); j++) { att_name2 = m_RegStat.getAttribute(j).getName(); if (att_name.equals(att_name2)) { att = j; break; } } try { if (SignDifferentNum(att)) { res = true; break; // TODO: If one target att significant, the whole rule significant!? } } catch (IllegalArgumentException e) { e.printStackTrace(); } catch (MathException e) { e.printStackTrace(); } } return res; } else { // TODO: Classification and regression return true; } }
public void copy(ClusStatistic other) { CombStat or = (CombStat) other; m_SumWeight = or.m_SumWeight; m_StatManager = or.m_StatManager; m_RegStat.copy(or.m_RegStat); m_ClassStat.copy(or.m_ClassStat); }
public String getPredictWriterString() { StringBuffer buf = new StringBuffer(); buf.append(m_ClassStat.getPredictWriterString()); if (buf.length() != 0) buf.append(","); buf.append(m_RegStat.getPredictWriterString()); return buf.toString(); }
// TODO: Move to ClassificationStat public double meanEntropy() { double sent = 0; int nbNominal = m_ClassStat.getNbNominalAttributes(); for (int i = 0; i < nbNominal; i++) { sent += entropy(i); } return sent / nbNominal; }
public String getString(StatisticPrintInfo info) { StringBuffer buf = new StringBuffer(); buf.append("["); buf.append(m_ClassStat.getString(info)); buf.append(" | "); buf.append(m_RegStat.getString(info)); buf.append("]"); return buf.toString(); }
/** * Significance testing for a nominal attribute * * @param att attribute index * @return true if this distribution significantly different from global distribution * @throws IllegalArgumentException * @throws MathException */ private boolean SignDifferentNom(int att) { /* double global_n = ((CombStat)m_StatManager.getGlobalStat()).getTotalWeight(); double local_n = getTotalWeight(); double ratio = local_n / global_n; double global_counts[] = new double[m_ClassStat.getClassCounts(att).length]; long local_counts[] = new long[global_counts.length]; for (int i = 0; i < local_counts.length; i++) { local_counts[i] = (long)(m_ClassStat.getClassCounts(att)[i]); global_counts[i] = ((CombStat)m_StatManager.getGlobalStat()).m_ClassStat.getClassCounts(att)[i] * ratio; } ChiSquareTestImpl testStatistic = new ChiSquareTestImpl(); // alpha = siginficance level, confidence = 1-alpha double alpha = getSettings().getRuleSignificanceLevel(); System.err.println("Attr.nom.: " + att + ", p-valueX: " + testStatistic.chiSquareTest(global_counts, local_counts)); System.err.println("Attr.nom.: " + att + ", p-valueG: " + m_ClassStat.getGTestPValue(att, m_StatManager)); System.err.println("Attr.nom.: " + att + ", Gvalue/thresh: " + m_ClassStat.getGTest(att, m_StatManager) + " / " + m_StatManager.getChiSquareInvProb(global_counts.length-1)); boolean result = testStatistic.chiSquareTest(global_counts, local_counts, alpha); System.err.println("Attr.nom.: " + att + ", result: " + result); return result; */ return m_ClassStat.getGTest(att, m_StatManager); }
/** Returns a number of attributes with significantly different distributions */ public int signDifferent() { int sign_diff = 0; // Nominal attributes for (int i = 0; i < m_ClassStat.getNbAttributes(); i++) { if (SignDifferentNom(i)) { sign_diff++; } } // Numeric attributes for (int i = 0; i < m_RegStat.getNbAttributes(); i++) { try { if (SignDifferentNum(i)) { sign_diff++; } } catch (IllegalArgumentException e) { e.printStackTrace(); } catch (MathException e) { e.printStackTrace(); } } System.out.println("Nb.sig.atts: " + sign_diff); return sign_diff; }
public void subtractFromOther(ClusStatistic other) { CombStat or = (CombStat) other; m_RegStat.subtractFromOther(or.m_RegStat); m_ClassStat.subtractFromOther(or.m_ClassStat); m_SumWeight = or.m_SumWeight - m_SumWeight; }
public void add(ClusStatistic other) { CombStat or = (CombStat) other; m_RegStat.add(or.m_RegStat); m_ClassStat.add(or.m_ClassStat); m_SumWeight += or.m_SumWeight; }
public void addPrediction(ClusStatistic other, double weight) { CombStat or = (CombStat) other; m_RegStat.addPrediction(or.m_RegStat, weight); m_ClassStat.addPrediction(or.m_ClassStat, weight); }
public void setTrainingStat(ClusStatistic train) { CombStat ctrain = (CombStat) train; m_RegStat.setTrainingStat(train.getRegressionStat()); m_ClassStat.setTrainingStat(train.getClassificationStat()); }
public void addPredictWriterSchema(String prefix, ClusSchema schema) { m_ClassStat.addPredictWriterSchema(prefix, schema); m_RegStat.addPredictWriterSchema(prefix, schema); }
public ClusStatistic cloneSimple() { return new CombStat( m_StatManager, (RegressionStat) m_RegStat.cloneSimple(), (ClassificationStat) m_ClassStat.cloneSimple()); }
public int getNbNominalAttributes() { return m_ClassStat.getNbNominalAttributes(); }
public int[] getNominalPred() { return m_ClassStat.getNominalPred(); }
// TODO: Not sure this makes sense in CombStat - Check! public double getSVarS(ClusAttributeWeights scale) { int nbTargetNom = m_ClassStat.getNbNominalAttributes(); int nbTargetNum = m_RegStat.getNbNumericAttributes(); return (m_ClassStat.getSVarS(scale) * nbTargetNom + m_RegStat.getSVarS(scale) * nbTargetNum) / (nbTargetNom + nbTargetNum); }
public void printDistribution(PrintWriter wrt) throws IOException { m_ClassStat.printDistribution(wrt); m_RegStat.printDistribution(wrt); }
public void updateWeighted(DataTuple tuple, int idx) { // idx? m_RegStat.updateWeighted(tuple, tuple.getWeight()); m_ClassStat.updateWeighted(tuple, tuple.getWeight()); m_SumWeight += tuple.getWeight(); }
public void updateWeighted(DataTuple tuple, double weight) { m_RegStat.updateWeighted(tuple, weight); m_ClassStat.updateWeighted(tuple, weight); m_SumWeight += weight; }
// TODO: Move to ClassificationStat public double entropy(int attr) { return m_ClassStat.entropy(attr); }
public void calcMean() { m_RegStat.calcMean(); m_ClassStat.calcMean(); }
public void reset() { m_RegStat.reset(); m_ClassStat.reset(); m_SumWeight = 0.0; }