Esempio n. 1
0
  /**
   * Generates the classifier.
   *
   * @param instances set of instances serving as training data
   * @throws Exception if the classifier has not been generated successfully
   */
  public void buildClassifier(Instances instances) throws Exception {

    if (!(m_Classifier instanceof WeightedInstancesHandler)) {
      throw new IllegalArgumentException("Classifier must be a " + "WeightedInstancesHandler!");
    }

    // can classifier handle the data?
    getCapabilities().testWithFail(instances);

    // remove instances with missing class
    instances = new Instances(instances);
    instances.deleteWithMissingClass();

    // only class? -> build ZeroR model
    if (instances.numAttributes() == 1) {
      System.err.println(
          "Cannot build model (only class attribute present in data!), "
              + "using ZeroR model instead!");
      m_ZeroR = new weka.classifiers.rules.ZeroR();
      m_ZeroR.buildClassifier(instances);
      return;
    } else {
      m_ZeroR = null;
    }

    m_Train = new Instances(instances, 0, instances.numInstances());

    m_NNSearch.setInstances(m_Train);
  }
Esempio n. 2
0
  /**
   * Splits the given set of instances into subsets.
   *
   * @exception Exception if something goes wrong
   */
  public final Instances[] split(Instances data) throws Exception {

    Instances[] instances = new Instances[m_numSubsets];
    double[] weights;
    double newWeight;
    Instance instance;
    int subset, i, j;

    for (j = 0; j < m_numSubsets; j++)
      instances[j] = new Instances((Instances) data, data.numInstances());
    for (i = 0; i < data.numInstances(); i++) {
      instance = ((Instances) data).instance(i);
      weights = weights(instance);
      subset = whichSubset(instance);
      if (subset > -1) instances[subset].add(instance);
      else
        for (j = 0; j < m_numSubsets; j++)
          if (Utils.gr(weights[j], 0)) {
            newWeight = weights[j] * instance.weight();
            instances[j].add(instance);
            instances[j].lastInstance().setWeight(newWeight);
          }
    }
    for (j = 0; j < m_numSubsets; j++) instances[j].compactify();

    return instances;
  }
Esempio n. 3
0
  /**
   * Returns a vector with column names of the dataset, listed in "list". If a column cannot be
   * found or the list is empty the ones from the default list are returned.
   *
   * @param list comma-separated list of attribute names
   * @param defaultList the default list of attribute names
   * @param inst the instances to get the attribute names from
   * @return a vector containing attribute names
   */
  protected Vector determineColumnNames(String list, String defaultList, Instances inst) {
    Vector result;
    Vector atts;
    StringTokenizer tok;
    int i;
    String item;

    // get attribute names
    atts = new Vector();
    for (i = 0; i < inst.numAttributes(); i++) atts.add(inst.attribute(i).name().toLowerCase());

    // process list
    result = new Vector();
    tok = new StringTokenizer(list, ",");
    while (tok.hasMoreTokens()) {
      item = tok.nextToken().toLowerCase();
      if (atts.contains(item)) {
        result.add(item);
      } else {
        result.clear();
        break;
      }
    }

    // do we have to return defaults?
    if (result.size() == 0) {
      tok = new StringTokenizer(defaultList, ",");
      while (tok.hasMoreTokens()) result.add(tok.nextToken().toLowerCase());
    }

    return result;
  }
 public void testTypical() {
   Instances result = useFilter();
   // Number of attributes and instances shouldn't change
   assertEquals(m_Instances.numAttributes() + 5, result.numAttributes());
   assertEquals(m_Instances.numInstances(), result.numInstances());
   // Eibe can enhance this to check the binarizing is correct.
 }
Esempio n. 5
0
  /**
   * Generate artificial training examples.
   *
   * @param artSize size of examples set to create
   * @param data training data
   * @return the set of unlabeled artificial examples
   */
  protected Instances generateArtificialData(int artSize, Instances data) {
    int numAttributes = data.numAttributes();
    Instances artData = new Instances(data, artSize);
    double[] att;
    Instance artInstance;

    for (int i = 0; i < artSize; i++) {
      att = new double[numAttributes];
      for (int j = 0; j < numAttributes; j++) {
        if (data.attribute(j).isNominal()) {
          // Select nominal value based on the frequency of occurence in the training data
          double[] stats = (double[]) m_AttributeStats.get(j);
          att[j] = (double) selectIndexProbabilistically(stats);
        } else if (data.attribute(j).isNumeric()) {
          // Generate numeric value from the Guassian distribution
          // defined by the mean and std dev of the attribute
          double[] stats = (double[]) m_AttributeStats.get(j);
          att[j] = (m_Random.nextGaussian() * stats[1]) + stats[0];
        } else System.err.println("Decorate can only handle numeric and nominal values.");
      }
      artInstance = new Instance(1.0, att);
      artData.add(artInstance);
    }
    return artData;
  }
  public static void wekaAlgorithms(Instances data) throws Exception {
    classifier = new FilteredClassifier(); // new instance of tree
    classifier.setClassifier(new NaiveBayes());
    //  classifier.setClassifier(new J48());
    // classifier.setClassifier(new RandomForest());

    //	classifier.setClassifier(new ZeroR());
    //  classifier.setClassifier(new NaiveBayes());
    //     classifier.setClassifier(new IBk());

    data.setClassIndex(data.numAttributes() - 1);
    Evaluation eval = new Evaluation(data);

    int folds = 10;
    eval.crossValidateModel(classifier, data, folds, new Random(1));

    System.out.println("===== Evaluating on filtered (training) dataset =====");
    System.out.println(eval.toSummaryString());
    System.out.println(eval.toClassDetailsString());
    double[][] mat = eval.confusionMatrix();
    System.out.println("========= Confusion Matrix =========");
    for (int i = 0; i < mat.length; i++) {
      for (int j = 0; j < mat.length; j++) {

        System.out.print(mat[i][j] + "  ");
      }
      System.out.println(" ");
    }
  }
 /** trains the classifier */
 @Override
 public void train() throws Exception {
   if (_train.classIndex() == -1) _train.setClassIndex(_train.numAttributes() - 1);
   _cl.buildClassifier(_train);
   // evaluate classifier and print some statistics
   evaluate();
 }
Esempio n. 8
0
  /**
   * initializes the algorithm
   *
   * @param data the data to work with
   * @throws Exception if m_SVM is null
   */
  protected void init(Instances data) throws Exception {
    if (m_SVM == null) {
      throw new Exception("SVM not initialized in optimizer. Use RegOptimizer.setSVMReg()");
    }
    m_C = m_SVM.getC();
    m_data = data;
    m_classIndex = data.classIndex();
    m_nInstances = data.numInstances();

    // Initialize kernel
    m_kernel = Kernel.makeCopy(m_SVM.getKernel());
    m_kernel.buildKernel(data);

    // init m_target
    m_target = new double[m_nInstances];
    for (int i = 0; i < m_nInstances; i++) {
      m_target[i] = data.instance(i).classValue();
    }

    m_random = new Random(m_nSeed);

    //		initialize alpha and alpha* array to all zero
    m_alpha = new double[m_target.length];
    m_alphaStar = new double[m_target.length];

    m_supportVectors = new SMOset(m_nInstances);

    m_b = 0.0;
    m_nEvals = 0;
    m_nCacheHits = -1;
  }
Esempio n. 9
0
  /**
   * wrap up various variables to save memeory and do some housekeeping after optimization has
   * finished.
   *
   * @throws Exception if something goes wrong
   */
  protected void wrapUp() throws Exception {
    m_target = null;

    m_nEvals = m_kernel.numEvals();
    m_nCacheHits = m_kernel.numCacheHits();

    if ((m_SVM.getKernel() instanceof PolyKernel)
        && ((PolyKernel) m_SVM.getKernel()).getExponent() == 1.0) {
      // convert alpha's to weights
      double[] weights = new double[m_data.numAttributes()];
      for (int k = m_supportVectors.getNext(-1); k != -1; k = m_supportVectors.getNext(k)) {
        for (int j = 0; j < weights.length; j++) {
          if (j != m_classIndex) {
            weights[j] += (m_alpha[k] - m_alphaStar[k]) * m_data.instance(k).value(j);
          }
        }
      }
      m_weights = weights;

      // release memory
      m_alpha = null;
      m_alphaStar = null;
      m_kernel = null;
    }
    m_bModelBuilt = true;
  }
Esempio n. 10
0
  /**
   * @param args
   * @throws Exception
   */
  public static void main(String[] args) throws Exception {
    Instances isTrainingSet = createSet(4);
    Instance instance1 = createInstance(new double[] {1, 0.7, 0.1, 0.7}, "S1", isTrainingSet);
    Instance instance2 = createInstance(new double[] {0.1, 0.2, 1, 0.3}, "S2", isTrainingSet);
    Instance instance22 = createInstance(new double[] {0, 0, 0, 0}, "S3", isTrainingSet);
    isTrainingSet.add(instance1);
    isTrainingSet.add(instance2);
    isTrainingSet.add(instance22);
    Instances isTestingSet = createSet(4);
    Instance instance3 = createInstance(new double[] {1, 0.7, 0.1, 0.7}, "S1", isTrainingSet);
    Instance instance4 = createInstance(new double[] {0.1, 0.2, 1, 0.3}, "S2", isTrainingSet);
    isTestingSet.add(instance3);
    isTestingSet.add(instance4);

    // Create a naïve bayes classifier
    Classifier cModel = (Classifier) new BayesNet(); // M5P
    cModel.buildClassifier(isTrainingSet);

    // Test the model
    Evaluation eTest = new Evaluation(isTrainingSet);
    eTest.evaluateModel(cModel, isTestingSet);

    // Print the result à la Weka explorer:
    String strSummary = eTest.toSummaryString();
    System.out.println(strSummary);

    // Get the likelihood of each classes
    // fDistribution[0] is the probability of being “positive”
    // fDistribution[1] is the probability of being “negative”
    double[] fDistribution = cModel.distributionForInstance(instance4);
    for (int i = 0; i < fDistribution.length; i++) {
      System.out.println(fDistribution[i]);
    }
  }
Esempio n. 11
0
  /**
   * Build the associator on the filtered data.
   *
   * @param data the training data
   * @throws Exception if the Associator could not be built successfully
   */
  public void buildAssociations(Instances data) throws Exception {
    if (m_Associator == null) throw new Exception("No base associator has been set!");

    // create copy and set class-index
    data = new Instances(data);
    if (getClassIndex() == 0) {
      data.setClassIndex(data.numAttributes() - 1);
    } else {
      data.setClassIndex(getClassIndex() - 1);
    }

    if (getClassIndex() != -1) {
      // remove instances with missing class
      data.deleteWithMissingClass();
    }

    m_Filter.setInputFormat(data); // filter capabilities are checked here
    data = Filter.useFilter(data, m_Filter);

    // can associator handle the data?
    getAssociator().getCapabilities().testWithFail(data);

    m_FilteredInstances = data.stringFreeStructure();
    m_Associator.buildAssociations(data);
  }
Esempio n. 12
0
  /**
   * Determines and returns (if possible) the structure (internally the header) of the data set as
   * an empty set of instances.
   *
   * @return the structure of the data set as an empty set of Instances
   * @throws IOException if an error occurs
   */
  public Instances getStructure() throws IOException {
    if (getDirectory() == null) {
      throw new IOException("No directory/source has been specified");
    }

    // determine class labels, i.e., sub-dirs
    if (m_structure == null) {
      String directoryPath = getDirectory().getAbsolutePath();
      ArrayList<Attribute> atts = new ArrayList<Attribute>();
      ArrayList<String> classes = new ArrayList<String>();

      File dir = new File(directoryPath);
      String[] subdirs = dir.list();

      for (int i = 0; i < subdirs.length; i++) {
        File subdir = new File(directoryPath + File.separator + subdirs[i]);
        if (subdir.isDirectory()) classes.add(subdirs[i]);
      }

      atts.add(new Attribute("text", (ArrayList<String>) null));
      if (m_OutputFilename) atts.add(new Attribute("filename", (ArrayList<String>) null));
      // make sure that the name of the class attribute is unlikely to
      // clash with any attribute created via the StringToWordVector filter
      atts.add(new Attribute("@@class@@", classes));

      String relName = directoryPath.replaceAll("/", "_");
      relName = relName.replaceAll("\\\\", "_").replaceAll(":", "_");
      m_structure = new Instances(relName, atts, 0);
      m_structure.setClassIndex(m_structure.numAttributes() - 1);
    }

    return m_structure;
  }
Esempio n. 13
0
  /**
   * Sets instances that should be stored.
   *
   * @param instances the instances
   */
  @Override
  public void setInstances(Instances instances) {
    m_ClassIndex.setUpper(instances.numAttributes() - 1);
    instances.setClassIndex(m_ClassIndex.getIndex());

    super.setInstances(instances);
  }
Esempio n. 14
0
  private double calcNodeScorePlain(int nNode) {
    Instances instances = m_BayesNet.m_Instances;
    ParentSet oParentSet = m_BayesNet.getParentSet(nNode);

    // determine cardinality of parent set & reserve space for frequency counts
    int nCardinality = oParentSet.getCardinalityOfParents();
    int numValues = instances.attribute(nNode).numValues();
    int[] nCounts = new int[nCardinality * numValues];

    // initialize (don't need this?)
    for (int iParent = 0; iParent < nCardinality * numValues; iParent++) {
      nCounts[iParent] = 0;
    }

    // estimate distributions
    Enumeration enumInsts = instances.enumerateInstances();

    while (enumInsts.hasMoreElements()) {
      Instance instance = (Instance) enumInsts.nextElement();

      // updateClassifier;
      double iCPT = 0;

      for (int iParent = 0; iParent < oParentSet.getNrOfParents(); iParent++) {
        int nParent = oParentSet.getParent(iParent);

        iCPT = iCPT * instances.attribute(nParent).numValues() + instance.value(nParent);
      }

      nCounts[numValues * ((int) iCPT) + (int) instance.value(nNode)]++;
    }

    return calcScoreOfCounts(nCounts, nCardinality, numValues, instances);
  } // CalcNodeScore
  /** tests whether a URL can be loaded (via setURL(URL)). */
  public void testURLSourcedLoader() {
    Instances data;

    if (!(getLoader() instanceof URLSourcedLoader)) {
      return;
    }

    try {
      // save
      m_Saver.setInstances(m_Instances);
      m_Saver.setFile(new File(m_ExportFilename));
      m_Saver.writeBatch();

      // load
      ((URLSourcedLoader) m_Loader).setURL(new File(m_ExportFilename).toURI().toURL().toString());
      data = m_Loader.getDataSet();

      // compare data
      try {
        if (m_Instances.classIndex() != data.classIndex()) {
          data.setClassIndex(m_Instances.classIndex());
        }
        compareDatasets(m_Instances, data);
      } catch (Exception e) {
        fail("URL load failed (datasets differ): " + e.toString());
      }
    } catch (Exception e) {
      e.printStackTrace();
      fail("URL load failed: " + e.toString());
    }
  }
Esempio n. 16
0
  private static void writePredictedDistributions(
      Classifier c, Instances data, int idIndex, Writer out) throws Exception {
    // header
    out.write("id");
    for (int i = 0; i < data.numClasses(); i++) {
      out.write(",\"");
      out.write(data.classAttribute().value(i).replaceAll("[\"\\\\]", "_"));
      out.write("\"");
    }
    out.write("\n");

    // data
    for (int i = 0; i < data.numInstances(); i++) {
      final String id = data.instance(i).stringValue(idIndex);
      double[] distribution = c.distributionForInstance(data.instance(i));

      // final String label = data.attribute(classIndex).value();
      out.write(id);
      for (double probability : distribution) {
        out.write(",");
        out.write(String.valueOf(probability > 1e-5 ? (float) probability : 0f));
      }
      out.write("\n");
    }
  }
  /** tests whether data can be loaded via setSource() with a file stream. */
  public void testLoaderWithStream() {
    Instances data;

    try {
      // save
      m_Saver.setInstances(m_Instances);
      m_Saver.setFile(new File(m_ExportFilename));
      m_Saver.writeBatch();

      // load
      m_Loader.setSource(new FileInputStream(new File(m_ExportFilename)));
      data = m_Loader.getDataSet();

      // compare data
      try {
        if (m_Instances.classIndex() != data.classIndex()) {
          data.setClassIndex(m_Instances.classIndex());
        }
        compareDatasets(m_Instances, data);
      } catch (Exception e) {
        fail("File stream loading failed (datasets differ): " + e.toString());
      }
    } catch (Exception e) {
      e.printStackTrace();
      fail("File stream loading failed: " + e.toString());
    }
  }
Esempio n. 18
0
  /**
   * Sets the format of the input instances.
   *
   * @param instanceInfo an Instances object containing the input instance structure (any instances
   *     contained in the object are ignored - only the structure is required).
   * @return true if the outputFormat may be collected immediately
   * @throws UnsupportedAttributeTypeException if selected attributes are not numeric or nominal.
   */
  public boolean setInputFormat(Instances instanceInfo) throws Exception {

    if ((instanceInfo.classIndex() > 0) && (!getFillWithMissing())) {
      throw new IllegalArgumentException(
          "TimeSeriesTranslate: Need to fill in missing values "
              + "using appropriate option when class index is set.");
    }
    super.setInputFormat(instanceInfo);
    // Create the output buffer
    Instances outputFormat = new Instances(instanceInfo, 0);
    for (int i = 0; i < instanceInfo.numAttributes(); i++) {
      if (i != instanceInfo.classIndex()) {
        if (m_SelectedCols.isInRange(i)) {
          if (outputFormat.attribute(i).isNominal() || outputFormat.attribute(i).isNumeric()) {
            outputFormat.renameAttribute(
                i,
                outputFormat.attribute(i).name()
                    + (m_InstanceRange < 0 ? '-' : '+')
                    + Math.abs(m_InstanceRange));
          } else {
            throw new UnsupportedAttributeTypeException(
                "Only numeric and nominal attributes may be " + " manipulated in time series.");
          }
        }
      }
    }
    outputFormat.setClassIndex(instanceInfo.classIndex());
    setOutputFormat(outputFormat);
    return true;
  }
Esempio n. 19
0
  /**
   * Analyses the given list of decision points according to the context specified. Furthermore, the
   * context is provided with some visualization of the analysis result.
   *
   * @param decisionPoints the list of decision points to be analysed
   * @param log the log to be analysed
   * @param highLevelPN the simulation model to export discovered data dependencies
   */
  public void analyse(ClusterDecisionAnalyzer cda) {
    clusterDecisionAnalyzer = cda;

    // create empty data set with attribute information
    Instances data = cda.getDataInfo();

    // in case no single learning instance can be provided (as decision
    // point is never
    // reached, or decision classes cannot specified properly) --> do not
    // call algorithm
    if (data.numInstances() == 0) {
      System.out.println("No learning instances available");
    }
    // actually solve the classification problem
    else {
      try {
        myClassifier.buildClassifier(data);
        // build up result visualization
        cda.setResultVisualization(createResultVisualization());
        cda.setEvaluationVisualization(createEvaluationVisualization(data));
      } catch (Exception ex) {
        ex.printStackTrace();
        cda.setResultVisualization(
            createMessagePanel("Error while solving the classification problem"));
      }
    }
  }
Esempio n. 20
0
  /**
   * Calculates the area under the precision-recall curve (AUPRC).
   *
   * @param tcurve a previously extracted threshold curve Instances.
   * @return the PRC area, or Double.NaN if you don't pass in a ThresholdCurve generated Instances.
   */
  public static double getPRCArea(Instances tcurve) {
    final int n = tcurve.numInstances();
    if (!RELATION_NAME.equals(tcurve.relationName()) || (n == 0)) {
      return Double.NaN;
    }

    final int pInd = tcurve.attribute(PRECISION_NAME).index();
    final int rInd = tcurve.attribute(RECALL_NAME).index();
    final double[] pVals = tcurve.attributeToDoubleArray(pInd);
    final double[] rVals = tcurve.attributeToDoubleArray(rInd);

    double area = 0;
    double xlast = rVals[n - 1];

    // start from the first real p/r pair (not the artificial zero point)
    for (int i = n - 2; i >= 0; i--) {
      double recallDelta = rVals[i] - xlast;
      area += (pVals[i] * recallDelta);

      xlast = rVals[i];
    }

    if (area == 0) {
      return Utils.missingValue();
    }
    return area;
  }
Esempio n. 21
0
  /**
   * Returns a string containing java source code equivalent to the test made at this node. The
   * instance being tested is called "i".
   *
   * @param index index of the nominal value tested
   * @param data the data containing instance structure info
   * @return a value of type 'String'
   */
  public final String sourceExpression(int index, Instances data) {

    StringBuffer expr = null;
    if (index < 0) {
      return "i[" + m_attIndex + "] == null";
    }
    if (data.attribute(m_attIndex).isNominal()) {
      if (index == 0) {
        expr = new StringBuffer("i[");
      } else {
        expr = new StringBuffer("!i[");
      }
      expr.append(m_attIndex).append("]");
      expr.append(".equals(\"")
          .append(data.attribute(m_attIndex).value((int) m_splitPoint))
          .append("\")");
    } else {
      expr = new StringBuffer("((Double) i[");
      expr.append(m_attIndex).append("])");
      if (index == 0) {
        expr.append(".doubleValue() <= ").append(m_splitPoint);
      } else {
        expr.append(".doubleValue() > ").append(m_splitPoint);
      }
    }
    return expr.toString();
  }
Esempio n. 22
0
  /**
   * Calculates the area under the ROC curve as the Wilcoxon-Mann-Whitney statistic.
   *
   * @param tcurve a previously extracted threshold curve Instances.
   * @return the ROC area, or Double.NaN if you don't pass in a ThresholdCurve generated Instances.
   */
  public static double getROCArea(Instances tcurve) {

    final int n = tcurve.numInstances();
    if (!RELATION_NAME.equals(tcurve.relationName()) || (n == 0)) {
      return Double.NaN;
    }
    final int tpInd = tcurve.attribute(TRUE_POS_NAME).index();
    final int fpInd = tcurve.attribute(FALSE_POS_NAME).index();
    final double[] tpVals = tcurve.attributeToDoubleArray(tpInd);
    final double[] fpVals = tcurve.attributeToDoubleArray(fpInd);

    double area = 0.0, cumNeg = 0.0;
    final double totalPos = tpVals[0];
    final double totalNeg = fpVals[0];
    for (int i = 0; i < n; i++) {
      double cip, cin;
      if (i < n - 1) {
        cip = tpVals[i] - tpVals[i + 1];
        cin = fpVals[i] - fpVals[i + 1];
      } else {
        cip = tpVals[n - 1];
        cin = fpVals[n - 1];
      }
      area += cip * (cumNeg + (0.5 * cin));
      cumNeg += cin;
    }
    area /= (totalNeg * totalPos);

    return area;
  }
Esempio n. 23
0
  /**
   * Find all the instances in the dataset covered/not covered by the rule in given index, and the
   * correponding simple statistics and predicted class distributions are stored in the given double
   * array, which can be obtained by getSimpleStats() and getDistributions().<br>
   *
   * @param index the given index, assuming correct
   * @param insts the dataset to be covered by the rule
   * @param stats the given double array to hold stats, side-effected
   * @param dist the given array to hold class distributions, side-effected if null, the
   *     distribution is not necessary
   * @return the instances covered and not covered by the rule
   */
  private Instances[] computeSimpleStats(
      int index, Instances insts, double[] stats, double[] dist) {
    Rule rule = (Rule) m_Ruleset.elementAt(index);

    Instances[] data = new Instances[2];
    data[0] = new Instances(insts, insts.numInstances());
    data[1] = new Instances(insts, insts.numInstances());

    for (int i = 0; i < insts.numInstances(); i++) {
      Instance datum = insts.instance(i);
      double weight = datum.weight();
      if (rule.covers(datum)) {
        data[0].add(datum); // Covered by this rule
        stats[0] += weight; // Coverage
        if ((int) datum.classValue() == (int) rule.getConsequent())
          stats[2] += weight; // True positives
        else stats[4] += weight; // False positives
        if (dist != null) dist[(int) datum.classValue()] += weight;
      } else {
        data[1].add(datum); // Not covered by this rule
        stats[1] += weight;
        if ((int) datum.classValue() != (int) rule.getConsequent())
          stats[3] += weight; // True negatives
        else stats[5] += weight; // False negatives
      }
    }

    return data;
  }
Esempio n. 24
0
  /**
   * Tests the ThresholdCurve generation from the command line. The classifier is currently
   * hardcoded. Pipe in an arff file.
   *
   * @param args currently ignored
   */
  public static void main(String[] args) {

    try {

      Instances inst = new Instances(new java.io.InputStreamReader(System.in));
      if (false) {
        System.out.println(ThresholdCurve.getNPointPrecision(inst, 11));
      } else {
        inst.setClassIndex(inst.numAttributes() - 1);
        ThresholdCurve tc = new ThresholdCurve();
        EvaluationUtils eu = new EvaluationUtils();
        Classifier classifier = new weka.classifiers.functions.Logistic();
        FastVector predictions = new FastVector();
        for (int i = 0; i < 2; i++) { // Do two runs.
          eu.setSeed(i);
          predictions.appendElements(eu.getCVPredictions(classifier, inst, 10));
          // System.out.println("\n\n\n");
        }
        Instances result = tc.getCurve(predictions);
        System.out.println(result);
      }
    } catch (Exception ex) {
      ex.printStackTrace();
    }
  }
Esempio n. 25
0
  public static double CA(Instances odata, int[] clusters) {
    double result = 0;
    double[] tmpdclass = odata.attributeToDoubleArray(odata.numAttributes() - 1);
    int[] oclass = new int[odata.numInstances()];
    for (int i = 0; i < tmpdclass.length; ++i) {
      oclass[i] = (int) tmpdclass[i];
    }
    int[] tmpclass = oclass.clone();
    int[] tmpclusters = clusters.clone();

    Arrays.sort(tmpclusters);
    Arrays.sort(tmpclass);
    int[][] M = new int[tmpclass[tmpclass.length - 1] + 1][tmpclusters[tmpclusters.length - 1] + 1];

    for (int i = 0; i < clusters.length; ++i) {
      M[oclass[i]][clusters[i]]++;
    }
    for (int i = 0; i < M.length; ++i) {
      System.out.println(Arrays.toString(M[i]));
    }
    for (int i = 0; i < M.length; ++i) {
      int maxindex = -1;
      for (int j = 0; j < M[0].length - 1; ++j) {
        if (M[i][j] < M[i][j + 1]) maxindex = j + 1;
      }
      M[i][0] = maxindex;
    }

    for (int i = 0; i < oclass.length; ++i) {
      if (M[oclass[i]][0] == clusters[i]) result++;
    }

    return (double) result / (double) odata.numInstances();
  }
Esempio n. 26
0
  /**
   * Returns a string representation of the classifier.
   *
   * @return a string representation of the classifier
   */
  public String toString() {
    StringBuffer result =
        new StringBuffer(
            "The independent probability of a class\n--------------------------------------\n");

    for (int c = 0; c < m_numClasses; c++)
      result
          .append(m_headerInfo.classAttribute().value(c))
          .append("\t")
          .append(Double.toString(m_probOfClass[c]))
          .append("\n");

    result.append(
        "\nThe probability of a word given the class\n-----------------------------------------\n\t");

    for (int c = 0; c < m_numClasses; c++)
      result.append(m_headerInfo.classAttribute().value(c)).append("\t");

    result.append("\n");

    for (int w = 0; w < m_numAttributes; w++) {
      result.append(m_headerInfo.attribute(w).name()).append("\t");
      for (int c = 0; c < m_numClasses; c++)
        result.append(Double.toString(Math.exp(m_probOfWordGivenClass[c][w]))).append("\t");
      result.append("\n");
    }

    return result.toString();
  }
  /**
   * Calculates the centroid pivot of a node based on the list of points that it contains (tbe two
   * lists of its children are provided).
   *
   * @param list1 The point index list of first child.
   * @param list2 The point index list of second child.
   * @param insts The insts object on which the tree is being built (for header information).
   * @return The centroid pivot of the node.
   */
  public Instance calcPivot(MyIdxList list1, MyIdxList list2, Instances insts) {
    int classIdx = m_Instances.classIndex();
    double[] attrVals = new double[insts.numAttributes()];

    Instance temp;
    for (int i = 0; i < list1.length(); i++) {
      temp = insts.instance(((ListNode) list1.get(i)).idx);
      for (int k = 0; k < temp.numValues(); k++) {
        if (temp.index(k) == classIdx) continue;
        attrVals[k] += temp.valueSparse(k);
      }
    }
    for (int j = 0; j < list2.length(); j++) {
      temp = insts.instance(((ListNode) list2.get(j)).idx);
      for (int k = 0; k < temp.numValues(); k++) {
        if (temp.index(k) == classIdx) continue;
        attrVals[k] += temp.valueSparse(k);
      }
    }
    for (int j = 0, numInsts = list1.length() + list2.length(); j < attrVals.length; j++) {
      attrVals[j] /= numInsts;
    }
    temp = new DenseInstance(1.0, attrVals);
    return temp;
  }
  /** test the batch saving/loading (via setFile(File)). */
  public void testBatch() {
    Instances data;

    try {
      // save
      m_Saver.setInstances(m_Instances);
      m_Saver.setFile(new File(m_ExportFilename));
      m_Saver.writeBatch();

      // load
      ((AbstractFileLoader) m_Loader).setFile(new File(m_ExportFilename));
      data = m_Loader.getDataSet();

      // compare data
      try {
        if (m_Instances.classIndex() != data.classIndex()) {
          data.setClassIndex(m_Instances.classIndex());
        }
        compareDatasets(m_Instances, data);
      } catch (Exception e) {
        fail("Incremental load failed (datasets differ): " + e.toString());
      }
    } catch (Exception e) {
      e.printStackTrace();
      fail("Batch save/load failed: " + e.toString());
    }
  }
Esempio n. 29
0
  /**
   * Signify that this batch of input to the filter is finished.
   *
   * @return true if there are instances pending output
   * @throws IllegalStateException if no input structure has been defined
   */
  @Override
  public boolean batchFinished() throws Exception {

    if (getInputFormat() == null) {
      throw new IllegalStateException("No input instance format defined");
    }

    if (!m_firstBatchFinished) {

      Instances filtered;
      if (m_numOfCrossValidationFolds < 2) {
        filtered = cleanseTrain(getInputFormat());
      } else {
        filtered = cleanseCross(getInputFormat());
      }

      for (int i = 0; i < filtered.numInstances(); i++) {
        push(filtered.instance(i));
      }

      m_firstBatchFinished = true;
      flushInput();
    }
    m_NewBatch = true;
    return (numPendingOutput() != 0);
  }
Esempio n. 30
0
  /**
   * Aggregate an object with this one
   *
   * @param toAggregate the object to aggregate
   * @return the result of aggregation
   * @throws Exception if the supplied object can't be aggregated for some reason
   */
  @Override
  public Logistic aggregate(Logistic toAggregate) throws Exception {
    if (m_numModels == Integer.MIN_VALUE) {
      throw new Exception(
          "Can't aggregate further - model has already been " + "aggregated and finalized");
    }

    if (m_Par == null) {
      throw new Exception("No model built yet, can't aggregate");
    }

    if (!m_structure.equalHeaders(toAggregate.m_structure)) {
      throw new Exception(
          "Can't aggregate - data headers dont match: "
              + m_structure.equalHeadersMsg(toAggregate.m_structure));
    }

    for (int i = 0; i < m_Par.length; i++) {
      for (int j = 0; j < m_Par[i].length; j++) {
        m_Par[i][j] += toAggregate.m_Par[i][j];
      }
    }

    m_numModels++;

    return this;
  }