예제 #1
0
  public static void main(String[] args) throws Exception {

    /*
     * First we load the test data from our ARFF file
     */
    ArffLoader testLoader = new ArffLoader();
    testLoader.setSource(new File("data/titanic/test.arff"));
    testLoader.setRetrieval(Loader.BATCH);
    Instances testDataSet = testLoader.getDataSet();

    /*
     * Now we tell the data set which attribute we want to classify, in our
     * case, we want to classify the first column: survived
     */
    Attribute testAttribute = testDataSet.attribute(0);
    testDataSet.setClass(testAttribute);
    testDataSet.deleteStringAttributes();

    /*
     * Now we read in the serialized model from disk
     */
    Classifier classifier = (Classifier) SerializationHelper.read("data/titanic/titanic.model");

    /*
     * This part may be a little confusing. We load up the test data again
     * so we have a prediction data set to populate. As we iterate over the
     * first data set we also iterate over the second data set. After an
     * instance is classified, we set the value of the prediction data set
     * to be the value of the classification
     */
    ArffLoader test1Loader = new ArffLoader();
    test1Loader.setSource(new File("data/titanic/test.arff"));
    Instances test1DataSet = test1Loader.getDataSet();
    Attribute test1Attribute = test1DataSet.attribute(0);
    test1DataSet.setClass(test1Attribute);

    /*
     * Now we iterate over the test data and classify each entry and set the
     * value of the 'survived' column to the result of the classification
     */
    Enumeration testInstances = testDataSet.enumerateInstances();
    Enumeration test1Instances = test1DataSet.enumerateInstances();
    while (testInstances.hasMoreElements()) {
      Instance instance = (Instance) testInstances.nextElement();
      Instance instance1 = (Instance) test1Instances.nextElement();
      double classification = classifier.classifyInstance(instance);
      instance1.setClassValue(classification);
    }

    /*
     * Now we want to write out our predictions. The resulting file is in a
     * format suitable to submit to Kaggle.
     */
    CSVSaver predictedCsvSaver = new CSVSaver();
    predictedCsvSaver.setFile(new File("data/titanic/predict.csv"));
    predictedCsvSaver.setInstances(test1DataSet);
    predictedCsvSaver.writeBatch();

    System.out.println("Prediciton saved to predict.csv");
  }
예제 #2
0
  //	计算h1,h2分类器共同的分类错误率;
  public double measureBothError(Classifier h1, Classifier h2, Instances test) {
    int m = test.numInstances();
    double value1, value2, value;
    int error = 0, total = 0;
    try {
      for (int i = 0; i < m; i++) {
        value = test.instance(i).classValue();
        value1 = h1.classifyInstance(test.instance(i));
        value2 = h2.classifyInstance(test.instance(i));

        // 两分类器做出相同决策
        if (value1 == value2) {
          // 两分类器做出相同决策的样本数量
          total++;

          // 两分类器做出相同错误决策
          if (value != value1) {
            //	两分类器做出相同错误决策的样本数量
            error++;
          }
        }
      }
    } catch (Exception e) {
      System.out.println(e);
    }
    // System.out.println("m:=" + m);
    // System.out.println("error:=" + error +"; total:=" + total);

    // 两个分类器的分类错误率= 两分类器做出相同错误决策的样本数量/两分类器做出相同决策的样本数量
    return (error * 1.0) / total;
  }
예제 #3
0
파일: WekaTest.java 프로젝트: fsteeg/tm2
  /**
   * @param args
   * @throws Exception
   */
  public static void main(String[] args) throws Exception {
    Instances isTrainingSet = createSet(4);
    Instance instance1 = createInstance(new double[] {1, 0.7, 0.1, 0.7}, "S1", isTrainingSet);
    Instance instance2 = createInstance(new double[] {0.1, 0.2, 1, 0.3}, "S2", isTrainingSet);
    Instance instance22 = createInstance(new double[] {0, 0, 0, 0}, "S3", isTrainingSet);
    isTrainingSet.add(instance1);
    isTrainingSet.add(instance2);
    isTrainingSet.add(instance22);
    Instances isTestingSet = createSet(4);
    Instance instance3 = createInstance(new double[] {1, 0.7, 0.1, 0.7}, "S1", isTrainingSet);
    Instance instance4 = createInstance(new double[] {0.1, 0.2, 1, 0.3}, "S2", isTrainingSet);
    isTestingSet.add(instance3);
    isTestingSet.add(instance4);

    // Create a naïve bayes classifier
    Classifier cModel = (Classifier) new BayesNet(); // M5P
    cModel.buildClassifier(isTrainingSet);

    // Test the model
    Evaluation eTest = new Evaluation(isTrainingSet);
    eTest.evaluateModel(cModel, isTestingSet);

    // Print the result à la Weka explorer:
    String strSummary = eTest.toSummaryString();
    System.out.println(strSummary);

    // Get the likelihood of each classes
    // fDistribution[0] is the probability of being “positive”
    // fDistribution[1] is the probability of being “negative”
    double[] fDistribution = cModel.distributionForInstance(instance4);
    for (int i = 0; i < fDistribution.length; i++) {
      System.out.println(fDistribution[i]);
    }
  }
예제 #4
0
  /**
   * Gets the classifier specification string, which contains the class name of the classifier and
   * any options to the classifier.
   *
   * @return the classifier string.
   */
  protected String getClassifierSpec() {

    Classifier c = getClassifier();
    if (c instanceof OptionHandler) {
      return c.getClass().getName() + " " + Utils.joinOptions(((OptionHandler) c).getOptions());
    }
    return c.getClass().getName();
  }
  public int SelectRow_KLDivergenceMisclassified(
      Instances pool, Classifier myEstimator, int desiredAttr) {

    // for each instance with unbought desiredAttr and label = desiredLabel
    // measure KL-divergence (relative entropy between two prob distributions):
    //  KL(P||Q) = sum_i  p_i log (p_i/q_i)
    // withr respect to Q = Uniform, we have
    //  KL(P||U) = sum_i p_i log(p_i)
    // choose (row) that is minimum (i.e. closest to uniform)

    int numInstances = pool.numInstances();
    double[] KLDivs = new double[numInstances];
    boolean[] isValidInstance = new boolean[numInstances];
    boolean misclassified = false;
    double[] probs = null;
    Instance inst;

    for (int i = 0; i < numInstances; i++) {
      inst = pool.instance(i);
      try {
        if (inst.classValue() != myEstimator.classifyInstance(inst)) misclassified = true;
        else misclassified = false;
      } catch (Exception e1) {
        // TODO Auto-generated catch block
        e1.printStackTrace();
      }
      if (inst.isMissing(desiredAttr) && misclassified) {
        try {
          probs = myEstimator.distributionForInstance(inst);
        } catch (Exception e) {
          // TODO Auto-generated catch block
          e.printStackTrace();
        }
        for (int j = 0; j < probs.length; j++) KLDivs[i] += MyXLogX(probs[j]);
        isValidInstance[i] = true;
      } else {
        KLDivs[i] = Double.MAX_VALUE;
        isValidInstance[i] = false;
      }
    }

    double leastDivergence = KLDivs[Utils.minIndex(KLDivs)];
    int numLeastDivs = 0;
    for (int i = 0; i < numInstances; i++)
      if (isValidInstance[i] && KLDivs[i] == leastDivergence) numLeastDivs++;
    int randomInstance = r.nextInt(numLeastDivs);
    int index = 0;
    for (int i = 0; i < numInstances; i++) {
      if (isValidInstance[i] && KLDivs[i] == leastDivergence) {
        if (index == randomInstance) return i;
        else index++;
      }
    }
    return -1;
  }
예제 #6
0
  /**
   * Buildclassifier selects a classifier from the set of classifiers by minimising error on the
   * training data.
   *
   * @param data the training data to be used for generating the boosted classifier.
   * @exception Exception if the classifier could not be built successfully
   */
  public void buildClassifier(Instances data) throws Exception {

    if (m_Classifiers.length == 0) {
      throw new Exception("No base classifiers have been set!");
    }
    Instances newData = new Instances(data);
    newData.deleteWithMissingClass();
    newData.randomize(new Random(m_Seed));
    if (newData.classAttribute().isNominal() && (m_NumXValFolds > 1))
      newData.stratify(m_NumXValFolds);
    Instances train = newData; // train on all data by default
    Instances test = newData; // test on training data by default
    Classifier bestClassifier = null;
    int bestIndex = -1;
    double bestPerformance = Double.NaN;
    int numClassifiers = m_Classifiers.length;
    for (int i = 0; i < numClassifiers; i++) {
      Classifier currentClassifier = getClassifier(i);
      Evaluation evaluation;
      if (m_NumXValFolds > 1) {
        evaluation = new Evaluation(newData);
        for (int j = 0; j < m_NumXValFolds; j++) {
          train = newData.trainCV(m_NumXValFolds, j);
          test = newData.testCV(m_NumXValFolds, j);
          currentClassifier.buildClassifier(train);
          evaluation.setPriors(train);
          evaluation.evaluateModel(currentClassifier, test);
        }
      } else {
        currentClassifier.buildClassifier(train);
        evaluation = new Evaluation(train);
        evaluation.evaluateModel(currentClassifier, test);
      }

      double error = evaluation.errorRate();
      if (m_Debug) {
        System.err.println(
            "Error rate: "
                + Utils.doubleToString(error, 6, 4)
                + " for classifier "
                + currentClassifier.getClass().getName());
      }

      if ((i == 0) || (error < bestPerformance)) {
        bestClassifier = currentClassifier;
        bestPerformance = error;
        bestIndex = i;
      }
    }
    m_ClassifierIndex = bestIndex;
    m_Classifier = bestClassifier;
    if (m_NumXValFolds > 1) {
      m_Classifier.buildClassifier(newData);
    }
  }
예제 #7
0
  public void batchPredict() {
    // load all test set
    String modelFile =
        "data\\AcquireValueShopper\\decisionTable_bayes_trees.model".replace("\\", File.separator);
    String pathTest = "data/AcquireValueShopper/test_new.csv";
    String pathPredict = "data/AcquireValueShopper/submission.csv";

    Scanner scanner;
    String line = "";
    String[] partsOfLine = null;
    String id = "";
    PrintWriter output;
    Map<String, String> testSet = new HashMap<String, String>();
    try {
      scanner = new Scanner(new File(pathTest));
      while (scanner.hasNext()) {
        line = scanner.nextLine().trim();
        partsOfLine = line.split(",");
        id = partsOfLine[0];
        testSet.put(id, line);
      }
      scanner.close();
    } catch (FileNotFoundException e1) {
      // TODO Auto-generated catch block
      e1.printStackTrace();
    }
    double[] returnProb;
    double prob = 0.0;
    // predict
    try {
      // load model
      Classifier classifier = (Classifier) SerializationHelper.read(modelFile);

      output = new PrintWriter(pathPredict);
      output.append("id,repeatProbability" + "\n");
      Iterator<String> idIterator = testSet.keySet().iterator();
      while (idIterator.hasNext()) {
        id = idIterator.next();
        line = testSet.get(id);
        Instances instances = buildInstance(line);
        Instance instance = instances.instance(0);
        returnProb = classifier.distributionForInstance(instance);
        prob = returnProb[1];
        // prob = classifier.classifyInstance(instance);
        output.append(id + "," + prob + "\n");
      }
      output.close();
    } catch (FileNotFoundException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    } catch (Exception e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    }
  }
예제 #8
0
  /**
   * Gets the classifier specification string, which contains the class name of the classifier and
   * any options to the classifier
   *
   * @param index the index of the classifier string to retrieve, starting from 0.
   * @return the classifier string, or the empty string if no classifier has been assigned (or the
   *     index given is out of range).
   */
  protected String getClassifierSpec(int index) {

    if (m_Classifiers.length < index) {
      return "";
    }
    Classifier c = getClassifier(index);
    if (c instanceof OptionHandler) {
      return c.getClass().getName() + " " + Utils.joinOptions(((OptionHandler) c).getOptions());
    }
    return c.getClass().getName();
  }
예제 #9
0
  @Override
  public String toString() {
    StringBuffer buffer = new StringBuffer();
    buffer.append("MultiClassifier\n");
    buffer.append("-----------------\n");
    for (Classifier classifier : classifiers) {
      buffer.append(classifier.toString());
      buffer.append("\n-----------------\n");
    }

    return buffer.toString();
  }
예제 #10
0
 @Override
 public double classifyInstance(Instance instance) throws Exception {
   double sum = 0.0;
   for (Classifier classifier : classifiers) {
     double classification = classifier.classifyInstance(instance);
     sum += classification;
   }
   if (sum >= classifiers.size() / 2) {
     return 1.0;
   }
   return 0.0;
 }
예제 #11
0
  /**
   * Boosting method. Boosts any classifier that can handle weighted instances.
   *
   * @param data the training data to be used for generating the boosted classifier.
   * @throws Exception if the classifier could not be built successfully
   */
  protected void buildClassifierWithWeights(Instances data) throws Exception {

    Instances trainData, training, trainingWeightsNotNormalized;
    int numInstances = data.numInstances();
    Random randomInstance = new Random(m_Seed);
    double minLoss = Double.MAX_VALUE;

    // Create a copy of the data so that when the weights are diddled
    // with it doesn't mess up the weights for anyone else
    trainingWeightsNotNormalized = new Instances(data, 0, numInstances);

    // Do boostrap iterations
    for (m_NumIterationsPerformed = -1;
        m_NumIterationsPerformed < m_Classifiers.length;
        m_NumIterationsPerformed++) {
      if (m_Debug) {
        System.err.println("Training classifier " + (m_NumIterationsPerformed + 1));
      }

      training = new Instances(trainingWeightsNotNormalized);
      normalizeWeights(training, m_SumOfWeights);

      // Select instances to train the classifier on
      if (m_WeightThreshold < 100) {
        trainData = selectWeightQuantile(training, (double) m_WeightThreshold / 100);
      } else {
        trainData = new Instances(training, 0, numInstances);
      }

      // Build classifier
      if (m_NumIterationsPerformed == -1) {
        m_ZeroR = new weka.classifiers.rules.ZeroR();
        m_ZeroR.buildClassifier(data);
      } else {
        if (m_Classifiers[m_NumIterationsPerformed] instanceof Randomizable)
          ((Randomizable) m_Classifiers[m_NumIterationsPerformed])
              .setSeed(randomInstance.nextInt());
        m_Classifiers[m_NumIterationsPerformed].buildClassifier(trainData);
      }

      // Update instance weights
      setWeights(trainingWeightsNotNormalized, m_NumIterationsPerformed);

      // Has progress been made?
      double loss = 0;
      for (Instance inst : trainingWeightsNotNormalized) {
        loss += Math.log(inst.weight());
      }
      if (m_Debug) {
        System.err.println("Current loss on log scale: " + loss);
      }
      if ((m_NumIterationsPerformed > -1) && (loss > minLoss)) {
        if (m_Debug) {
          System.err.println("Loss has increased: bailing out.");
        }
        break;
      }
      minLoss = loss;
    }
  }
예제 #12
0
  private void jButton3ActionPerformed(
      java.awt.event.ActionEvent evt) { // GEN-FIRST:event_jButton3ActionPerformed
    // TODO add your handling code here:
    switch (jComboBox1.getSelectedIndex()) {
      case 0:
        model = new NaiveBayes();
        jTextArea1.append("Building NaiveBayes model from training data ...\n");
        break;
      case 1:
        model = new Id3();
        jTextArea1.append("Building ID3 model from training data ...\n");
        break;
      case 2:
        model = new J48();
        jTextArea1.append("Building J48 model from training data ...\n");
        break;
    }

    try {
      model.buildClassifier(training);
      jTextArea1.append("Model building is complete ...\n");
      jButton4.setEnabled(true);
      jButton6.setEnabled(true);
    } catch (Exception ex) {
      jTextArea1.append("Model building failed ...\n");
      jTextArea1.append(ex.getMessage());
      jTextArea1.append("\n");
      jButton4.setEnabled(true);
      jButton6.setEnabled(false);
      model = null;
    }
  } // GEN-LAST:event_jButton3ActionPerformed
예제 #13
0
  public static Instances getKnowledgeBase() {
    if (knowledgeBase == null) {
      try {
        // load knowledgebase from file
        CreateAppInsertIntoVm.knowledgeBase =
            Action.loadKnowledge(Configuration.getInstance().getKBCreateAppInsertIntoVm());

        // prediction is also performed therefore the classifier and the evaluator must be
        // instantiated
        if (!isOnlyLearning()) {
          System.out.println("Classify data CreateAppInsertInto");
          if (knowledgeBase.numInstances() > 0) {
            classifier = new MultilayerPerceptron();
            classifier.buildClassifier(knowledgeBase);
            evaluation = new Evaluation(knowledgeBase);
            evaluation.crossValidateModel(
                classifier,
                knowledgeBase,
                10,
                knowledgeBase.getRandomNumberGenerator(randomData.nextLong(1, 1000)));
            System.out.println("Classified data CreateAppInsertInto");
          } else {
            System.out.println("No Instancedata for classifier CreateAppInsertIntoVm");
          }
        }
      } catch (Exception e) {
        e.printStackTrace();
      }
    }
    return knowledgeBase;
  }
예제 #14
0
  /**
   * Gets the raw output from the classifier
   *
   * @return the raw output from the classifier
   */
  public String getRawResultOutput() {
    StringBuffer result = new StringBuffer();

    if (m_Classifier == null) {
      return "<null> classifier";
    }
    result.append(toString());
    result.append("Classifier model: \n" + m_Classifier.toString() + '\n');

    // append the performance statistics
    if (m_result != null) {
      result.append(m_result);

      if (m_doesProduce != null) {
        for (int i = 0; i < m_doesProduce.length; i++) {
          if (m_doesProduce[i]) {
            try {
              double dv =
                  ((AdditionalMeasureProducer) m_Classifier).getMeasure(m_AdditionalMeasures[i]);
              if (!Utils.isMissingValue(dv)) {
                Double value = new Double(dv);
                result.append(m_AdditionalMeasures[i] + " : " + value + '\n');
              } else {
                result.append(m_AdditionalMeasures[i] + " : " + '?' + '\n');
              }
            } catch (Exception ex) {
              System.err.println(ex);
            }
          }
        }
      }
    }
    return result.toString();
  }
예제 #15
0
  /**
   * Returns an enumeration describing the available options..
   *
   * @return an enumeration of all the available options.
   */
  public Enumeration listOptions() {

    Vector newVector = new Vector(1);

    newVector.addElement(
        new Option(
            "\tSkips the determination of sizes (train/test/classifier)\n"
                + "\t(default: sizes are determined)",
            "no-size",
            0,
            "-no-size"));
    newVector.addElement(
        new Option(
            "\tThe full class name of the classifier.\n"
                + "\teg: weka.classifiers.bayes.NaiveBayes",
            "W",
            1,
            "-W <class name>"));

    if ((m_Template != null) && (m_Template instanceof OptionHandler)) {
      newVector.addElement(
          new Option(
              "",
              "",
              0,
              "\nOptions specific to classifier " + m_Template.getClass().getName() + ":"));
      Enumeration enu = ((OptionHandler) m_Template).listOptions();
      while (enu.hasMoreElements()) {
        newVector.addElement(enu.nextElement());
      }
    }
    return newVector.elements();
  }
예제 #16
0
파일: LWL.java 프로젝트: alishakiba/jDenetX
  /**
   * Generates the classifier.
   *
   * @param instances set of instances serving as training data
   * @throws Exception if the classifier has not been generated successfully
   */
  public void buildClassifier(Instances instances) throws Exception {

    if (!(m_Classifier instanceof WeightedInstancesHandler)) {
      throw new IllegalArgumentException("Classifier must be a " + "WeightedInstancesHandler!");
    }

    // can classifier handle the data?
    getCapabilities().testWithFail(instances);

    // remove instances with missing class
    instances = new Instances(instances);
    instances.deleteWithMissingClass();

    // only class? -> build ZeroR model
    if (instances.numAttributes() == 1) {
      System.err.println(
          "Cannot build model (only class attribute present in data!), "
              + "using ZeroR model instead!");
      m_ZeroR = new weka.classifiers.rules.ZeroR();
      m_ZeroR.buildClassifier(instances);
      return;
    } else {
      m_ZeroR = null;
    }

    m_Train = new Instances(instances, 0, instances.numInstances());

    m_NNSearch.setInstances(m_Train);
  }
예제 #17
0
  /**
   * 用分类器测试
   *
   * @param trainFileName
   * @param testFileName
   */
  public static void classify(String trainFileName, String testFileName) {
    try {
      File inputFile = new File(fileName + trainFileName); // 训练语料文件
      ArffLoader atf = new ArffLoader();
      atf.setFile(inputFile);
      Instances instancesTrain = atf.getDataSet(); // 读入训练文件

      // 设置类标签类
      inputFile = new File(fileName + testFileName); // 测试语料文件
      atf.setFile(inputFile);
      Instances instancesTest = atf.getDataSet(); // 读入测试文件

      instancesTest.setClassIndex(instancesTest.numAttributes() - 1);
      instancesTrain.setClassIndex(instancesTrain.numAttributes() - 1);

      classifier = (Classifier) Class.forName(CLASSIFIERNAME).newInstance();
      classifier.buildClassifier(instancesTrain);

      Evaluation eval = new Evaluation(instancesTrain);
      //  第一个为一个训练过的分类器,第二个参数是在某个数据集上评价的数据集
      eval.evaluateModel(classifier, instancesTest);

      System.out.println(eval.toClassDetailsString());
      System.out.println(eval.toSummaryString());
      System.out.println(eval.toMatrixString());
      System.out.println("precision is :" + (1 - eval.errorRate()));

    } catch (Exception e) {
      e.printStackTrace();
    }
  }
예제 #18
0
  /**
   * Analyses the given list of decision points according to the context specified. Furthermore, the
   * context is provided with some visualization of the analysis result.
   *
   * @param decisionPoints the list of decision points to be analysed
   * @param log the log to be analysed
   * @param highLevelPN the simulation model to export discovered data dependencies
   */
  public void analyse(ClusterDecisionAnalyzer cda) {
    clusterDecisionAnalyzer = cda;

    // create empty data set with attribute information
    Instances data = cda.getDataInfo();

    // in case no single learning instance can be provided (as decision
    // point is never
    // reached, or decision classes cannot specified properly) --> do not
    // call algorithm
    if (data.numInstances() == 0) {
      System.out.println("No learning instances available");
    }
    // actually solve the classification problem
    else {
      try {
        myClassifier.buildClassifier(data);
        // build up result visualization
        cda.setResultVisualization(createResultVisualization());
        cda.setEvaluationVisualization(createEvaluationVisualization(data));
      } catch (Exception ex) {
        ex.printStackTrace();
        cda.setResultVisualization(
            createMessagePanel("Error while solving the classification problem"));
      }
    }
  }
 /** trains the classifier */
 @Override
 public void train() throws Exception {
   if (_train.classIndex() == -1) _train.setClassIndex(_train.numAttributes() - 1);
   _cl.buildClassifier(_train);
   // evaluate classifier and print some statistics
   evaluate();
 }
예제 #20
0
  private static void writePredictedDistributions(
      Classifier c, Instances data, int idIndex, Writer out) throws Exception {
    // header
    out.write("id");
    for (int i = 0; i < data.numClasses(); i++) {
      out.write(",\"");
      out.write(data.classAttribute().value(i).replaceAll("[\"\\\\]", "_"));
      out.write("\"");
    }
    out.write("\n");

    // data
    for (int i = 0; i < data.numInstances(); i++) {
      final String id = data.instance(i).stringValue(idIndex);
      double[] distribution = c.distributionForInstance(data.instance(i));

      // final String label = data.attribute(classIndex).value();
      out.write(id);
      for (double probability : distribution) {
        out.write(",");
        out.write(String.valueOf(probability > 1e-5 ? (float) probability : 0f));
      }
      out.write("\n");
    }
  }
예제 #21
0
  public void run() throws Exception {
    BufferedReader datafileclassificationpickup =
        readDataFile(Config.outputPath() + "DaysPickUpClassification.txt");
    BufferedReader datafileclassificationdropoff =
        readDataFile(Config.outputPath() + "DaysDropOffClassification.txt");
    BufferedReader datafileregresssionpickup =
        readDataFile(Config.outputPath() + "DaysPickUpRegression.txt");
    BufferedReader datafileregresssiondropoff =
        readDataFile(Config.outputPath() + "DaysDropOffRegression.txt");

    dataclassificationpickup = new Instances(datafileclassificationpickup);
    dataclassificationpickup.setClassIndex(dataclassificationpickup.numAttributes() - 1);

    dataclassificationdropoff = new Instances(datafileclassificationdropoff);
    dataclassificationdropoff.setClassIndex(dataclassificationdropoff.numAttributes() - 1);

    dataregressionpickup = new Instances(datafileregresssionpickup);
    dataregressionpickup.setClassIndex(dataregressionpickup.numAttributes() - 1);

    dataregressiondropoff = new Instances(datafileregresssiondropoff);
    dataregressiondropoff.setClassIndex(dataregressiondropoff.numAttributes() - 1);

    System.out.println("KNN classification model");
    ibkclassificationpickup = new IBk(10);
    ibkclassificationpickup.buildClassifier(dataclassificationpickup);
    ibkclassificationdropoff = new IBk(10);
    ibkclassificationdropoff.buildClassifier(dataclassificationdropoff);
    System.out.println("Classification Model Ready");

    System.out.println("KNN regression model");
    ibkregressionpickup = new IBk(10);
    ibkregressionpickup.buildClassifier(dataregressionpickup);
    ibkregressiondropoff = new IBk(10);
    ibkregressiondropoff.buildClassifier(dataregressiondropoff);
    System.out.println("Regression Model Ready");

    instclassificationpickup = new DenseInstance(9);
    instclassificationpickup.setDataset(dataclassificationpickup);
    instclassificationdropoff = new DenseInstance(9);
    instclassificationdropoff.setDataset(dataclassificationdropoff);
    instregressionpickup = new DenseInstance(9);
    instregressionpickup.setDataset(dataregressionpickup);
    instregressiondropoff = new DenseInstance(9);
    instregressiondropoff.setDataset(dataregressiondropoff);
    System.out.println("Models ready");
  }
예제 #22
0
  /**
   * Gets the key describing the current SplitEvaluator. For example This may contain the name of
   * the classifier used for classifier predictive evaluation. The number of key fields must be
   * constant for a given SplitEvaluator.
   *
   * @return an array of objects containing the key.
   */
  public Object[] getKey() {

    Object[] key = new Object[KEY_SIZE];
    key[0] = m_Template.getClass().getName();
    key[1] = m_ClassifierOptions;
    key[2] = m_ClassifierVersion;
    return key;
  }
예제 #23
0
 private static int calculatenpeople(Classifier ibkregression, Instance instregression) {
   Double predictValue = null;
   try {
     predictValue = ibkregression.classifyInstance(instregression);
   } catch (Exception e) {
     e.printStackTrace();
   }
   return predictValue.intValue();
 }
예제 #24
0
 // 通过h1,h2分类器学习样本集,将h1,h2分类决策相同的样本放入L中,得到标记集合;
 public void updateL(Classifier h1, Classifier h2, Instances L, Instances test) {
   int length = unlabeledIns.numInstances();
   double value1 = 0.0, value2 = 0.0;
   try {
     for (int i = 0; i < length; i++) {
       value1 = h1.classifyInstance(test.instance(i));
       value2 = h2.classifyInstance(test.instance(i));
       if (value1 == value2) {
         // 当两个分类器做出相同决策时重新标记样本的类别;
         test.instance(i).setClassValue(value1);
         L.add(test.instance(i));
       }
     }
   } catch (Exception e) {
     System.out.println(e);
   }
   // return false;
 }
예제 #25
0
 @Override
 public final void run() {
   try {
     Classifier copiedClassifier = AbstractClassifier.makeCopy(classifier);
     copiedClassifier.buildClassifier(train);
     //			log.print("The " + threadId + "th classifier is built!!!");
     //			accuracy = getAccuracy(copiedClassifier, test);
     //			classifier = AbstractClassifier.makeCopy(classifier);
     //			classifier.buildClassifier(train);
     log.print("The " + threadId + "th classifier is built!!!");
     accuracy = getAccuracy(copiedClassifier, test);
   } catch (Exception e) {
     log.print(e.getStackTrace().toString());
     log.print(e.toString());
   }
   multiThreadEval.finishOneThreads();
   log.print("The " + threadId + "th thread is finshed! accuracy = " + accuracy);
 }
예제 #26
0
  /**
   * Calculates the class membership probabilities for the given test instance.
   *
   * @param instance the instance to be classified
   * @return predicted class probability distribution
   * @exception Exception if distribution can't be computed successfully
   */
  public double[] distributionForInstance(Instance instance) throws Exception {
    if (instance.classAttribute().isNumeric()) {
      throw new UnsupportedClassTypeException("Decorate can't handle a numeric class!");
    }
    double[] sums = new double[instance.numClasses()], newProbs;
    Classifier curr;

    for (int i = 0; i < m_Committee.size(); i++) {
      curr = (Classifier) m_Committee.get(i);
      newProbs = curr.distributionForInstance(instance);
      for (int j = 0; j < newProbs.length; j++) sums[j] += newProbs[j];
    }
    if (Utils.eq(Utils.sum(sums), 0)) {
      return sums;
    } else {
      Utils.normalize(sums);
      return sums;
    }
  }
  /** 分类过程 */
  public double classifyMessage(String message) throws Exception {

    filter.input(makeInstance(message, instances.stringFreeStructure()));
    Instance filteredInstance = filter.output(); // 必须使用原来的filter

    double predicted = classifier.classifyInstance(filteredInstance); // (int)predicted是类标索引
    //        System.out.println("Message classified as : "
    //                + instances.classAttribute().value((int) predicted));
    return predicted;
  }
예제 #28
0
 public double getAccuracy(Classifier classifier, Instances test) throws Exception {
   for (Instance instance : test) {
     int predClass = (int) classifier.classifyInstance(instance);
     int realClass = (int) instance.classValue();
     if (predClass == realClass) {
       corrCnt++;
     }
   }
   return (double) corrCnt / totCnt;
 }
예제 #29
0
  /**
   * Generates the classifier.
   *
   * @param instances set of instances serving as training data
   * @throws Exception if the classifier has not been generated successfully
   */
  public void buildClassifier(Instances instances) throws Exception {
    if (!Groovy.isPresent()) throw new Exception("Groovy classes not in CLASSPATH!");

    // try loading the module
    initGroovyObject();

    // build the model
    if (m_GroovyObject != null) m_GroovyObject.buildClassifier(instances);
    else System.err.println("buildClassifier: No Groovy object present!");
  }
예제 #30
0
 private static boolean calcultateifhotspot(
     Classifier ibkclassification, Instance instclassification) {
   Double predictValue = null;
   try {
     predictValue = ibkclassification.classifyInstance(instclassification);
   } catch (Exception e) {
     e.printStackTrace();
   }
   if (predictValue == 0.0) return true;
   else return false;
 }