public static void main(String[] args) {
    final Wine[] wines = new Wine[] {new RedWine(), new Champagne()};

    final Classifier c = new Classifier();
    for (Wine w : wines) {
      c.classify(w);
    }
  }
Example #2
0
  @Override
  public void trainC(ClassificationDataSet dataSet, ExecutorService threadPool) {
    final int models = baseClassifiers.size();
    final int C = dataSet.getClassSize();
    weightsPerModel = C == 2 ? 1 : C;
    ClassificationDataSet metaSet =
        new ClassificationDataSet(
            weightsPerModel * models, new CategoricalData[0], dataSet.getPredicting());

    List<ClassificationDataSet> dataFolds = dataSet.cvSet(folds);
    // iterate in the order of the folds so we get the right dataum weights
    for (ClassificationDataSet cds : dataFolds)
      for (int i = 0; i < cds.getSampleSize(); i++)
        metaSet.addDataPoint(
            new DenseVector(weightsPerModel * models),
            cds.getDataPointCategory(i),
            cds.getDataPoint(i).getWeight());

    // create the meta training set
    for (int c = 0; c < baseClassifiers.size(); c++) {
      Classifier cl = baseClassifiers.get(c);
      int pos = 0;
      for (int f = 0; f < dataFolds.size(); f++) {
        ClassificationDataSet train = ClassificationDataSet.comineAllBut(dataFolds, f);
        ClassificationDataSet test = dataFolds.get(f);
        if (threadPool == null) cl.trainC(train);
        else cl.trainC(train, threadPool);
        for (int i = 0;
            i < test.getSampleSize();
            i++) // evaluate and mark each point in the held out fold.
        {
          CategoricalResults pred = cl.classify(test.getDataPoint(i));
          if (C == 2)
            metaSet.getDataPoint(pos).getNumericalValues().set(c, pred.getProb(0) * 2 - 1);
          else {
            Vec toSet = metaSet.getDataPoint(pos).getNumericalValues();
            for (int j = weightsPerModel * c; j < weightsPerModel * (c + 1); j++)
              toSet.set(j, pred.getProb(j - weightsPerModel * c));
          }

          pos++;
        }
      }
    }

    // train the meta model
    if (threadPool == null) aggregatingClassifier.trainC(metaSet);
    else aggregatingClassifier.trainC(metaSet, threadPool);

    // train the final classifiers, unless folds=1. In that case they are already trained
    if (folds != 1) {
      for (Classifier cl : baseClassifiers)
        if (threadPool == null) cl.trainC(dataSet);
        else cl.trainC(dataSet, threadPool);
    }
  }
Example #3
0
  public static void main(String[] args) {

    Scanner input = new Scanner(System.in);
    final Classifier<String, String> bayes = new BayesClassifier<String, String>();

    System.out.print("Please enter a training file: ");
    String trainingFile = input.nextLine();
    System.out.print("Please enter a testing file: ");
    String testingFile = input.nextLine();
    boolean firstline = true;
    String attribute;
    int intAttribute = 0;
    try (BufferedReader br = new BufferedReader(new FileReader(trainingFile))) {
      for (String line; (line = br.readLine()) != null; ) {
        if (firstline) {
          String[] r = line.split("\\s");
          System.out.println("Please choose an attribute (by number):");
          System.out.println("Attribute:");
          for (int i = 0; i < r.length; i++) {
            System.out.println((i + 1) + ". " + r[i]);
          }
          System.out.println("Attribute: ");
          attribute = input.nextLine();
          intAttribute = Integer.parseInt(attribute) - 1;
          firstline = false;
          continue;
        }

        String[] r = line.split("\\s");
        if (!line.equals("")) bayes.learn(r[intAttribute], Arrays.asList(r));
        else continue;
      }
      // line is not visible here.
    } catch (FileNotFoundException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    } catch (IOException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    }

    try (BufferedReader br = new BufferedReader(new FileReader(testingFile))) {
      for (String line; (line = br.readLine()) != null; ) {
        String temp = bayes.classify(Arrays.asList(line)).getCategory();
        System.out.println(temp);
      }
      // line is not visible here.
    } catch (FileNotFoundException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    } catch (IOException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    }
    System.out.println("Done");
  }
Example #4
0
  @Override
  public CategoricalResults classify(DataPoint data) {
    Vec w = new DenseVector(weightsPerModel * baseClassifiers.size());
    if (weightsPerModel == 1)
      for (int i = 0; i < baseClassifiers.size(); i++)
        w.set(i, baseClassifiers.get(i).classify(data).getProb(0) * 2 - 1);
    else {
      for (int i = 0; i < baseClassifiers.size(); i++) {
        CategoricalResults pred = baseClassifiers.get(i).classify(data);
        for (int j = 0; j < weightsPerModel; j++) w.set(i * weightsPerModel + j, pred.getProb(j));
      }
    }

    return aggregatingClassifier.classify(new DataPoint(w));
  }
  public static void trainAndValidate(
      Map<String, List<FeatureVector>> songs, Map<String, List<FeatureVector>> testSongs)
      throws Exception {
    List<String> genres =
        Arrays.asList(0, 1, 2, 3, 4, 5, 6, 7, 8, 9)
            .stream()
            .map((n) -> Classify.intToGenre(n))
            .collect(Collectors.toList());
    System.out.println("Binary logistics classifier on every genre:");
    System.out.println("\t\tPositiv\tFalsePos\tFalseNeg\tAccuracy");
    for (String genre : genres) {
      Classifier c = new LogisticsRegressionClassifier(genre);
      c.train(songs);
      int positive = 0;
      int total = 0;
      int falsePositive = 0;
      int falseNegative = 0;

      for (String label : songs.keySet()) {
        for (FeatureVector features : songs.get(label)) {
          String result = c.classify(features);
          if (result != null) {
            if (genre.equals(label)) {
              positive++;
            } else {
              falsePositive++;
            }
          } else {
            if (label.equals(genre)) {
              falseNegative++;
            }
          }
          total++;
        }
      }
      System.out.println(
          Classify.shortenGenre(genre)
              + "\t"
              + positive
              + "\t\t"
              + falsePositive
              + "\t\t\t"
              + falseNegative
              + "\t\t\t"
              + (positive * 1.0) / (positive + falseNegative + falsePositive));
    }
  }
  public static void main(String[] args) {

    /*
     * Create a new classifier instance. The context features are
     * Strings and the context will be classified with a String according
     * to the featureset of the context.
     */
    final Classifier<String, String> bayes = new BayesClassifier<String, String>();

    /*
     * The classifier can learn from classifications that are handed over
     * to the learn methods. Imagin a tokenized text as follows. The tokens
     * are the text's features. The category of the text will either be
     * positive or negative.
     */
    final String[] positiveText = "I love sunny days".split("\\s");
    bayes.learn("positive", Arrays.asList(positiveText));

    final String[] negativeText = "I hate rain".split("\\s");
    bayes.learn("negative", Arrays.asList(negativeText));

    /*
     * Now that the classifier has "learned" two classifications, it will
     * be able to classify similar sentences. The classify method returns
     * a Classification Object, that contains the given featureset,
     * classification probability and resulting category.
     */
    final String[] unknownText1 = "today is a sunny day".split("\\s");
    final String[] unknownText2 = "there will be rain".split("\\s");

    System.out.println( // will output "positive"
        bayes.classify(Arrays.asList(unknownText1)).getCategory());
    System.out.println( // will output "negative"
        bayes.classify(Arrays.asList(unknownText2)).getCategory());

    /*
     * The BayesClassifier extends the abstract Classifier and provides
     * detailed classification results that can be retrieved by calling
     * the classifyDetailed Method.
     *
     * The classification with the highest probability is the resulting
     * classification. The returned List will look like this.
     * [
     *   Classification [
     *     category=negative,
     *     probability=0.0078125,
     *     featureset=[today, is, a, sunny, day]
     *   ],
     *   Classification [
     *     category=positive,
     *     probability=0.0234375,
     *     featureset=[today, is, a, sunny, day]
     *   ]
     * ]
     */
    ((BayesClassifier<String, String>) bayes).classifyDetailed(Arrays.asList(unknownText1));

    /*
     * Please note, that this particular classifier implementation will
     * "forget" learned classifications after a few learning sessions. The
     * number of learning sessions it will record can be set as follows:
     */
    bayes.setMemoryCapacity(500); // remember the last 500 learned classifications
  }