Beispiel #1
0
  public static void main(String[] argv) throws IOException {
    int i;

    // parse options
    for (i = 0; i < argv.length; i++) {
      if (argv[i].charAt(0) != '-') break;
      ++i;
      switch (argv[i - 1].charAt(1)) {
        case 'b':
          try {
            flag_predict_probability = (atoi(argv[i]) != 0);
          } catch (NumberFormatException e) {
            exit_with_help();
          }
          break;

        case 'q':
          i--;
          Linear.disableDebugOutput();
          break;

        default:
          System.err.printf("unknown option: -%d%n", argv[i - 1].charAt(1));
          exit_with_help();
          break;
      }
    }
    if (i >= argv.length || argv.length <= i + 2) {
      exit_with_help();
    }

    BufferedReader reader = null;
    Writer writer = null;
    try {
      reader =
          new BufferedReader(
              new InputStreamReader(new FileInputStream(argv[i]), Linear.FILE_CHARSET));
      writer =
          new BufferedWriter(
              new OutputStreamWriter(new FileOutputStream(argv[i + 2]), Linear.FILE_CHARSET));

      Model model = Linear.loadModel(new File(argv[i + 1]));
      doPredict(reader, writer, model);
    } finally {
      closeQuietly(reader);
      closeQuietly(writer);
    }
  }
Beispiel #2
0
  /** <b>Note: The streams are NOT closed</b> */
  static void doPredict(BufferedReader reader, Writer writer, Model model) throws IOException {
    int correct = 0;
    int total = 0;
    double error = 0;
    double sump = 0, sumt = 0, sumpp = 0, sumtt = 0, sumpt = 0;

    int nr_class = model.getNrClass();
    double[] prob_estimates = null;
    int n;
    int nr_feature = model.getNrFeature();
    if (model.bias >= 0) n = nr_feature + 1;
    else n = nr_feature;

    if (flag_predict_probability && !model.isProbabilityModel()) {
      throw new IllegalArgumentException(
          "probability output is only supported for logistic regression");
    }

    Formatter out = new Formatter(writer);

    if (flag_predict_probability) {
      int[] labels = model.getLabels();
      prob_estimates = new double[nr_class];

      printf(out, "labels");
      for (int j = 0; j < nr_class; j++) printf(out, " %d", labels[j]);
      printf(out, "\n");
    }

    String line = null;
    while ((line = reader.readLine()) != null) {
      List<Feature> x = new ArrayList<Feature>();
      StringTokenizer st = new StringTokenizer(line, " \t\n");
      double target_label;
      try {
        String label = st.nextToken();
        target_label = atof(label);
      } catch (NoSuchElementException e) {
        throw new RuntimeException("Wrong input format at line " + (total + 1), e);
      }

      while (st.hasMoreTokens()) {
        String[] split = COLON.split(st.nextToken(), 2);
        if (split == null || split.length < 2) {
          throw new RuntimeException("Wrong input format at line " + (total + 1));
        }

        try {
          int idx = atoi(split[0]);
          double val = atof(split[1]);

          // feature indices larger than those in training are not used
          if (idx <= nr_feature) {
            Feature node = new FeatureNode(idx, val);
            x.add(node);
          }
        } catch (NumberFormatException e) {
          throw new RuntimeException("Wrong input format at line " + (total + 1), e);
        }
      }

      if (model.bias >= 0) {
        Feature node = new FeatureNode(n, model.bias);
        x.add(node);
      }

      Feature[] nodes = new Feature[x.size()];
      nodes = x.toArray(nodes);

      double predict_label;

      if (flag_predict_probability) {
        assert prob_estimates != null;
        predict_label = Linear.predictProbability(model, nodes, prob_estimates);
        printf(out, "%g", predict_label);
        for (int j = 0; j < model.nr_class; j++) printf(out, " %g", prob_estimates[j]);
        printf(out, "\n");
      } else {
        predict_label = Linear.predict(model, nodes);
        printf(out, "%g\n", predict_label);
      }

      if (predict_label == target_label) {
        ++correct;
      }

      error += (predict_label - target_label) * (predict_label - target_label);
      sump += predict_label;
      sumt += target_label;
      sumpp += predict_label * predict_label;
      sumtt += target_label * target_label;
      sumpt += predict_label * target_label;
      ++total;
    }

    if (model.solverType.isSupportVectorRegression()) //
    {
      info("Mean squared error = %g (regression)%n", error / total);
      info(
          "Squared correlation coefficient = %g (regression)%n", //
          ((total * sumpt - sump * sumt) * (total * sumpt - sump * sumt))
              / ((total * sumpp - sump * sump) * (total * sumtt - sumt * sumt)));
    } else {
      info("Accuracy = %g%% (%d/%d)%n", (double) correct / total * 100, correct, total);
    }
  }