Пример #1
0
  public static void trainLibLinear() throws Exception {
    System.out.println("train SVMs.");

    Indexer<String> featureIndexer = IOUtils.readIndexer(WORD_INDEXER_FILE);

    List<SparseVector> trainData = SparseVector.readList(TRAIN_DATA_FILE);
    List<SparseVector> testData = SparseVector.readList(TEST_DATA_FILE);

    Collections.shuffle(trainData);
    Collections.shuffle(testData);

    // List[] lists = new List[] { trainData, testData };
    //
    // for (int i = 0; i < lists.length; i++) {
    // List<SparseVector> list = lists[i];
    // for (int j = 0; j < list.size(); j++) {
    // SparseVector sv = list.get(j);
    // if (sv.label() > 0) {
    // sv.setLabel(1);
    // }
    // }
    // }

    Problem prob = new Problem();
    prob.l = trainData.size();
    prob.n = featureIndexer.size() + 1;
    prob.y = new double[prob.l];
    prob.x = new Feature[prob.l][];
    prob.bias = -1;

    if (prob.bias >= 0) {
      prob.n++;
    }

    for (int i = 0; i < trainData.size(); i++) {
      SparseVector x = trainData.get(i);

      Feature[] input = new Feature[prob.bias > 0 ? x.size() + 1 : x.size()];

      for (int j = 0; j < x.size(); j++) {
        int index = x.indexAtLoc(j) + 1;
        double value = x.valueAtLoc(j);

        assert index >= 0;

        input[j] = new FeatureNode(index + 1, value);
      }

      if (prob.bias >= 0) {
        input[input.length - 1] = new FeatureNode(prob.n, prob.bias);
      }

      prob.x[i] = input;
      prob.y[i] = x.label();
    }

    Model model = Linear.train(prob, getSVMParamter());

    CounterMap<Integer, Integer> cm = new CounterMap<Integer, Integer>();

    for (int i = 0; i < testData.size(); i++) {
      SparseVector sv = testData.get(i);
      Feature[] input = new Feature[sv.size()];
      for (int j = 0; j < sv.size(); j++) {
        int index = sv.indexAtLoc(j) + 1;
        double value = sv.valueAtLoc(j);
        input[j] = new FeatureNode(index + 1, value);
      }

      double[] dec_values = new double[model.getNrClass()];
      Linear.predictValues(model, input, dec_values);
      int max_id = ArrayMath.argmax(dec_values);
      int pred = model.getLabels()[max_id];
      int answer = sv.label();

      cm.incrementCount(answer, pred, 1);
    }

    System.out.println(cm);

    model.save(new File(MODEL_FILE));
  }