Пример #1
0
  public svm_problem do_sample(svm_problem svmProblem) {
    logger.debug("Creating " + sample + " sample");
    Map<Double, List<Integer>> label2index = new HashMap<Double, List<Integer>>();
    for (int i = 0; i < svmProblem.l; ++i) {
      double label = svmProblem.y[i];
      if (label2index.containsKey(label)) {
        label2index.get(label).add(i);
      } else {
        List<Integer> indeces = new LinkedList<Integer>();
        indeces.add(i);
        label2index.put(label, indeces);
      }
    }

    for (List<Integer> indeces : label2index.values()) {
      Collections.shuffle(indeces);
    }

    int newSize = (int) (svmProblem.l * sample);
    logger.debug("Original size: " + svmProblem.l);
    logger.debug("Sample size: " + newSize);
    double[] newlabels = new double[newSize];
    svm_node[][] newdata = new svm_node[newSize][];

    int i = 0;
    for (List<Integer> indeces : label2index.values()) {
      int catSize = (int) (indeces.size() * sample);
      for (int j = 0; j < catSize; ++j) {
        int index = indeces.remove(0);
        newlabels[i] = svmProblem.y[index];
        newdata[i] = svmProblem.x[index];
        if (++i >= newSize) {
          break;
        }
      }
      if (i >= newSize) {
        break;
      }
    }

    // fill any remaining empty items caused due to rounding
    if (i < newSize) {
      for (List<Integer> indeces : label2index.values()) {
        if (indeces.isEmpty()) {
          continue;
        }
        int index = indeces.remove(0);
        newlabels[i] = svmProblem.y[index];
        newdata[i] = svmProblem.x[index];
        if (++i >= newSize) {
          break;
        }
      }
    }

    svm_problem newProblem = new svm_problem();
    newProblem.l = newSize;
    newProblem.x = newdata;
    newProblem.y = newlabels;

    return newProblem;
  }