public svm_problem do_sample(svm_problem svmProblem) { logger.debug("Creating " + sample + " sample"); Map<Double, List<Integer>> label2index = new HashMap<Double, List<Integer>>(); for (int i = 0; i < svmProblem.l; ++i) { double label = svmProblem.y[i]; if (label2index.containsKey(label)) { label2index.get(label).add(i); } else { List<Integer> indeces = new LinkedList<Integer>(); indeces.add(i); label2index.put(label, indeces); } } for (List<Integer> indeces : label2index.values()) { Collections.shuffle(indeces); } int newSize = (int) (svmProblem.l * sample); logger.debug("Original size: " + svmProblem.l); logger.debug("Sample size: " + newSize); double[] newlabels = new double[newSize]; svm_node[][] newdata = new svm_node[newSize][]; int i = 0; for (List<Integer> indeces : label2index.values()) { int catSize = (int) (indeces.size() * sample); for (int j = 0; j < catSize; ++j) { int index = indeces.remove(0); newlabels[i] = svmProblem.y[index]; newdata[i] = svmProblem.x[index]; if (++i >= newSize) { break; } } if (i >= newSize) { break; } } // fill any remaining empty items caused due to rounding if (i < newSize) { for (List<Integer> indeces : label2index.values()) { if (indeces.isEmpty()) { continue; } int index = indeces.remove(0); newlabels[i] = svmProblem.y[index]; newdata[i] = svmProblem.x[index]; if (++i >= newSize) { break; } } } svm_problem newProblem = new svm_problem(); newProblem.l = newSize; newProblem.x = newdata; newProblem.y = newlabels; return newProblem; }