/** * @param lr is the learning rate. * @param ts is the training set. */ public LogisticRegression(double lr, double rp, TrainingExample[] ts) { learningRate = lr; regularizationParam = rp; trainingSet = ts; // Construct the correct number of hypothesis given // the number of features in the trainingSet // and the number of different classifications ArrayList<Integer> classifications = new ArrayList<Integer>(); for (TrainingExample t : trainingSet) { Integer c = t.getAnswer(); if (!classifications.contains(c)) classifications.add(c); } hypothesis = new Hypothesis[classifications.size() - 0]; int s = trainingSet[0].getInput().length + 1; for (int i = 0; i < classifications.size() - 0; ++i) { hypothesis[i] = new Hypothesis(s, classifications.get(i)); } try { costFunction = this.getClass().getMethod("defaultCostFunction", Hypothesis.class); } catch (Exception e) { e.printStackTrace(); } }
/** * Runs gradient decent on the hypothesis * * @param tSet the training set to be used */ private void gradientDecent(TrainingExample[] tSet) { double h, val, newVal; int answer; double lm = LogisticRegression.this.learningRate / tSet.length; for (int i = 0; i < numFeatures; ++i) { val = 0; for (TrainingExample t : tSet) { answer = t.getAnswer(); h = predict(t.getInput()); if (i == 0) val += (h - answer); else val += (h - answer) * t.getInput()[i - 1]; } newVal = parameter.get(i, 0) * (1 - lm * LogisticRegression.this.getRegularizationParam()); newVal -= lm * val; parameter.set(i, 0, newVal); } }
/** * Runs gradient decent to tune the parameters of each hypothesis. * * @param iterations the number of times to run gradient decent */ public void tune(int iterations) { for (Hypothesis h : hypothesis) { // construct a new training set using One vs. Rest // if the training example has the same value as the // hypothesis then set the answer to 1 // otherwise set the answer to 0. TrainingExample[] tSet = new TrainingExample[trainingSet.length]; int answer; int i = 0; for (TrainingExample t : trainingSet) { if (t.getAnswer() == h.getClassification()) answer = 1; else answer = 0; tSet[i] = new TrainingExample(t.getInput(), answer); ++i; } for (i = 0; i < iterations; ++i) { h.gradientDecent(tSet); } } }
/** * Calculates the cost of the <code>trainingSet</code>. * * @param hyp the hypothesis to use in calculating the cost. * @return the cost associated with the hypothesis. */ public double defaultCostFunction(Hypothesis hyp) { double error = 0; double h; int answer; for (TrainingExample t : trainingSet) { try { h = (Double) hyp.predict(t.getInput()); } catch (Exception e) { e.printStackTrace(); continue; } answer = t.getAnswer(); error -= answer * log(h) + (1 - answer) * log(1 - h); } double regError = 0; for (int i = 0; i < hyp.getNumFeatures(); ++i) { regError += pow(hyp.getParameter(i), 2); } error += regError / regularizationParam; return error / (2 * trainingSet.length); }
public void labelGrids(int k) throws FileNotFoundException { // Initializing the global arraylist trainingData = new ArrayList<>(); int count = 0; Scanner input = new Scanner(new File("data.txt")); int noOfRows = input.nextInt(); int noOfColumns = input.nextInt(); input.nextLine(); String[] row; for (int i = 0; i < noOfRows; i++) { row = input.nextLine().split(" "); for (int j = 0; j < row.length; j++) { if (row[j].equals("+")) { TrainingExample newExample = new TrainingExample(); newExample.x2 = i; newExample.x1 = j; newExample.y = '+'; newExample.ex_no = count; count++; trainingData.add(newExample); } else if (row[j].equals("-")) { TrainingExample newExample = new TrainingExample(); newExample.x2 = i; newExample.x1 = j; newExample.y = '-'; newExample.ex_no = count; count++; trainingData.add(newExample); } else { TrainingExample newExample = new TrainingExample(); newExample.x2 = i; newExample.x1 = j; newExample.y = '.'; newExample.ex_no = count; count++; trainingData.add(newExample); } } } for (int i = 0; i < trainingData.size(); i++) { kNN(trainingData, i, 1, k, false, true); } labelPrint(trainingData, noOfRows, noOfColumns); }