/** * * Least-square solution y = X * b where: y_i = b_0 + b_1*x_1i + b_2*x_2i + ... + b_k*x_ki * including intercep term y_i = b_1*x_1i + b_2*x_2i + ... + b_k*x_ki without intercep term * * @param datay * @param dataX */ private void multipleLinearRegression(Matrix datay, Matrix dataX) { Matrix X, y; try { X = dataX; y = datay; b = X.solve(y); coeffs = new double[b.getRowDimension()]; for (int j = 0; j < b.getRowDimension(); j++) { coeffs[j] = b.get(j, 0); // System.out.println("coeff[" + j + "]=" + coeffs[j]); } // Residuals: Matrix r = X.times(b).minus(y); residuals = r.getColumnPackedCopy(); // root mean square error (RMSE) rmse = Math.sqrt(MathUtils.sumSquared(residuals) / residuals.length); // Predicted values Matrix p = X.times(b); predictedValues = p.getColumnPackedCopy(); // Correlation between original values and predicted ones correlation = MathUtils.correlation(predictedValues, y.getColumnPackedCopy()); } catch (RuntimeException re) { throw new Error("Error solving Least-square solution: y = X * b"); } }
// Given a set of coefficients and data predict values applying linear equation // This function can be used to test with data that was not used in training // c[] is the number of the columns in the file not the indexFeatures public void predictValues( String fileName, int indVariable, int[] c, boolean interceptTerm, int rowIni, int rowEnd) { try { BufferedReader reader = new BufferedReader(new FileReader(fileName)); Matrix data = Matrix.read(reader); reader.close(); int rows = data.getRowDimension() - 1; int cols = data.getColumnDimension() - 1; if (rowIni < 0 || rowIni > rows) throw new RuntimeException( "Problem reading file, rowIni=" + rowIni + " and number of rows in file=" + rows); if (rowEnd < 0 || rowEnd > rows) throw new RuntimeException( "Problem reading file, rowIni=" + rowIni + " and number of rows in file=" + rows); if (rowIni > rowEnd) throw new RuntimeException( "Problem reading file, rowIni < rowend" + rowIni + " < " + rowEnd); Matrix indVar = data.getMatrix( rowIni, rowEnd, indVariable, indVariable); // dataVowels(:,0) -> last col is the independent variable data = data.getMatrix( rowIni, rowEnd, c); // the dependent variables correspond to the column indices in c int numCoeff; if (interceptTerm) numCoeff = c.length + 1; else numCoeff = c.length; if (b != null) { if (b.getRowDimension() == numCoeff) { if (interceptTerm) { // first column of X is filled with 1s if b_0 != 0 int row = data.getRowDimension(); int col = data.getColumnDimension(); Matrix B = new Matrix(row, col + 1); Matrix ones = new Matrix(row, 1); for (int i = 0; i < row; i++) ones.set(i, 0, 1.0); B.setMatrix(0, row - 1, 0, 0, ones); B.setMatrix(0, row - 1, 1, col, data); data = B; } // Residuals: Matrix r = data.times(b).minus(indVar); residuals = r.getColumnPackedCopy(); // root mean square error (RMSE) rmse = Math.sqrt(MathUtils.sumSquared(residuals) / residuals.length); // Predicted values Matrix p = data.times(b); predictedValues = p.getColumnPackedCopy(); for (int i = 0; i < predictedValues.length; i++) if (predictedValues[i] < 0.0) System.out.println( "*** WARNING predictedValue < 0.0 : predictedValues[" + i + "]=" + predictedValues[i]); // Correlation between original values and predicted ones correlation = MathUtils.correlation(predictedValues, indVar.getColumnPackedCopy()); System.out.println("Correlation predicted values and real: " + correlation); System.out.println("RMSE (root mean square error): " + rmse); } else { throw new RuntimeException( "Number of columns of data is not the same as number of coeficients"); } } else { throw new RuntimeException("Regression coefficients are not loaded"); } } catch (Exception e) { throw new RuntimeException("Problem reading file " + fileName, e); } }