public static boolean testCostAndGradient(Minimizable.ByGradient minable) { Matrix parameters = minable.getNewMatrix(); parameters.setAll(0.0); minable.setParameters(parameters); testCostAndGradientCurrentParameters(minable); parameters.setAll(0.0); Matrix delta = minable.getNewMatrix(); minable.getCostGradient(delta); delta.timesEquals(-0.0001); parameters.plusEquals(delta); minable.setParameters(parameters); testCostAndGradientCurrentParameters(minable); return true; }
public static double testCostAndGradientCurrentParameters(Minimizable.ByGradient minable) { Matrix parameters = minable.getParameters(minable.getNewMatrix()); double cost = minable.getCost(); // the gradient from the minimizable function Matrix analyticGradient = minable.getCostGradient(minable.getNewMatrix()); // the gradient calculate from the slope of the cost Matrix empiricalGradient = (Matrix) analyticGradient.cloneMatrix(); // This setting of epsilon should make the individual elements of // the analytical gradient and the empirical gradient equal. This // simplifies the comparison of the individual dimensions of the // gradient and thus makes debugging easier. double epsilon = 0.1 / analyticGradient.twoNorm(); double tolerance = epsilon * 5; System.out.println("epsilon = " + epsilon + " tolerance=" + tolerance); // Check each direction, perturb it, measure new cost, // and make sure it agrees with the gradient from minable.getCostGradient() for (int i = 0; i < parameters.singleSize(); i++) { double param = parameters.singleValue(i); parameters.setSingleValue(i, param + epsilon); // logger.fine ("Parameters:"); parameters.print(); minable.setParameters(parameters); double epsCost = minable.getCost(); double slope = (epsCost - cost) / epsilon; System.out.println( "cost=" + cost + " epsCost=" + epsCost + " slope[" + i + "] = " + slope + " gradient[]=" + analyticGradient.singleValue(i)); assert (!Double.isNaN(slope)); logger.fine( "TestMinimizable checking singleIndex " + i + ": gradient slope = " + analyticGradient.singleValue(i) + ", cost+epsilon slope = " + slope + ": slope difference = " + Math.abs(slope - analyticGradient.singleValue(i))); // No negative below because the gradient points in the direction // of maximizing the function. empiricalGradient.setSingleValue(i, slope); parameters.setSingleValue(i, param); } // Normalize the matrices to have the same L2 length System.out.println("empiricalGradient.twoNorm = " + empiricalGradient.twoNorm()); analyticGradient.timesEquals(1.0 / analyticGradient.twoNorm()); empiricalGradient.timesEquals(1.0 / empiricalGradient.twoNorm()); // logger.info ("AnalyticGradient:"); analyticGradient.print(); // logger.info ("EmpiricalGradient:"); empiricalGradient.print(); // Return the angle between the two vectors, in radians double angle = Math.acos(analyticGradient.dotProduct(empiricalGradient)); logger.info("TestMinimizable angle = " + angle); if (Math.abs(angle) > tolerance) throw new IllegalStateException("Gradient/Cost mismatch: angle=" + angle); if (Double.isNaN(angle)) throw new IllegalStateException("Gradient/Cost error: angle is NaN!"); return angle; }