@Override public double evaluate( JavaSparkContext sparkContext, PMML model, Path modelParentPath, JavaRDD<String> testData) { log.info("Evaluating model"); JavaRDD<Rating> testRatingData = parsedToRatingRDD(toParsedRDD(testData)); testRatingData = aggregateScores(testRatingData); MatrixFactorizationModel mfModel = pmmlToMFModel(sparkContext, model, modelParentPath); double eval; if (implicit) { double auc = AUC.areaUnderCurve(sparkContext, mfModel, testRatingData); log.info("AUC: {}", auc); eval = auc; } else { double rmse = RMSE.rmse(mfModel, testRatingData); log.info("RMSE: {}", rmse); eval = 1.0 / rmse; } return eval; }
@Override public OutputOracle forecast(InputOracle io) throws OracleException { double AVGrmse = Double.MAX_VALUE; Oracle best = null; try { SelectInstancesRP(io); KNN.setInstances(m_Training); m_TestSet = DataConverter.FromInputOracleToInstance(io); logger.info("PREDICT TARGET VALUE FOR : " + m_TestSet.toStringNoWeight()); System.out.println(m_TestSet); Neighbourshood = KNN.kNearestNeighbours( m_TestSet, NumNeighbours); // >k neighbours are returned if there are more than one neighbours at // the kth boundary. distances = KNN.getDistances(); RMSE = RMSE(this.ConsideredOutOracle); double actual; for (Map.Entry<Oracle, Double[]> entry : RMSE.entrySet()) { // actual=(entry.getValue()[0]*VarianceRMSE(entry.getValue())[0]+entry.getValue()[1]*VarianceRMSE(entry.getValue())[1])/2; actual = (entry.getValue()[0] + entry.getValue()[1]) / 2; if (actual <= AVGrmse) { AVGrmse = actual; best = entry.getKey(); } } logger.info("ORACLE SELECTED FOR PREDICTION : " + best.toString().split("@")[0]); return best.forecast(io); } catch (Exception ex) { logger.error("forecast error " + ex); throw new OracleException(ex); } }