public static void createDataFrame() { // Convert batch to JSON and inject into R native dataframe // This will be required for JSON processing System.out.println(engine.eval("library(jsonlite)")); for (int i = 0; i < hdfsPaths.size(); i++) { System.out.println( engine.eval( "testdata_" + i + " <- read.df(sqlContext, '" + hdfsPaths.get(i) + "', 'json')")); } }
public static void readRScript(String pathToRScript) { Path file = Paths.get(pathToRScript); try (Stream<String> lines = Files.lines(file, Charset.defaultCharset())) { lines.forEachOrdered(line -> System.out.println(engine.eval(line))); } catch (IOException e) { e.printStackTrace(); } }
private static REXP runCommand(String command, boolean convert) { boolean obtainedLock = engine.getRsync().safeLock(); REXP value; try { value = engine.eval(command, convert); } finally { if (obtainedLock) engine.getRsync().unlock(); } if (!convert) engine.waitForR(); return value; }
public static void runTestScript() { System.out.println( engine.eval( "jrdd <- SparkR:::callJStatic(\"com.ikanow.aleph2.analytics.r.utils.RScriptUtils\",\"getJavaRDD\", sc)")); System.out.println(engine.eval("SparkR:::show(jrdd)")); System.out.println(engine.eval("newRdd <- SparkR:::RDD(jrdd, \"string\")")); System.out.println(engine.eval("df <- SparkR:::createDataFrame(sqlContext, newRdd)")); System.out.println(engine.eval("showDF(df)")); System.out.println(engine.eval("localDf <- collect(df)")); System.out.println(engine.eval("localDf")); }
public static void start(String... args) { if (!Rengine.versionCheck()) { System.err.println("** Version mismatch " + "- Java files don't match library version. **"); System.exit(1); } System.out.println("Creating R Engine (with arguments)"); // 1) we pass the arguments from the command line // 2) we won't use the main loop at first, we'll start it later // (that's the "false" as second argument) // 3) the callbacks are implemented by the TextConsole class above engine = new Rengine(args, false, null); // the engine creates R is a new thread, so we should wait until it's ready if (!engine.waitForR()) { System.out.println("Cannot load R."); return; } engine.eval("{library(rJava);.jinit()}", false); }
public static void main(String[] args) throws IOException { start("--vanilla"); initializeSparkR(); System.out.println( engine.eval( "jrdd <- SparkR:::callJStatic(\"com.ikanow.aleph2.analytics.r.utils.RScriptUtils\",\"getJavaRDD\", sc)")); System.out.println(engine.eval("SparkR:::show(jrdd)")); System.out.println(engine.eval("newRdd <- SparkR:::RDD(jrdd, \"string\")")); System.out.println(engine.eval("df <- SparkR:::createDataFrame(sqlContext, newRdd)")); System.out.println(engine.eval("showDF(df)")); System.out.println(engine.eval("localDf <- collect(df)")); System.out.println(engine.eval("localDf")); // processAnalytics(); // readRScript(); shutdown(); }
public static void initializeSparkR() { start("--vanilla"); // To do move these into an array or separate file or something and check for null // if null we need to exit our gracefully as all statements are required for the jobs to run System.out.println(engine.eval(".libPaths(c(.libPaths(), '/root/spark-1.5.2/R/lib'))")); System.out.println(engine.eval("Sys.setenv(SPARK_HOME = '/root/spark-1.5.2')")); System.out.println( engine.eval( "Sys.setenv(PATH = paste(Sys.getenv(c('PATH')), '/root/spark-1.5.2/bin', sep=':'))")); System.out.println(engine.eval("library(SparkR)")); // We should probably point to the server here Im guessing this is pointing to localhost System.out.println( engine.eval( "sc <- sparkR.init(sparkJars = \"/root/aleph2_analytic_services_R-0.0.1-SNAPSHOT.jar\")")); System.out.println(engine.eval("sqlContext <- sparkRSQL.init(sc)")); }
// Run prediction method on data based on file name public StockInfo[] runPrediction(Map<String, StockInfo[]> dataToProcess) { double[] resultArrayOpen = null; double[] resultArrayHigh = null; double[] resultArrayLow = null; double[] resultArrayClose = null; double[] resultArrayVolume = null; StockInfo[] predictedDataArray = null; try { logger.log("prediction will start"); // Flat the data to perform prediction on it StockInfo[] flatStockInfo = this.flatMapOfData(dataToProcess); Collections.reverse(Arrays.asList(flatStockInfo)); REXP x; RVector v; // Check for installed packages x = re.eval("installed.packages()"); v = x.asVector(); String[] packages = x.asStringArray(); boolean isForecastInstalled = false; logger.log("<R> getting installed packages"); for (int index = 0; index < packages.length && isForecastInstalled == false; index++) { logger.log("<R> has installed " + packages[index]); if (packages[index] != null && packages[index].compareTo("forecast") == 0) { isForecastInstalled = true; } } // If forecast needs to be installed if (isForecastInstalled == false) { logger.log("<R> will set repos"); // Set CRAN re.eval("r <- getOption(\"repos\")"); re.eval("r[\"CRAN\"] <- \"http://cran.us.r-project.org\""); re.eval("options(repos = r)"); re.eval("rm(r)"); // Install forecast re.eval("install.packages(\"forecast\")"); logger.log("<R> will install forecast package"); } // Load forecast library re.eval("library(\"forecast\")"); logger.log("<R> loaded forecast"); // Make prediction for Open value ----------------------- // Load data into R logger.log("<R> loading data into R"); StringBuilder builder = new StringBuilder("inputData <- c("); for (int index = 0; index < flatStockInfo.length; index++) { builder.append(flatStockInfo[index].open); if (index != flatStockInfo.length - 1) { builder.append(","); } else { builder.append(")"); } } String stringFromBuilder = builder.toString(); re.eval(stringFromBuilder); // Create time series from data logger.log("<R> forecasting open values BestFit"); re.eval("temporalData <- ts(inputData, frequency=365)"); // Forecast data re.eval("forecastData <- forecast(temporalData, h=30)"); // re.eval("arimaModel <- auto.arima(temporalData, max.p=5, max.q=5, max.P=5, max.Q=5)"); // re.eval("forecastData <- forecast(arimaModel, h=30)"); x = re.eval("forecastData"); v = x.asVector(); x = (REXP) v.elementAt(1); // instead of 3 resultArrayOpen = x.asDoubleArray(); // Make prediction for High value ------------------------ builder = new StringBuilder("inputData <- c("); for (int index = 0; index < flatStockInfo.length; index++) { builder.append(flatStockInfo[index].high); if (index != flatStockInfo.length - 1) { builder.append(","); } else { builder.append(")"); } } stringFromBuilder = builder.toString(); re.eval(stringFromBuilder); // Create time series from data logger.log("<R> forecasting high values BestFit"); re.eval("temporalData <- ts(inputData, frequency=365)"); // Forecast data re.eval("forecastData <- forecast(temporalData, h=30)"); // re.eval("arimaModel <- auto.arima(temporalData, max.p=5, max.q=5, max.P=5, max.Q=5)"); // re.eval("forecastData <- forecast(arimaModel, h=30)"); x = re.eval("forecastData"); v = x.asVector(); x = (REXP) v.elementAt(1); resultArrayHigh = x.asDoubleArray(); // Make prediction for Low value ------------------------ builder = new StringBuilder("inputData <- c("); for (int index = 0; index < flatStockInfo.length; index++) { builder.append(flatStockInfo[index].low); if (index != flatStockInfo.length - 1) { builder.append(","); } else { builder.append(")"); } } stringFromBuilder = builder.toString(); re.eval(stringFromBuilder); // Create time series from data logger.log("<R> forecasting low values BestFit"); re.eval("temporalData <- ts(inputData, frequency=365)"); // Forecast data re.eval("forecastData <- forecast(temporalData, h=30)"); // re.eval("arimaModel <- auto.arima(temporalData, max.p=5, max.q=5, max.P=5, max.Q=5)"); // re.eval("forecastData <- forecast(arimaModel, h=30)"); x = re.eval("forecastData"); v = x.asVector(); x = (REXP) v.elementAt(1); resultArrayLow = x.asDoubleArray(); // Make prediction for Close value ------------------------ builder = new StringBuilder("inputData <- c("); for (int index = 0; index < flatStockInfo.length; index++) { builder.append(flatStockInfo[index].close); if (index != flatStockInfo.length - 1) { builder.append(","); } else { builder.append(")"); } } stringFromBuilder = builder.toString(); re.eval(stringFromBuilder); // Create time series from data logger.log("<R> forecasting close values BestFit"); re.eval("temporalData <- ts(inputData, frequency=365)"); // Forecast data re.eval("forecastData <- forecast(temporalData, h=30)"); // re.eval("arimaModel <- auto.arima(temporalData, max.p=5, max.q=5, max.P=5, max.Q=5)"); // re.eval("forecastData <- forecast(arimaModel, h=30)"); x = re.eval("forecastData"); v = x.asVector(); x = (REXP) v.elementAt(1); resultArrayClose = x.asDoubleArray(); // Make prediction for Close value ------------------------ builder = new StringBuilder("inputData <- c("); for (int index = 0; index < flatStockInfo.length; index++) { builder.append(flatStockInfo[index].volume); if (index != flatStockInfo.length - 1) { builder.append(","); } else { builder.append(")"); } } stringFromBuilder = builder.toString(); re.eval(stringFromBuilder); // Create time series from data re.eval("temporalData <- ts(inputData, frequency=365)"); // Forecast data re.eval("forecastData <- forecast(temporalData, h=30)"); // re.eval("arimaModel <- auto.arima(temporalData, max.p=5, max.q=5, max.P=5, max.Q=5)"); // re.eval("forecastData <- forecast(arimaModel, h=30)"); x = re.eval("forecastData"); v = x.asVector(); x = (REXP) v.elementAt(1); resultArrayVolume = x.asDoubleArray(); // Create a single StockInfo[] for all data StockInfo predictedData; predictedDataArray = new StockInfo[30]; Date lastDate = flatStockInfo[flatStockInfo.length - 1].date; Calendar c = Calendar.getInstance(); c.setTime(lastDate); c.add(Calendar.DATE, 1); logger.log("<R> values for forecasted data"); SimpleDateFormat dateFormat = new SimpleDateFormat(); dateFormat.applyPattern("dd/MM/YYYY"); // For all days that were predicted for (int index = 0; index < 30; index++) { predictedData = new StockInfo(); predictedData.open = (float) resultArrayOpen[index]; float maxHigh = (float) StrictMath.max( resultArrayClose[index], StrictMath.max(resultArrayHigh[index], resultArrayOpen[index])); predictedData.high = maxHigh; float minLow = (float) StrictMath.min( resultArrayClose[index], StrictMath.min(resultArrayLow[index], resultArrayOpen[index])); predictedData.low = minLow; predictedData.close = (float) resultArrayClose[index]; predictedData.volume = (int) resultArrayVolume[index]; while (c.get(Calendar.DAY_OF_WEEK) == Calendar.SUNDAY || c.get(Calendar.DAY_OF_WEEK) == Calendar.SATURDAY) { c.add(Calendar.DATE, 1); } predictedData.date = c.getTime(); predictedDataArray[index] = predictedData; logger.log( "<R> stock prediction " + dateFormat.format(predictedData.date.getTime()) + " open: " + predictedData.open + " high: " + predictedData.high + " low: " + predictedData.low + " close: " + predictedData.close); c.add(Calendar.DATE, 1); } } catch (Exception e) { logger.logException(e); } return predictedDataArray; }