public static void createDataFrame() { // Convert batch to JSON and inject into R native dataframe // This will be required for JSON processing System.out.println(engine.eval("library(jsonlite)")); for (int i = 0; i < hdfsPaths.size(); i++) { System.out.println( engine.eval( "testdata_" + i + " <- read.df(sqlContext, '" + hdfsPaths.get(i) + "', 'json')")); } }
private static REXP runCommand(String command, boolean convert) { boolean obtainedLock = engine.getRsync().safeLock(); REXP value; try { value = engine.eval(command, convert); } finally { if (obtainedLock) engine.getRsync().unlock(); } if (!convert) engine.waitForR(); return value; }
/** * Inits the R engine. * * @param args The command-line argument list from main. */ private void initREngine(String[] args) { out("Creating R engine"); // start the R engine re = new Rengine(args, false, null); out("R engine created, waiting for R..."); // Waits until R is ready for a new thread. If it returns false, R has died. if (!re.waitForR()) { out("Cannot load R"); return; } // loading rJava String[] szLib = sendCommand2R("library(rJava)"); if (szLib != null) { for (int cnt = 0; cnt < szLib.length; ++cnt) { out(szLib[cnt]); } } // init rJava String[] szJinit = sendCommand2R(".jinit()"); if (szJinit != null) { for (int cnt = 0; cnt < szJinit.length; ++cnt) { out(szJinit[cnt]); } } // ready for input out("READY."); }
/** Ends the application. */ private void quit() { // end R engine re.end(); // close window this.dispose(); System.exit(0); }
// Constructor for new RController private RController() { logger = Logger.getInstance(); if (!Rengine.versionCheck()) { logger.logError("** Version mismatch - Java files don't match library version."); } else { logger.log("Creating Rengine"); this.re = new Rengine(new String[] {}, false, new TextConsole()); logger.log("Rengine created, waiting for R"); // Wait for REngine new thread to finish loading if (!re.waitForR()) { logger.logError("Cannot load R"); } else { this.hasLoaded = true; } } }
public static void readRScript(String pathToRScript) { Path file = Paths.get(pathToRScript); try (Stream<String> lines = Files.lines(file, Charset.defaultCharset())) { lines.forEachOrdered(line -> System.out.println(engine.eval(line))); } catch (IOException e) { e.printStackTrace(); } }
public static void start(String... args) { if (!Rengine.versionCheck()) { System.err.println("** Version mismatch " + "- Java files don't match library version. **"); System.exit(1); } System.out.println("Creating R Engine (with arguments)"); // 1) we pass the arguments from the command line // 2) we won't use the main loop at first, we'll start it later // (that's the "false" as second argument) // 3) the callbacks are implemented by the TextConsole class above engine = new Rengine(args, false, null); // the engine creates R is a new thread, so we should wait until it's ready if (!engine.waitForR()) { System.out.println("Cannot load R."); return; } engine.eval("{library(rJava);.jinit()}", false); }
/** * Sends a single command to R. * * @param sz The R command * @param szDescription The Description showing up before the output */ private String[] sendCommand2R(String sz) { // set command if (sz.length() == 0) { return null; } // parse long lParsed = re.rniParse(sz, 1); // eval long lEvaluated = re.rniEval(lParsed, 0); // generate REXP if (lEvaluated > 0) { REXP exp = new REXP(re, lEvaluated); String[] szArr = exp.asStringArray(); if (szArr != null) { return szArr; } double[] dArr = exp.asDoubleArray(); if (dArr != null) { String[] szDArr = new String[dArr.length]; for (int cnt = 0; cnt < dArr.length; ++cnt) { szDArr[cnt] = String.valueOf(dArr[cnt]); } return szDArr; } int[] iArr = exp.asIntArray(); if (iArr != null) { String[] szIArr = new String[iArr.length]; for (int cnt = 0; cnt < iArr.length; ++cnt) { szIArr[cnt] = String.valueOf(iArr[cnt]); } return szIArr; } } return null; }
public static void runTestScript() { System.out.println( engine.eval( "jrdd <- SparkR:::callJStatic(\"com.ikanow.aleph2.analytics.r.utils.RScriptUtils\",\"getJavaRDD\", sc)")); System.out.println(engine.eval("SparkR:::show(jrdd)")); System.out.println(engine.eval("newRdd <- SparkR:::RDD(jrdd, \"string\")")); System.out.println(engine.eval("df <- SparkR:::createDataFrame(sqlContext, newRdd)")); System.out.println(engine.eval("showDF(df)")); System.out.println(engine.eval("localDf <- collect(df)")); System.out.println(engine.eval("localDf")); }
public static void main(String[] args) throws IOException { start("--vanilla"); initializeSparkR(); System.out.println( engine.eval( "jrdd <- SparkR:::callJStatic(\"com.ikanow.aleph2.analytics.r.utils.RScriptUtils\",\"getJavaRDD\", sc)")); System.out.println(engine.eval("SparkR:::show(jrdd)")); System.out.println(engine.eval("newRdd <- SparkR:::RDD(jrdd, \"string\")")); System.out.println(engine.eval("df <- SparkR:::createDataFrame(sqlContext, newRdd)")); System.out.println(engine.eval("showDF(df)")); System.out.println(engine.eval("localDf <- collect(df)")); System.out.println(engine.eval("localDf")); // processAnalytics(); // readRScript(); shutdown(); }
public static void initializeSparkR() { start("--vanilla"); // To do move these into an array or separate file or something and check for null // if null we need to exit our gracefully as all statements are required for the jobs to run System.out.println(engine.eval(".libPaths(c(.libPaths(), '/root/spark-1.5.2/R/lib'))")); System.out.println(engine.eval("Sys.setenv(SPARK_HOME = '/root/spark-1.5.2')")); System.out.println( engine.eval( "Sys.setenv(PATH = paste(Sys.getenv(c('PATH')), '/root/spark-1.5.2/bin', sep=':'))")); System.out.println(engine.eval("library(SparkR)")); // We should probably point to the server here Im guessing this is pointing to localhost System.out.println( engine.eval( "sc <- sparkR.init(sparkJars = \"/root/aleph2_analytic_services_R-0.0.1-SNAPSHOT.jar\")")); System.out.println(engine.eval("sqlContext <- sparkRSQL.init(sc)")); }
public static Rengine getEngine() { if (engine == null) { engine = new Rengine(new String[] {"--no-save"}, false, null) { @Override public REXP eval(String s, boolean convert) { return super.eval(s, convert); } }; engine.addMainLoopCallbacks( new RMainLoopCallbacks() { @Override public void rBusy(Rengine arg0, int arg1) { // TODO Auto-generated method stub } @Override public String rChooseFile(Rengine arg0, int arg1) { // TODO Auto-generated method stub return null; } @Override public void rFlushConsole(Rengine arg0) { // TODO Auto-generated method stub } @Override public void rLoadHistory(Rengine arg0, String arg1) { // TODO Auto-generated method stub } @Override public String rReadConsole(Rengine arg0, String arg1, int arg2) { logger.info("read console " + arg1 + " arg2 " + arg2); return null; } @Override public void rSaveHistory(Rengine arg0, String arg1) { // TODO Auto-generated method stub } @Override public void rShowMessage(Rengine arg0, String arg1) { // System.out.println("[Rengine message]: "+arg1); } @Override public void rWriteConsole(Rengine arg0, String arg1, int arg2) { logger.info("write console 1"); // if (arg2==0){ // System.out.println("[Rengine write type "+arg2+"]: "+arg1); // } } }); engine.waitForR(); } return engine; }
// Run prediction method on data based on file name public StockInfo[] runPrediction(Map<String, StockInfo[]> dataToProcess) { double[] resultArrayOpen = null; double[] resultArrayHigh = null; double[] resultArrayLow = null; double[] resultArrayClose = null; double[] resultArrayVolume = null; StockInfo[] predictedDataArray = null; try { logger.log("prediction will start"); // Flat the data to perform prediction on it StockInfo[] flatStockInfo = this.flatMapOfData(dataToProcess); Collections.reverse(Arrays.asList(flatStockInfo)); REXP x; RVector v; // Check for installed packages x = re.eval("installed.packages()"); v = x.asVector(); String[] packages = x.asStringArray(); boolean isForecastInstalled = false; logger.log("<R> getting installed packages"); for (int index = 0; index < packages.length && isForecastInstalled == false; index++) { logger.log("<R> has installed " + packages[index]); if (packages[index] != null && packages[index].compareTo("forecast") == 0) { isForecastInstalled = true; } } // If forecast needs to be installed if (isForecastInstalled == false) { logger.log("<R> will set repos"); // Set CRAN re.eval("r <- getOption(\"repos\")"); re.eval("r[\"CRAN\"] <- \"http://cran.us.r-project.org\""); re.eval("options(repos = r)"); re.eval("rm(r)"); // Install forecast re.eval("install.packages(\"forecast\")"); logger.log("<R> will install forecast package"); } // Load forecast library re.eval("library(\"forecast\")"); logger.log("<R> loaded forecast"); // Make prediction for Open value ----------------------- // Load data into R logger.log("<R> loading data into R"); StringBuilder builder = new StringBuilder("inputData <- c("); for (int index = 0; index < flatStockInfo.length; index++) { builder.append(flatStockInfo[index].open); if (index != flatStockInfo.length - 1) { builder.append(","); } else { builder.append(")"); } } String stringFromBuilder = builder.toString(); re.eval(stringFromBuilder); // Create time series from data logger.log("<R> forecasting open values BestFit"); re.eval("temporalData <- ts(inputData, frequency=365)"); // Forecast data re.eval("forecastData <- forecast(temporalData, h=30)"); // re.eval("arimaModel <- auto.arima(temporalData, max.p=5, max.q=5, max.P=5, max.Q=5)"); // re.eval("forecastData <- forecast(arimaModel, h=30)"); x = re.eval("forecastData"); v = x.asVector(); x = (REXP) v.elementAt(1); // instead of 3 resultArrayOpen = x.asDoubleArray(); // Make prediction for High value ------------------------ builder = new StringBuilder("inputData <- c("); for (int index = 0; index < flatStockInfo.length; index++) { builder.append(flatStockInfo[index].high); if (index != flatStockInfo.length - 1) { builder.append(","); } else { builder.append(")"); } } stringFromBuilder = builder.toString(); re.eval(stringFromBuilder); // Create time series from data logger.log("<R> forecasting high values BestFit"); re.eval("temporalData <- ts(inputData, frequency=365)"); // Forecast data re.eval("forecastData <- forecast(temporalData, h=30)"); // re.eval("arimaModel <- auto.arima(temporalData, max.p=5, max.q=5, max.P=5, max.Q=5)"); // re.eval("forecastData <- forecast(arimaModel, h=30)"); x = re.eval("forecastData"); v = x.asVector(); x = (REXP) v.elementAt(1); resultArrayHigh = x.asDoubleArray(); // Make prediction for Low value ------------------------ builder = new StringBuilder("inputData <- c("); for (int index = 0; index < flatStockInfo.length; index++) { builder.append(flatStockInfo[index].low); if (index != flatStockInfo.length - 1) { builder.append(","); } else { builder.append(")"); } } stringFromBuilder = builder.toString(); re.eval(stringFromBuilder); // Create time series from data logger.log("<R> forecasting low values BestFit"); re.eval("temporalData <- ts(inputData, frequency=365)"); // Forecast data re.eval("forecastData <- forecast(temporalData, h=30)"); // re.eval("arimaModel <- auto.arima(temporalData, max.p=5, max.q=5, max.P=5, max.Q=5)"); // re.eval("forecastData <- forecast(arimaModel, h=30)"); x = re.eval("forecastData"); v = x.asVector(); x = (REXP) v.elementAt(1); resultArrayLow = x.asDoubleArray(); // Make prediction for Close value ------------------------ builder = new StringBuilder("inputData <- c("); for (int index = 0; index < flatStockInfo.length; index++) { builder.append(flatStockInfo[index].close); if (index != flatStockInfo.length - 1) { builder.append(","); } else { builder.append(")"); } } stringFromBuilder = builder.toString(); re.eval(stringFromBuilder); // Create time series from data logger.log("<R> forecasting close values BestFit"); re.eval("temporalData <- ts(inputData, frequency=365)"); // Forecast data re.eval("forecastData <- forecast(temporalData, h=30)"); // re.eval("arimaModel <- auto.arima(temporalData, max.p=5, max.q=5, max.P=5, max.Q=5)"); // re.eval("forecastData <- forecast(arimaModel, h=30)"); x = re.eval("forecastData"); v = x.asVector(); x = (REXP) v.elementAt(1); resultArrayClose = x.asDoubleArray(); // Make prediction for Close value ------------------------ builder = new StringBuilder("inputData <- c("); for (int index = 0; index < flatStockInfo.length; index++) { builder.append(flatStockInfo[index].volume); if (index != flatStockInfo.length - 1) { builder.append(","); } else { builder.append(")"); } } stringFromBuilder = builder.toString(); re.eval(stringFromBuilder); // Create time series from data re.eval("temporalData <- ts(inputData, frequency=365)"); // Forecast data re.eval("forecastData <- forecast(temporalData, h=30)"); // re.eval("arimaModel <- auto.arima(temporalData, max.p=5, max.q=5, max.P=5, max.Q=5)"); // re.eval("forecastData <- forecast(arimaModel, h=30)"); x = re.eval("forecastData"); v = x.asVector(); x = (REXP) v.elementAt(1); resultArrayVolume = x.asDoubleArray(); // Create a single StockInfo[] for all data StockInfo predictedData; predictedDataArray = new StockInfo[30]; Date lastDate = flatStockInfo[flatStockInfo.length - 1].date; Calendar c = Calendar.getInstance(); c.setTime(lastDate); c.add(Calendar.DATE, 1); logger.log("<R> values for forecasted data"); SimpleDateFormat dateFormat = new SimpleDateFormat(); dateFormat.applyPattern("dd/MM/YYYY"); // For all days that were predicted for (int index = 0; index < 30; index++) { predictedData = new StockInfo(); predictedData.open = (float) resultArrayOpen[index]; float maxHigh = (float) StrictMath.max( resultArrayClose[index], StrictMath.max(resultArrayHigh[index], resultArrayOpen[index])); predictedData.high = maxHigh; float minLow = (float) StrictMath.min( resultArrayClose[index], StrictMath.min(resultArrayLow[index], resultArrayOpen[index])); predictedData.low = minLow; predictedData.close = (float) resultArrayClose[index]; predictedData.volume = (int) resultArrayVolume[index]; while (c.get(Calendar.DAY_OF_WEEK) == Calendar.SUNDAY || c.get(Calendar.DAY_OF_WEEK) == Calendar.SATURDAY) { c.add(Calendar.DATE, 1); } predictedData.date = c.getTime(); predictedDataArray[index] = predictedData; logger.log( "<R> stock prediction " + dateFormat.format(predictedData.date.getTime()) + " open: " + predictedData.open + " high: " + predictedData.high + " low: " + predictedData.low + " close: " + predictedData.close); c.add(Calendar.DATE, 1); } } catch (Exception e) { logger.logException(e); } return predictedDataArray; }
public static void shutdown() { engine.end(); }