public static void createDataFrame() {
    // Convert batch to JSON and inject into R native dataframe
    // This will be required for JSON processing
    System.out.println(engine.eval("library(jsonlite)"));

    for (int i = 0; i < hdfsPaths.size(); i++) {
      System.out.println(
          engine.eval(
              "testdata_" + i + " <- read.df(sqlContext, '" + hdfsPaths.get(i) + "', 'json')"));
    }
  }
 public static void readRScript(String pathToRScript) {
   Path file = Paths.get(pathToRScript);
   try (Stream<String> lines = Files.lines(file, Charset.defaultCharset())) {
     lines.forEachOrdered(line -> System.out.println(engine.eval(line)));
   } catch (IOException e) {
     e.printStackTrace();
   }
 }
Exemple #3
0
  private static REXP runCommand(String command, boolean convert) {
    boolean obtainedLock = engine.getRsync().safeLock();
    REXP value;

    try {
      value = engine.eval(command, convert);
    } finally {
      if (obtainedLock) engine.getRsync().unlock();
    }
    if (!convert) engine.waitForR();
    return value;
  }
 public static void runTestScript() {
   System.out.println(
       engine.eval(
           "jrdd <- SparkR:::callJStatic(\"com.ikanow.aleph2.analytics.r.utils.RScriptUtils\",\"getJavaRDD\", sc)"));
   System.out.println(engine.eval("SparkR:::show(jrdd)"));
   System.out.println(engine.eval("newRdd <- SparkR:::RDD(jrdd, \"string\")"));
   System.out.println(engine.eval("df <- SparkR:::createDataFrame(sqlContext, newRdd)"));
   System.out.println(engine.eval("showDF(df)"));
   System.out.println(engine.eval("localDf <- collect(df)"));
   System.out.println(engine.eval("localDf"));
 }
 public static void start(String... args) {
   if (!Rengine.versionCheck()) {
     System.err.println("** Version mismatch " + "- Java files don't match library version. **");
     System.exit(1);
   }
   System.out.println("Creating R Engine (with arguments)");
   // 1) we pass the arguments from the command line
   // 2) we won't use the main loop at first, we'll start it later
   //    (that's the "false" as second argument)
   // 3) the callbacks are implemented by the TextConsole class above
   engine = new Rengine(args, false, null);
   // the engine creates R is a new thread, so we should wait until it's ready
   if (!engine.waitForR()) {
     System.out.println("Cannot load R.");
     return;
   }
   engine.eval("{library(rJava);.jinit()}", false);
 }
  public static void main(String[] args) throws IOException {
    start("--vanilla");
    initializeSparkR();

    System.out.println(
        engine.eval(
            "jrdd <- SparkR:::callJStatic(\"com.ikanow.aleph2.analytics.r.utils.RScriptUtils\",\"getJavaRDD\", sc)"));
    System.out.println(engine.eval("SparkR:::show(jrdd)"));
    System.out.println(engine.eval("newRdd <- SparkR:::RDD(jrdd, \"string\")"));
    System.out.println(engine.eval("df <- SparkR:::createDataFrame(sqlContext, newRdd)"));
    System.out.println(engine.eval("showDF(df)"));
    System.out.println(engine.eval("localDf <- collect(df)"));
    System.out.println(engine.eval("localDf"));

    //    processAnalytics();
    //    readRScript();
    shutdown();
  }
 public static void initializeSparkR() {
   start("--vanilla");
   // To do move these into an array or separate file or something and check for null
   // if null we need to exit our gracefully as all statements are required for the jobs to run
   System.out.println(engine.eval(".libPaths(c(.libPaths(), '/root/spark-1.5.2/R/lib'))"));
   System.out.println(engine.eval("Sys.setenv(SPARK_HOME = '/root/spark-1.5.2')"));
   System.out.println(
       engine.eval(
           "Sys.setenv(PATH = paste(Sys.getenv(c('PATH')), '/root/spark-1.5.2/bin', sep=':'))"));
   System.out.println(engine.eval("library(SparkR)"));
   // We should probably point to the server here Im guessing this is pointing to localhost
   System.out.println(
       engine.eval(
           "sc <- sparkR.init(sparkJars = \"/root/aleph2_analytic_services_R-0.0.1-SNAPSHOT.jar\")"));
   System.out.println(engine.eval("sqlContext <- sparkRSQL.init(sc)"));
 }
  // Run prediction method on data based on file name
  public StockInfo[] runPrediction(Map<String, StockInfo[]> dataToProcess) {
    double[] resultArrayOpen = null;
    double[] resultArrayHigh = null;
    double[] resultArrayLow = null;
    double[] resultArrayClose = null;
    double[] resultArrayVolume = null;

    StockInfo[] predictedDataArray = null;

    try {
      logger.log("prediction will start");
      // Flat the data to perform prediction on it
      StockInfo[] flatStockInfo = this.flatMapOfData(dataToProcess);
      Collections.reverse(Arrays.asList(flatStockInfo));

      REXP x;
      RVector v;

      // Check for installed packages
      x = re.eval("installed.packages()");
      v = x.asVector();
      String[] packages = x.asStringArray();
      boolean isForecastInstalled = false;
      logger.log("<R> getting installed packages");
      for (int index = 0; index < packages.length && isForecastInstalled == false; index++) {
        logger.log("<R> has installed " + packages[index]);
        if (packages[index] != null && packages[index].compareTo("forecast") == 0) {
          isForecastInstalled = true;
        }
      }

      // If forecast needs to be installed
      if (isForecastInstalled == false) {
        logger.log("<R> will set repos");

        // Set CRAN
        re.eval("r <- getOption(\"repos\")");
        re.eval("r[\"CRAN\"] <- \"http://cran.us.r-project.org\"");
        re.eval("options(repos = r)");
        re.eval("rm(r)");

        // Install forecast
        re.eval("install.packages(\"forecast\")");

        logger.log("<R> will install forecast package");
      }
      // Load forecast library
      re.eval("library(\"forecast\")");
      logger.log("<R> loaded forecast");

      // Make prediction for Open value -----------------------
      // Load data into R
      logger.log("<R> loading data into R");

      StringBuilder builder = new StringBuilder("inputData <- c(");
      for (int index = 0; index < flatStockInfo.length; index++) {
        builder.append(flatStockInfo[index].open);
        if (index != flatStockInfo.length - 1) {
          builder.append(",");
        } else {
          builder.append(")");
        }
      }
      String stringFromBuilder = builder.toString();
      re.eval(stringFromBuilder);
      // Create time series from data
      logger.log("<R> forecasting open values BestFit");
      re.eval("temporalData <- ts(inputData, frequency=365)");
      // Forecast data
      re.eval("forecastData <- forecast(temporalData, h=30)");
      // re.eval("arimaModel <- auto.arima(temporalData, max.p=5, max.q=5, max.P=5, max.Q=5)");
      // re.eval("forecastData <- forecast(arimaModel, h=30)");
      x = re.eval("forecastData");
      v = x.asVector();
      x = (REXP) v.elementAt(1); // instead of 3
      resultArrayOpen = x.asDoubleArray();

      // Make prediction for High value ------------------------
      builder = new StringBuilder("inputData <- c(");
      for (int index = 0; index < flatStockInfo.length; index++) {
        builder.append(flatStockInfo[index].high);
        if (index != flatStockInfo.length - 1) {
          builder.append(",");
        } else {
          builder.append(")");
        }
      }
      stringFromBuilder = builder.toString();
      re.eval(stringFromBuilder);
      // Create time series from data
      logger.log("<R> forecasting high values BestFit");
      re.eval("temporalData <- ts(inputData, frequency=365)");
      // Forecast data
      re.eval("forecastData <- forecast(temporalData, h=30)");
      // re.eval("arimaModel <- auto.arima(temporalData, max.p=5, max.q=5, max.P=5, max.Q=5)");
      // re.eval("forecastData <- forecast(arimaModel, h=30)");
      x = re.eval("forecastData");
      v = x.asVector();
      x = (REXP) v.elementAt(1);
      resultArrayHigh = x.asDoubleArray();

      // Make prediction for Low value ------------------------
      builder = new StringBuilder("inputData <- c(");
      for (int index = 0; index < flatStockInfo.length; index++) {
        builder.append(flatStockInfo[index].low);
        if (index != flatStockInfo.length - 1) {
          builder.append(",");
        } else {
          builder.append(")");
        }
      }
      stringFromBuilder = builder.toString();
      re.eval(stringFromBuilder);
      // Create time series from data
      logger.log("<R> forecasting low values BestFit");
      re.eval("temporalData <- ts(inputData, frequency=365)");
      // Forecast data
      re.eval("forecastData <- forecast(temporalData, h=30)");
      // re.eval("arimaModel <- auto.arima(temporalData, max.p=5, max.q=5, max.P=5, max.Q=5)");
      // re.eval("forecastData <- forecast(arimaModel, h=30)");
      x = re.eval("forecastData");
      v = x.asVector();
      x = (REXP) v.elementAt(1);
      resultArrayLow = x.asDoubleArray();

      // Make prediction for Close value ------------------------
      builder = new StringBuilder("inputData <- c(");
      for (int index = 0; index < flatStockInfo.length; index++) {
        builder.append(flatStockInfo[index].close);
        if (index != flatStockInfo.length - 1) {
          builder.append(",");
        } else {
          builder.append(")");
        }
      }
      stringFromBuilder = builder.toString();
      re.eval(stringFromBuilder);
      // Create time series from data
      logger.log("<R> forecasting close values BestFit");
      re.eval("temporalData <- ts(inputData, frequency=365)");
      // Forecast data
      re.eval("forecastData <- forecast(temporalData, h=30)");
      // re.eval("arimaModel <- auto.arima(temporalData, max.p=5, max.q=5, max.P=5, max.Q=5)");
      // re.eval("forecastData <- forecast(arimaModel, h=30)");
      x = re.eval("forecastData");
      v = x.asVector();
      x = (REXP) v.elementAt(1);
      resultArrayClose = x.asDoubleArray();

      // Make prediction for Close value ------------------------
      builder = new StringBuilder("inputData <- c(");
      for (int index = 0; index < flatStockInfo.length; index++) {
        builder.append(flatStockInfo[index].volume);
        if (index != flatStockInfo.length - 1) {
          builder.append(",");
        } else {
          builder.append(")");
        }
      }
      stringFromBuilder = builder.toString();
      re.eval(stringFromBuilder);
      // Create time series from data
      re.eval("temporalData <- ts(inputData, frequency=365)");
      // Forecast data
      re.eval("forecastData <- forecast(temporalData, h=30)");
      // re.eval("arimaModel <- auto.arima(temporalData, max.p=5, max.q=5, max.P=5, max.Q=5)");
      // re.eval("forecastData <- forecast(arimaModel, h=30)");
      x = re.eval("forecastData");
      v = x.asVector();
      x = (REXP) v.elementAt(1);
      resultArrayVolume = x.asDoubleArray();

      // Create a single StockInfo[] for all data
      StockInfo predictedData;
      predictedDataArray = new StockInfo[30];

      Date lastDate = flatStockInfo[flatStockInfo.length - 1].date;
      Calendar c = Calendar.getInstance();
      c.setTime(lastDate);
      c.add(Calendar.DATE, 1);

      logger.log("<R> values for forecasted data");

      SimpleDateFormat dateFormat = new SimpleDateFormat();
      dateFormat.applyPattern("dd/MM/YYYY");

      // For all days that were predicted
      for (int index = 0; index < 30; index++) {
        predictedData = new StockInfo();
        predictedData.open = (float) resultArrayOpen[index];

        float maxHigh =
            (float)
                StrictMath.max(
                    resultArrayClose[index],
                    StrictMath.max(resultArrayHigh[index], resultArrayOpen[index]));
        predictedData.high = maxHigh;

        float minLow =
            (float)
                StrictMath.min(
                    resultArrayClose[index],
                    StrictMath.min(resultArrayLow[index], resultArrayOpen[index]));
        predictedData.low = minLow;

        predictedData.close = (float) resultArrayClose[index];
        predictedData.volume = (int) resultArrayVolume[index];

        while (c.get(Calendar.DAY_OF_WEEK) == Calendar.SUNDAY
            || c.get(Calendar.DAY_OF_WEEK) == Calendar.SATURDAY) {
          c.add(Calendar.DATE, 1);
        }
        predictedData.date = c.getTime();

        predictedDataArray[index] = predictedData;

        logger.log(
            "<R> stock prediction "
                + dateFormat.format(predictedData.date.getTime())
                + " open: "
                + predictedData.open
                + " high: "
                + predictedData.high
                + " low: "
                + predictedData.low
                + " close: "
                + predictedData.close);
        c.add(Calendar.DATE, 1);
      }

    } catch (Exception e) {
      logger.logException(e);
    }

    return predictedDataArray;
  }