Ejemplo n.º 1
0
  @Override
  public DDF loadFromJDBC(JDBCDataSourceDescriptor dataSource) throws DDFException {
    SparkDDFManager sparkDDFManager = (SparkDDFManager) mDDFManager;
    HiveContext sqlContext = sparkDDFManager.getHiveContext();

    JDBCDataSourceCredentials cred =
        (JDBCDataSourceCredentials) dataSource.getDataSourceCredentials();
    String fullURL = dataSource.getDataSourceUri().getUri().toString();
    if (cred.getUsername() != null && !cred.getUsername().equals("")) {
      fullURL += String.format("?user=%s&password=%s", cred.getUsername(), cred.getPassword());
    }

    Map<String, String> options = new HashMap<String, String>();
    options.put("url", fullURL);
    options.put("dbtable", dataSource.getDbTable());
    DataFrame df = sqlContext.load("jdbc", options);

    DDF ddf =
        sparkDDFManager.newDDF(
            sparkDDFManager,
            df,
            new Class<?>[] {DataFrame.class},
            null,
            SparkUtils.schemaFromDataFrame(df));
    // TODO?
    ddf.getRepresentationHandler().get(RDD.class, Row.class);
    ddf.getMetaDataHandler().setDataSourceDescriptor(dataSource);
    return ddf;
  }
Ejemplo n.º 2
0
  @Override
  public DDF loadSpecialFormat(DataFormat format, URI fileURI, Boolean flatten)
      throws DDFException {
    SparkDDFManager sparkDDFManager = (SparkDDFManager) mDDFManager;
    HiveContext sqlContext = sparkDDFManager.getHiveContext();
    DataFrame jdf = null;
    switch (format) {
      case JSON:
        jdf = sqlContext.jsonFile(fileURI.toString());
        break;
      case PQT:
        jdf = sqlContext.parquetFile(fileURI.toString());
        break;
      default:
        throw new DDFException(String.format("Unsupported data format: %s", format.toString()));
    }

    DataFrame df = SparkUtils.getDataFrameWithValidColnames(jdf);
    DDF ddf =
        sparkDDFManager.newDDF(
            sparkDDFManager,
            df,
            new Class<?>[] {DataFrame.class},
            null,
            SparkUtils.schemaFromDataFrame(df));

    if (flatten == true) return ddf.getFlattenedDDF();
    else return ddf;
  }
Ejemplo n.º 3
0
 public double rmse(DDF predictedDDF, boolean implicitPrefs) throws DDFException {
   RDD<Rating> predictions =
       (RDD<Rating>) predictedDDF.getRepresentationHandler().get(RDD.class, Rating.class);
   RDD<Rating> ratings =
       (RDD<Rating>) this.getDDF().getRepresentationHandler().get(RDD.class, Rating.class);
   return new ROCComputer().computeRmse(ratings, predictions, false);
 }
Ejemplo n.º 4
0
  @Override
  public DDF loadTextFile(DataSourceDescriptor dataSource) throws DDFException {
    String hiveTableName = UUID.randomUUID().toString().replace("-", "_");
    StringBuilder stringBuilder = new StringBuilder();
    List<String> columnNames = new ArrayList<>();
    List<Schema.Column> columnList = dataSource.getDataSourceSchema().getColumns();
    for (int i = 0; i < columnList.size(); ++i) {
      columnNames.add(columnList.get(i).getName());
      if (i == 0) {
        stringBuilder.append(columnList.get(i).getName() + " " + columnList.get(i).getType());
      } else {
        stringBuilder.append(
            ", " + columnList.get(i).getName() + " " + columnList.get(i).getType());
      }
    }
    String schemaStr = stringBuilder.toString();

    TextFileFormat textFileFormat = (TextFileFormat) (dataSource.getFileFormat());
    String quote = textFileFormat.getQuote();
    String delimiter = textFileFormat.getDelimiter();

    String serdesString =
        "ROW FORMAT SERDE 'com.bizo.hive.serde.csv.CSVSerde' "
            + "WITH serdeproperties ('separatorChar' = '"
            + delimiter
            + "', 'quoteChar' = '"
            + quote
            + "')";

    URI uri = dataSource.getDataSourceUri().getUri();
    String sqlCmd =
        "create external table "
            + hiveTableName
            + " ("
            + schemaStr
            + ") "
            + serdesString
            + " STORED AS TEXTFILE LOCATION '"
            + uri.toString()
            + "'";

    this.mDDFManager.sql(sqlCmd, false);
    DDF ddf = this.mDDFManager.sql2ddf(String.format("select * from %s", hiveTableName), false);

    ddf.setColumnNames(columnNames);
    return ddf;
  }
Ejemplo n.º 5
0
  @Override
  /*
   * input expected RDD[double[][]]
   * (non-Javadoc)
   * @see io.ddf.ml.AMLMetricsSupporter#roc(io.ddf.DDF, int)
   */
  public RocMetric roc(DDF predictionDDF, int alpha_length) throws DDFException {

    RDD<LabeledPoint> rddLabeledPoint =
        (RDD<LabeledPoint>)
            predictionDDF.getRepresentationHandler().get(RDD.class, LabeledPoint.class);
    ROCComputer rc = new ROCComputer();

    return (rc.ROC(rddLabeledPoint, alpha_length));
  }