@Test public void testEsSchemaRDD1WriteWithMappingExclude() throws Exception { DataFrame dataFrame = artistsAsDataFrame(); String target = "sparksql-test/scala-basic-write-exclude-mapping"; JavaEsSparkSQL.saveToEs(dataFrame, target, ImmutableMap.of(ES_MAPPING_EXCLUDE, "url")); assertTrue(RestUtils.exists(target)); assertThat(RestUtils.get(target + "/_search?"), not(containsString("url"))); }
@Test public void testEsdataFrame1Write() throws Exception { DataFrame dataFrame = artistsAsDataFrame(); String target = "sparksql-test/scala-basic-write"; JavaEsSparkSQL.saveToEs(dataFrame, target); assertTrue(RestUtils.exists(target)); assertThat(RestUtils.get(target + "/_search?"), containsString("345")); }
@Test public void testEsdataFrame1WriteWithId() throws Exception { DataFrame dataFrame = artistsAsDataFrame(); String target = "sparksql-test/scala-basic-write-id-mapping"; JavaEsSparkSQL.saveToEs(dataFrame, target, ImmutableMap.of(ES_MAPPING_ID, "id")); assertTrue(RestUtils.exists(target)); assertThat(RestUtils.get(target + "/_search?"), containsString("345")); assertThat(RestUtils.exists(target + "/1"), is(true)); }
public static void main(String args[]) { SparkConf conf = new SparkConf().setAppName("esh-spark").setMaster("local[4]"); conf.set("es.index.auto.create", "true"); JavaSparkContext context = new JavaSparkContext(conf); JavaRDD<String> textFile = context.textFile("hdfs://localhost:9000/ch07/crimes_dataset.csv"); JavaRDD<Crime> dataSplits = textFile.map( line -> { CSVParser parser = CSVParser.parse(line, CSVFormat.RFC4180); Crime c = new Crime(); CSVRecord record = parser.getRecords().get(0); c.setId(record.get(0)); c.setCaseNumber(record.get(1)); c.setEventDate(record.get(2)); c.setBlock(record.get(3)); c.setIucr(record.get(4)); c.setPrimaryType(record.get(5)); c.setDescription(record.get(6)); c.setLocation(record.get(7)); c.setArrest(Boolean.parseBoolean(record.get(8))); c.setDomestic(Boolean.parseBoolean(record.get(9))); String lat = record.get(10); String lon = record.get(11); Map<String, Double> geoLocation = new HashMap<>(); geoLocation.put("lat", StringUtils.isEmpty(lat) ? null : Double.parseDouble(lat)); geoLocation.put("lon", StringUtils.isEmpty(lon) ? null : Double.parseDouble(lon)); c.setGeoLocation(geoLocation); return c; }); SQLContext sqlContext = new SQLContext(context); DataFrame df = sqlContext.createDataFrame(dataSplits, Crime.class); JavaEsSparkSQL.saveToEs(df, "esh_sparksql/crimes_reflection"); }