@Override public void write(E entity) { Preconditions.checkState( state.equals(ReaderWriterState.OPEN), "Attempt to write to a writer in state:%s", state); reusedKey.reuseFor(entity); DatasetWriter<E> writer = cachedWriters.getIfPresent(reusedKey); if (writer == null) { // avoid checking in every whether the entity belongs in the view by only // checking when a new writer is created Preconditions.checkArgument( view.includes(entity), "View %s does not include entity %s", view, entity); // get a new key because it is stored in the cache StorageKey key = StorageKey.copy(reusedKey); try { writer = cachedWriters.getUnchecked(key); } catch (UncheckedExecutionException ex) { throw new IllegalArgumentException( "Problem creating view for entity: " + entity, ex.getCause()); } } writer.write(entity); }
@Test public void testUseReaderSchema() throws IOException { // Create a schema with only a username, so we can test reading it // with an enhanced record structure. Schema oldRecordSchema = SchemaBuilder.record("org.kitesdk.data.user.OldUserRecord") .fields() .requiredString("username") .endRecord(); // create the dataset Dataset<Record> in = repo.create("ns", "in", new DatasetDescriptor.Builder().schema(oldRecordSchema).build()); Dataset<Record> out = repo.create("ns", "out", new DatasetDescriptor.Builder().schema(oldRecordSchema).build()); Record oldUser = new Record(oldRecordSchema); oldUser.put("username", "user"); DatasetWriter<Record> writer = in.newWriter(); try { writer.write(oldUser); } finally { writer.close(); } Pipeline pipeline = new MRPipeline(TestCrunchDatasets.class); // read data from updated dataset that has the new schema. // At this point, User class has the old schema PCollection<NewUserRecord> data = pipeline.read(CrunchDatasets.asSource(in.getUri(), NewUserRecord.class)); PCollection<NewUserRecord> processed = data.parallelDo(new UserRecordIdentityFn(), Avros.records(NewUserRecord.class)); pipeline.write(processed, CrunchDatasets.asTarget(out)); DatasetReader reader = out.newReader(); Assert.assertTrue("Pipeline failed.", pipeline.run().succeeded()); try { // there should be one record that is equal to our old user generic record. Assert.assertEquals(oldUser, reader.next()); Assert.assertFalse(reader.hasNext()); } finally { reader.close(); } }
private static void writeTestRecords(View<TestRecord> view) { DatasetWriter<TestRecord> writer = null; try { writer = view.newWriter(); for (int i = 0; i < 10; i += 1) { TestRecord record = new TestRecord(); record.id = i; record.data = "test/-" + i; writer.write(record); } } finally { if (writer != null) { writer.close(); } } }