@Override
  public void write(E entity) {
    Preconditions.checkState(
        state.equals(ReaderWriterState.OPEN), "Attempt to write to a writer in state:%s", state);

    reusedKey.reuseFor(entity);

    DatasetWriter<E> writer = cachedWriters.getIfPresent(reusedKey);
    if (writer == null) {
      // avoid checking in every whether the entity belongs in the view by only
      // checking when a new writer is created
      Preconditions.checkArgument(
          view.includes(entity), "View %s does not include entity %s", view, entity);
      // get a new key because it is stored in the cache
      StorageKey key = StorageKey.copy(reusedKey);
      try {
        writer = cachedWriters.getUnchecked(key);
      } catch (UncheckedExecutionException ex) {
        throw new IllegalArgumentException(
            "Problem creating view for entity: " + entity, ex.getCause());
      }
    }

    writer.write(entity);
  }
Example #2
0
  @Test
  public void testUseReaderSchema() throws IOException {

    // Create a schema with only a username, so we can test reading it
    // with an enhanced record structure.
    Schema oldRecordSchema =
        SchemaBuilder.record("org.kitesdk.data.user.OldUserRecord")
            .fields()
            .requiredString("username")
            .endRecord();

    // create the dataset
    Dataset<Record> in =
        repo.create("ns", "in", new DatasetDescriptor.Builder().schema(oldRecordSchema).build());
    Dataset<Record> out =
        repo.create("ns", "out", new DatasetDescriptor.Builder().schema(oldRecordSchema).build());
    Record oldUser = new Record(oldRecordSchema);
    oldUser.put("username", "user");

    DatasetWriter<Record> writer = in.newWriter();

    try {

      writer.write(oldUser);

    } finally {
      writer.close();
    }

    Pipeline pipeline = new MRPipeline(TestCrunchDatasets.class);

    // read data from updated dataset that has the new schema.
    // At this point, User class has the old schema
    PCollection<NewUserRecord> data =
        pipeline.read(CrunchDatasets.asSource(in.getUri(), NewUserRecord.class));

    PCollection<NewUserRecord> processed =
        data.parallelDo(new UserRecordIdentityFn(), Avros.records(NewUserRecord.class));

    pipeline.write(processed, CrunchDatasets.asTarget(out));

    DatasetReader reader = out.newReader();

    Assert.assertTrue("Pipeline failed.", pipeline.run().succeeded());

    try {

      // there should be one record that is equal to our old user generic record.
      Assert.assertEquals(oldUser, reader.next());
      Assert.assertFalse(reader.hasNext());

    } finally {
      reader.close();
    }
  }
  private static void writeTestRecords(View<TestRecord> view) {
    DatasetWriter<TestRecord> writer = null;
    try {
      writer = view.newWriter();
      for (int i = 0; i < 10; i += 1) {
        TestRecord record = new TestRecord();
        record.id = i;
        record.data = "test/-" + i;
        writer.write(record);
      }

    } finally {
      if (writer != null) {
        writer.close();
      }
    }
  }