/** * output without streaming nor staging. * * @throws Exception if failed */ @Test public void output_nomove() throws Exception { profile.setOutputStaging(false); profile.setOutputStreaming(false); profile .getLocalFileSystem() .getConf() .set(HadoopDataSourceUtil.KEY_LOCAL_TEMPDIR, localtemp.getPath()); HadoopDataSourceCore core = new HadoopDataSourceCore(profile); setup(core); ModelOutput<StringBuilder> output = core.openOutput(context, StringBuilder.class, format, "output", "file.txt", counter); try { output.write(new StringBuilder("Hello, world!")); } finally { output.close(); } assertThat(counter.get(), is(greaterThan(0L))); File target = new File(mapping, "output/file.txt"); assertThat(target.exists(), is(false)); commitAttempt(core); assertThat(target.exists(), is(true)); commitTransaction(core); assertThat(target.exists(), is(true)); assertThat(get(target), is(Arrays.asList("Hello, world!"))); }
/** * Output multiple file sets. * * @throws Exception if failed */ @Test public void output_multiple() throws Exception { tester .options() .putExtraAttribute( HadoopFileIoProcessor.OPTION_EXPORTER_ENABLED, GenericOptionValue.ENABLED.getSymbol()); JobflowInfo info = tester.compileJobflow(MultipleOutputJob.class); ModelOutput<Ex1> source = tester.openOutput(Ex1.class, tester.getImporter(info, "input")); writeTestData(source); source.close(); assertThat(tester.run(info), is(true)); List<Ex1> out1 = getList(Out1ExporterDesc.class); checkSids(out1); checlValues(out1, 100); List<Ex1> out2 = getList(Out2ExporterDesc.class); checkSids(out2); checlValues(out2, 200); List<Ex1> out3 = getList(Out3ExporterDesc.class); checkSids(out3); checlValues(out3, 300); List<Ex1> out4 = getList(Out4ExporterDesc.class); checkSids(out4); checlValues(out4, 400); }
/** * input/output simple records. * * @throws Exception if test was failed */ @SuppressWarnings("unchecked") @Test public void simple_record() throws Exception { ModelLoader loader = generate(); Class<?> type = loader.modelType("Simple"); assertThat(type.isAnnotationPresent(ModelInputLocation.class), is(true)); assertThat(type.isAnnotationPresent(ModelOutputLocation.class), is(true)); ModelWrapper object = loader.newModel("Simple"); DataOutputBuffer output = new DataOutputBuffer(); ModelOutput<Object> modelOut = (ModelOutput<Object>) type.getAnnotation(ModelOutputLocation.class) .value() .getDeclaredConstructor(RecordEmitter.class) .newInstance(new TsvEmitter(new OutputStreamWriter(output, "UTF-8"))); object.set("sid", 1L); object.set("value", new Text("hello")); modelOut.write(object.unwrap()); object.set("sid", 2L); object.set("value", new Text("world")); modelOut.write(object.unwrap()); object.set("sid", 3L); object.set("value", null); modelOut.write(object.unwrap()); modelOut.close(); DataInputBuffer input = new DataInputBuffer(); input.reset(output.getData(), output.getLength()); ModelInput<Object> modelIn = (ModelInput<Object>) type.getAnnotation(ModelInputLocation.class) .value() .getDeclaredConstructor(RecordParser.class) .newInstance(new TsvParser(new InputStreamReader(input, "UTF-8"))); ModelWrapper copy = loader.newModel("Simple"); modelIn.readTo(copy.unwrap()); assertThat(copy.get("sid"), is((Object) 1L)); assertThat(copy.get("value"), is((Object) new Text("hello"))); modelIn.readTo(copy.unwrap()); assertThat(copy.get("sid"), is((Object) 2L)); assertThat(copy.get("value"), is((Object) new Text("world"))); modelIn.readTo(copy.unwrap()); assertThat(copy.get("sid"), is((Object) 3L)); assertThat(copy.getOption("value").isNull(), is(true)); assertThat(input.read(), is(-1)); modelIn.close(); }
private void writeEx2(ModelOutput<Ex2> output, int... sids) throws IOException { try { Ex2 value = new Ex2(); for (int sid : sids) { value.setSid(sid); value.setValue(sid); value.setStringAsString(String.valueOf(sid)); output.write(value); } } finally { output.close(); } }
/** * rollback output. * * @throws Exception if failed */ @Test public void output_rollback() throws Exception { HadoopDataSourceCore core = new HadoopDataSourceCore(profile); setup(core); ModelOutput<StringBuilder> output = core.openOutput(context, StringBuilder.class, format, "output", "file.txt", counter); try { output.write(new StringBuilder("Hello, world!")); } finally { output.close(); } cleanup(core); assertThat(new File(mapping, "output/file.txt").exists(), is(false)); }
/** * all primitive types. * * @throws Exception if test was failed */ @SuppressWarnings("unchecked") @Test public void primitives() throws Exception { ModelLoader loader = generate(); Class<?> type = loader.modelType("Primitives"); assertThat(type.isAnnotationPresent(ModelInputLocation.class), is(true)); assertThat(type.isAnnotationPresent(ModelOutputLocation.class), is(true)); ModelWrapper object = loader.newModel("Primitives"); object.set("type_boolean", true); object.set("type_byte", (byte) 64); object.set("type_short", (short) 256); object.set("type_int", 100); object.set("type_long", 200L); object.set("type_float", 300.f); object.set("type_double", 400.d); object.set("type_decimal", new BigDecimal("1234.567")); object.set("type_text", new Text("Hello, world!")); object.set("type_date", new Date(2011, 3, 31)); object.set("type_datetime", new DateTime(2011, 3, 31, 23, 30, 1)); DataOutputBuffer output = new DataOutputBuffer(); ModelOutput<Object> modelOut = (ModelOutput<Object>) type.getAnnotation(ModelOutputLocation.class) .value() .getDeclaredConstructor(RecordEmitter.class) .newInstance(new TsvEmitter(new OutputStreamWriter(output, "UTF-8"))); modelOut.write(object.unwrap()); modelOut.write(object.unwrap()); modelOut.write(object.unwrap()); modelOut.close(); DataInputBuffer input = new DataInputBuffer(); input.reset(output.getData(), output.getLength()); ModelInput<Object> modelIn = (ModelInput<Object>) type.getAnnotation(ModelInputLocation.class) .value() .getDeclaredConstructor(RecordParser.class) .newInstance(new TsvParser(new InputStreamReader(input, "UTF-8"))); ModelWrapper copy = loader.newModel("Primitives"); modelIn.readTo(copy.unwrap()); assertThat(object.unwrap(), equalTo(copy.unwrap())); assertThat(input.read(), is(-1)); modelIn.close(); }
/** * ストリームからTSVファイルを読み出し、ジョブの入力データとして書き出す。 * * @param <T> Import対象テーブルに対応するModelのクラス型 * @param targetTableModel Import対象テーブルに対応するModelのクラス * @param dfsFilePath HFSF上のファイル名 * @param inputStream FileList * @return 書きだした件数 * @throws BulkLoaderSystemException 読み出しや出力に失敗した場合 */ protected <T> long write(Class<T> targetTableModel, URI dfsFilePath, InputStream inputStream) throws BulkLoaderSystemException { Configuration conf = new Configuration(); TsvIoFactory<T> factory = new TsvIoFactory<>(targetTableModel); try (ModelInput<T> input = factory.createModelInput(inputStream)) { long count = 0; T buffer = factory.createModelObject(); try (ModelOutput<T> output = TemporaryStorage.openOutput(conf, targetTableModel, new Path(dfsFilePath))) { while (input.readTo(buffer)) { count++; output.write(buffer); } } return count; } catch (IOException e) { throw new BulkLoaderSystemException( e, getClass(), "TG-EXTRACTOR-02001", "DFSにファイルを書き出す処理に失敗。URI:" + dfsFilePath); } }
private void writeTestData(ModelOutput<Ex1> source) throws IOException { Ex1 value = new Ex1(); source.write(value); value.setSid(1); source.write(value); value.setSid(2); source.write(value); value.setSid(3); source.write(value); value.setSid(4); source.write(value); value.setSid(5); source.write(value); value.setSid(6); source.write(value); value.setSid(7); source.write(value); value.setSid(8); source.write(value); value.setSid(9); source.write(value); }
/** * Creates a new cache with some deleted values. * * @throws Exception if failed */ @Test public void create_deleted() throws Exception { CacheInfo info = new CacheInfo( "a", "id", calendar("2011-12-13 14:15:16"), "EXAMPLE", Collections.singleton("COL"), "com.example.Model", 123L); framework.deployLibrary(TestDataModel.class, "batchapps/tbatch/lib/jobflow-tflow.jar"); CacheStorage storage = new CacheStorage(getConfiguration(), getTargetUri()); try { storage.putPatchCacheInfo(info); ModelOutput<TestDataModel> output = create(storage, storage.getPatchContents("0")); try { TestDataModel model = new TestDataModel(); for (int i = 0; i < 100; i++) { model.systemId.set(i); model.deleted.set(i % 10 != 0); output.write(model); } } finally { output.close(); } execute(CacheBuildClient.SUBCOMMAND_CREATE); assertThat(storage.getHeadCacheInfo(), is(info)); List<TestDataModel> results = collect(storage, storage.getHeadContents("*")); assertThat(results.size(), is(10)); for (int i = 0; i < 10; i++) { assertThat(results.get(i).systemId.get(), is(i * 10L)); } } finally { storage.close(); } }
/** * output multiple files. * * @throws Exception if failed */ @Test public void output_multifile() throws Exception { HadoopDataSourceCore core = new HadoopDataSourceCore(profile); setup(core); for (int i = 0; i < 3; i++) { ModelOutput<StringBuilder> output = core.openOutput( context, StringBuilder.class, format, "output", "file" + i + ".txt", counter); try { for (int j = 0; j < i + 1; j++) { output.write(new StringBuilder("Hello" + j)); } } finally { output.close(); } } commit(core); assertThat(get(new File(mapping, "output/file0.txt")), is(Arrays.asList("Hello0"))); assertThat(get(new File(mapping, "output/file1.txt")), is(Arrays.asList("Hello0", "Hello1"))); assertThat( get(new File(mapping, "output/file2.txt")), is(Arrays.asList("Hello0", "Hello1", "Hello2"))); }
/** * output multiple records. * * @throws Exception if failed */ @Test public void output_multirecord() throws Exception { HadoopDataSourceCore core = new HadoopDataSourceCore(profile); setup(core); ModelOutput<StringBuilder> output = core.openOutput(context, StringBuilder.class, format, "output", "file.txt", counter); try { output.write(new StringBuilder("Hello, world!")); } finally { output.close(); } File target = new File(mapping, "output/file.txt"); assertThat(target.exists(), is(false)); commitAttempt(core); assertThat(target.exists(), is(false)); commitTransaction(core); assertThat(target.exists(), is(true)); assertThat(get(target), is(Arrays.asList("Hello, world!"))); }
/** * Update a cache. * * @throws Exception if failed */ @Test public void update_delete() throws Exception { CacheInfo info = new CacheInfo( "a", "id", calendar("2011-12-13 14:15:16"), "EXAMPLE", Collections.singleton("COL"), "com.example.Model", 123L); framework.deployLibrary(TestDataModel.class, "batchapps/tbatch/lib/jobflow-tflow.jar"); CacheStorage storage = new CacheStorage(getConfiguration(), getTargetUri()); try { storage.putPatchCacheInfo(info); ModelOutput<TestDataModel> head = create(storage, storage.getHeadContents("0")); try { TestDataModel model = new TestDataModel(); for (int i = 0; i < 10; i++) { model.systemId.set(i); model.value.set("HEAD"); model.deleted.set(false); head.write(model); } } finally { head.close(); } ModelOutput<TestDataModel> patch = create(storage, storage.getPatchContents("0")); try { TestDataModel model = new TestDataModel(); for (int i = 0; i < 10; i += 2) { model.systemId.set(i); model.value.set("NEXT"); model.deleted.set(i % 4 == 0); patch.write(model); } } finally { patch.close(); } execute(CacheBuildClient.SUBCOMMAND_UPDATE); assertThat(storage.getHeadCacheInfo(), is(info)); List<TestDataModel> results = collect(storage, storage.getHeadContents("*")); assertThat(results.size(), is(7)); assertThat(results.get(0).systemId.get(), is(1L)); assertThat(results.get(0).value.toString(), is("HEAD")); assertThat(results.get(1).systemId.get(), is(2L)); assertThat(results.get(1).value.toString(), is("NEXT")); assertThat(results.get(2).systemId.get(), is(3L)); assertThat(results.get(2).value.toString(), is("HEAD")); assertThat(results.get(3).systemId.get(), is(5L)); assertThat(results.get(3).value.toString(), is("HEAD")); assertThat(results.get(4).systemId.get(), is(6L)); assertThat(results.get(4).value.toString(), is("NEXT")); assertThat(results.get(5).systemId.get(), is(7L)); assertThat(results.get(5).value.toString(), is("HEAD")); assertThat(results.get(6).systemId.get(), is(9L)); assertThat(results.get(6).value.toString(), is("HEAD")); } finally { storage.close(); } }