/** * 指定のジョブにリソースの情報を追加する。 * * @param job 対象の情報 * @param resourcePath リソースへのパス (for temporary storage) * @param resourceName リソースの名前 * @throws IOException リソースの情報が不明であった場合 * @throws IllegalArgumentException 引数に{@code null}が含まれる場合 */ public static void add(Job job, String resourcePath, String resourceName) throws IOException { if (job == null) { throw new IllegalArgumentException("job must not be null"); // $NON-NLS-1$ } if (resourcePath == null) { throw new IllegalArgumentException("resourcePath must not be null"); // $NON-NLS-1$ } if (resourceName == null) { throw new IllegalArgumentException("resourceName must not be null"); // $NON-NLS-1$ } Configuration conf = job.getConfiguration(); List<FileStatus> list = TemporaryStorage.listStatus(conf, new Path(resourcePath)); if (list.isEmpty()) { throw new IOException(MessageFormat.format("Resource not found: {0}", resourcePath)); } List<String> localNames = restoreStrings(conf, getLocalCacheNameKey(resourceName)); List<String> remotePaths = restoreStrings(conf, getRemotePathKey(resourceName)); long size = conf.getLong(KEY_SIZE, 0L); int index = localNames.size(); for (FileStatus status : list) { String name = String.format("%s-%04d", resourceName, index++); // $NON-NLS-1$ StringBuilder buf = new StringBuilder(); buf.append(status.getPath().toString()); buf.append('#'); buf.append(name); String cachePath = buf.toString(); remotePaths.add(status.getPath().toString()); localNames.add(name); try { URI uri = new URI(cachePath); DistributedCache.addCacheFile(uri, conf); } catch (URISyntaxException e) { throw new IllegalStateException(e); } size += status.getLen(); } conf.setStrings( getLocalCacheNameKey(resourceName), localNames.toArray(new String[localNames.size()])); conf.setStrings( getRemotePathKey(resourceName), remotePaths.toArray(new String[remotePaths.size()])); conf.setLong(KEY_SIZE, size); if (JobCompatibility.isLocalMode(job)) { if (LOG.isDebugEnabled()) { LOG.debug( "symlinks for distributed cache will not be created in standalone mode"); //$NON-NLS-1$ } } else { DistributedCache.createSymlink(conf); } }
private Collection<TestDataModel> collectContent(FileSystem fs, FileStatus status) throws IOException { Collection<TestDataModel> results = new ArrayList<TestDataModel>(); ModelInput<TestDataModel> input = TemporaryStorage.openInput(fs.getConf(), TestDataModel.class, status.getPath()); try { TestDataModel model = new TestDataModel(); while (input.readTo(model)) { results.add(model.copy()); } } finally { input.close(); } return results; }
@Override public DataModelReflection next() throws IOException { while (true) { if (current == null) { if (rest.hasNext() == false) { return null; } current = TemporaryStorage.openInput(conf, definition.getModelClass(), rest.next()); } if (current.readTo(object)) { break; } else { current.close(); current = null; } } return definition.toReflection(object); }
/** * ストリームからTSVファイルを読み出し、ジョブの入力データとして書き出す。 * * @param <T> Import対象テーブルに対応するModelのクラス型 * @param targetTableModel Import対象テーブルに対応するModelのクラス * @param dfsFilePath HFSF上のファイル名 * @param inputStream FileList * @return 書きだした件数 * @throws BulkLoaderSystemException 読み出しや出力に失敗した場合 */ protected <T> long write(Class<T> targetTableModel, URI dfsFilePath, InputStream inputStream) throws BulkLoaderSystemException { Configuration conf = new Configuration(); TsvIoFactory<T> factory = new TsvIoFactory<>(targetTableModel); try (ModelInput<T> input = factory.createModelInput(inputStream)) { long count = 0; T buffer = factory.createModelObject(); try (ModelOutput<T> output = TemporaryStorage.openOutput(conf, targetTableModel, new Path(dfsFilePath))) { while (input.readTo(buffer)) { count++; output.write(buffer); } } return count; } catch (IOException e) { throw new BulkLoaderSystemException( e, getClass(), "TG-EXTRACTOR-02001", "DFSにファイルを書き出す処理に失敗。URI:" + dfsFilePath); } }
private ModelOutput<TestDataModel> create(CacheStorage storage, Path path) throws IOException { return TemporaryStorage.openOutput( storage.getFileSystem().getConf(), TestDataModel.class, path); }