@Override public DrillTable create(String key) { try { FileSelection fileSelection = FileSelection.create(fs, config.getLocation(), key); if (fileSelection == null) return null; if (fileSelection.containsDirectories(fs)) { for (FormatMatcher m : dirMatchers) { try { Object selection = m.isReadable(fileSelection); if (selection != null) return new DynamicDrillTable(plugin, storageEngineName, selection); } catch (IOException e) { logger.debug("File read failed.", e); } } fileSelection = fileSelection.minusDirectories(fs); } for (FormatMatcher m : fileMatchers) { Object selection = m.isReadable(fileSelection); if (selection != null) return new DynamicDrillTable(plugin, storageEngineName, selection); } return null; } catch (IOException e) { logger.debug( "Failed to create DrillTable with root {} and name {}", config.getLocation(), key, e); } return null; }
@Override public FormatSelection isReadable(DrillFileSystem fs, FileSelection selection) throws IOException { // TODO: we only check the first file for directory reading. This is because if (selection.containsDirectories(fs)) { if (isDirReadable(fs, selection.getFirstPath(fs))) { return new FormatSelection(plugin.getConfig(), expandSelection(fs, selection)); } } return super.isReadable(fs, selection); }
private FileSelection expandSelection(DrillFileSystem fs, FileSelection selection) throws IOException { if (metaDataFileExists(fs, selection.getFirstPath(fs))) { FileStatus metaRootDir = selection.getFirstPath(fs); Path metaFilePath = getMetadataPath(metaRootDir); // get the metadata for the directory by reading the metadata file ParquetTableMetadata_v1 metadata = Metadata.readBlockMeta(fs, metaFilePath.toString()); List<String> fileNames = Lists.newArrayList(); for (ParquetFileMetadata file : metadata.files) { fileNames.add(file.path); } // when creating the file selection, set the selection root in the form /a/b instead of // file:/a/b. The reason is that the file names above have been created in the form // /a/b/c.parquet and the format of the selection root must match that of the file names // otherwise downstream operations such as partition pruning can break. Path metaRootPath = Path.getPathWithoutSchemeAndAuthority(metaRootDir.getPath()); return new FileSelection( fileNames, metaRootPath.toString(), metadata /* save metadata for future use */); } else { // don't expand yet; ParquetGroupScan's metadata gathering operation // does that. return selection; } }
@Test public void testCreateReturnsNullWhenArgumentsAreIllegal() { for (final Object statuses : new Object[] {null, EMPTY_STATUSES}) { for (final Object files : new Object[] {null, EMPTY_FILES}) { for (final Object root : new Object[] {null, EMPTY_ROOT}) { final FileSelection selection = FileSelection.create( (List<FileStatus>) statuses, (List<String>) files, (String) root); assertNull(selection); } } } }
public FileSelection minusDirectories(DrillFileSystem fs) throws IOException { if (isExpanded()) { return this; } Stopwatch timer = Stopwatch.createStarted(); final List<FileStatus> statuses = getStatuses(fs); final int total = statuses.size(); final Path[] paths = new Path[total]; for (int i = 0; i < total; i++) { paths[i] = statuses.get(i).getPath(); } final List<FileStatus> allStats = fs.list(true, paths); final List<FileStatus> nonDirectories = Lists.newArrayList( Iterables.filter( allStats, new Predicate<FileStatus>() { @Override public boolean apply(@Nullable FileStatus status) { return !status.isDirectory(); } })); final FileSelection fileSel = create(nonDirectories, null, selectionRoot); logger.debug( "FileSelection.minusDirectories() took {} ms, numFiles: {}", timer.elapsed(TimeUnit.MILLISECONDS), total); // fileSel will be null if we query an empty folder if (fileSel != null) { fileSel.setExpanded(); } return fileSel; }
@Override public GroupScan createNewGroupScan(List<String> newFiles) throws IOException { final FileSelection newSelection = FileSelection.create(null, newFiles, getBaseTableLocation()); final FileGroupScan newScan = ((FileGroupScan) scanRel.getGroupScan()).clone(newSelection); return newScan; }