/** * Adds a new HdfsPartition to internal partition list, populating with file format information * and file locations. If a partition contains no files, it's not added. For unchanged files * (indicated by unchanged mtime), reuses the FileDescriptor from the oldFileDescMap. Otherwise, * creates a new FileDescriptor for each modified or new file and adds it to newFileDescs. Returns * new partition or null, if none was added. * * @throws InvalidStorageDescriptorException if the supplied storage descriptor contains metadata * that Impala can't understand. */ private HdfsPartition addPartition( StorageDescriptor storageDescriptor, org.apache.hadoop.hive.metastore.api.Partition msPartition, List<LiteralExpr> partitionKeyExprs, Map<String, FileDescriptor> oldFileDescMap, List<FileDescriptor> newFileDescs) throws IOException, InvalidStorageDescriptorException { HdfsStorageDescriptor fileFormatDescriptor = HdfsStorageDescriptor.fromStorageDescriptor(this.name_, storageDescriptor); Path partDirPath = new Path(storageDescriptor.getLocation()); List<FileDescriptor> fileDescriptors = Lists.newArrayList(); if (DFS.exists(partDirPath)) { // DistributedFilesystem does not have an API that takes in a timestamp and return // a list of files that has been added/changed since. Therefore, we are calling // DFS.listStatus() to list all the files. for (FileStatus fileStatus : DFS.listStatus(partDirPath)) { String fileName = fileStatus.getPath().getName().toString(); if (fileStatus.isDirectory() || FileSystemUtil.isHiddenFile(fileName) || HdfsCompression.fromFileName(fileName) == HdfsCompression.LZO_INDEX) { // Ignore directory, hidden file starting with . or _, and LZO index files // If a directory is erroneously created as a subdirectory of a partition dir // we should ignore it and move on. Hive will not recurse into directories. // Skip index files, these are read by the LZO scanner directly. continue; } String fullPath = fileStatus.getPath().toString(); FileDescriptor fd = (oldFileDescMap != null) ? oldFileDescMap.get(fullPath) : null; if (fd != null && fd.getFileLength() == fileStatus.getLen() && fd.getModificationTime() == fileStatus.getModificationTime()) { // Reuse the old file descriptor along with its block metadata if the file // length and mtime has not been changed. } else { // Create a new file descriptor. The block metadata will be populated by // loadFileDescriptorsBlockMd. fd = new FileDescriptor(fullPath, fileStatus.getLen(), fileStatus.getModificationTime()); newFileDescs.add(fd); } fileDescriptors.add(fd); fileDescMap_.put(fullPath, fd); } HdfsPartition partition = new HdfsPartition( this, msPartition, partitionKeyExprs, fileFormatDescriptor, fileDescriptors, getAvailableAccessLevel(partDirPath)); partitions_.add(partition); numHdfsFiles_ += fileDescriptors.size(); totalHdfsBytes_ += partition.getSize(); return partition; } else { LOG.warn("Path " + partDirPath + " does not exist for partition. Ignoring."); return null; } }