예제 #1
0
  /**
   * Adds a new HdfsPartition to internal partition list, populating with file format information
   * and file locations. If a partition contains no files, it's not added. For unchanged files
   * (indicated by unchanged mtime), reuses the FileDescriptor from the oldFileDescMap. Otherwise,
   * creates a new FileDescriptor for each modified or new file and adds it to newFileDescs. Returns
   * new partition or null, if none was added.
   *
   * @throws InvalidStorageDescriptorException if the supplied storage descriptor contains metadata
   *     that Impala can't understand.
   */
  private HdfsPartition addPartition(
      StorageDescriptor storageDescriptor,
      org.apache.hadoop.hive.metastore.api.Partition msPartition,
      List<LiteralExpr> partitionKeyExprs,
      Map<String, FileDescriptor> oldFileDescMap,
      List<FileDescriptor> newFileDescs)
      throws IOException, InvalidStorageDescriptorException {
    HdfsStorageDescriptor fileFormatDescriptor =
        HdfsStorageDescriptor.fromStorageDescriptor(this.name_, storageDescriptor);
    Path partDirPath = new Path(storageDescriptor.getLocation());
    List<FileDescriptor> fileDescriptors = Lists.newArrayList();
    if (DFS.exists(partDirPath)) {
      // DistributedFilesystem does not have an API that takes in a timestamp and return
      // a list of files that has been added/changed since. Therefore, we are calling
      // DFS.listStatus() to list all the files.
      for (FileStatus fileStatus : DFS.listStatus(partDirPath)) {
        String fileName = fileStatus.getPath().getName().toString();
        if (fileStatus.isDirectory()
            || FileSystemUtil.isHiddenFile(fileName)
            || HdfsCompression.fromFileName(fileName) == HdfsCompression.LZO_INDEX) {
          // Ignore directory, hidden file starting with . or _, and LZO index files
          // If a directory is erroneously created as a subdirectory of a partition dir
          // we should ignore it and move on. Hive will not recurse into directories.
          // Skip index files, these are read by the LZO scanner directly.
          continue;
        }

        String fullPath = fileStatus.getPath().toString();
        FileDescriptor fd = (oldFileDescMap != null) ? oldFileDescMap.get(fullPath) : null;
        if (fd != null
            && fd.getFileLength() == fileStatus.getLen()
            && fd.getModificationTime() == fileStatus.getModificationTime()) {
          // Reuse the old file descriptor along with its block metadata if the file
          // length and mtime has not been changed.
        } else {
          // Create a new file descriptor. The block metadata will be populated by
          // loadFileDescriptorsBlockMd.
          fd = new FileDescriptor(fullPath, fileStatus.getLen(), fileStatus.getModificationTime());
          newFileDescs.add(fd);
        }
        fileDescriptors.add(fd);
        fileDescMap_.put(fullPath, fd);
      }

      HdfsPartition partition =
          new HdfsPartition(
              this,
              msPartition,
              partitionKeyExprs,
              fileFormatDescriptor,
              fileDescriptors,
              getAvailableAccessLevel(partDirPath));
      partitions_.add(partition);
      numHdfsFiles_ += fileDescriptors.size();
      totalHdfsBytes_ += partition.getSize();
      return partition;
    } else {
      LOG.warn("Path " + partDirPath + " does not exist for partition. Ignoring.");
      return null;
    }
  }