コード例 #1
0
ファイル: HdfsTable.java プロジェクト: nongli/Impala
 /**
  * Counts the number of unique data node network addresses for all file blocks in the given
  * partition.
  */
 private static int countUniqueDataNetworkLocations(List<HdfsPartition> partitions) {
   Set<TNetworkAddress> uniqueHostPorts = Sets.newHashSet();
   for (HdfsPartition partition : partitions) {
     for (FileDescriptor fileDesc : partition.getFileDescriptors()) {
       for (THdfsFileBlock blockMd : fileDesc.getFileBlocks()) {
         for (TNetworkAddress networkAddress : blockMd.getNetwork_addresses()) {
           uniqueHostPorts.add(networkAddress);
         }
       }
     }
   }
   return uniqueHostPorts.size();
 }
コード例 #2
0
ファイル: HdfsTable.java プロジェクト: nongli/Impala
  /** Populate file block metadata inside each file descriptors. */
  private void loadBlockMd(List<FileDescriptor> fileDescriptors) throws RuntimeException {
    LOG.debug("load block md for " + name_);
    // Block locations for all the files
    List<BlockLocation> blockLocations = Lists.newArrayList();

    // loop over all files and record their block metadata, minus volume ids
    for (FileDescriptor fileDescriptor : fileDescriptors) {
      Path p = new Path(fileDescriptor.getFilePath());
      BlockLocation[] locations = null;
      try {
        FileStatus fileStatus = DFS.getFileStatus(p);
        // fileDescriptors should not contain directories.
        Preconditions.checkArgument(!fileStatus.isDirectory());
        locations = DFS.getFileBlockLocations(fileStatus, 0, fileStatus.getLen());
        if (locations != null) {
          blockLocations.addAll(Arrays.asList(locations));
          for (int i = 0; i < locations.length; ++i) {
            FileBlock blockMd =
                new FileBlock(
                    fileDescriptor.getFilePath(), fileDescriptor.getFileLength(), locations[i]);
            fileDescriptor.addFileBlock(blockMd);
          }
        }
      } catch (IOException e) {
        throw new RuntimeException(
            "couldn't determine block locations for path '" + p + "':\n" + e.getMessage(), e);
      }
    }

    if (!SUPPORTS_VOLUME_ID) {
      return;
    }

    // BlockStorageLocations for all the blocks
    // block described by blockMetadataList[i] is located at locations[i]
    BlockStorageLocation[] locations = null;
    try {
      // Get the BlockStorageLocations for all the blocks
      locations = DFS.getFileBlockStorageLocations(blockLocations);
    } catch (IOException e) {
      LOG.error("Couldn't determine block storage locations:\n" + e.getMessage());
      return;
    }

    if (locations == null || locations.length == 0) {
      LOG.warn("Attempted to get block locations but the call returned nulls");
      return;
    }

    if (locations.length != blockLocations.size()) {
      // blocks and locations don't match up
      LOG.error(
          "Number of block locations not equal to number of blocks: "
              + "#locations="
              + Long.toString(locations.length)
              + " #blocks="
              + Long.toString(blockLocations.size()));
      return;
    }

    int locationsIdx = 0;
    int unknownDiskIdCount = 0;
    for (FileDescriptor fileDescriptor : fileDescriptors) {
      for (THdfsFileBlock blockMd : fileDescriptor.getFileBlocks()) {
        VolumeId[] volumeIds = locations[locationsIdx++].getVolumeIds();
        // Convert opaque VolumeId to 0 based ids.
        // TODO: the diskId should be eventually retrievable from Hdfs when
        // the community agrees this API is useful.
        int[] diskIds = new int[volumeIds.length];
        for (int i = 0; i < volumeIds.length; ++i) {
          diskIds[i] = getDiskId(volumeIds[i]);
          if (diskIds[i] < 0) ++unknownDiskIdCount;
        }
        FileBlock.setDiskIds(diskIds, blockMd);
      }
    }
    LOG.debug("loaded disk ids for table " + getFullName() + ". nodes: " + getNumNodes());
    if (unknownDiskIdCount > 0) {
      LOG.warn("unknown disk id count " + unknownDiskIdCount);
    }
  }