示例#1
0
  /**
   * Fix offset and length of block locations. Note that this method modifies the original array.
   *
   * @param locations block locations of har part file
   * @param start the start of the desired range in the contained file
   * @param len the length of the desired range
   * @param fileOffsetInHar the offset of the desired file in the har part file
   * @return block locations with fixed offset and length
   */
  static BlockLocation[] fixBlockLocations(
      BlockLocation[] locations, long start, long len, long fileOffsetInHar) {
    // offset 1 past last byte of desired range
    long end = start + len;

    for (BlockLocation location : locations) {
      // offset of part block relative to beginning of desired file
      // (may be negative if file starts in this part block)
      long harBlockStart = location.getOffset() - fileOffsetInHar;
      // offset 1 past last byte of har block relative to beginning of
      // desired file
      long harBlockEnd = harBlockStart + location.getLength();

      if (start > harBlockStart) {
        // desired range starts after beginning of this har block
        // fix offset to beginning of relevant range (relative to desired file)
        location.setOffset(start);
        // fix length to relevant portion of har block
        location.setLength(location.getLength() - (start - harBlockStart));
      } else {
        // desired range includes beginning of this har block
        location.setOffset(harBlockStart);
      }

      if (harBlockEnd > end) {
        // range ends before end of this har block
        // fix length to remove irrelevant portion at the end
        location.setLength(location.getLength() - (harBlockEnd - end));
      }
    }

    return locations;
  }
示例#2
0
  /** Return scan ranges (hdfs splits) plus their storage locations, including volume ids. */
  @Override
  public List<TScanRangeLocations> getScanRangeLocations(long maxScanRangeLength) {
    List<TScanRangeLocations> result = Lists.newArrayList();
    List<HdfsTable.BlockMetadata> blockMetadata = HdfsTable.getBlockMetadata(partitions);
    for (HdfsTable.BlockMetadata block : blockMetadata) {
      // collect all locations for block
      String[] blockHostPorts = null;
      try {
        // Use getNames() to get port number as well
        blockHostPorts = block.getLocation().getNames();
        // uncomment if you need to see detailed block locations
        // LOG.info(Arrays.toString(blockHostPorts));
      } catch (IOException e) {
        // this shouldn't happen, getHosts() doesn't throw anything
        String errorMsg = "BlockLocation.getHosts() failed:\n" + e.getMessage();
        LOG.error(errorMsg);
        throw new IllegalStateException(errorMsg);
      }

      if (blockHostPorts.length == 0) {
        // we didn't get locations for this block; for now, just ignore the block
        // TODO: do something meaningful with that
        continue;
      }

      // record host/ports and volume ids
      Preconditions.checkState(blockHostPorts.length > 0);
      List<TScanRangeLocation> locations = Lists.newArrayList();
      for (int i = 0; i < blockHostPorts.length; ++i) {
        TScanRangeLocation location = new TScanRangeLocation();
        String hostPort = blockHostPorts[i];
        location.setServer(addressToTHostPort(hostPort));
        location.setVolume_id(block.getVolumeId(i));
        locations.add(location);
      }

      // create scan ranges, taking into account maxScanRangeLength
      BlockLocation blockLocation = block.getLocation();
      long currentOffset = blockLocation.getOffset();
      long remainingLength = blockLocation.getLength();
      while (remainingLength > 0) {
        long currentLength = remainingLength;
        if (maxScanRangeLength > 0 && remainingLength > maxScanRangeLength) {
          currentLength = maxScanRangeLength;
        }
        TScanRange scanRange = new TScanRange();
        scanRange.setHdfs_file_split(
            new THdfsFileSplit(
                block.getFileName(), currentOffset, currentLength, block.getPartition().getId()));
        TScanRangeLocations scanRangeLocations = new TScanRangeLocations();
        scanRangeLocations.scan_range = scanRange;
        scanRangeLocations.locations = locations;
        result.add(scanRangeLocations);
        remainingLength -= currentLength;
        currentOffset += currentLength;
      }
    }
    return result;
  }
示例#3
0
 public FileFragment(String tableName, Path uri, BlockLocation blockLocation) throws IOException {
   this(
       tableName,
       uri,
       blockLocation.getOffset(),
       blockLocation.getLength(),
       blockLocation.getHosts(),
       null);
 }
示例#4
0
 @Override
 public TreeMap<Long, BlockLocation> getLocationsWithOffset(FileSystem fs, FileStatus status)
     throws IOException {
   TreeMap<Long, BlockLocation> offsetBlockMap = new TreeMap<Long, BlockLocation>();
   BlockLocation[] locations = getLocations(fs, status);
   for (BlockLocation location : locations) {
     offsetBlockMap.put(location.getOffset(), location);
   }
   return offsetBlockMap;
 }
 protected int getBlockIndex(BlockLocation[] blkLocations, long offset) {
   for (int i = 0; i < blkLocations.length; i++) {
     // is the offset inside this block?
     if ((blkLocations[i].getOffset() <= offset)
         && (offset < blkLocations[i].getOffset() + blkLocations[i].getLength())) {
       return i;
     }
   }
   BlockLocation last = blkLocations[blkLocations.length - 1];
   long fileLength = last.getOffset() + last.getLength() - 1;
   throw new IllegalArgumentException(
       "Offset " + offset + " is outside of file (0.." + fileLength + ")");
 }
  /**
   * Helper function to add an ArraySpec to a HashMap that stores ArraySpec -> BlockLocation
   * mappings
   *
   * @param blockToAS HashMap that stores the mappings being added to
   * @param offset The offset, in bytes, in the file that this ArraySpec starts at
   * @param as The ArraySpec to add to the Map
   */
  public static void insertNewAs(
      HashMap<BlockLocation, ArrayList<ArraySpec>> blockToAS, long offset, ArraySpec as) {

    // search for the correct BlockLocation
    // (TODO this is inefficient, fix it)
    Iterator iter = blockToAS.keySet().iterator();

    while (iter.hasNext()) {
      BlockLocation tempKey = (BlockLocation) (iter.next());
      if (tempKey.getOffset() == offset) {
        (blockToAS.get(tempKey)).add(as);
      }
    }
  }