Esempio n. 1
0
  /** Test conversion of LocatedBlock to BlockLocation */
  @Test
  public void testLocatedBlocks2Locations() {
    DatanodeInfo d = DFSTestUtil.getLocalDatanodeInfo();
    DatanodeInfo[] ds = new DatanodeInfo[1];
    ds[0] = d;

    // ok
    ExtendedBlock b1 = new ExtendedBlock("bpid", 1, 1, 1);
    LocatedBlock l1 = new LocatedBlock(b1, ds, 0, false);

    // corrupt
    ExtendedBlock b2 = new ExtendedBlock("bpid", 2, 1, 1);
    LocatedBlock l2 = new LocatedBlock(b2, ds, 0, true);

    List<LocatedBlock> ls = Arrays.asList(l1, l2);
    LocatedBlocks lbs = new LocatedBlocks(10, false, ls, l2, true, null);

    BlockLocation[] bs = DFSUtil.locatedBlocks2Locations(lbs);

    assertTrue("expected 2 blocks but got " + bs.length, bs.length == 2);

    int corruptCount = 0;
    for (BlockLocation b : bs) {
      if (b.isCorrupt()) {
        corruptCount++;
      }
    }

    assertTrue("expected 1 corrupt files but got " + corruptCount, corruptCount == 1);

    // test an empty location
    bs = DFSUtil.locatedBlocks2Locations(new LocatedBlocks());
    assertEquals(0, bs.length);
  }
Esempio n. 2
0
 private void addBlocks(
     VolumeManager fs,
     String host,
     ArrayList<String> files,
     Map<String, Long> totalBlocks,
     Map<String, Long> localBlocks)
     throws Exception {
   long allBlocks = 0;
   long matchingBlocks = 0;
   if (!totalBlocks.containsKey(host)) {
     totalBlocks.put(host, 0L);
     localBlocks.put(host, 0L);
   }
   for (String file : files) {
     Path filePath = new Path(file);
     FileSystem ns = fs.getFileSystemByPath(filePath);
     FileStatus fileStatus = ns.getFileStatus(filePath);
     BlockLocation[] fileBlockLocations =
         ns.getFileBlockLocations(fileStatus, 0, fileStatus.getLen());
     for (BlockLocation blockLocation : fileBlockLocations) {
       allBlocks++;
       for (String location : blockLocation.getHosts()) {
         HostAndPort hap = HostAndPort.fromParts(location, 0);
         if (hap.getHostText().equals(host)) {
           matchingBlocks++;
           break;
         }
       }
     }
   }
   totalBlocks.put(host, allBlocks + totalBlocks.get(host));
   localBlocks.put(host, matchingBlocks + localBlocks.get(host));
 }
Esempio n. 3
0
  /** Return scan ranges (hdfs splits) plus their storage locations, including volume ids. */
  @Override
  public List<TScanRangeLocations> getScanRangeLocations(long maxScanRangeLength) {
    List<TScanRangeLocations> result = Lists.newArrayList();
    List<HdfsTable.BlockMetadata> blockMetadata = HdfsTable.getBlockMetadata(partitions);
    for (HdfsTable.BlockMetadata block : blockMetadata) {
      // collect all locations for block
      String[] blockHostPorts = null;
      try {
        // Use getNames() to get port number as well
        blockHostPorts = block.getLocation().getNames();
        // uncomment if you need to see detailed block locations
        // LOG.info(Arrays.toString(blockHostPorts));
      } catch (IOException e) {
        // this shouldn't happen, getHosts() doesn't throw anything
        String errorMsg = "BlockLocation.getHosts() failed:\n" + e.getMessage();
        LOG.error(errorMsg);
        throw new IllegalStateException(errorMsg);
      }

      if (blockHostPorts.length == 0) {
        // we didn't get locations for this block; for now, just ignore the block
        // TODO: do something meaningful with that
        continue;
      }

      // record host/ports and volume ids
      Preconditions.checkState(blockHostPorts.length > 0);
      List<TScanRangeLocation> locations = Lists.newArrayList();
      for (int i = 0; i < blockHostPorts.length; ++i) {
        TScanRangeLocation location = new TScanRangeLocation();
        String hostPort = blockHostPorts[i];
        location.setServer(addressToTHostPort(hostPort));
        location.setVolume_id(block.getVolumeId(i));
        locations.add(location);
      }

      // create scan ranges, taking into account maxScanRangeLength
      BlockLocation blockLocation = block.getLocation();
      long currentOffset = blockLocation.getOffset();
      long remainingLength = blockLocation.getLength();
      while (remainingLength > 0) {
        long currentLength = remainingLength;
        if (maxScanRangeLength > 0 && remainingLength > maxScanRangeLength) {
          currentLength = maxScanRangeLength;
        }
        TScanRange scanRange = new TScanRange();
        scanRange.setHdfs_file_split(
            new THdfsFileSplit(
                block.getFileName(), currentOffset, currentLength, block.getPartition().getId()));
        TScanRangeLocations scanRangeLocations = new TScanRangeLocations();
        scanRangeLocations.scan_range = scanRange;
        scanRangeLocations.locations = locations;
        result.add(scanRangeLocations);
        remainingLength -= currentLength;
        currentOffset += currentLength;
      }
    }
    return result;
  }
Esempio n. 4
0
 public FileFragment(String tableName, Path uri, BlockLocation blockLocation) throws IOException {
   this(
       tableName,
       uri,
       blockLocation.getOffset(),
       blockLocation.getLength(),
       blockLocation.getHosts(),
       null);
 }
Esempio n. 5
0
 @Override
 public TreeMap<Long, BlockLocation> getLocationsWithOffset(FileSystem fs, FileStatus status)
     throws IOException {
   TreeMap<Long, BlockLocation> offsetBlockMap = new TreeMap<Long, BlockLocation>();
   BlockLocation[] locations = getLocations(fs, status);
   for (BlockLocation location : locations) {
     offsetBlockMap.put(location.getOffset(), location);
   }
   return offsetBlockMap;
 }
Esempio n. 6
0
  /**
   * Fix offset and length of block locations. Note that this method modifies the original array.
   *
   * @param locations block locations of har part file
   * @param start the start of the desired range in the contained file
   * @param len the length of the desired range
   * @param fileOffsetInHar the offset of the desired file in the har part file
   * @return block locations with fixed offset and length
   */
  static BlockLocation[] fixBlockLocations(
      BlockLocation[] locations, long start, long len, long fileOffsetInHar) {
    // offset 1 past last byte of desired range
    long end = start + len;

    for (BlockLocation location : locations) {
      // offset of part block relative to beginning of desired file
      // (may be negative if file starts in this part block)
      long harBlockStart = location.getOffset() - fileOffsetInHar;
      // offset 1 past last byte of har block relative to beginning of
      // desired file
      long harBlockEnd = harBlockStart + location.getLength();

      if (start > harBlockStart) {
        // desired range starts after beginning of this har block
        // fix offset to beginning of relevant range (relative to desired file)
        location.setOffset(start);
        // fix length to relevant portion of har block
        location.setLength(location.getLength() - (start - harBlockStart));
      } else {
        // desired range includes beginning of this har block
        location.setOffset(harBlockStart);
      }

      if (harBlockEnd > end) {
        // range ends before end of this har block
        // fix length to remove irrelevant portion at the end
        location.setLength(location.getLength() - (harBlockEnd - end));
      }
    }

    return locations;
  }
 protected int getBlockIndex(BlockLocation[] blkLocations, long offset) {
   for (int i = 0; i < blkLocations.length; i++) {
     // is the offset inside this block?
     if ((blkLocations[i].getOffset() <= offset)
         && (offset < blkLocations[i].getOffset() + blkLocations[i].getLength())) {
       return i;
     }
   }
   BlockLocation last = blkLocations[blkLocations.length - 1];
   long fileLength = last.getOffset() + last.getLength() - 1;
   throw new IllegalArgumentException(
       "Offset " + offset + " is outside of file (0.." + fileLength + ")");
 }
  /**
   * Helper function to add an ArraySpec to a HashMap that stores ArraySpec -> BlockLocation
   * mappings
   *
   * @param blockToAS HashMap that stores the mappings being added to
   * @param offset The offset, in bytes, in the file that this ArraySpec starts at
   * @param as The ArraySpec to add to the Map
   */
  public static void insertNewAs(
      HashMap<BlockLocation, ArrayList<ArraySpec>> blockToAS, long offset, ArraySpec as) {

    // search for the correct BlockLocation
    // (TODO this is inefficient, fix it)
    Iterator iter = blockToAS.keySet().iterator();

    while (iter.hasNext()) {
      BlockLocation tempKey = (BlockLocation) (iter.next());
      if (tempKey.getOffset() == offset) {
        (blockToAS.get(tempKey)).add(as);
      }
    }
  }
  @SuppressWarnings("unchecked")
  @Override
  /**
   * Splits the input collection into sets of files where each Map task gets about the same number
   * of files
   */
  public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {

    Path[] paths = FileInputFormat.getInputPaths(job);
    // HADOOP-1818: Manage splits only if there are paths
    if (paths.length == 0) {
      return new InputSplit[0];
    }

    if (numSplits > paths.length) {
      numSplits = paths.length;
    } else if (numSplits < 1) {
      numSplits = 1;
    }
    logger.info("Allocating " + paths.length + " files across " + numSplits + " map tasks");
    List<PositionAwareSplit<CombineFileSplit>> splits =
        new ArrayList<PositionAwareSplit<CombineFileSplit>>(numSplits);
    final int numPaths = paths.length;
    long[] lengths = new long[numPaths];
    TObjectLongHashMap<String>[] locations =
        (TObjectLongHashMap<String>[]) Array.newInstance(TObjectLongHashMap.class, numPaths);
    final FileSystem fs = FileSystem.get(job);
    for (int i = 0; i < paths.length; i++) {
      final FileStatus fss = fs.getFileStatus(paths[i]);
      lengths[i] = fss.getLen();
      final TObjectLongHashMap<String> location2size =
          locations[i] = new TObjectLongHashMap<String>();
      final long normalblocksize = fss.getBlockSize();
      for (long offset = 0; offset < lengths[i]; offset += normalblocksize) {
        final long blocksize = Math.min(offset + normalblocksize, lengths[i]);
        final BlockLocation[] blockLocations = fs.getFileBlockLocations(fss, offset, blocksize);
        for (BlockLocation bl : blockLocations) {
          for (String host : bl.getHosts()) {
            location2size.adjustOrPutValue(host, blocksize, blocksize);
          }
        }
      }
    }

    // we need to over-estimate using ceil, to ensure that the last split is not /too/ big
    final int numberOfFilesPerSplit = (int) Math.ceil((double) paths.length / (double) numSplits);

    int pathsUsed = 0;
    int splitnum = 0;
    CombineFileSplit mfs;
    // for each split except the last one (which may be smaller than numberOfFilesPerSplit)
    while (pathsUsed < numPaths) {
      /* caclulate split size for this task - usually numberOfFilesPerSplit, but
       * less than this for the last split */
      final int splitSizeForThisSplit =
          numberOfFilesPerSplit + pathsUsed > numPaths
              ? numPaths - pathsUsed
              : numberOfFilesPerSplit;
      // arrays of information for split
      Path[] splitPaths = new Path[splitSizeForThisSplit];
      long[] splitLengths = new long[splitSizeForThisSplit];
      long[] splitStarts = new long[splitSizeForThisSplit];
      final TObjectLongHashMap<String> allLocationsForSplit = new TObjectLongHashMap<String>();
      String[] splitLocations = null; // final recommended locations for this split.
      for (int i = 0; i < splitSizeForThisSplit; i++) {
        locations[pathsUsed + i].forEachEntry(
            new TObjectLongProcedure<String>() {
              public boolean execute(String a, long b) {
                allLocationsForSplit.adjustOrPutValue(a, b, b);
                return true;
              }
            });
        if (allLocationsForSplit.size() <= 3) {
          splitLocations = allLocationsForSplit.keys(new String[allLocationsForSplit.size()]);
        } else {
          String[] hosts = allLocationsForSplit.keys(new String[allLocationsForSplit.size()]);
          Arrays.sort(
              hosts,
              new Comparator<String>() {
                public int compare(String o1, String o2) {
                  long diffamount = allLocationsForSplit.get(o1) - allLocationsForSplit.get(o2);
                  if (diffamount > 0) {
                    return -1;
                  } else if (diffamount < 0) {
                    return 1;
                  }
                  return 0;
                }
              });
          splitLocations = new String[3];
          System.arraycopy(hosts, 0, splitLocations, 0, 3);
        }
      }

      // copy information for this split
      System.arraycopy(lengths, pathsUsed, splitLengths, 0, splitSizeForThisSplit);
      System.arraycopy(paths, pathsUsed, splitPaths, 0, splitSizeForThisSplit);
      // count the number of paths consumed
      pathsUsed += splitSizeForThisSplit;

      // make the actual split object
      // logger.info("New split of size " + splitSizeForThisSplit);
      mfs = new CombineFileSplit(job, splitPaths, splitStarts, splitLengths, splitLocations);
      splits.add(new PositionAwareSplit<CombineFileSplit>(mfs, splitnum));
      splitnum++;
    }

    if (!(pathsUsed == paths.length)) {
      throw new IOException("Number of used paths does not equal total available paths!");
    }
    return splits.toArray(new PositionAwareSplit[splits.size()]);
  }