private File addBlock(Block b, File src, boolean createOk, boolean resetIdx)
        throws IOException {
      if (numBlocks < maxBlocksPerDir) {
        File dest = new File(dir, b.getBlockName());
        File metaData = getMetaFile(src, b);
        File newmeta = getMetaFile(dest, b);
        if (!metaData.renameTo(newmeta) || !src.renameTo(dest)) {
          throw new IOException(
              "could not move files for " + b + " from tmp to " + dest.getAbsolutePath());
        }
        if (DataNode.LOG.isDebugEnabled()) {
          DataNode.LOG.debug("addBlock: Moved " + metaData + " to " + newmeta);
          DataNode.LOG.debug("addBlock: Moved " + src + " to " + dest);
        }

        numBlocks += 1;
        return dest;
      }

      if (lastChildIdx < 0 && resetIdx) {
        // reset so that all children will be checked
        lastChildIdx = random.nextInt(children.length);
      }

      if (lastChildIdx >= 0 && children != null) {
        // Check if any child-tree has room for a block.
        for (int i = 0; i < children.length; i++) {
          int idx = (lastChildIdx + i) % children.length;
          File file = children[idx].addBlock(b, src, false, resetIdx);
          if (file != null) {
            lastChildIdx = idx;
            return file;
          }
        }
        lastChildIdx = -1;
      }

      if (!createOk) {
        return null;
      }

      if (children == null || children.length == 0) {
        children = new FSDir[maxBlocksPerDir];
        for (int idx = 0; idx < maxBlocksPerDir; idx++) {
          children[idx] = new FSDir(new File(dir, DataStorage.BLOCK_SUBDIR_PREFIX + idx));
        }
      }

      // now pick a child randomly for creating a new set of subdirs.
      lastChildIdx = random.nextInt(children.length);
      return children[lastChildIdx].addBlock(b, src, true, false);
    }
 /** Return the block file for the given ID */
 public File findBlockFile(long blockId) {
   final Block b = new Block(blockId);
   File blockfile = null;
   ActiveFile activefile = ongoingCreates.get(b);
   if (activefile != null) {
     blockfile = activefile.file;
   }
   if (blockfile == null) {
     blockfile = getFile(b);
   }
   if (blockfile == null) {
     if (DataNode.LOG.isDebugEnabled()) {
       DataNode.LOG.debug("ongoingCreates=" + ongoingCreates);
       DataNode.LOG.debug("volumeMap=" + volumeMap);
     }
   }
   return blockfile;
 }
  /**
   * Start writing to a block file If isRecovery is true and the block pre-exists, then we kill all
   * volumeMap.put(b, v); volumeMap.put(b, v); other threads that might be writing to this block,
   * and then reopen the file.
   */
  public BlockWriteStreams writeToBlock(Block b, boolean isRecovery) throws IOException {
    //
    // Make sure the block isn't a valid one - we're still creating it!
    //
    if (isValidBlock(b)) {
      if (!isRecovery) {
        throw new BlockAlreadyExistsException(
            "Block " + b + " is valid, and cannot be written to.");
      }
      // If the block was successfully finalized because all packets
      // were successfully processed at the Datanode but the ack for
      // some of the packets were not received by the client. The client
      // re-opens the connection and retries sending those packets.
      // The other reason is that an "append" is occurring to this block.
      detachBlock(b, 1);
    }
    long blockSize = b.getNumBytes();

    //
    // Serialize access to /tmp, and check if file already there.
    //
    File f = null;
    List<Thread> threads = null;
    synchronized (this) {
      //
      // Is it already in the create process?
      //
      ActiveFile activeFile = ongoingCreates.get(b);
      if (activeFile != null) {
        f = activeFile.file;
        threads = activeFile.threads;

        if (!isRecovery) {
          throw new BlockAlreadyExistsException(
              "Block "
                  + b
                  + " has already been started (though not completed), and thus cannot be created.");
        } else {
          for (Thread thread : threads) {
            thread.interrupt();
          }
        }
        ongoingCreates.remove(b);
      }
      FSVolume v = null;
      if (!isRecovery) {
        v = volumes.getNextVolume(blockSize);
        // create temporary file to hold block in the designated volume
        f = createTmpFile(v, b);
        volumeMap.put(b, new DatanodeBlockInfo(v));
      } else if (f != null) {
        DataNode.LOG.info("Reopen already-open Block for append " + b);
        // create or reuse temporary file to hold block in the
        // designated volume
        v = volumeMap.get(b).getVolume();
        volumeMap.put(b, new DatanodeBlockInfo(v));
      } else {
        // reopening block for appending to it.
        DataNode.LOG.info("Reopen Block for append " + b);
        v = volumeMap.get(b).getVolume();
        f = createTmpFile(v, b);
        File blkfile = getBlockFile(b);
        File oldmeta = getMetaFile(b);
        File newmeta = getMetaFile(f, b);

        // rename meta file to tmp directory
        DataNode.LOG.debug("Renaming " + oldmeta + " to " + newmeta);
        if (!oldmeta.renameTo(newmeta)) {
          throw new IOException(
              "Block "
                  + b
                  + " reopen failed. "
                  + " Unable to move meta file  "
                  + oldmeta
                  + " to tmp dir "
                  + newmeta);
        }

        // rename block file to tmp directory
        DataNode.LOG.debug("Renaming " + blkfile + " to " + f);
        if (!blkfile.renameTo(f)) {
          if (!f.delete()) {
            throw new IOException(
                "Block " + b + " reopen failed. " + " Unable to remove file " + f);
          }
          if (!blkfile.renameTo(f)) {
            throw new IOException(
                "Block "
                    + b
                    + " reopen failed. "
                    + " Unable to move block file "
                    + blkfile
                    + " to tmp dir "
                    + f);
          }
        }
        volumeMap.put(b, new DatanodeBlockInfo(v));
      }
      if (f == null) {
        DataNode.LOG.warn("Block " + b + " reopen failed " + " Unable to locate tmp file.");
        throw new IOException("Block " + b + " reopen failed " + " Unable to locate tmp file.");
      }
      ongoingCreates.put(b, new ActiveFile(f, threads));
    }

    try {
      if (threads != null) {
        for (Thread thread : threads) {
          thread.join();
        }
      }
    } catch (InterruptedException e) {
      throw new IOException("Recovery waiting for thread interrupted.");
    }

    //
    // Finally, allow a writer to the block file
    // REMIND - mjc - make this a filter stream that enforces a max
    // block size, so clients can't go crazy
    //
    File metafile = getMetaFile(f, b);
    DataNode.LOG.debug("writeTo blockfile is " + f + " of size " + f.length());
    DataNode.LOG.debug("writeTo metafile is " + metafile + " of size " + metafile.length());
    return createBlockWriteStreams(f, metafile);
  }
Exemplo n.º 4
0
  BlockSender(
      Block block,
      long startOffset,
      long length,
      boolean corruptChecksumOk,
      boolean chunkOffsetOK,
      boolean verifyChecksum,
      DataNode datanode,
      String clientTraceFmt)
      throws IOException {
    try {
      this.block = block;
      synchronized (datanode.data) {
        this.replica = datanode.data.getReplica(block.getBlockId());
        if (replica == null) {
          throw new ReplicaNotFoundException(block);
        }
        this.replicaVisibleLength = replica.getVisibleLength();
      }
      long minEndOffset = startOffset + length;
      // if this is a write in progress
      ChunkChecksum chunkChecksum = null;
      if (replica instanceof ReplicaBeingWritten) {
        for (int i = 0; i < 30 && replica.getBytesOnDisk() < minEndOffset; i++) {
          try {
            Thread.sleep(100);
          } catch (InterruptedException ie) {
            throw new IOException(ie);
          }
        }

        long currentBytesOnDisk = replica.getBytesOnDisk();

        if (currentBytesOnDisk < minEndOffset) {
          throw new IOException(
              String.format(
                  "need %d bytes, but only %d bytes available", minEndOffset, currentBytesOnDisk));
        }

        ReplicaInPipeline rip = (ReplicaInPipeline) replica;
        chunkChecksum = rip.getLastChecksumAndDataLen();
      }

      if (replica.getGenerationStamp() < block.getGenerationStamp()) {
        throw new IOException(
            "replica.getGenerationStamp() < block.getGenerationStamp(), block="
                + block
                + ", replica="
                + replica);
      }
      if (replicaVisibleLength < 0) {
        throw new IOException(
            "The replica is not readable, block=" + block + ", replica=" + replica);
      }
      if (DataNode.LOG.isDebugEnabled()) {
        DataNode.LOG.debug("block=" + block + ", replica=" + replica);
      }

      this.chunkOffsetOK = chunkOffsetOK;
      this.corruptChecksumOk = corruptChecksumOk;
      this.verifyChecksum = verifyChecksum;

      // transferToFully() fails on 32 bit platforms for block sizes >= 2GB,
      // use normal transfer in those cases
      this.transferToAllowed =
          datanode.transferToAllowed && (!is32Bit || length < (long) Integer.MAX_VALUE);
      this.clientTraceFmt = clientTraceFmt;

      if (!corruptChecksumOk || datanode.data.metaFileExists(block)) {
        checksumIn =
            new DataInputStream(
                new BufferedInputStream(datanode.data.getMetaDataInputStream(block), BUFFER_SIZE));

        // read and handle the common header here. For now just a version
        BlockMetadataHeader header = BlockMetadataHeader.readHeader(checksumIn);
        short version = header.getVersion();

        if (version != FSDataset.METADATA_VERSION) {
          LOG.warn(
              "Wrong version (" + version + ") for metadata file for " + block + " ignoring ...");
        }
        checksum = header.getChecksum();
      } else {
        LOG.warn("Could not find metadata file for " + block);
        // This only decides the buffer size. Use BUFFER_SIZE?
        checksum = DataChecksum.newDataChecksum(DataChecksum.CHECKSUM_NULL, 16 * 1024);
      }

      /* If bytesPerChecksum is very large, then the metadata file
       * is mostly corrupted. For now just truncate bytesPerchecksum to
       * blockLength.
       */
      bytesPerChecksum = checksum.getBytesPerChecksum();
      if (bytesPerChecksum > 10 * 1024 * 1024 && bytesPerChecksum > replicaVisibleLength) {
        checksum =
            DataChecksum.newDataChecksum(
                checksum.getChecksumType(), Math.max((int) replicaVisibleLength, 10 * 1024 * 1024));
        bytesPerChecksum = checksum.getBytesPerChecksum();
      }
      checksumSize = checksum.getChecksumSize();

      if (length < 0) {
        length = replicaVisibleLength;
      }

      // end is either last byte on disk or the length for which we have a
      // checksum
      if (chunkChecksum != null) {
        endOffset = chunkChecksum.getDataLength();
      } else {
        endOffset = replica.getBytesOnDisk();
      }

      if (startOffset < 0 || startOffset > endOffset || (length + startOffset) > endOffset) {
        String msg =
            " Offset "
                + startOffset
                + " and length "
                + length
                + " don't match block "
                + block
                + " ( blockLen "
                + endOffset
                + " )";
        LOG.warn(datanode.dnRegistration + ":sendBlock() : " + msg);
        throw new IOException(msg);
      }

      offset = (startOffset - (startOffset % bytesPerChecksum));
      if (length >= 0) {
        // Make sure endOffset points to end of a checksumed chunk.
        long tmpLen = startOffset + length;
        if (tmpLen % bytesPerChecksum != 0) {
          tmpLen += (bytesPerChecksum - tmpLen % bytesPerChecksum);
        }
        if (tmpLen < endOffset) {
          // will use on-disk checksum here since the end is a stable chunk
          endOffset = tmpLen;
        } else if (chunkChecksum != null) {
          // in last chunk which is changing. flag that we need to use in-memory
          // checksum
          this.lastChunkChecksum = chunkChecksum;
        }
      }

      // seek to the right offsets
      if (offset > 0) {
        long checksumSkip = (offset / bytesPerChecksum) * checksumSize;
        // note blockInStream is seeked when created below
        if (checksumSkip > 0) {
          // Should we use seek() for checksum file as well?
          IOUtils.skipFully(checksumIn, checksumSkip);
        }
      }
      seqno = 0;

      if (DataNode.LOG.isDebugEnabled()) {
        DataNode.LOG.debug("replica=" + replica);
      }

      blockIn = datanode.data.getBlockInputStream(block, offset); // seek to offset
    } catch (IOException ioe) {
      IOUtils.closeStream(this);
      IOUtils.closeStream(blockIn);
      throw ioe;
    }
  }