public FSDir(File dir) throws IOException {
   this.dir = dir;
   this.children = null;
   if (!dir.exists()) {
     if (!dir.mkdirs()) {
       throw new IOException("Mkdirs failed to create " + dir.toString());
     }
   } else {
     File[] files = dir.listFiles();
     int numChildren = 0;
     for (int idx = 0; idx < files.length; idx++) {
       if (files[idx].isDirectory()) {
         numChildren++;
       } else if (Block.isBlockFilename(files[idx])) {
         numBlocks++;
       }
     }
     if (numChildren > 0) {
       children = new FSDir[numChildren];
       int curdir = 0;
       for (int idx = 0; idx < files.length; idx++) {
         if (files[idx].isDirectory()) {
           children[curdir] = new FSDir(files[idx]);
           curdir++;
         }
       }
     }
   }
 }
 private File createTmpFile(Block b, File f) throws IOException {
   if (f.exists()) {
     throw new IOException(
         "Unexpected problem in creating temporary file for "
             + b
             + ".  File "
             + f
             + " should not be present, but is.");
   }
   // Create the zero-length temp file
   //
   boolean fileCreated = false;
   try {
     fileCreated = f.createNewFile();
   } catch (IOException ioe) {
     throw (IOException) new IOException(DISK_ERROR + f).initCause(ioe);
   }
   if (!fileCreated) {
     throw new IOException(
         "Unexpected problem in creating temporary file for "
             + b
             + ".  File "
             + f
             + " should be creatable, but is already present.");
   }
   return f;
 }
  public void testAbort() throws IOException {
    JobConf job = new JobConf();
    setConfForFileOutputCommitter(job);
    JobContext jContext = new JobContextImpl(job, taskID.getJobID());
    TaskAttemptContext tContext = new TaskAttemptContextImpl(job, taskID);
    FileOutputCommitter committer = new FileOutputCommitter();
    FileOutputFormat.setWorkOutputPath(job, committer.getTempTaskOutputPath(tContext));

    // do setup
    committer.setupJob(jContext);
    committer.setupTask(tContext);
    String file = "test.txt";

    // A reporter that does nothing
    Reporter reporter = Reporter.NULL;
    // write output
    FileSystem localFs = FileSystem.getLocal(job);
    TextOutputFormat theOutputFormat = new TextOutputFormat();
    RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(localFs, job, file, reporter);
    writeOutput(theRecordWriter, reporter);

    // do abort
    committer.abortTask(tContext);
    File expectedFile =
        new File(new Path(committer.getTempTaskOutputPath(tContext), file).toString());
    assertFalse("task temp dir still exists", expectedFile.exists());

    committer.abortJob(jContext, JobStatus.State.FAILED);
    expectedFile = new File(new Path(outDir, FileOutputCommitter.TEMP_DIR_NAME).toString());
    assertFalse("job temp dir still exists", expectedFile.exists());
    assertEquals("Output directory not empty", 0, new File(outDir.toString()).listFiles().length);
    FileUtil.fullyDelete(new File(outDir.toString()));
  }
    /**
     * Recover detached files on datanode restart. If a detached block does not exist in the
     * original directory, then it is moved to the original directory.
     */
    private void recoverDetachedBlocks(File dataDir, File dir) throws IOException {
      File contents[] = dir.listFiles();
      if (contents == null) {
        return;
      }
      for (int i = 0; i < contents.length; i++) {
        if (!contents[i].isFile()) {
          throw new IOException("Found " + contents[i] + " in " + dir + " but it is not a file.");
        }

        //
        // If the original block file still exists, then no recovery
        // is needed.
        //
        File blk = new File(dataDir, contents[i].getName());
        if (!blk.exists()) {
          if (!contents[i].renameTo(blk)) {
            throw new IOException("Unable to recover detached file " + contents[i]);
          }
          continue;
        }
        if (!contents[i].delete()) {
          throw new IOException("Unable to cleanup detached file " + contents[i]);
        }
      }
    }
 /** {@inheritDoc} */
 public synchronized Block getStoredBlock(long blkid) throws IOException {
   File blockfile = findBlockFile(blkid);
   if (blockfile == null) {
     return null;
   }
   File metafile = findMetaFile(blockfile);
   return new Block(blkid, blockfile.length(), parseGenerationStamp(blockfile, metafile));
 }
 /** Find the file corresponding to the block and return it if it exists. */
 File validateBlockFile(Block b) {
   // Should we check for metadata file too?
   File f = getFile(b);
   if (f != null && f.exists()) return f;
   if (InterDatanodeProtocol.LOG.isDebugEnabled()) {
     InterDatanodeProtocol.LOG.debug("b=" + b + ", f=" + f);
   }
   return null;
 }
 void clearPath(File f) {
   String root = dir.getAbsolutePath();
   String dir = f.getAbsolutePath();
   if (dir.startsWith(root)) {
     String[] dirNames = dir.substring(root.length()).split(File.separator + "subdir");
     if (clearPath(f, dirNames, 1)) return;
   }
   clearPath(f, null, -1);
 }
 /** Find the corresponding meta data file from a given block file */
 private static long parseGenerationStamp(File blockFile, File metaFile) throws IOException {
   String metaname = metaFile.getName();
   String gs =
       metaname.substring(
           blockFile.getName().length() + 1, metaname.length() - METADATA_EXTENSION.length());
   try {
     return Long.parseLong(gs);
   } catch (NumberFormatException nfe) {
     throw (IOException)
         new IOException("blockFile=" + blockFile + ", metaFile=" + metaFile).initCause(nfe);
   }
 }
    private File addBlock(Block b, File src, boolean createOk, boolean resetIdx)
        throws IOException {
      if (numBlocks < maxBlocksPerDir) {
        File dest = new File(dir, b.getBlockName());
        File metaData = getMetaFile(src, b);
        File newmeta = getMetaFile(dest, b);
        if (!metaData.renameTo(newmeta) || !src.renameTo(dest)) {
          throw new IOException(
              "could not move files for " + b + " from tmp to " + dest.getAbsolutePath());
        }
        if (DataNode.LOG.isDebugEnabled()) {
          DataNode.LOG.debug("addBlock: Moved " + metaData + " to " + newmeta);
          DataNode.LOG.debug("addBlock: Moved " + src + " to " + dest);
        }

        numBlocks += 1;
        return dest;
      }

      if (lastChildIdx < 0 && resetIdx) {
        // reset so that all children will be checked
        lastChildIdx = random.nextInt(children.length);
      }

      if (lastChildIdx >= 0 && children != null) {
        // Check if any child-tree has room for a block.
        for (int i = 0; i < children.length; i++) {
          int idx = (lastChildIdx + i) % children.length;
          File file = children[idx].addBlock(b, src, false, resetIdx);
          if (file != null) {
            lastChildIdx = idx;
            return file;
          }
        }
        lastChildIdx = -1;
      }

      if (!createOk) {
        return null;
      }

      if (children == null || children.length == 0) {
        children = new FSDir[maxBlocksPerDir];
        for (int idx = 0; idx < maxBlocksPerDir; idx++) {
          children[idx] = new FSDir(new File(dir, DataStorage.BLOCK_SUBDIR_PREFIX + idx));
        }
      }

      // now pick a child randomly for creating a new set of subdirs.
      lastChildIdx = random.nextInt(children.length);
      return children[lastChildIdx].addBlock(b, src, true, false);
    }
Exemple #10
0
  // Mostly for setting up the symlinks. Note that when we setup the distributed
  // cache, we didn't create the symlinks. This is done on a per task basis
  // by the currently executing task.
  public static void setupWorkDir(JobConf conf) throws IOException {
    File workDir = new File(".").getAbsoluteFile();
    FileUtil.fullyDelete(workDir);
    if (DistributedCache.getSymlink(conf)) {
      URI[] archives = DistributedCache.getCacheArchives(conf);
      URI[] files = DistributedCache.getCacheFiles(conf);
      Path[] localArchives = DistributedCache.getLocalCacheArchives(conf);
      Path[] localFiles = DistributedCache.getLocalCacheFiles(conf);
      if (archives != null) {
        for (int i = 0; i < archives.length; i++) {
          String link = archives[i].getFragment();
          if (link != null) {
            link = workDir.toString() + Path.SEPARATOR + link;
            File flink = new File(link);
            if (!flink.exists()) {
              FileUtil.symLink(localArchives[i].toString(), link);
            }
          }
        }
      }
      if (files != null) {
        for (int i = 0; i < files.length; i++) {
          String link = files[i].getFragment();
          if (link != null) {
            link = workDir.toString() + Path.SEPARATOR + link;
            File flink = new File(link);
            if (!flink.exists()) {
              FileUtil.symLink(localFiles[i].toString(), link);
            }
          }
        }
      }
    }
    File jobCacheDir = null;
    if (conf.getJar() != null) {
      jobCacheDir = new File(new Path(conf.getJar()).getParent().toString());
    }

    // create symlinks for all the files in job cache dir in current
    // workingdir for streaming
    try {
      DistributedCache.createAllSymlink(conf, jobCacheDir, workDir);
    } catch (IOException ie) {
      // Do not exit even if symlinks have not been created.
      LOG.warn(StringUtils.stringifyException(ie));
    }
    // add java.io.tmpdir given by mapred.child.tmp
    String tmp = conf.get("mapred.child.tmp", "./tmp");
    Path tmpDir = new Path(tmp);

    // if temp directory path is not absolute
    // prepend it with workDir.
    if (!tmpDir.isAbsolute()) {
      tmpDir = new Path(workDir.toString(), tmp);
      FileSystem localFs = FileSystem.getLocal(conf);
      if (!localFs.mkdirs(tmpDir) && !localFs.getFileStatus(tmpDir).isDir()) {
        throw new IOException("Mkdirs failed to create " + tmpDir.toString());
      }
    }
  }
  public void testFailAbort() throws IOException {
    JobConf job = new JobConf();
    job.set(FileSystem.FS_DEFAULT_NAME_KEY, "faildel:///");
    job.setClass("fs.faildel.impl", FakeFileSystem.class, FileSystem.class);
    setConfForFileOutputCommitter(job);
    JobContext jContext = new JobContextImpl(job, taskID.getJobID());
    TaskAttemptContext tContext = new TaskAttemptContextImpl(job, taskID);
    FileOutputCommitter committer = new FileOutputCommitter();
    FileOutputFormat.setWorkOutputPath(job, committer.getTempTaskOutputPath(tContext));

    // do setup
    committer.setupJob(jContext);
    committer.setupTask(tContext);
    String file = "test.txt";

    // A reporter that does nothing
    Reporter reporter = Reporter.NULL;
    // write output
    FileSystem localFs = new FakeFileSystem();
    TextOutputFormat theOutputFormat = new TextOutputFormat();
    RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(localFs, job, file, reporter);
    writeOutput(theRecordWriter, reporter);

    // do abort
    Throwable th = null;
    try {
      committer.abortTask(tContext);
    } catch (IOException ie) {
      th = ie;
    }
    assertNotNull(th);
    assertTrue(th instanceof IOException);
    assertTrue(th.getMessage().contains("fake delete failed"));
    File jobTmpDir = new File(new Path(outDir, FileOutputCommitter.TEMP_DIR_NAME).toString());
    File taskTmpDir = new File(jobTmpDir, "_" + taskID);
    File expectedFile = new File(taskTmpDir, file);
    assertTrue(expectedFile + " does not exists", expectedFile.exists());

    th = null;
    try {
      committer.abortJob(jContext, JobStatus.State.FAILED);
    } catch (IOException ie) {
      th = ie;
    }
    assertNotNull(th);
    assertTrue(th instanceof IOException);
    assertTrue(th.getMessage().contains("fake delete failed"));
    assertTrue("job temp dir does not exists", jobTmpDir.exists());
  }
 /**
  * This method create symlinks for all files in a given dir in another directory
  *
  * @param conf the configuration
  * @param jobCacheDir the target directory for creating symlinks
  * @param workDir the directory in which the symlinks are created
  * @throws IOException
  */
 public static void createAllSymlink(Configuration conf, File jobCacheDir, File workDir)
     throws IOException {
   if ((jobCacheDir == null || !jobCacheDir.isDirectory())
       || workDir == null
       || (!workDir.isDirectory())) {
     return;
   }
   boolean createSymlink = getSymlink(conf);
   if (createSymlink) {
     File[] list = jobCacheDir.listFiles();
     for (int i = 0; i < list.length; i++) {
       FileUtil.symLink(
           list[i].getAbsolutePath(), new File(workDir, list[i].getName()).toString());
     }
   }
 }
Exemple #13
0
 /*
  * Fetch a file that is in a local file system. Return a local File.
  */
 private File fileFetch(File file, Reporter reporter) throws IOException, InterruptedException {
   UUID uniqueId = UUID.randomUUID();
   File toDir = new File(tempDir, uniqueId.toString() + "/" + file.getName());
   if (toDir.exists()) {
     FileUtils.deleteDirectory(toDir);
   }
   toDir.mkdirs();
   log.info("Copying " + file + " to " + toDir);
   try {
     copyFile(file, new File(toDir, file.getName()), reporter);
   } catch (InterruptedException e) {
     cleanDirNoExceptions(toDir);
     throw e;
   }
   return toDir;
 }
Exemple #14
0
 /** Use this method to know the total size of a deployment URI. */
 public long sizeOf(String uriStr) throws IOException, URISyntaxException {
   URI uri = new URI(uriStr);
   if (uriStr.startsWith("file:")) {
     File f = new File(uri);
     return f.isDirectory() ? FileUtils.sizeOfDirectory(f) : f.length();
   } else if (uriStr.startsWith("s3")) {
     return -1; // NotYetImplemented
     // Be flexible as to what we can expect here (e.g. maprfs, etc)
   } else {
     //		} else if(uriStr.startsWith("hdfs")) {
     return FileSystem.get(hadoopConf).getContentSummary(new Path(uriStr)).getLength();
     //		} else {
     //			throw new IllegalArgumentException("Scheme not recognized or non-absolute URI provided: "
     // + uri);
   }
 }
    /*
     * dirNames is an array of string integers derived from usual directory
     * structure data/subdirN/subdirXY/subdirM ... If dirName array is
     * non-null, we only check the child at the children[dirNames[idx]].
     * This avoids iterating over children in common case. If directory
     * structure changes in later versions, we need to revisit this.
     */
    private boolean clearPath(File f, String[] dirNames, int idx) {
      if ((dirNames == null || idx == dirNames.length) && dir.compareTo(f) == 0) {
        numBlocks--;
        return true;
      }

      if (dirNames != null) {
        // guess the child index from the directory name
        if (idx > (dirNames.length - 1) || children == null) {
          return false;
        }
        int childIdx;
        try {
          childIdx = Integer.parseInt(dirNames[idx]);
        } catch (NumberFormatException ignored) {
          // layout changed? we could print a warning.
          return false;
        }
        return (childIdx >= 0 && childIdx < children.length)
            ? children[childIdx].clearPath(f, dirNames, idx + 1)
            : false;
      }

      // guesses failed. back to blind iteration.
      if (children != null) {
        for (int i = 0; i < children.length; i++) {
          if (children[i].clearPath(f, null, -1)) {
            return true;
          }
        }
      }
      return false;
    }
  /**
   * Remove a block from disk
   *
   * @param blockFile block file
   * @param metaFile block meta file
   * @param b a block
   * @return true if on-disk files are deleted; false otherwise
   */
  private boolean delBlockFromDisk(File blockFile, File metaFile, Block b) {
    if (blockFile == null) {
      DataNode.LOG.warn("No file exists for block: " + b);
      return true;
    }

    if (!blockFile.delete()) {
      DataNode.LOG.warn("Not able to delete the block file: " + blockFile);
      return false;
    } else { // remove the meta file
      if (metaFile != null && !metaFile.delete()) {
        DataNode.LOG.warn("Not able to delete the meta block file: " + metaFile);
        return false;
      }
    }
    return true;
  }
  /** Complete the block write! */
  public synchronized void finalizeBlock(Block b) throws IOException {
    ActiveFile activeFile = ongoingCreates.get(b);
    if (activeFile == null) {
      throw new IOException("Block " + b + " is already finalized.");
    }
    File f = activeFile.file;
    if (f == null || !f.exists()) {
      throw new IOException("No temporary file " + f + " for block " + b);
    }
    FSVolume v = volumeMap.get(b).getVolume();
    if (v == null) {
      throw new IOException("No volume for temporary file " + f + " for block " + b);
    }

    File dest = null;
    dest = v.addBlock(b, f);
    volumeMap.put(b, new DatanodeBlockInfo(v, dest));
    ongoingCreates.remove(b);
  }
 private static void createSymlink(
     Configuration conf,
     URI cache,
     CacheStatus cacheStatus,
     boolean isArchive,
     Path currentWorkDir,
     boolean honorSymLinkConf)
     throws IOException {
   boolean doSymlink = honorSymLinkConf && DistributedCache.getSymlink(conf);
   if (cache.getFragment() == null) {
     doSymlink = false;
   }
   String link = currentWorkDir.toString() + Path.SEPARATOR + cache.getFragment();
   File flink = new File(link);
   if (doSymlink) {
     if (!flink.exists()) {
       FileUtil.symLink(cacheStatus.localizedLoadPath.toString(), link);
     }
   }
 }
  /** Find the corresponding meta data file from a given block file */
  private static File findMetaFile(final File blockFile) throws IOException {
    final String prefix = blockFile.getName() + "_";
    final File parent = blockFile.getParentFile();
    File[] matches =
        parent.listFiles(
            new FilenameFilter() {
              public boolean accept(File dir, String name) {
                return dir.equals(parent)
                    && name.startsWith(prefix)
                    && name.endsWith(METADATA_EXTENSION);
              }
            });

    if (matches == null || matches.length == 0) {
      throw new IOException("Meta file not found, blockFile=" + blockFile);
    } else if (matches.length > 1) {
      throw new IOException("Found more than one meta files: " + Arrays.asList(matches));
    }
    return matches[0];
  }
Exemple #20
0
 static {
   Configuration conf = null;
   if (H2O.OPT_ARGS.hdfs_config != null) {
     conf = new Configuration();
     File p = new File(H2O.OPT_ARGS.hdfs_config);
     if (!p.exists()) Log.die("Unable to open hdfs configuration file " + p.getAbsolutePath());
     conf.addResource(new Path(p.getAbsolutePath()));
     Log.debug(Sys.HDFS_, "resource ", p.getAbsolutePath(), " added to the hadoop configuration");
   } else {
     conf = new Configuration();
     if (!Strings.isNullOrEmpty(H2O.OPT_ARGS.hdfs)) {
       // setup default remote Filesystem - for version 0.21 and higher
       conf.set("fs.defaultFS", H2O.OPT_ARGS.hdfs);
       // To provide compatibility with version 0.20.0 it is necessary to setup the property
       // fs.default.name which was in newer version renamed to 'fs.defaultFS'
       conf.set("fs.default.name", H2O.OPT_ARGS.hdfs);
     }
   }
   CONF = conf;
 }
Exemple #21
0
  /*
   * Fetch a file that is in a Hadoop file system. Return a local File.
   * Interruptible.
   */
  private File hdfsFetch(Path fromPath, Reporter reporter)
      throws IOException, InterruptedException {
    UUID uniqueId = UUID.randomUUID();
    File toFile = new File(tempDir, uniqueId.toString() + "/" + fromPath.getName());
    File toDir = new File(toFile.getParent());
    if (toDir.exists()) {
      FileUtils.deleteDirectory(toDir);
    }
    toDir.mkdirs();
    Path toPath = new Path(toFile.getCanonicalPath());

    FileSystem fS = fromPath.getFileSystem(hadoopConf);
    FileSystem tofS = FileSystem.getLocal(hadoopConf);

    Throttler throttler = new Throttler((double) bytesPerSecThrottle);
    try {
      for (FileStatus fStatus : fS.globStatus(fromPath)) {
        log.info("Copying " + fStatus.getPath() + " to " + toPath);
        long bytesSoFar = 0;

        FSDataInputStream iS = fS.open(fStatus.getPath());
        FSDataOutputStream oS = tofS.create(toPath);

        byte[] buffer = new byte[downloadBufferSize];

        int nRead;
        while ((nRead = iS.read(buffer, 0, buffer.length)) != -1) {
          // Needed to being able to be interrupted at any moment.
          if (Thread.interrupted()) {
            iS.close();
            oS.close();
            cleanDirNoExceptions(toDir);
            throw new InterruptedException();
          }
          bytesSoFar += nRead;
          oS.write(buffer, 0, nRead);
          throttler.incrementAndThrottle(nRead);
          if (bytesSoFar >= bytesToReportProgress) {
            reporter.progress(bytesSoFar);
            bytesSoFar = 0l;
          }
        }

        if (reporter != null) {
          reporter.progress(bytesSoFar);
        }

        oS.close();
        iS.close();
      }

      return toDir;
    } catch (ClosedByInterruptException e) {
      // This can be thrown by the method read.
      cleanDirNoExceptions(toDir);
      throw new InterruptedIOException();
    }
  }
    /** Populate the given blockSet with any child blocks found at this node. */
    public void getBlockInfo(TreeSet<Block> blockSet) {
      if (children != null) {
        for (int i = 0; i < children.length; i++) {
          children[i].getBlockInfo(blockSet);
        }
      }

      File blockFiles[] = dir.listFiles();
      for (int i = 0; i < blockFiles.length; i++) {
        if (Block.isBlockFilename(blockFiles[i])) {
          long genStamp = getGenerationStampFromFile(blockFiles, blockFiles[i]);
          blockSet.add(new Block(blockFiles[i], blockFiles[i].length(), genStamp));
        }
      }
    }
    void getVolumeMap(HashMap<Block, DatanodeBlockInfo> volumeMap, FSVolume volume) {
      if (children != null) {
        for (int i = 0; i < children.length; i++) {
          children[i].getVolumeMap(volumeMap, volume);
        }
      }

      File blockFiles[] = dir.listFiles();
      for (int i = 0; i < blockFiles.length; i++) {
        if (Block.isBlockFilename(blockFiles[i])) {
          long genStamp = getGenerationStampFromFile(blockFiles, blockFiles[i]);
          volumeMap.put(
              new Block(blockFiles[i], blockFiles[i].length(), genStamp),
              new DatanodeBlockInfo(volume, blockFiles[i]));
        }
      }
    }
    FSVolume(File currentDir, Configuration conf) throws IOException {
      this.reserved = conf.getLong("dfs.datanode.du.reserved", 0);
      boolean supportAppends = conf.getBoolean("dfs.support.append", false);
      File parent = currentDir.getParentFile();

      this.detachDir = new File(parent, "detach");
      if (detachDir.exists()) {
        recoverDetachedBlocks(currentDir, detachDir);
      }

      // Files that were being written when the datanode was last shutdown
      // are now moved back to the data directory. It is possible that
      // in the future, we might want to do some sort of datanode-local
      // recovery for these blocks. For example, crc validation.
      //
      this.tmpDir = new File(parent, "tmp");
      if (tmpDir.exists()) {
        if (supportAppends) {
          recoverDetachedBlocks(currentDir, tmpDir);
        } else {
          FileUtil.fullyDelete(tmpDir);
        }
      }
      this.dataDir = new FSDir(currentDir);
      if (!tmpDir.mkdirs()) {
        if (!tmpDir.isDirectory()) {
          throw new IOException("Mkdirs failed to create " + tmpDir.toString());
        }
      }
      if (!detachDir.mkdirs()) {
        if (!detachDir.isDirectory()) {
          throw new IOException("Mkdirs failed to create " + detachDir.toString());
        }
      }
      this.usage = new DF(parent, conf);
      this.dfsUsage = new DU(parent, conf);
      this.dfsUsage.start();
    }
 /** {@inheritDoc} */
 public void validateBlockMetadata(Block b) throws IOException {
   DatanodeBlockInfo info = volumeMap.get(b);
   if (info == null) {
     throw new IOException("Block " + b + " does not exist in volumeMap.");
   }
   FSVolume v = info.getVolume();
   File tmp = v.getTmpFile(b);
   File f = getFile(b);
   if (f == null) {
     f = tmp;
   }
   if (f == null) {
     throw new IOException("Block " + b + " does not exist on disk.");
   }
   if (!f.exists()) {
     throw new IOException("Block " + b + " block file " + f + " does not exist on disk.");
   }
   if (b.getNumBytes() != f.length()) {
     throw new IOException(
         "Block "
             + b
             + " length is "
             + b.getNumBytes()
             + " does not match block file length "
             + f.length());
   }
   File meta = getMetaFile(f, b);
   if (meta == null) {
     throw new IOException("Block " + b + " metafile does not exist.");
   }
   if (!meta.exists()) {
     throw new IOException("Block " + b + " metafile " + meta + " does not exist on disk.");
   }
   if (meta.length() == 0) {
     throw new IOException("Block " + b + " metafile " + meta + " is empty.");
   }
   long stamp = parseGenerationStamp(f, meta);
   if (stamp != b.getGenerationStamp()) {
     throw new IOException(
         "Block "
             + b
             + " genstamp is "
             + b.getGenerationStamp()
             + " does not match meta file stamp "
             + stamp);
   }
 }
 /**
  * Find the metadata file for the specified block file. Return the generation stamp from the
  * name of the metafile.
  */
 long getGenerationStampFromFile(File[] listdir, File blockFile) {
   String blockName = blockFile.getName();
   for (int j = 0; j < listdir.length; j++) {
     String path = listdir[j].getName();
     if (!path.startsWith(blockName)) {
       continue;
     }
     String[] vals = path.split("_");
     if (vals.length != 3) { // blk, blkid, genstamp.meta
       continue;
     }
     String[] str = vals[2].split("\\.");
     if (str.length != 2) {
       continue;
     }
     return Long.parseLong(str[0]);
   }
   DataNode.LOG.warn("Block " + blockFile + " does not have a metafile!");
   return Block.GRANDFATHER_GENERATION_STAMP;
 }
 public MetaDataInputStream getMetaDataInputStream(Block b) throws IOException {
   File checksumFile = getMetaFile(b);
   return new MetaDataInputStream(new FileInputStream(checksumFile), checksumFile.length());
 }
  public int run(String[] args) throws IOException, ClassNotFoundException, InterruptedException {

    if (args.length != 2) {
      System.out.println("Usage: FeatureMatching ID <inputName.jpeg/inputName.jpg>");
      System.exit(1);
    }

    SimpleDateFormat sdf = new SimpleDateFormat("", Locale.US);
    sdf.applyPattern("yyyy-MM-dd_HH-mm-ss");
    String time = sdf.format(new Date());

    Job job = Job.getInstance();

    ID = "/" + args[0];
    String filename = args[1];
    filename = filename.toLowerCase();
    System.out.println("current filename:" + filename);

    // Detect illegal username (if the username dir doesn't exist)
    File userPath = new File(LOCAL_USER_DIR + ID);
    if (!userPath.exists()) {
      System.out.println("Unauthorized username!!!\nExiting......");
      System.exit(1);
    }
    // Preprocess the input image.jpg from local dir: /local.../user/ID/input/image.jpg
    // Save the features to local dir: hdfs://.../user/ID/input/image.jpg
    extractQueryFeatures2HDFS(filename, job);

    // Add the feature file to the hdfs cache
    String featureFileName = filename.substring(0, filename.lastIndexOf(".")) + ".json";
    //        job.addCacheFile(new Path(HDFS_HOME + USER + ID + INPUT + "/" +
    // featureFileName).toUri());
    job.getConfiguration()
        .set("featureFilePath", HDFS_HOME + USER + ID + INPUT + "/" + featureFileName);

    // Check the file type. Only support jpeg/jpg type images
    String type = filename.substring(args[1].lastIndexOf("."));
    if (!(type.equals(".jpg") || type.equals(".jpeg"))) {
      System.out.println("Image type not supported!!!\nExiting");
      System.exit(1);
    }

    // Input: hdfs://.../features/
    // The feature dir is a location of all features extracted from the database
    String inputPathStr = HDFS_HOME + FEATURES;
    // Output: hdfs://.../user/ID/output/
    String outputPathStr = HDFS_HOME + USER + ID + OUTPUT + "/" + time;

    job.setInputFormatClass(KeyValueTextInputFormat.class);
    //        job.setOutputFormatClass(TextOutputFormat.class);

    // Get the lists of all feature files: /.../features/data/part-*
    FileSystem fs = FileSystem.get(job.getConfiguration());
    FileStatus[] statuses = fs.listStatus(new Path(inputPathStr));
    StringBuffer sb = new StringBuffer();
    for (FileStatus fileStatus : statuses) {
      sb.append(fileStatus.getPath() + ",");
    }
    sb.deleteCharAt(sb.lastIndexOf(","));

    job.setJarByClass(FeatureMatching.class);
    job.setMapperClass(FeatureMatchMapper.class);
    job.setReducerClass(FeatureMatchReducer.class);

    // only need one reducer to collect the result
    job.setNumReduceTasks(1);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(Text.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(DoubleWritable.class);

    // Input a directory, so need the recursive input
    FileInputFormat.setInputDirRecursive(job, true);
    // Set the PathFilter, to omit _SUCCESS files
    // (This is not working correctly, as the PathFilter class is an interface rather than a class.
    // But the 2nd arg asks me to extend the PathFilter)
    //        FileInputFormat.setInputPathFilter(job, MyPathFilter.class);
    //
    //        FileInputFormat.setInputPaths(job, new Path(inputPathStr));
    FileInputFormat.setInputPaths(job, sb.toString());
    FileOutputFormat.setOutputPath(job, new Path(outputPathStr));

    boolean success = job.waitForCompletion(true);
    return success ? 0 : 1;
  }
  /**
   * Try to update an old block to a new block. If there are ongoing create threads running for the
   * old block, the threads will be returned without updating the block.
   *
   * @return ongoing create threads if there is any. Otherwise, return null.
   */
  private synchronized List<Thread> tryUpdateBlock(Block oldblock, Block newblock)
      throws IOException {
    // check ongoing create threads
    final ActiveFile activefile = ongoingCreates.get(oldblock);
    if (activefile != null && !activefile.threads.isEmpty()) {
      // remove dead threads
      for (Iterator<Thread> i = activefile.threads.iterator(); i.hasNext(); ) {
        final Thread t = i.next();
        if (!t.isAlive()) {
          i.remove();
        }
      }

      // return living threads
      if (!activefile.threads.isEmpty()) {
        return new ArrayList<Thread>(activefile.threads);
      }
    }

    // No ongoing create threads is alive. Update block.
    File blockFile = findBlockFile(oldblock.getBlockId());
    if (blockFile == null) {
      throw new IOException("Block " + oldblock + " does not exist.");
    }

    File oldMetaFile = findMetaFile(blockFile);
    long oldgs = parseGenerationStamp(blockFile, oldMetaFile);

    // rename meta file to a tmp file
    File tmpMetaFile =
        new File(
            oldMetaFile.getParent(),
            oldMetaFile.getName() + "_tmp" + newblock.getGenerationStamp());
    if (!oldMetaFile.renameTo(tmpMetaFile)) {
      throw new IOException("Cannot rename block meta file to " + tmpMetaFile);
    }

    // update generation stamp
    if (oldgs > newblock.getGenerationStamp()) {
      throw new IOException(
          "Cannot update block (id="
              + newblock.getBlockId()
              + ") generation stamp from "
              + oldgs
              + " to "
              + newblock.getGenerationStamp());
    }

    // update length
    if (newblock.getNumBytes() > oldblock.getNumBytes()) {
      throw new IOException(
          "Cannot update block file (="
              + blockFile
              + ") length from "
              + oldblock.getNumBytes()
              + " to "
              + newblock.getNumBytes());
    }
    if (newblock.getNumBytes() < oldblock.getNumBytes()) {
      truncateBlock(blockFile, tmpMetaFile, oldblock.getNumBytes(), newblock.getNumBytes());
    }

    // rename the tmp file to the new meta file (with new generation stamp)
    File newMetaFile = getMetaFile(blockFile, newblock);
    if (!tmpMetaFile.renameTo(newMetaFile)) {
      throw new IOException("Cannot rename tmp meta file to " + newMetaFile);
    }

    updateBlockMap(ongoingCreates, oldblock, newblock);
    updateBlockMap(volumeMap, oldblock, newblock);

    // paranoia! verify that the contents of the stored block
    // matches the block file on disk.
    validateBlockMetadata(newblock);
    return null;
  }
  /**
   * Start writing to a block file If isRecovery is true and the block pre-exists, then we kill all
   * volumeMap.put(b, v); volumeMap.put(b, v); other threads that might be writing to this block,
   * and then reopen the file.
   */
  public BlockWriteStreams writeToBlock(Block b, boolean isRecovery) throws IOException {
    //
    // Make sure the block isn't a valid one - we're still creating it!
    //
    if (isValidBlock(b)) {
      if (!isRecovery) {
        throw new BlockAlreadyExistsException(
            "Block " + b + " is valid, and cannot be written to.");
      }
      // If the block was successfully finalized because all packets
      // were successfully processed at the Datanode but the ack for
      // some of the packets were not received by the client. The client
      // re-opens the connection and retries sending those packets.
      // The other reason is that an "append" is occurring to this block.
      detachBlock(b, 1);
    }
    long blockSize = b.getNumBytes();

    //
    // Serialize access to /tmp, and check if file already there.
    //
    File f = null;
    List<Thread> threads = null;
    synchronized (this) {
      //
      // Is it already in the create process?
      //
      ActiveFile activeFile = ongoingCreates.get(b);
      if (activeFile != null) {
        f = activeFile.file;
        threads = activeFile.threads;

        if (!isRecovery) {
          throw new BlockAlreadyExistsException(
              "Block "
                  + b
                  + " has already been started (though not completed), and thus cannot be created.");
        } else {
          for (Thread thread : threads) {
            thread.interrupt();
          }
        }
        ongoingCreates.remove(b);
      }
      FSVolume v = null;
      if (!isRecovery) {
        v = volumes.getNextVolume(blockSize);
        // create temporary file to hold block in the designated volume
        f = createTmpFile(v, b);
        volumeMap.put(b, new DatanodeBlockInfo(v));
      } else if (f != null) {
        DataNode.LOG.info("Reopen already-open Block for append " + b);
        // create or reuse temporary file to hold block in the
        // designated volume
        v = volumeMap.get(b).getVolume();
        volumeMap.put(b, new DatanodeBlockInfo(v));
      } else {
        // reopening block for appending to it.
        DataNode.LOG.info("Reopen Block for append " + b);
        v = volumeMap.get(b).getVolume();
        f = createTmpFile(v, b);
        File blkfile = getBlockFile(b);
        File oldmeta = getMetaFile(b);
        File newmeta = getMetaFile(f, b);

        // rename meta file to tmp directory
        DataNode.LOG.debug("Renaming " + oldmeta + " to " + newmeta);
        if (!oldmeta.renameTo(newmeta)) {
          throw new IOException(
              "Block "
                  + b
                  + " reopen failed. "
                  + " Unable to move meta file  "
                  + oldmeta
                  + " to tmp dir "
                  + newmeta);
        }

        // rename block file to tmp directory
        DataNode.LOG.debug("Renaming " + blkfile + " to " + f);
        if (!blkfile.renameTo(f)) {
          if (!f.delete()) {
            throw new IOException(
                "Block " + b + " reopen failed. " + " Unable to remove file " + f);
          }
          if (!blkfile.renameTo(f)) {
            throw new IOException(
                "Block "
                    + b
                    + " reopen failed. "
                    + " Unable to move block file "
                    + blkfile
                    + " to tmp dir "
                    + f);
          }
        }
        volumeMap.put(b, new DatanodeBlockInfo(v));
      }
      if (f == null) {
        DataNode.LOG.warn("Block " + b + " reopen failed " + " Unable to locate tmp file.");
        throw new IOException("Block " + b + " reopen failed " + " Unable to locate tmp file.");
      }
      ongoingCreates.put(b, new ActiveFile(f, threads));
    }

    try {
      if (threads != null) {
        for (Thread thread : threads) {
          thread.join();
        }
      }
    } catch (InterruptedException e) {
      throw new IOException("Recovery waiting for thread interrupted.");
    }

    //
    // Finally, allow a writer to the block file
    // REMIND - mjc - make this a filter stream that enforces a max
    // block size, so clients can't go crazy
    //
    File metafile = getMetaFile(f, b);
    DataNode.LOG.debug("writeTo blockfile is " + f + " of size " + f.length());
    DataNode.LOG.debug("writeTo metafile is " + metafile + " of size " + metafile.length());
    return createBlockWriteStreams(f, metafile);
  }