Пример #1
0
  public void testAbort() throws IOException {
    JobConf job = new JobConf();
    setConfForFileOutputCommitter(job);
    JobContext jContext = new JobContextImpl(job, taskID.getJobID());
    TaskAttemptContext tContext = new TaskAttemptContextImpl(job, taskID);
    FileOutputCommitter committer = new FileOutputCommitter();
    FileOutputFormat.setWorkOutputPath(job, committer.getTempTaskOutputPath(tContext));

    // do setup
    committer.setupJob(jContext);
    committer.setupTask(tContext);
    String file = "test.txt";

    // A reporter that does nothing
    Reporter reporter = Reporter.NULL;
    // write output
    FileSystem localFs = FileSystem.getLocal(job);
    TextOutputFormat theOutputFormat = new TextOutputFormat();
    RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(localFs, job, file, reporter);
    writeOutput(theRecordWriter, reporter);

    // do abort
    committer.abortTask(tContext);
    File expectedFile =
        new File(new Path(committer.getTempTaskOutputPath(tContext), file).toString());
    assertFalse("task temp dir still exists", expectedFile.exists());

    committer.abortJob(jContext, JobStatus.State.FAILED);
    expectedFile = new File(new Path(outDir, FileOutputCommitter.TEMP_DIR_NAME).toString());
    assertFalse("job temp dir still exists", expectedFile.exists());
    assertEquals("Output directory not empty", 0, new File(outDir.toString()).listFiles().length);
    FileUtil.fullyDelete(new File(outDir.toString()));
  }
Пример #2
0
  // Mostly for setting up the symlinks. Note that when we setup the distributed
  // cache, we didn't create the symlinks. This is done on a per task basis
  // by the currently executing task.
  public static void setupWorkDir(JobConf conf) throws IOException {
    File workDir = new File(".").getAbsoluteFile();
    FileUtil.fullyDelete(workDir);
    if (DistributedCache.getSymlink(conf)) {
      URI[] archives = DistributedCache.getCacheArchives(conf);
      URI[] files = DistributedCache.getCacheFiles(conf);
      Path[] localArchives = DistributedCache.getLocalCacheArchives(conf);
      Path[] localFiles = DistributedCache.getLocalCacheFiles(conf);
      if (archives != null) {
        for (int i = 0; i < archives.length; i++) {
          String link = archives[i].getFragment();
          if (link != null) {
            link = workDir.toString() + Path.SEPARATOR + link;
            File flink = new File(link);
            if (!flink.exists()) {
              FileUtil.symLink(localArchives[i].toString(), link);
            }
          }
        }
      }
      if (files != null) {
        for (int i = 0; i < files.length; i++) {
          String link = files[i].getFragment();
          if (link != null) {
            link = workDir.toString() + Path.SEPARATOR + link;
            File flink = new File(link);
            if (!flink.exists()) {
              FileUtil.symLink(localFiles[i].toString(), link);
            }
          }
        }
      }
    }
    File jobCacheDir = null;
    if (conf.getJar() != null) {
      jobCacheDir = new File(new Path(conf.getJar()).getParent().toString());
    }

    // create symlinks for all the files in job cache dir in current
    // workingdir for streaming
    try {
      DistributedCache.createAllSymlink(conf, jobCacheDir, workDir);
    } catch (IOException ie) {
      // Do not exit even if symlinks have not been created.
      LOG.warn(StringUtils.stringifyException(ie));
    }
    // add java.io.tmpdir given by mapred.child.tmp
    String tmp = conf.get("mapred.child.tmp", "./tmp");
    Path tmpDir = new Path(tmp);

    // if temp directory path is not absolute
    // prepend it with workDir.
    if (!tmpDir.isAbsolute()) {
      tmpDir = new Path(workDir.toString(), tmp);
      FileSystem localFs = FileSystem.getLocal(conf);
      if (!localFs.mkdirs(tmpDir) && !localFs.getFileStatus(tmpDir).isDir()) {
        throw new IOException("Mkdirs failed to create " + tmpDir.toString());
      }
    }
  }
Пример #3
0
  public void testFailAbort() throws IOException {
    JobConf job = new JobConf();
    job.set(FileSystem.FS_DEFAULT_NAME_KEY, "faildel:///");
    job.setClass("fs.faildel.impl", FakeFileSystem.class, FileSystem.class);
    setConfForFileOutputCommitter(job);
    JobContext jContext = new JobContextImpl(job, taskID.getJobID());
    TaskAttemptContext tContext = new TaskAttemptContextImpl(job, taskID);
    FileOutputCommitter committer = new FileOutputCommitter();
    FileOutputFormat.setWorkOutputPath(job, committer.getTempTaskOutputPath(tContext));

    // do setup
    committer.setupJob(jContext);
    committer.setupTask(tContext);
    String file = "test.txt";

    // A reporter that does nothing
    Reporter reporter = Reporter.NULL;
    // write output
    FileSystem localFs = new FakeFileSystem();
    TextOutputFormat theOutputFormat = new TextOutputFormat();
    RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(localFs, job, file, reporter);
    writeOutput(theRecordWriter, reporter);

    // do abort
    Throwable th = null;
    try {
      committer.abortTask(tContext);
    } catch (IOException ie) {
      th = ie;
    }
    assertNotNull(th);
    assertTrue(th instanceof IOException);
    assertTrue(th.getMessage().contains("fake delete failed"));
    File jobTmpDir = new File(new Path(outDir, FileOutputCommitter.TEMP_DIR_NAME).toString());
    File taskTmpDir = new File(jobTmpDir, "_" + taskID);
    File expectedFile = new File(taskTmpDir, file);
    assertTrue(expectedFile + " does not exists", expectedFile.exists());

    th = null;
    try {
      committer.abortJob(jContext, JobStatus.State.FAILED);
    } catch (IOException ie) {
      th = ie;
    }
    assertNotNull(th);
    assertTrue(th instanceof IOException);
    assertTrue(th.getMessage().contains("fake delete failed"));
    assertTrue("job temp dir does not exists", jobTmpDir.exists());
  }
 public FSDir(File dir) throws IOException {
   this.dir = dir;
   this.children = null;
   if (!dir.exists()) {
     if (!dir.mkdirs()) {
       throw new IOException("Mkdirs failed to create " + dir.toString());
     }
   } else {
     File[] files = dir.listFiles();
     int numChildren = 0;
     for (int idx = 0; idx < files.length; idx++) {
       if (files[idx].isDirectory()) {
         numChildren++;
       } else if (Block.isBlockFilename(files[idx])) {
         numBlocks++;
       }
     }
     if (numChildren > 0) {
       children = new FSDir[numChildren];
       int curdir = 0;
       for (int idx = 0; idx < files.length; idx++) {
         if (files[idx].isDirectory()) {
           children[curdir] = new FSDir(files[idx]);
           curdir++;
         }
       }
     }
   }
 }
 private File createTmpFile(Block b, File f) throws IOException {
   if (f.exists()) {
     throw new IOException(
         "Unexpected problem in creating temporary file for "
             + b
             + ".  File "
             + f
             + " should not be present, but is.");
   }
   // Create the zero-length temp file
   //
   boolean fileCreated = false;
   try {
     fileCreated = f.createNewFile();
   } catch (IOException ioe) {
     throw (IOException) new IOException(DISK_ERROR + f).initCause(ioe);
   }
   if (!fileCreated) {
     throw new IOException(
         "Unexpected problem in creating temporary file for "
             + b
             + ".  File "
             + f
             + " should be creatable, but is already present.");
   }
   return f;
 }
    /**
     * Recover detached files on datanode restart. If a detached block does not exist in the
     * original directory, then it is moved to the original directory.
     */
    private void recoverDetachedBlocks(File dataDir, File dir) throws IOException {
      File contents[] = dir.listFiles();
      if (contents == null) {
        return;
      }
      for (int i = 0; i < contents.length; i++) {
        if (!contents[i].isFile()) {
          throw new IOException("Found " + contents[i] + " in " + dir + " but it is not a file.");
        }

        //
        // If the original block file still exists, then no recovery
        // is needed.
        //
        File blk = new File(dataDir, contents[i].getName());
        if (!blk.exists()) {
          if (!contents[i].renameTo(blk)) {
            throw new IOException("Unable to recover detached file " + contents[i]);
          }
          continue;
        }
        if (!contents[i].delete()) {
          throw new IOException("Unable to cleanup detached file " + contents[i]);
        }
      }
    }
 /** {@inheritDoc} */
 public void validateBlockMetadata(Block b) throws IOException {
   DatanodeBlockInfo info = volumeMap.get(b);
   if (info == null) {
     throw new IOException("Block " + b + " does not exist in volumeMap.");
   }
   FSVolume v = info.getVolume();
   File tmp = v.getTmpFile(b);
   File f = getFile(b);
   if (f == null) {
     f = tmp;
   }
   if (f == null) {
     throw new IOException("Block " + b + " does not exist on disk.");
   }
   if (!f.exists()) {
     throw new IOException("Block " + b + " block file " + f + " does not exist on disk.");
   }
   if (b.getNumBytes() != f.length()) {
     throw new IOException(
         "Block "
             + b
             + " length is "
             + b.getNumBytes()
             + " does not match block file length "
             + f.length());
   }
   File meta = getMetaFile(f, b);
   if (meta == null) {
     throw new IOException("Block " + b + " metafile does not exist.");
   }
   if (!meta.exists()) {
     throw new IOException("Block " + b + " metafile " + meta + " does not exist on disk.");
   }
   if (meta.length() == 0) {
     throw new IOException("Block " + b + " metafile " + meta + " is empty.");
   }
   long stamp = parseGenerationStamp(f, meta);
   if (stamp != b.getGenerationStamp()) {
     throw new IOException(
         "Block "
             + b
             + " genstamp is "
             + b.getGenerationStamp()
             + " does not match meta file stamp "
             + stamp);
   }
 }
Пример #8
0
  /*
   * Fetch a file that is in a Hadoop file system. Return a local File.
   * Interruptible.
   */
  private File hdfsFetch(Path fromPath, Reporter reporter)
      throws IOException, InterruptedException {
    UUID uniqueId = UUID.randomUUID();
    File toFile = new File(tempDir, uniqueId.toString() + "/" + fromPath.getName());
    File toDir = new File(toFile.getParent());
    if (toDir.exists()) {
      FileUtils.deleteDirectory(toDir);
    }
    toDir.mkdirs();
    Path toPath = new Path(toFile.getCanonicalPath());

    FileSystem fS = fromPath.getFileSystem(hadoopConf);
    FileSystem tofS = FileSystem.getLocal(hadoopConf);

    Throttler throttler = new Throttler((double) bytesPerSecThrottle);
    try {
      for (FileStatus fStatus : fS.globStatus(fromPath)) {
        log.info("Copying " + fStatus.getPath() + " to " + toPath);
        long bytesSoFar = 0;

        FSDataInputStream iS = fS.open(fStatus.getPath());
        FSDataOutputStream oS = tofS.create(toPath);

        byte[] buffer = new byte[downloadBufferSize];

        int nRead;
        while ((nRead = iS.read(buffer, 0, buffer.length)) != -1) {
          // Needed to being able to be interrupted at any moment.
          if (Thread.interrupted()) {
            iS.close();
            oS.close();
            cleanDirNoExceptions(toDir);
            throw new InterruptedException();
          }
          bytesSoFar += nRead;
          oS.write(buffer, 0, nRead);
          throttler.incrementAndThrottle(nRead);
          if (bytesSoFar >= bytesToReportProgress) {
            reporter.progress(bytesSoFar);
            bytesSoFar = 0l;
          }
        }

        if (reporter != null) {
          reporter.progress(bytesSoFar);
        }

        oS.close();
        iS.close();
      }

      return toDir;
    } catch (ClosedByInterruptException e) {
      // This can be thrown by the method read.
      cleanDirNoExceptions(toDir);
      throw new InterruptedIOException();
    }
  }
 /** Find the file corresponding to the block and return it if it exists. */
 File validateBlockFile(Block b) {
   // Should we check for metadata file too?
   File f = getFile(b);
   if (f != null && f.exists()) return f;
   if (InterDatanodeProtocol.LOG.isDebugEnabled()) {
     InterDatanodeProtocol.LOG.debug("b=" + b + ", f=" + f);
   }
   return null;
 }
Пример #10
0
    FSVolume(File currentDir, Configuration conf) throws IOException {
      this.reserved = conf.getLong("dfs.datanode.du.reserved", 0);
      boolean supportAppends = conf.getBoolean("dfs.support.append", false);
      File parent = currentDir.getParentFile();

      this.detachDir = new File(parent, "detach");
      if (detachDir.exists()) {
        recoverDetachedBlocks(currentDir, detachDir);
      }

      // Files that were being written when the datanode was last shutdown
      // are now moved back to the data directory. It is possible that
      // in the future, we might want to do some sort of datanode-local
      // recovery for these blocks. For example, crc validation.
      //
      this.tmpDir = new File(parent, "tmp");
      if (tmpDir.exists()) {
        if (supportAppends) {
          recoverDetachedBlocks(currentDir, tmpDir);
        } else {
          FileUtil.fullyDelete(tmpDir);
        }
      }
      this.dataDir = new FSDir(currentDir);
      if (!tmpDir.mkdirs()) {
        if (!tmpDir.isDirectory()) {
          throw new IOException("Mkdirs failed to create " + tmpDir.toString());
        }
      }
      if (!detachDir.mkdirs()) {
        if (!detachDir.isDirectory()) {
          throw new IOException("Mkdirs failed to create " + detachDir.toString());
        }
      }
      this.usage = new DF(parent, conf);
      this.dfsUsage = new DU(parent, conf);
      this.dfsUsage.start();
    }
Пример #11
0
 /*
  * Fetch a file that is in a local file system. Return a local File.
  */
 private File fileFetch(File file, Reporter reporter) throws IOException, InterruptedException {
   UUID uniqueId = UUID.randomUUID();
   File toDir = new File(tempDir, uniqueId.toString() + "/" + file.getName());
   if (toDir.exists()) {
     FileUtils.deleteDirectory(toDir);
   }
   toDir.mkdirs();
   log.info("Copying " + file + " to " + toDir);
   try {
     copyFile(file, new File(toDir, file.getName()), reporter);
   } catch (InterruptedException e) {
     cleanDirNoExceptions(toDir);
     throw e;
   }
   return toDir;
 }
Пример #12
0
  /** Complete the block write! */
  public synchronized void finalizeBlock(Block b) throws IOException {
    ActiveFile activeFile = ongoingCreates.get(b);
    if (activeFile == null) {
      throw new IOException("Block " + b + " is already finalized.");
    }
    File f = activeFile.file;
    if (f == null || !f.exists()) {
      throw new IOException("No temporary file " + f + " for block " + b);
    }
    FSVolume v = volumeMap.get(b).getVolume();
    if (v == null) {
      throw new IOException("No volume for temporary file " + f + " for block " + b);
    }

    File dest = null;
    dest = v.addBlock(b, f);
    volumeMap.put(b, new DatanodeBlockInfo(v, dest));
    ongoingCreates.remove(b);
  }
Пример #13
0
 static {
   Configuration conf = null;
   if (H2O.OPT_ARGS.hdfs_config != null) {
     conf = new Configuration();
     File p = new File(H2O.OPT_ARGS.hdfs_config);
     if (!p.exists()) Log.die("Unable to open hdfs configuration file " + p.getAbsolutePath());
     conf.addResource(new Path(p.getAbsolutePath()));
     Log.debug(Sys.HDFS_, "resource ", p.getAbsolutePath(), " added to the hadoop configuration");
   } else {
     conf = new Configuration();
     if (!Strings.isNullOrEmpty(H2O.OPT_ARGS.hdfs)) {
       // setup default remote Filesystem - for version 0.21 and higher
       conf.set("fs.defaultFS", H2O.OPT_ARGS.hdfs);
       // To provide compatibility with version 0.20.0 it is necessary to setup the property
       // fs.default.name which was in newer version renamed to 'fs.defaultFS'
       conf.set("fs.default.name", H2O.OPT_ARGS.hdfs);
     }
   }
   CONF = conf;
 }
Пример #14
0
 private static void createSymlink(
     Configuration conf,
     URI cache,
     CacheStatus cacheStatus,
     boolean isArchive,
     Path currentWorkDir,
     boolean honorSymLinkConf)
     throws IOException {
   boolean doSymlink = honorSymLinkConf && DistributedCache.getSymlink(conf);
   if (cache.getFragment() == null) {
     doSymlink = false;
   }
   String link = currentWorkDir.toString() + Path.SEPARATOR + cache.getFragment();
   File flink = new File(link);
   if (doSymlink) {
     if (!flink.exists()) {
       FileUtil.symLink(cacheStatus.localizedLoadPath.toString(), link);
     }
   }
 }
Пример #15
0
  public int run(String[] args) throws IOException, ClassNotFoundException, InterruptedException {

    if (args.length != 2) {
      System.out.println("Usage: FeatureMatching ID <inputName.jpeg/inputName.jpg>");
      System.exit(1);
    }

    SimpleDateFormat sdf = new SimpleDateFormat("", Locale.US);
    sdf.applyPattern("yyyy-MM-dd_HH-mm-ss");
    String time = sdf.format(new Date());

    Job job = Job.getInstance();

    ID = "/" + args[0];
    String filename = args[1];
    filename = filename.toLowerCase();
    System.out.println("current filename:" + filename);

    // Detect illegal username (if the username dir doesn't exist)
    File userPath = new File(LOCAL_USER_DIR + ID);
    if (!userPath.exists()) {
      System.out.println("Unauthorized username!!!\nExiting......");
      System.exit(1);
    }
    // Preprocess the input image.jpg from local dir: /local.../user/ID/input/image.jpg
    // Save the features to local dir: hdfs://.../user/ID/input/image.jpg
    extractQueryFeatures2HDFS(filename, job);

    // Add the feature file to the hdfs cache
    String featureFileName = filename.substring(0, filename.lastIndexOf(".")) + ".json";
    //        job.addCacheFile(new Path(HDFS_HOME + USER + ID + INPUT + "/" +
    // featureFileName).toUri());
    job.getConfiguration()
        .set("featureFilePath", HDFS_HOME + USER + ID + INPUT + "/" + featureFileName);

    // Check the file type. Only support jpeg/jpg type images
    String type = filename.substring(args[1].lastIndexOf("."));
    if (!(type.equals(".jpg") || type.equals(".jpeg"))) {
      System.out.println("Image type not supported!!!\nExiting");
      System.exit(1);
    }

    // Input: hdfs://.../features/
    // The feature dir is a location of all features extracted from the database
    String inputPathStr = HDFS_HOME + FEATURES;
    // Output: hdfs://.../user/ID/output/
    String outputPathStr = HDFS_HOME + USER + ID + OUTPUT + "/" + time;

    job.setInputFormatClass(KeyValueTextInputFormat.class);
    //        job.setOutputFormatClass(TextOutputFormat.class);

    // Get the lists of all feature files: /.../features/data/part-*
    FileSystem fs = FileSystem.get(job.getConfiguration());
    FileStatus[] statuses = fs.listStatus(new Path(inputPathStr));
    StringBuffer sb = new StringBuffer();
    for (FileStatus fileStatus : statuses) {
      sb.append(fileStatus.getPath() + ",");
    }
    sb.deleteCharAt(sb.lastIndexOf(","));

    job.setJarByClass(FeatureMatching.class);
    job.setMapperClass(FeatureMatchMapper.class);
    job.setReducerClass(FeatureMatchReducer.class);

    // only need one reducer to collect the result
    job.setNumReduceTasks(1);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(Text.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(DoubleWritable.class);

    // Input a directory, so need the recursive input
    FileInputFormat.setInputDirRecursive(job, true);
    // Set the PathFilter, to omit _SUCCESS files
    // (This is not working correctly, as the PathFilter class is an interface rather than a class.
    // But the 2nd arg asks me to extend the PathFilter)
    //        FileInputFormat.setInputPathFilter(job, MyPathFilter.class);
    //
    //        FileInputFormat.setInputPaths(job, new Path(inputPathStr));
    FileInputFormat.setInputPaths(job, sb.toString());
    FileOutputFormat.setOutputPath(job, new Path(outputPathStr));

    boolean success = job.waitForCompletion(true);
    return success ? 0 : 1;
  }
Пример #16
0
  /*
   * Fetch a file that is in a S3 file system. Return a local File. It accepts "s3://" and "s3n://" prefixes.
   * Interruptible.
   */
  private File s3Fetch(URI uri, Reporter reporter) throws IOException, InterruptedException {
    String bucketName = uri.getHost();
    String path = uri.getPath();
    UUID uniqueId = UUID.randomUUID();
    File destFolder = new File(tempDir, uniqueId.toString() + "/" + path);
    if (destFolder.exists()) {
      FileUtils.deleteDirectory(destFolder);
    }
    destFolder.mkdirs();

    Throttler throttler = new Throttler((double) bytesPerSecThrottle);

    boolean done = false;
    try {
      s3Service = new RestS3Service(getCredentials());
      if (s3Service.checkBucketStatus(bucketName) != RestS3Service.BUCKET_STATUS__MY_BUCKET) {
        throw new IOException("Bucket doesn't exist or is already claimed: " + bucketName);
      }

      if (path.startsWith("/")) {
        path = path.substring(1, path.length());
      }

      for (S3Object object : s3Service.listObjects(new S3Bucket(bucketName), path, "")) {
        long bytesSoFar = 0;

        String fileName = path;
        if (path.contains("/")) {
          fileName = path.substring(path.lastIndexOf("/") + 1, path.length());
        }
        File fileDest = new File(destFolder, fileName);
        log.info("Downloading " + object.getKey() + " to " + fileDest + " ...");

        if (fileDest.exists()) {
          fileDest.delete();
        }

        object = s3Service.getObject(new S3Bucket(bucketName), object.getKey());
        InputStream iS = object.getDataInputStream();
        FileOutputStream writer = new FileOutputStream(fileDest);
        byte[] buffer = new byte[downloadBufferSize];

        int nRead;
        while ((nRead = iS.read(buffer, 0, buffer.length)) != -1) {
          // Needed to being able to be interrupted at any moment.
          if (Thread.interrupted()) {
            iS.close();
            writer.close();
            cleanDirNoExceptions(destFolder);
            throw new InterruptedException();
          }

          bytesSoFar += nRead;
          writer.write(buffer, 0, nRead);
          throttler.incrementAndThrottle(nRead);
          if (bytesSoFar >= bytesToReportProgress) {
            reporter.progress(bytesSoFar);
            bytesSoFar = 0l;
          }
        }

        if (reporter != null) {
          reporter.progress(bytesSoFar);
        }

        writer.close();
        iS.close();
        done = true;
      }

      if (!done) {
        throw new IOException("Bucket is empty! " + bucketName + " path: " + path);
      }
    } catch (S3ServiceException e) {
      throw new IOException(e);
    }

    return destFolder;
  }
  /**
   * Compare the checksums of the hdfs file as well as the local copied file.
   *
   * @author [email protected]
   * @date Fri Jan 27 06:06:00 2012
   */
  boolean compareChecksums(FileSystem fs, Path p, String sFsPath) {
    try {
      // get hdfs file info
      FileStatus stat = fs.getFileStatus(p);

      // get HDFS checksum
      FileChecksum ck = fs.getFileChecksum(p);
      String sCk, sCkShort;
      if (ck == null) {
        sCk = sCkShort = "<null>";
      } else {
        sCk = ck.toString();
        sCkShort = sCk.replaceAll("^.*:", "");
      }

      // System.out.println(p.toUri().getPath() + " len=" + stat.getLen()
      // + " " + stat.getOwner() + "/" + stat.getGroup()
      // + " checksum=" + sCk);

      // find the local file
      File fLocal = new File(sFsPath);
      if (!fLocal.exists()) {
        System.out.println("CHECKSUM-ERROR: file does not exist: " + sFsPath);
        return false;
      }
      if (!fLocal.isFile()) {
        System.out.println("CHECKSUM-ERROR: path is not a file: " + sFsPath);
        return false;
      }
      if (stat.getLen() != fLocal.length()) {
        System.out.println(
            "CHECKSUM-ERROR: length mismatch: "
                + sFsPath
                + " hdfslen="
                + stat.getLen()
                + " fslen="
                + fLocal.length());
        return false;
      }

      // get local fs checksum
      FileChecksum ckLocal = getLocalFileChecksum(sFsPath);
      if (ckLocal == null) {
        System.out.println("ERROR Failed to get checksum for local file " + sFsPath);
        return false;
      }

      // compare checksums as a string, after stripping the
      // algorithm name from the beginning
      String sCkLocal = ckLocal.toString();
      String sCkLocalShort = sCkLocal.replaceAll("^.*:", "");

      if (false == sCkShort.equals(sCkLocalShort)) {
        System.out.println(
            "CHECKSUM-ERROR: checksum mismatch: "
                + sFsPath
                + "\nhdfs = "
                + sCk
                + "\nlocal= "
                + sCkLocal);
        return false;
      }

      return true;
    } catch (IOException e) {
      System.out.println("CHECKSUM-ERROR: " + sFsPath + " exception " + e.toString());
    }

    return false;
  }
  /**
   * Method to move files from HDFS to local filesystem
   *
   * <p>localPath: Path on the machines filesystem fs:FileSystem object from HDFS pathList:List of
   * paths for files that might need to be backed up size:max size in bytes to be backed up
   *
   * <p>ReturnsDate of the last files backed up if reached size limit, else, zero
   */
  public long backupFiles(
      String localPath, String preservePath, FileSystem fs, ArrayList<Path> pathList, long size) {
    Path fsPath;
    long tmpSize = 0;
    long tmpDate = 0;

    // Start iterating over all paths
    for (Path hdfsPath : pathList) {
      try {
        long nFileSize = fs.getContentSummary(hdfsPath).getLength();
        tmpSize = tmpSize + nFileSize;

        if ((tmpSize <= size) || (size == 0)) {
          FileStatus stat = fs.getFileStatus(hdfsPath);

          System.err.println(
              "File "
                  + hdfsPath.toUri().getPath()
                  + " "
                  + nFileSize
                  + " bytes, "
                  + "perms: "
                  + stat.getOwner()
                  + "/"
                  + stat.getGroup()
                  + ", "
                  + stat.getPermission().toString());

          tmpDate = stat.getModificationTime() / 1000;

          String sFsPath = localPath + hdfsPath.toUri().getPath();
          fsPath = new Path(sFsPath);

          File f = new File(sFsPath);

          // COMMENTED OUT: until a few backup cycles run
          // and the mtime gets in fact set on all copied
          // files.
          //
          // ignore it if the file exists and has the same mtime
          // if (f.exists() && f.isFile() && f.lastModified() == stat.getModificationTime())
          // {
          // System.out.println("no need to backup " + f.toString() + ", mtime matches hdfs");
          // continue;
          // }

          if (false == m_bDryRun) {
            // check if we need to back up the local file
            // (not directory), if it already exists.
            if (f.exists() && f.isFile()) {
              // ignore files with substrings in the
              // no-preserve file
              if (true == doPreserveFile(sFsPath)) {
                // move it to the backup path
                String sNewPath = preservePath + hdfsPath.toUri().getPath();
                File newFile = new File(sNewPath);

                // create directory structure for new file?
                if (false == newFile.getParentFile().exists()) {
                  if (false == newFile.getParentFile().mkdirs()) {
                    System.err.println("Failed to mkdirs " + newFile.getParentFile().toString());
                    System.exit(1);
                  }
                }

                // rename existing file to new location
                if (false == f.renameTo(newFile)) {
                  System.err.println(
                      "Failed to renameTo " + f.toString() + " to " + newFile.toString());
                  System.exit(1);
                }

                System.out.println("preserved " + f.toString() + " into " + newFile.toString());
              } else {
                System.out.println("skipped preservation of " + f.toString());
              }
            }

            // copy from hdfs to local filesystem
            fs.copyToLocalFile(hdfsPath, fsPath);

            // set the mtime to match hdfs file
            f.setLastModified(stat.getModificationTime());

            // compare checksums on both files
            compareChecksums(fs, hdfsPath, sFsPath);
          }

          // don't print the progress after every file -- go
          // by at least 1% increments
          long nPercentDone = (long) (100 * tmpSize / m_nTotalBytes);
          if (nPercentDone > m_nLastPercentBytesDone) {
            System.out.println(
                "progress: copied "
                    + prettyPrintBytes(tmpSize)
                    + ", "
                    + nPercentDone
                    + "% done"
                    + ", tstamp="
                    + tmpDate);

            m_nLastPercentBytesDone = nPercentDone;
          }

          if (m_nSleepSeconds > 0) {
            try {
              Thread.sleep(1000 * m_nSleepSeconds);
            } catch (Exception e2) {
              // ignore
            }
          }
        } else {
          return tmpDate;
        }
      } catch (IOException e) {
        System.err.println("FATAL ERROR: Something wrong with the file");
        System.err.println(e);
        System.out.println(tmpDate);
        System.exit(1);

        return 0;
      }
    }

    return 0;
  }
Пример #19
0
 /**
  * We're informed that a block is no longer valid. We could lazily garbage-collect the block, but
  * why bother? just get rid of it.
  */
 public void invalidate(Block invalidBlks[]) throws IOException {
   boolean error = false;
   for (int i = 0; i < invalidBlks.length; i++) {
     File f = null;
     FSVolume v;
     synchronized (this) {
       f = getFile(invalidBlks[i]);
       DatanodeBlockInfo dinfo = volumeMap.get(invalidBlks[i]);
       if (dinfo == null) {
         DataNode.LOG.warn(
             "Unexpected error trying to delete block "
                 + invalidBlks[i]
                 + ". BlockInfo not found in volumeMap.");
         error = true;
         continue;
       }
       v = dinfo.getVolume();
       if (f == null) {
         DataNode.LOG.warn(
             "Unexpected error trying to delete block "
                 + invalidBlks[i]
                 + ". Block not found in blockMap."
                 + ((v == null) ? " " : " Block found in volumeMap."));
         error = true;
         continue;
       }
       if (v == null) {
         DataNode.LOG.warn(
             "Unexpected error trying to delete block "
                 + invalidBlks[i]
                 + ". No volume for this block."
                 + " Block found in blockMap. "
                 + f
                 + ".");
         error = true;
         continue;
       }
       File parent = f.getParentFile();
       if (parent == null) {
         DataNode.LOG.warn(
             "Unexpected error trying to delete block "
                 + invalidBlks[i]
                 + ". Parent not found for file "
                 + f
                 + ".");
         error = true;
         continue;
       }
       v.clearPath(parent);
       volumeMap.remove(invalidBlks[i]);
     }
     File metaFile = getMetaFile(f, invalidBlks[i]);
     long blockSize = f.length() + metaFile.length();
     if (!f.delete() || (!metaFile.delete() && metaFile.exists())) {
       DataNode.LOG.warn(
           "Unexpected error trying to delete block " + invalidBlks[i] + " at file " + f);
       error = true;
       continue;
     }
     v.decDfsUsed(blockSize);
     DataNode.LOG.info("Deleting block " + invalidBlks[i] + " file " + f);
     if (f.exists()) {
       //
       // This is a temporary check especially for hadoop-1220.
       // This will go away in the future.
       //
       DataNode.LOG.info("File " + f + " was deleted but still exists!");
     }
   }
   if (error) {
     throw new IOException("Error in deleting blocks.");
   }
 }
Пример #20
0
  /** In case of interrupted, written file is not deleted. */
  private void copyFile(File sourceFile, File destFile, Reporter reporter)
      throws IOException, InterruptedException {
    if (!destFile.exists()) {
      destFile.createNewFile();
    }
    FileChannel source = null;
    FileChannel destination = null;

    Throttler throttler = new Throttler((double) bytesPerSecThrottle);

    FileInputStream iS = null;
    FileOutputStream oS = null;

    try {
      iS = new FileInputStream(sourceFile);
      oS = new FileOutputStream(destFile);
      source = iS.getChannel();
      destination = oS.getChannel();
      long bytesSoFar = 0;
      long reportingBytesSoFar = 0;
      long size = source.size();

      int transferred = 0;

      while (bytesSoFar < size) {
        // Needed to being able to be interrupted at any moment.
        if (Thread.interrupted()) {
          throw new InterruptedException();
        }

        // Casting to int here is safe since we will transfer at most "downloadBufferSize" bytes.
        // This is done on purpose for being able to implement Throttling.
        transferred = (int) destination.transferFrom(source, bytesSoFar, downloadBufferSize);
        bytesSoFar += transferred;
        reportingBytesSoFar += transferred;
        throttler.incrementAndThrottle(transferred);
        if (reportingBytesSoFar >= bytesToReportProgress) {
          reporter.progress(reportingBytesSoFar);
          reportingBytesSoFar = 0l;
        }
      }

      if (reporter != null) {
        reporter.progress(reportingBytesSoFar);
      }

    } catch (InterruptedException e) {
      e.printStackTrace();
    } finally {
      if (iS != null) {
        iS.close();
      }
      if (oS != null) {
        oS.close();
      }
      if (source != null) {
        source.close();
      }
      if (destination != null) {
        destination.close();
      }
    }
  }