Esempio n. 1
1
  private void unzipPlugin(Path zip, Path target) throws IOException {
    Files.createDirectories(target);

    try (ZipInputStream zipInput = new ZipInputStream(Files.newInputStream(zip))) {
      ZipEntry entry;
      byte[] buffer = new byte[8192];
      while ((entry = zipInput.getNextEntry()) != null) {
        Path targetFile = target.resolve(entry.getName());

        // be on the safe side: do not rely on that directories are always extracted
        // before their children (although this makes sense, but is it guaranteed?)
        Files.createDirectories(targetFile.getParent());
        if (entry.isDirectory() == false) {
          try (OutputStream out = Files.newOutputStream(targetFile)) {
            int len;
            while ((len = zipInput.read(buffer)) >= 0) {
              out.write(buffer, 0, len);
            }
          }
        }
        zipInput.closeEntry();
      }
    }
  }
Esempio n. 2
0
  // the method which actually copies the caches locally and unjars/unzips them
  // and does chmod for the files
  private static Path localizeCache(
      Configuration conf, URI cache, long confFileStamp, CacheStatus cacheStatus, boolean isArchive)
      throws IOException {
    FileSystem fs = getFileSystem(cache, conf);
    FileSystem localFs = FileSystem.getLocal(conf);
    Path parchive = null;

    if (isArchive) {
      parchive =
          new Path(
              cacheStatus.localizedLoadPath, new Path(cacheStatus.localizedLoadPath.getName()));
    } else {
      parchive = cacheStatus.localizedLoadPath;
    }
    if (!localFs.mkdirs(parchive.getParent())) {
      throw new IOException(
          "Mkdirs failed to create directory " + cacheStatus.localizedLoadPath.toString());
    }
    String cacheId = cache.getPath();

    fs.copyToLocalFile(new Path(cacheId), parchive);
    if (isArchive) {
      String tmpArchive = parchive.toString().toLowerCase();
      File srcFile = new File(parchive.toString());
      File destDir = new File(parchive.getParent().toString());
      if (tmpArchive.endsWith(".jar")) {
        RunJar.unJar(srcFile, destDir);
      } else if (tmpArchive.endsWith(".zip")) {
        FileUtil.unZip(srcFile, destDir);
      } else if (isTarFile(tmpArchive)) {
        FileUtil.unTar(srcFile, destDir);
      }
      // else will not do anyhting
      // and copy the file into the dir as it is
    }
    long cacheSize = FileUtil.getDU(new File(parchive.getParent().toString()));
    cacheStatus.size = cacheSize;
    addCacheInfoUpdate(cacheStatus);

    // do chmod here
    try {
      // Setting recursive permission to grant everyone read and execute
      Path localDir = new Path(cacheStatus.localizedBaseDir, cacheStatus.uniqueParentDir);
      LOG.info("Doing chmod on localdir :" + localDir);
      FileUtil.chmod(localDir.toString(), "ugo+rx", true);
    } catch (InterruptedException e) {
      LOG.warn("Exception in chmod" + e.toString());
    }

    // update cacheStatus to reflect the newly cached file
    cacheStatus.mtime = getTimestamp(conf, cache);
    return cacheStatus.localizedLoadPath;
  }
Esempio n. 3
0
 // the relative path of p. basically
 // getting rid of /. Parsing and doing
 // string manipulation is not good - so
 // just use the path api to do it.
 private Path makeRelative(String initial, Path p) {
   String scheme = this.uri.getScheme();
   String authority = this.uri.getAuthority();
   Path root = new Path(Path.SEPARATOR);
   if (root.compareTo(p) == 0) return new Path(scheme, authority, initial);
   Path retPath = new Path(p.getName());
   Path parent = p.getParent();
   for (int i = 0; i < p.depth() - 1; i++) {
     retPath = new Path(parent.getName(), retPath);
     parent = parent.getParent();
   }
   return new Path(new Path(scheme, authority, initial), retPath.toString());
 }
  public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    conf.setLong("mapreduce.task.timeout", 10000 * 60 * 60);

    Path train_file = new Path(args[0]);
    Path test_file = new Path(args[1]);
    conf.set("train_file", train_file.getParent().getName());

    Job job = new Job(conf, "MapTestID");
    job.setJarByClass(MapTestID.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setMapperClass(MapTestID.MapTestIDMap.class);
    job.setReducerClass(MapTestID.MapTestIDReduce.class);

    job.setNumReduceTasks(300);

    FileInputFormat.addInputPath(job, train_file);
    FileInputFormat.addInputPath(job, test_file);
    FileOutputFormat.setOutputPath(job, new Path(args[2]));

    return job.waitForCompletion(true) ? 0 : 1;
  }
 private static boolean exists(Path p) {
   Path base = p.getParent();
   String fileName = p.getFileName().toString();
   return Arrays.asList(extensions)
       .stream()
       .anyMatch(it -> Files.exists(base.resolve(fileName + it)));
 }
Esempio n. 6
0
 /**
  * this method returns the path inside the har filesystem. this is relative path inside the har
  * filesystem.
  *
  * @param path the fully qualified path in the har filesystem.
  * @return relative path in the filesystem.
  */
 private Path getPathInHar(Path path) {
   Path harPath = new Path(path.toUri().getPath());
   if (archivePath.compareTo(harPath) == 0) return new Path(Path.SEPARATOR);
   Path tmp = new Path(harPath.getName());
   Path parent = harPath.getParent();
   while (!(parent.compareTo(archivePath) == 0)) {
     if (parent.toString().equals(Path.SEPARATOR)) {
       tmp = null;
       break;
     }
     tmp = new Path(parent.getName(), tmp);
     parent = parent.getParent();
   }
   if (tmp != null) tmp = new Path(Path.SEPARATOR, tmp);
   return tmp;
 }
Esempio n. 7
0
  private static IntWritable deduceInputFile(JobConf job) {
    Path[] inputPaths = FileInputFormat.getInputPaths(job);
    Path inputFile = new Path(job.get("map.input.file"));

    // value == one for sort-input; value == two for sort-output
    return (inputFile.getParent().equals(inputPaths[0])) ? sortInput : sortOutput;
  }
Esempio n. 8
0
 // indicates if the siblings of this path should be skipped
 static boolean skip(Path path) {
   Path parent = path.getParent();
   if (parent != null && rand.nextBoolean()) {
     skipped.add(parent);
     return true;
   }
   return false;
 }
  /** Returns the {@code NodePath.path} for this shard. */
  public static Path shardStatePathToDataPath(Path shardPath) {
    int count = shardPath.getNameCount();

    // Sanity check:
    assert Integer.parseInt(shardPath.getName(count - 1).toString()) >= 0;
    assert "indices".equals(shardPath.getName(count - 3).toString());

    return shardPath.getParent().getParent().getParent();
  }
 public static String absolutePath(Path p) {
   if (p == null) return "";
   StringBuilder sb = new StringBuilder();
   Path parentPath = p.getParent();
   if (parentPath == null) return "/";
   sb.append(absolutePath(parentPath));
   if (sb.length() > 1) sb.append("/");
   sb.append(p.getName());
   return sb.toString();
 }
Esempio n. 11
0
 /*
  * find the parent path that is the
  * archive path in the path. The last
  * path segment that ends with .har is
  * the path that will be returned.
  */
 private Path archivePath(Path p) {
   Path retPath = null;
   Path tmp = p;
   for (int i = 0; i < p.depth(); i++) {
     if (tmp.toString().endsWith(".har")) {
       retPath = tmp;
       break;
     }
     tmp = tmp.getParent();
   }
   return retPath;
 }
 /**
  * open a file for writing
  *
  * @param hdfsPath !null path -
  * @return !null stream
  */
 @Override
 public OutputStream openFileForWrite(final Path src) {
   if (isRunningAsUser()) {
     return super.openFileForWrite(src);
   }
   if (true) throw new UnsupportedOperationException("Fix This"); // ToDo
   final FileSystem fs = getDFS();
   try {
     Path parent = src.getParent();
     guaranteeDirectory(parent);
     return FileSystem.create(fs, src, FULL_FILE_ACCESS);
   } catch (IOException e) {
     throw new RuntimeException(e);
   }
 }
Esempio n. 13
0
  public void downloadFile(String relPath) {
    VOSync.debug("Downloading file from storage: " + relPath);
    Path filePath = FileSystems.getDefault().getPath(startDir.toString(), relPath.substring(1));
    try {
      WatchKey key =
          filePath.getParent().register(watcher, ENTRY_CREATE, ENTRY_DELETE, ENTRY_MODIFY);
      keys.remove(key);
      key.cancel();

      FileOutputStream outp = new FileOutputStream(filePath.toFile());
      DropboxFileInfo info = api.getFile(relPath, null, outp, null);
      outp.close();

      key = filePath.getParent().register(watcher, ENTRY_CREATE, ENTRY_DELETE, ENTRY_MODIFY);
      keys.put(key, filePath.getParent());

      MetaHandler.setFile(relPath, filePath.toFile(), info.getMetadata().rev);
    } catch (IOException ex) {
      logger.error("Error downloading file " + relPath + ": " + ex.getMessage());
    } catch (DropboxException ex) {
      ex.printStackTrace();
      logger.error("Error downloading file " + relPath + ": " + ex.getMessage());
    }
  }
Esempio n. 14
0
 // check if this path's directory has been skipped
 static void check(Path path) {
   if (skipped.contains(path.getParent()))
     throw new RuntimeException(path + " should not have been visited");
 }
Esempio n. 15
0
  private void copyBinDirectory(
      Path sourcePluginBinDirectory,
      Path destPluginBinDirectory,
      String pluginName,
      Terminal terminal)
      throws IOException {
    boolean canCopyFromSource =
        Files.exists(sourcePluginBinDirectory)
            && Files.isReadable(sourcePluginBinDirectory)
            && Files.isDirectory(sourcePluginBinDirectory);
    if (canCopyFromSource) {
      terminal.println(VERBOSE, "Found bin, moving to %s", destPluginBinDirectory.toAbsolutePath());
      if (Files.exists(destPluginBinDirectory)) {
        IOUtils.rm(destPluginBinDirectory);
      }
      try {
        Files.createDirectories(destPluginBinDirectory.getParent());
        FileSystemUtils.move(sourcePluginBinDirectory, destPluginBinDirectory);
      } catch (IOException e) {
        throw new IOException(
            "Could not move [" + sourcePluginBinDirectory + "] to [" + destPluginBinDirectory + "]",
            e);
      }
      if (Environment.getFileStore(destPluginBinDirectory)
          .supportsFileAttributeView(PosixFileAttributeView.class)) {
        final PosixFileAttributes parentDirAttributes =
            Files.getFileAttributeView(
                    destPluginBinDirectory.getParent(), PosixFileAttributeView.class)
                .readAttributes();
        // copy permissions from parent bin directory
        final Set<PosixFilePermission> filePermissions = new HashSet<>();
        for (PosixFilePermission posixFilePermission : parentDirAttributes.permissions()) {
          switch (posixFilePermission) {
            case OWNER_EXECUTE:
            case GROUP_EXECUTE:
            case OTHERS_EXECUTE:
              break;
            default:
              filePermissions.add(posixFilePermission);
          }
        }
        // add file execute permissions to existing perms, so execution will work.
        filePermissions.add(PosixFilePermission.OWNER_EXECUTE);
        filePermissions.add(PosixFilePermission.GROUP_EXECUTE);
        filePermissions.add(PosixFilePermission.OTHERS_EXECUTE);
        Files.walkFileTree(
            destPluginBinDirectory,
            new SimpleFileVisitor<Path>() {
              @Override
              public FileVisitResult visitFile(Path file, BasicFileAttributes attrs)
                  throws IOException {
                if (attrs.isRegularFile()) {
                  setPosixFileAttributes(
                      file,
                      parentDirAttributes.owner(),
                      parentDirAttributes.group(),
                      filePermissions);
                }
                return FileVisitResult.CONTINUE;
              }

              @Override
              public FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attrs)
                  throws IOException {
                setPosixFileAttributes(
                    dir,
                    parentDirAttributes.owner(),
                    parentDirAttributes.group(),
                    parentDirAttributes.permissions());
                return FileVisitResult.CONTINUE;
              }
            });
      } else {
        terminal.println(
            VERBOSE, "Skipping posix permissions - filestore doesn't support posix permission");
      }
      terminal.println(
          VERBOSE, "Installed %s into %s", pluginName, destPluginBinDirectory.toAbsolutePath());
    }
  }
Esempio n. 16
0
  private void extract(PluginHandle pluginHandle, Terminal terminal, Path pluginFile)
      throws IOException {
    // unzip plugin to a staging temp dir, named for the plugin
    Path tmp = Files.createTempDirectory(environment.tmpFile(), null);
    Path root = tmp.resolve(pluginHandle.name);
    unzipPlugin(pluginFile, root);

    // find the actual root (in case its unzipped with extra directory wrapping)
    root = findPluginRoot(root);

    // read and validate the plugin descriptor
    PluginInfo info = PluginInfo.readFromProperties(root);
    terminal.println(VERBOSE, "%s", info);

    // update name in handle based on 'name' property found in descriptor file
    pluginHandle = new PluginHandle(info.getName(), pluginHandle.version, pluginHandle.user);
    final Path extractLocation = pluginHandle.extractedDir(environment);
    if (Files.exists(extractLocation)) {
      throw new IOException(
          "plugin directory "
              + extractLocation.toAbsolutePath()
              + " already exists. To update the plugin, uninstall it first using 'remove "
              + pluginHandle.name
              + "' command");
    }

    // check for jar hell before any copying
    if (info.isJvm()) {
      jarHellCheck(root, info.isIsolated());
    }

    // install plugin
    FileSystemUtils.copyDirectoryRecursively(root, extractLocation);
    terminal.println("Installed %s into %s", pluginHandle.name, extractLocation.toAbsolutePath());

    // cleanup
    tryToDeletePath(terminal, tmp, pluginFile);

    // take care of bin/ by moving and applying permissions if needed
    Path sourcePluginBinDirectory = extractLocation.resolve("bin");
    Path destPluginBinDirectory = pluginHandle.binDir(environment);
    boolean needToCopyBinDirectory = Files.exists(sourcePluginBinDirectory);
    if (needToCopyBinDirectory) {
      if (Files.exists(destPluginBinDirectory) && !Files.isDirectory(destPluginBinDirectory)) {
        tryToDeletePath(terminal, extractLocation);
        throw new IOException(
            "plugin bin directory " + destPluginBinDirectory + " is not a directory");
      }

      try {
        copyBinDirectory(
            sourcePluginBinDirectory, destPluginBinDirectory, pluginHandle.name, terminal);
      } catch (IOException e) {
        // rollback and remove potentially before installed leftovers
        terminal.printError(
            "Error copying bin directory [%s] to [%s], cleaning up, reason: %s",
            sourcePluginBinDirectory, destPluginBinDirectory, ExceptionsHelper.detailedMessage(e));
        tryToDeletePath(terminal, extractLocation, pluginHandle.binDir(environment));
        throw e;
      }
    }

    Path sourceConfigDirectory = extractLocation.resolve("config");
    Path destConfigDirectory = pluginHandle.configDir(environment);
    boolean needToCopyConfigDirectory = Files.exists(sourceConfigDirectory);
    if (needToCopyConfigDirectory) {
      if (Files.exists(destConfigDirectory) && !Files.isDirectory(destConfigDirectory)) {
        tryToDeletePath(terminal, extractLocation, destPluginBinDirectory);
        throw new IOException(
            "plugin config directory " + destConfigDirectory + " is not a directory");
      }

      try {
        terminal.println(
            VERBOSE, "Found config, moving to %s", destConfigDirectory.toAbsolutePath());
        moveFilesWithoutOverwriting(sourceConfigDirectory, destConfigDirectory, ".new");

        if (Environment.getFileStore(destConfigDirectory)
            .supportsFileAttributeView(PosixFileAttributeView.class)) {
          // We copy owner, group and permissions from the parent ES_CONFIG directory, assuming they
          // were properly set depending
          // on how es was installed in the first place: can be root:elasticsearch (750) if es was
          // installed from rpm/deb packages
          // or most likely elasticsearch:elasticsearch if installed from tar/zip. As for
          // permissions we don't rely on umask.
          final PosixFileAttributes parentDirAttributes =
              Files.getFileAttributeView(
                      destConfigDirectory.getParent(), PosixFileAttributeView.class)
                  .readAttributes();
          // for files though, we make sure not to copy execute permissions from the parent dir and
          // leave them untouched
          final Set<PosixFilePermission> baseFilePermissions = new HashSet<>();
          for (PosixFilePermission posixFilePermission : parentDirAttributes.permissions()) {
            switch (posixFilePermission) {
              case OWNER_EXECUTE:
              case GROUP_EXECUTE:
              case OTHERS_EXECUTE:
                break;
              default:
                baseFilePermissions.add(posixFilePermission);
            }
          }
          Files.walkFileTree(
              destConfigDirectory,
              new SimpleFileVisitor<Path>() {
                @Override
                public FileVisitResult visitFile(Path file, BasicFileAttributes attrs)
                    throws IOException {
                  if (attrs.isRegularFile()) {
                    Set<PosixFilePermission> newFilePermissions =
                        new HashSet<>(baseFilePermissions);
                    Set<PosixFilePermission> currentFilePermissions =
                        Files.getPosixFilePermissions(file);
                    for (PosixFilePermission posixFilePermission : currentFilePermissions) {
                      switch (posixFilePermission) {
                        case OWNER_EXECUTE:
                        case GROUP_EXECUTE:
                        case OTHERS_EXECUTE:
                          newFilePermissions.add(posixFilePermission);
                      }
                    }
                    setPosixFileAttributes(
                        file,
                        parentDirAttributes.owner(),
                        parentDirAttributes.group(),
                        newFilePermissions);
                  }
                  return FileVisitResult.CONTINUE;
                }

                @Override
                public FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attrs)
                    throws IOException {
                  setPosixFileAttributes(
                      dir,
                      parentDirAttributes.owner(),
                      parentDirAttributes.group(),
                      parentDirAttributes.permissions());
                  return FileVisitResult.CONTINUE;
                }
              });
        } else {
          terminal.println(
              VERBOSE, "Skipping posix permissions - filestore doesn't support posix permission");
        }

        terminal.println(
            VERBOSE,
            "Installed %s into %s",
            pluginHandle.name,
            destConfigDirectory.toAbsolutePath());
      } catch (IOException e) {
        terminal.printError(
            "Error copying config directory [%s] to [%s], cleaning up, reason: %s",
            sourceConfigDirectory, destConfigDirectory, ExceptionsHelper.detailedMessage(e));
        tryToDeletePath(terminal, extractLocation, destPluginBinDirectory, destConfigDirectory);
        throw e;
      }
    }
  }
Esempio n. 17
0
  /**
   * Initialize DFSCopyFileMapper specific job-configuration.
   *
   * @param conf : The dfs/mapred configuration.
   * @param jobConf : The handle to the jobConf object to be initialized.
   * @param args Arguments
   */
  private static void setup(Configuration conf, JobConf jobConf, final Arguments args)
      throws IOException {
    jobConf.set(DST_DIR_LABEL, args.dst.toUri().toString());

    // set boolean values
    final boolean update = args.flags.contains(Options.UPDATE);
    final boolean overwrite = !update && args.flags.contains(Options.OVERWRITE);
    jobConf.setBoolean(Options.UPDATE.propertyname, update);
    jobConf.setBoolean(Options.OVERWRITE.propertyname, overwrite);
    jobConf.setBoolean(
        Options.IGNORE_READ_FAILURES.propertyname,
        args.flags.contains(Options.IGNORE_READ_FAILURES));
    jobConf.setBoolean(
        Options.PRESERVE_STATUS.propertyname, args.flags.contains(Options.PRESERVE_STATUS));

    final String randomId = getRandomId();
    JobClient jClient = new JobClient(jobConf);
    Path jobDirectory = new Path(jClient.getSystemDir(), NAME + "_" + randomId);
    jobConf.set(JOB_DIR_LABEL, jobDirectory.toString());

    FileSystem dstfs = args.dst.getFileSystem(conf);
    boolean dstExists = dstfs.exists(args.dst);
    boolean dstIsDir = false;
    if (dstExists) {
      dstIsDir = dstfs.getFileStatus(args.dst).isDir();
    }

    // default logPath
    Path logPath = args.log;
    if (logPath == null) {
      String filename = "_distcp_logs_" + randomId;
      if (!dstExists || !dstIsDir) {
        Path parent = args.dst.getParent();
        if (!dstfs.exists(parent)) {
          dstfs.mkdirs(parent);
        }
        logPath = new Path(parent, filename);
      } else {
        logPath = new Path(args.dst, filename);
      }
    }
    FileOutputFormat.setOutputPath(jobConf, logPath);

    // create src list, dst list
    FileSystem jobfs = jobDirectory.getFileSystem(jobConf);

    Path srcfilelist = new Path(jobDirectory, "_distcp_src_files");
    jobConf.set(SRC_LIST_LABEL, srcfilelist.toString());
    SequenceFile.Writer src_writer =
        SequenceFile.createWriter(
            jobfs,
            jobConf,
            srcfilelist,
            LongWritable.class,
            FilePair.class,
            SequenceFile.CompressionType.NONE);

    Path dstfilelist = new Path(jobDirectory, "_distcp_dst_files");
    SequenceFile.Writer dst_writer =
        SequenceFile.createWriter(
            jobfs, jobConf, dstfilelist, Text.class, Text.class, SequenceFile.CompressionType.NONE);

    Path dstdirlist = new Path(jobDirectory, "_distcp_dst_dirs");
    jobConf.set(DST_DIR_LIST_LABEL, dstdirlist.toString());
    SequenceFile.Writer dir_writer =
        SequenceFile.createWriter(
            jobfs,
            jobConf,
            dstdirlist,
            Text.class,
            FilePair.class,
            SequenceFile.CompressionType.NONE);

    // handle the case where the destination directory doesn't exist
    // and we've only a single src directory OR we're updating/overwriting
    // the contents of the destination directory.
    final boolean special = (args.srcs.size() == 1 && !dstExists) || update || overwrite;
    int srcCount = 0, cnsyncf = 0, dirsyn = 0;
    long fileCount = 0L, byteCount = 0L, cbsyncs = 0L;
    try {
      for (Iterator<Path> srcItr = args.srcs.iterator(); srcItr.hasNext(); ) {
        final Path src = srcItr.next();
        FileSystem srcfs = src.getFileSystem(conf);
        FileStatus srcfilestat = srcfs.getFileStatus(src);
        Path root = special && srcfilestat.isDir() ? src : src.getParent();
        if (srcfilestat.isDir()) {
          ++srcCount;
        }

        Stack<FileStatus> pathstack = new Stack<FileStatus>();
        for (pathstack.push(srcfilestat); !pathstack.empty(); ) {
          FileStatus cur = pathstack.pop();
          FileStatus[] children = srcfs.listStatus(cur.getPath());
          for (int i = 0; i < children.length; i++) {
            boolean skipfile = false;
            final FileStatus child = children[i];
            final String dst = makeRelative(root, child.getPath());
            ++srcCount;

            if (child.isDir()) {
              pathstack.push(child);
            } else {
              // skip file if the src and the dst files are the same.
              skipfile = update && sameFile(srcfs, child, dstfs, new Path(args.dst, dst));
              // skip file if it exceed file limit or size limit
              skipfile |=
                  fileCount == args.filelimit || byteCount + child.getLen() > args.sizelimit;

              if (!skipfile) {
                ++fileCount;
                byteCount += child.getLen();

                if (LOG.isTraceEnabled()) {
                  LOG.trace("adding file " + child.getPath());
                }

                ++cnsyncf;
                cbsyncs += child.getLen();
                if (cnsyncf > SYNC_FILE_MAX || cbsyncs > BYTES_PER_MAP) {
                  src_writer.sync();
                  dst_writer.sync();
                  cnsyncf = 0;
                  cbsyncs = 0L;
                }
              }
            }

            if (!skipfile) {
              src_writer.append(
                  new LongWritable(child.isDir() ? 0 : child.getLen()), new FilePair(child, dst));
            }

            dst_writer.append(new Text(dst), new Text(child.getPath().toString()));
          }

          if (cur.isDir()) {
            String dst = makeRelative(root, cur.getPath());
            dir_writer.append(new Text(dst), new FilePair(cur, dst));
            if (++dirsyn > SYNC_FILE_MAX) {
              dirsyn = 0;
              dir_writer.sync();
            }
          }
        }
      }
    } finally {
      checkAndClose(src_writer);
      checkAndClose(dst_writer);
      checkAndClose(dir_writer);
    }

    FileStatus dststatus = null;
    try {
      dststatus = dstfs.getFileStatus(args.dst);
    } catch (FileNotFoundException fnfe) {
      LOG.info(args.dst + " does not exist.");
    }

    // create dest path dir if copying > 1 file
    if (dststatus == null) {
      if (srcCount > 1 && !dstfs.mkdirs(args.dst)) {
        throw new IOException("Failed to create" + args.dst);
      }
    }

    final Path sorted = new Path(jobDirectory, "_distcp_sorted");
    checkDuplication(jobfs, dstfilelist, sorted, conf);

    if (dststatus != null && args.flags.contains(Options.DELETE)) {
      deleteNonexisting(dstfs, dststatus, sorted, jobfs, jobDirectory, jobConf, conf);
    }

    Path tmpDir =
        new Path(
            (dstExists && !dstIsDir) || (!dstExists && srcCount == 1)
                ? args.dst.getParent()
                : args.dst,
            "_distcp_tmp_" + randomId);
    jobConf.set(TMP_DIR_LABEL, tmpDir.toUri().toString());
    LOG.info("srcCount=" + srcCount);
    jobConf.setInt(SRC_COUNT_LABEL, srcCount);
    jobConf.setLong(TOTAL_SIZE_LABEL, byteCount);
    setMapCount(byteCount, jobConf);
  }
Esempio n. 18
0
    /**
     * Copy a file to a destination.
     *
     * @param srcstat src path and metadata
     * @param dstpath dst path
     * @param reporter
     */
    private void copy(
        FileStatus srcstat,
        Path relativedst,
        OutputCollector<WritableComparable<?>, Text> outc,
        Reporter reporter)
        throws IOException {
      Path absdst = new Path(destPath, relativedst);
      int totfiles = job.getInt(SRC_COUNT_LABEL, -1);
      assert totfiles >= 0 : "Invalid file count " + totfiles;

      // if a directory, ensure created even if empty
      if (srcstat.isDir()) {
        if (destFileSys.exists(absdst)) {
          if (!destFileSys.getFileStatus(absdst).isDir()) {
            throw new IOException("Failed to mkdirs: " + absdst + " is a file.");
          }
        } else if (!destFileSys.mkdirs(absdst)) {
          throw new IOException("Failed to mkdirs " + absdst);
        }
        // TODO: when modification times can be set, directories should be
        // emitted to reducers so they might be preserved. Also, mkdirs does
        // not currently return an error when the directory already exists;
        // if this changes, all directory work might as well be done in reduce
        return;
      }

      if (destFileSys.exists(absdst) && !overwrite && !needsUpdate(srcstat, destFileSys, absdst)) {
        outc.collect(null, new Text("SKIP: " + srcstat.getPath()));
        ++skipcount;
        reporter.incrCounter(Counter.SKIP, 1);
        updateStatus(reporter);
        return;
      }

      Path tmpfile = new Path(job.get(TMP_DIR_LABEL), relativedst);
      long cbcopied = 0L;
      FSDataInputStream in = null;
      FSDataOutputStream out = null;
      try {
        // open src file
        try {
          in = srcstat.getPath().getFileSystem(job).open(srcstat.getPath());
        } catch (IOException e) {
          LOG.error("Failed to open src file " + srcstat.getPath() + ", ignore and return");
          in = null;
          return;
        }
        reporter.incrCounter(Counter.BYTESEXPECTED, srcstat.getLen());
        // open tmp file
        out = create(tmpfile, reporter, srcstat);
        // copy file
        for (int cbread; (cbread = in.read(buffer)) >= 0; ) {
          out.write(buffer, 0, cbread);
          cbcopied += cbread;
          reporter.setStatus(
              String.format("%.2f ", cbcopied * 100.0 / srcstat.getLen())
                  + absdst
                  + " [ "
                  + StringUtils.humanReadableInt(cbcopied)
                  + " / "
                  + StringUtils.humanReadableInt(srcstat.getLen())
                  + " ]");
        }
      } finally {
        checkAndClose(in);
        checkAndClose(out);
      }

      if (cbcopied != srcstat.getLen()) {
        if (srcstat.getLen() == 0 && cbcopied > 0) {
          LOG.info("most likely see a WAL file corruption: " + srcstat.getPath());
        } else {
          throw new IOException(
              "File size not matched: copied "
                  + bytesString(cbcopied)
                  + " to tmpfile (="
                  + tmpfile
                  + ") but expected "
                  + bytesString(srcstat.getLen())
                  + " from "
                  + srcstat.getPath());
        }
      } else {
        if (totfiles == 1) {
          // Copying a single file; use dst path provided by user as destination
          // rather than destination directory, if a file
          Path dstparent = absdst.getParent();
          if (!(destFileSys.exists(dstparent) && destFileSys.getFileStatus(dstparent).isDir())) {
            absdst = dstparent;
          }
        }
        if (destFileSys.exists(absdst) && destFileSys.getFileStatus(absdst).isDir()) {
          throw new IOException(absdst + " is a directory");
        }
        if (!destFileSys.mkdirs(absdst.getParent())) {
          throw new IOException("Failed to craete parent dir: " + absdst.getParent());
        }
        rename(tmpfile, absdst);

        FileStatus dststat = destFileSys.getFileStatus(absdst);
        if (dststat.getLen() != srcstat.getLen()) {
          destFileSys.delete(absdst, false);
          throw new IOException(
              "File size not matched: copied "
                  + bytesString(dststat.getLen())
                  + " to dst (="
                  + absdst
                  + ") but expected "
                  + bytesString(srcstat.getLen())
                  + " from "
                  + srcstat.getPath());
        }
        updatePermissions(srcstat, dststat);
      }

      // report at least once for each file
      ++copycount;
      reporter.incrCounter(Counter.BYTESCOPIED, cbcopied);
      reporter.incrCounter(Counter.COPY, 1);
      updateStatus(reporter);
    }