Beispiel #1
0
  /**
   * Check whether the contents of src and dst are the same.
   *
   * <p>Return false if dstpath does not exist
   *
   * <p>If the files have different sizes, return false.
   *
   * <p>If the files have the same sizes, the file checksums will be compared.
   *
   * <p>When file checksum is not supported in any of file systems, two files are considered as the
   * same if they have the same size.
   */
  private static boolean sameFile(
      FileSystem srcfs, FileStatus srcstatus, FileSystem dstfs, Path dstpath) throws IOException {
    FileStatus dststatus;
    try {
      dststatus = dstfs.getFileStatus(dstpath);
    } catch (FileNotFoundException fnfe) {
      return false;
    }

    // same length?
    if (srcstatus.getLen() != dststatus.getLen()) {
      return false;
    }

    // compare checksums
    try {
      final FileChecksum srccs = srcfs.getFileChecksum(srcstatus.getPath());
      final FileChecksum dstcs = dstfs.getFileChecksum(dststatus.getPath());
      // return true if checksum is not supported
      // (i.e. some of the checksums is null)
      return srccs == null || dstcs == null || srccs.equals(dstcs);
    } catch (FileNotFoundException fnfe) {
      return false;
    }
  }
Beispiel #2
0
  public void copyInitialState(Path origAppDir) throws IOException {
    // locate previous snapshot
    String newAppDir = this.dag.assertAppPath();

    FSRecoveryHandler recoveryHandler = new FSRecoveryHandler(origAppDir.toString(), conf);
    // read snapshot against new dependencies
    Object snapshot = recoveryHandler.restore();
    if (snapshot == null) {
      throw new IllegalArgumentException("No previous application state found in " + origAppDir);
    }
    InputStream logIs = recoveryHandler.getLog();

    // modify snapshot state to switch app id
    ((StreamingContainerManager.CheckpointState) snapshot).setApplicationId(this.dag, conf);
    Path checkpointPath = new Path(newAppDir, LogicalPlan.SUBDIR_CHECKPOINTS);

    FileSystem fs = FileSystem.newInstance(origAppDir.toUri(), conf);
    // remove the path that was created by the storage agent during deserialization and replacement
    fs.delete(checkpointPath, true);

    // write snapshot to new location
    recoveryHandler = new FSRecoveryHandler(newAppDir, conf);
    recoveryHandler.save(snapshot);
    OutputStream logOs = recoveryHandler.rotateLog();
    IOUtils.copy(logIs, logOs);
    logOs.flush();
    logOs.close();
    logIs.close();

    // copy sub directories that are not present in target
    FileStatus[] lFiles = fs.listStatus(origAppDir);
    for (FileStatus f : lFiles) {
      if (f.isDirectory()) {
        String targetPath = f.getPath().toString().replace(origAppDir.toString(), newAppDir);
        if (!fs.exists(new Path(targetPath))) {
          LOG.debug("Copying {} to {}", f.getPath(), targetPath);
          FileUtil.copy(fs, f.getPath(), fs, new Path(targetPath), false, conf);
          // FSUtil.copy(fs, f, fs, new Path(targetPath), false, false, conf);
        } else {
          LOG.debug("Ignoring {} as it already exists under {}", f.getPath(), targetPath);
          // FSUtil.setPermission(fs, new Path(targetPath), new FsPermission((short)0777));
        }
      }
    }
  }
Beispiel #3
0
 private static void updatePermissions(
     FileStatus src, FileStatus dst, EnumSet<FileAttribute> preseved, FileSystem destFileSys)
     throws IOException {
   String owner = null;
   String group = null;
   if (preseved.contains(FileAttribute.USER) && !src.getOwner().equals(dst.getOwner())) {
     owner = src.getOwner();
   }
   if (preseved.contains(FileAttribute.GROUP) && !src.getGroup().equals(dst.getGroup())) {
     group = src.getGroup();
   }
   if (owner != null || group != null) {
     destFileSys.setOwner(dst.getPath(), owner, group);
   }
   if (preseved.contains(FileAttribute.PERMISSION)
       && !src.getPermission().equals(dst.getPermission())) {
     destFileSys.setPermission(dst.getPath(), src.getPermission());
   }
 }
  /**
   * Generate the list of files and make them into FileSplits. This needs to be copied to insert a
   * filter on acceptable data
   */
  @Override
  public List<InputSplit> getSplits(JobContext job) throws IOException {
    long minSize = Math.max(getFormatMinSplitSize(), getMinSplitSize(job));
    long maxSize = getMaxSplitSize(job);
    long desiredMappers =
        job.getConfiguration().getLong("org.systemsbiology.jxtandem.DesiredXMLInputMappers", 0);

    // generate splits
    List<InputSplit> splits = new ArrayList<InputSplit>();
    List<FileStatus> fileStatuses = listStatus(job);
    boolean forceNumberMappers = fileStatuses.size() == 1;
    for (FileStatus file : fileStatuses) {
      Path path = file.getPath();
      if (!isPathAcceptable(path)) // filter acceptable data
      continue;
      FileSystem fs = path.getFileSystem(job.getConfiguration());
      long length = file.getLen();
      BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length);
      if ((length != 0) && isSplitable(job, path)) {
        long blockSize = file.getBlockSize();
        // use desired mappers to force more splits
        if (forceNumberMappers && desiredMappers > 0)
          maxSize = Math.min(maxSize, (length / desiredMappers));

        long splitSize = computeSplitSize(blockSize, minSize, maxSize);

        long bytesRemaining = length;
        while (withinSlop(splitSize, bytesRemaining)) {
          int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining);
          splits.add(
              new FileSplit(
                  path, length - bytesRemaining, splitSize, blkLocations[blkIndex].getHosts()));
          bytesRemaining -= splitSize;
        }

        if (bytesRemaining != 0) {
          splits.add(
              new FileSplit(
                  path,
                  length - bytesRemaining,
                  bytesRemaining,
                  blkLocations[blkLocations.length - 1].getHosts()));
        }
      } else if (length != 0) {
        splits.add(new FileSplit(path, 0, length, blkLocations[0].getHosts()));
      } else {
        // Create empty hosts array for zero length files
        splits.add(new FileSplit(path, 0, length, new String[0]));
      }
    }
    System.out.println("Total # of splits: " + splits.size());
    //     LOG.debug("Total # of splits: " + splits.size());
    return splits;
  }
Beispiel #5
0
  /**
   * Get block locations from the underlying fs and fix their offsets and lengths.
   *
   * @param file the input file status to get block locations
   * @param start the start of the desired range in the contained file
   * @param len the length of the desired range
   * @return block locations for this segment of file
   * @throws IOException
   */
  @Override
  public BlockLocation[] getFileBlockLocations(FileStatus file, long start, long len)
      throws IOException {
    HarStatus hstatus = getFileHarStatus(file.getPath());
    Path partPath = new Path(archivePath, hstatus.getPartName());
    FileStatus partStatus = metadata.getPartFileStatus(partPath);

    // get all part blocks that overlap with the desired file blocks
    BlockLocation[] locations =
        fs.getFileBlockLocations(partStatus, hstatus.getStartIndex() + start, len);

    return fixBlockLocations(locations, start, len, hstatus.getStartIndex());
  }
 /**
  * list subfiles
  *
  * @param hdfsPath !null path probably exists
  * @return !null list of enclused file names - emptu if file of not exists
  */
 @Override
 public String[] ls(final String hdfsPath) {
   if (isRunningAsUser()) {
     return super.ls(hdfsPath);
   }
   final FileSystem fs = getDFS();
   try {
     FileStatus[] statuses = fs.listStatus(new Path(hdfsPath));
     if (statuses == null) return new String[0];
     List<String> holder = new ArrayList<String>();
     for (int i = 0; i < statuses.length; i++) {
       FileStatus statuse = statuses[i];
       String s = statuse.getPath().getName();
       holder.add(s);
     }
     String[] ret = new String[holder.size()];
     holder.toArray(ret);
     return ret;
   } catch (IOException e) {
     throw new RuntimeException(e);
   }
 }
 public static void recursePath(Configuration conf, Path path, Job job) {
   try {
     FileSystem fs = path.getFileSystem(conf);
     FileStatus[] fstats = fs.listStatus(path);
     if (fstats != null) {
       for (FileStatus f : fstats) {
         Path p = f.getPath();
         ;
         if (fs.isFile(p)) {
           // connection times out otherwise
           System.err.println("file:" + p.toString());
           FileInputFormat.addInputPath(job, p);
         } else {
           System.err.println("dir:" + p.toString());
           recursePath(conf, p, job);
         }
       }
     }
   } catch (IOException e) {
     // shouldn't be here
     throw new RuntimeException(e);
   }
 }
  public int run(String[] args) throws IOException, ClassNotFoundException, InterruptedException {

    if (args.length != 2) {
      System.out.println("Usage: FeatureMatching ID <inputName.jpeg/inputName.jpg>");
      System.exit(1);
    }

    SimpleDateFormat sdf = new SimpleDateFormat("", Locale.US);
    sdf.applyPattern("yyyy-MM-dd_HH-mm-ss");
    String time = sdf.format(new Date());

    Job job = Job.getInstance();

    ID = "/" + args[0];
    String filename = args[1];
    filename = filename.toLowerCase();
    System.out.println("current filename:" + filename);

    // Detect illegal username (if the username dir doesn't exist)
    File userPath = new File(LOCAL_USER_DIR + ID);
    if (!userPath.exists()) {
      System.out.println("Unauthorized username!!!\nExiting......");
      System.exit(1);
    }
    // Preprocess the input image.jpg from local dir: /local.../user/ID/input/image.jpg
    // Save the features to local dir: hdfs://.../user/ID/input/image.jpg
    extractQueryFeatures2HDFS(filename, job);

    // Add the feature file to the hdfs cache
    String featureFileName = filename.substring(0, filename.lastIndexOf(".")) + ".json";
    //        job.addCacheFile(new Path(HDFS_HOME + USER + ID + INPUT + "/" +
    // featureFileName).toUri());
    job.getConfiguration()
        .set("featureFilePath", HDFS_HOME + USER + ID + INPUT + "/" + featureFileName);

    // Check the file type. Only support jpeg/jpg type images
    String type = filename.substring(args[1].lastIndexOf("."));
    if (!(type.equals(".jpg") || type.equals(".jpeg"))) {
      System.out.println("Image type not supported!!!\nExiting");
      System.exit(1);
    }

    // Input: hdfs://.../features/
    // The feature dir is a location of all features extracted from the database
    String inputPathStr = HDFS_HOME + FEATURES;
    // Output: hdfs://.../user/ID/output/
    String outputPathStr = HDFS_HOME + USER + ID + OUTPUT + "/" + time;

    job.setInputFormatClass(KeyValueTextInputFormat.class);
    //        job.setOutputFormatClass(TextOutputFormat.class);

    // Get the lists of all feature files: /.../features/data/part-*
    FileSystem fs = FileSystem.get(job.getConfiguration());
    FileStatus[] statuses = fs.listStatus(new Path(inputPathStr));
    StringBuffer sb = new StringBuffer();
    for (FileStatus fileStatus : statuses) {
      sb.append(fileStatus.getPath() + ",");
    }
    sb.deleteCharAt(sb.lastIndexOf(","));

    job.setJarByClass(FeatureMatching.class);
    job.setMapperClass(FeatureMatchMapper.class);
    job.setReducerClass(FeatureMatchReducer.class);

    // only need one reducer to collect the result
    job.setNumReduceTasks(1);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(Text.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(DoubleWritable.class);

    // Input a directory, so need the recursive input
    FileInputFormat.setInputDirRecursive(job, true);
    // Set the PathFilter, to omit _SUCCESS files
    // (This is not working correctly, as the PathFilter class is an interface rather than a class.
    // But the 2nd arg asks me to extend the PathFilter)
    //        FileInputFormat.setInputPathFilter(job, MyPathFilter.class);
    //
    //        FileInputFormat.setInputPaths(job, new Path(inputPathStr));
    FileInputFormat.setInputPaths(job, sb.toString());
    FileOutputFormat.setOutputPath(job, new Path(outputPathStr));

    boolean success = job.waitForCompletion(true);
    return success ? 0 : 1;
  }
Beispiel #9
0
  /**
   * Initialize DFSCopyFileMapper specific job-configuration.
   *
   * @param conf : The dfs/mapred configuration.
   * @param jobConf : The handle to the jobConf object to be initialized.
   * @param args Arguments
   */
  private static void setup(Configuration conf, JobConf jobConf, final Arguments args)
      throws IOException {
    jobConf.set(DST_DIR_LABEL, args.dst.toUri().toString());

    // set boolean values
    final boolean update = args.flags.contains(Options.UPDATE);
    final boolean overwrite = !update && args.flags.contains(Options.OVERWRITE);
    jobConf.setBoolean(Options.UPDATE.propertyname, update);
    jobConf.setBoolean(Options.OVERWRITE.propertyname, overwrite);
    jobConf.setBoolean(
        Options.IGNORE_READ_FAILURES.propertyname,
        args.flags.contains(Options.IGNORE_READ_FAILURES));
    jobConf.setBoolean(
        Options.PRESERVE_STATUS.propertyname, args.flags.contains(Options.PRESERVE_STATUS));

    final String randomId = getRandomId();
    JobClient jClient = new JobClient(jobConf);
    Path jobDirectory = new Path(jClient.getSystemDir(), NAME + "_" + randomId);
    jobConf.set(JOB_DIR_LABEL, jobDirectory.toString());

    FileSystem dstfs = args.dst.getFileSystem(conf);
    boolean dstExists = dstfs.exists(args.dst);
    boolean dstIsDir = false;
    if (dstExists) {
      dstIsDir = dstfs.getFileStatus(args.dst).isDir();
    }

    // default logPath
    Path logPath = args.log;
    if (logPath == null) {
      String filename = "_distcp_logs_" + randomId;
      if (!dstExists || !dstIsDir) {
        Path parent = args.dst.getParent();
        if (!dstfs.exists(parent)) {
          dstfs.mkdirs(parent);
        }
        logPath = new Path(parent, filename);
      } else {
        logPath = new Path(args.dst, filename);
      }
    }
    FileOutputFormat.setOutputPath(jobConf, logPath);

    // create src list, dst list
    FileSystem jobfs = jobDirectory.getFileSystem(jobConf);

    Path srcfilelist = new Path(jobDirectory, "_distcp_src_files");
    jobConf.set(SRC_LIST_LABEL, srcfilelist.toString());
    SequenceFile.Writer src_writer =
        SequenceFile.createWriter(
            jobfs,
            jobConf,
            srcfilelist,
            LongWritable.class,
            FilePair.class,
            SequenceFile.CompressionType.NONE);

    Path dstfilelist = new Path(jobDirectory, "_distcp_dst_files");
    SequenceFile.Writer dst_writer =
        SequenceFile.createWriter(
            jobfs, jobConf, dstfilelist, Text.class, Text.class, SequenceFile.CompressionType.NONE);

    Path dstdirlist = new Path(jobDirectory, "_distcp_dst_dirs");
    jobConf.set(DST_DIR_LIST_LABEL, dstdirlist.toString());
    SequenceFile.Writer dir_writer =
        SequenceFile.createWriter(
            jobfs,
            jobConf,
            dstdirlist,
            Text.class,
            FilePair.class,
            SequenceFile.CompressionType.NONE);

    // handle the case where the destination directory doesn't exist
    // and we've only a single src directory OR we're updating/overwriting
    // the contents of the destination directory.
    final boolean special = (args.srcs.size() == 1 && !dstExists) || update || overwrite;
    int srcCount = 0, cnsyncf = 0, dirsyn = 0;
    long fileCount = 0L, byteCount = 0L, cbsyncs = 0L;
    try {
      for (Iterator<Path> srcItr = args.srcs.iterator(); srcItr.hasNext(); ) {
        final Path src = srcItr.next();
        FileSystem srcfs = src.getFileSystem(conf);
        FileStatus srcfilestat = srcfs.getFileStatus(src);
        Path root = special && srcfilestat.isDir() ? src : src.getParent();
        if (srcfilestat.isDir()) {
          ++srcCount;
        }

        Stack<FileStatus> pathstack = new Stack<FileStatus>();
        for (pathstack.push(srcfilestat); !pathstack.empty(); ) {
          FileStatus cur = pathstack.pop();
          FileStatus[] children = srcfs.listStatus(cur.getPath());
          for (int i = 0; i < children.length; i++) {
            boolean skipfile = false;
            final FileStatus child = children[i];
            final String dst = makeRelative(root, child.getPath());
            ++srcCount;

            if (child.isDir()) {
              pathstack.push(child);
            } else {
              // skip file if the src and the dst files are the same.
              skipfile = update && sameFile(srcfs, child, dstfs, new Path(args.dst, dst));
              // skip file if it exceed file limit or size limit
              skipfile |=
                  fileCount == args.filelimit || byteCount + child.getLen() > args.sizelimit;

              if (!skipfile) {
                ++fileCount;
                byteCount += child.getLen();

                if (LOG.isTraceEnabled()) {
                  LOG.trace("adding file " + child.getPath());
                }

                ++cnsyncf;
                cbsyncs += child.getLen();
                if (cnsyncf > SYNC_FILE_MAX || cbsyncs > BYTES_PER_MAP) {
                  src_writer.sync();
                  dst_writer.sync();
                  cnsyncf = 0;
                  cbsyncs = 0L;
                }
              }
            }

            if (!skipfile) {
              src_writer.append(
                  new LongWritable(child.isDir() ? 0 : child.getLen()), new FilePair(child, dst));
            }

            dst_writer.append(new Text(dst), new Text(child.getPath().toString()));
          }

          if (cur.isDir()) {
            String dst = makeRelative(root, cur.getPath());
            dir_writer.append(new Text(dst), new FilePair(cur, dst));
            if (++dirsyn > SYNC_FILE_MAX) {
              dirsyn = 0;
              dir_writer.sync();
            }
          }
        }
      }
    } finally {
      checkAndClose(src_writer);
      checkAndClose(dst_writer);
      checkAndClose(dir_writer);
    }

    FileStatus dststatus = null;
    try {
      dststatus = dstfs.getFileStatus(args.dst);
    } catch (FileNotFoundException fnfe) {
      LOG.info(args.dst + " does not exist.");
    }

    // create dest path dir if copying > 1 file
    if (dststatus == null) {
      if (srcCount > 1 && !dstfs.mkdirs(args.dst)) {
        throw new IOException("Failed to create" + args.dst);
      }
    }

    final Path sorted = new Path(jobDirectory, "_distcp_sorted");
    checkDuplication(jobfs, dstfilelist, sorted, conf);

    if (dststatus != null && args.flags.contains(Options.DELETE)) {
      deleteNonexisting(dstfs, dststatus, sorted, jobfs, jobDirectory, jobConf, conf);
    }

    Path tmpDir =
        new Path(
            (dstExists && !dstIsDir) || (!dstExists && srcCount == 1)
                ? args.dst.getParent()
                : args.dst,
            "_distcp_tmp_" + randomId);
    jobConf.set(TMP_DIR_LABEL, tmpDir.toUri().toString());
    LOG.info("srcCount=" + srcCount);
    jobConf.setInt(SRC_COUNT_LABEL, srcCount);
    jobConf.setLong(TOTAL_SIZE_LABEL, byteCount);
    setMapCount(byteCount, jobConf);
  }
Beispiel #10
0
    /**
     * Copy a file to a destination.
     *
     * @param srcstat src path and metadata
     * @param dstpath dst path
     * @param reporter
     */
    private void copy(
        FileStatus srcstat,
        Path relativedst,
        OutputCollector<WritableComparable<?>, Text> outc,
        Reporter reporter)
        throws IOException {
      Path absdst = new Path(destPath, relativedst);
      int totfiles = job.getInt(SRC_COUNT_LABEL, -1);
      assert totfiles >= 0 : "Invalid file count " + totfiles;

      // if a directory, ensure created even if empty
      if (srcstat.isDir()) {
        if (destFileSys.exists(absdst)) {
          if (!destFileSys.getFileStatus(absdst).isDir()) {
            throw new IOException("Failed to mkdirs: " + absdst + " is a file.");
          }
        } else if (!destFileSys.mkdirs(absdst)) {
          throw new IOException("Failed to mkdirs " + absdst);
        }
        // TODO: when modification times can be set, directories should be
        // emitted to reducers so they might be preserved. Also, mkdirs does
        // not currently return an error when the directory already exists;
        // if this changes, all directory work might as well be done in reduce
        return;
      }

      if (destFileSys.exists(absdst) && !overwrite && !needsUpdate(srcstat, destFileSys, absdst)) {
        outc.collect(null, new Text("SKIP: " + srcstat.getPath()));
        ++skipcount;
        reporter.incrCounter(Counter.SKIP, 1);
        updateStatus(reporter);
        return;
      }

      Path tmpfile = new Path(job.get(TMP_DIR_LABEL), relativedst);
      long cbcopied = 0L;
      FSDataInputStream in = null;
      FSDataOutputStream out = null;
      try {
        // open src file
        try {
          in = srcstat.getPath().getFileSystem(job).open(srcstat.getPath());
        } catch (IOException e) {
          LOG.error("Failed to open src file " + srcstat.getPath() + ", ignore and return");
          in = null;
          return;
        }
        reporter.incrCounter(Counter.BYTESEXPECTED, srcstat.getLen());
        // open tmp file
        out = create(tmpfile, reporter, srcstat);
        // copy file
        for (int cbread; (cbread = in.read(buffer)) >= 0; ) {
          out.write(buffer, 0, cbread);
          cbcopied += cbread;
          reporter.setStatus(
              String.format("%.2f ", cbcopied * 100.0 / srcstat.getLen())
                  + absdst
                  + " [ "
                  + StringUtils.humanReadableInt(cbcopied)
                  + " / "
                  + StringUtils.humanReadableInt(srcstat.getLen())
                  + " ]");
        }
      } finally {
        checkAndClose(in);
        checkAndClose(out);
      }

      if (cbcopied != srcstat.getLen()) {
        if (srcstat.getLen() == 0 && cbcopied > 0) {
          LOG.info("most likely see a WAL file corruption: " + srcstat.getPath());
        } else {
          throw new IOException(
              "File size not matched: copied "
                  + bytesString(cbcopied)
                  + " to tmpfile (="
                  + tmpfile
                  + ") but expected "
                  + bytesString(srcstat.getLen())
                  + " from "
                  + srcstat.getPath());
        }
      } else {
        if (totfiles == 1) {
          // Copying a single file; use dst path provided by user as destination
          // rather than destination directory, if a file
          Path dstparent = absdst.getParent();
          if (!(destFileSys.exists(dstparent) && destFileSys.getFileStatus(dstparent).isDir())) {
            absdst = dstparent;
          }
        }
        if (destFileSys.exists(absdst) && destFileSys.getFileStatus(absdst).isDir()) {
          throw new IOException(absdst + " is a directory");
        }
        if (!destFileSys.mkdirs(absdst.getParent())) {
          throw new IOException("Failed to craete parent dir: " + absdst.getParent());
        }
        rename(tmpfile, absdst);

        FileStatus dststat = destFileSys.getFileStatus(absdst);
        if (dststat.getLen() != srcstat.getLen()) {
          destFileSys.delete(absdst, false);
          throw new IOException(
              "File size not matched: copied "
                  + bytesString(dststat.getLen())
                  + " to dst (="
                  + absdst
                  + ") but expected "
                  + bytesString(srcstat.getLen())
                  + " from "
                  + srcstat.getPath());
        }
        updatePermissions(srcstat, dststat);
      }

      // report at least once for each file
      ++copycount;
      reporter.incrCounter(Counter.BYTESCOPIED, cbcopied);
      reporter.incrCounter(Counter.COPY, 1);
      updateStatus(reporter);
    }
Beispiel #11
0
 /**
  * Return true if dst should be replaced by src and the update flag is set. Right now, this
  * merely checks that the src and dst len are not equal. This should be improved on once
  * modification times, CRCs, etc. can be meaningful in this context.
  *
  * @throws IOException
  */
 private boolean needsUpdate(FileStatus srcstatus, FileSystem dstfs, Path dstpath)
     throws IOException {
   return update && !sameFile(srcstatus.getPath().getFileSystem(job), srcstatus, dstfs, dstpath);
 }
Beispiel #12
0
  /** Delete the dst files/dirs which do not exist in src */
  private static void deleteNonexisting(
      FileSystem dstfs,
      FileStatus dstroot,
      Path dstsorted,
      FileSystem jobfs,
      Path jobdir,
      JobConf jobconf,
      Configuration conf)
      throws IOException {
    if (!dstroot.isDir()) {
      throw new IOException(
          "dst must be a directory when option "
              + Options.DELETE.cmd
              + " is set, but dst (= "
              + dstroot.getPath()
              + ") is not a directory.");
    }

    // write dst lsr results
    final Path dstlsr = new Path(jobdir, "_distcp_dst_lsr");
    final SequenceFile.Writer writer =
        SequenceFile.createWriter(
            jobfs,
            jobconf,
            dstlsr,
            Text.class,
            FileStatus.class,
            SequenceFile.CompressionType.NONE);
    try {
      // do lsr to get all file statuses in dstroot
      final Stack<FileStatus> lsrstack = new Stack<FileStatus>();
      for (lsrstack.push(dstroot); !lsrstack.isEmpty(); ) {
        final FileStatus status = lsrstack.pop();
        if (status.isDir()) {
          for (FileStatus child : dstfs.listStatus(status.getPath())) {
            String relative = makeRelative(dstroot.getPath(), child.getPath());
            writer.append(new Text(relative), child);
            lsrstack.push(child);
          }
        }
      }
    } finally {
      checkAndClose(writer);
    }

    // sort lsr results
    final Path sortedlsr = new Path(jobdir, "_distcp_dst_lsr_sorted");
    SequenceFile.Sorter sorter =
        new SequenceFile.Sorter(
            jobfs, new Text.Comparator(), Text.class, FileStatus.class, jobconf);
    sorter.sort(dstlsr, sortedlsr);

    // compare lsr list and dst list
    SequenceFile.Reader lsrin = null;
    SequenceFile.Reader dstin = null;
    try {
      lsrin = new SequenceFile.Reader(jobfs, sortedlsr, jobconf);
      dstin = new SequenceFile.Reader(jobfs, dstsorted, jobconf);

      // compare sorted lsr list and sorted dst list
      final Text lsrpath = new Text();
      final FileStatus lsrstatus = new FileStatus();
      final Text dstpath = new Text();
      final Text dstfrom = new Text();
      final FsShell shell = new FsShell(conf);
      final String[] shellargs = {"-rmr", null};

      boolean hasnext = dstin.next(dstpath, dstfrom);
      for (; lsrin.next(lsrpath, lsrstatus); ) {
        int dst_cmp_lsr = dstpath.compareTo(lsrpath);
        for (; hasnext && dst_cmp_lsr < 0; ) {
          hasnext = dstin.next(dstpath, dstfrom);
          dst_cmp_lsr = dstpath.compareTo(lsrpath);
        }

        if (dst_cmp_lsr == 0) {
          // lsrpath exists in dst, skip it
          hasnext = dstin.next(dstpath, dstfrom);
        } else {
          // lsrpath does not exist, delete it
          String s = new Path(dstroot.getPath(), lsrpath.toString()).toString();
          if (shellargs[1] == null || !isAncestorPath(shellargs[1], s)) {
            shellargs[1] = s;
            int r = 0;
            try {
              r = shell.run(shellargs);
            } catch (Exception e) {
              throw new IOException("Exception from shell.", e);
            }
            if (r != 0) {
              throw new IOException(
                  "\"" + shellargs[0] + " " + shellargs[1] + "\" returns non-zero value " + r);
            }
          }
        }
      }
    } finally {
      checkAndClose(lsrin);
      checkAndClose(dstin);
    }
  }
Beispiel #13
0
  public int run(String[] args) throws Exception {
    // printUsage();
    /*
     * SETUP
     */
    Configuration argConf = getConf();
    Hashtable<String, String> confArg = new Hashtable<String, String>();
    setup(confArg, argConf);
    Date currentTime = new Date();
    Date endDate = new Date(new Long(confArg.get("timestamp_stop")));
    Boolean full_run = confArg.get("intermediate").matches("(?i).*true.*");
    Boolean quick_add = confArg.get("quick_add").matches("(?i).*true.*");
    logger.info("Running GeStore");

    // ZooKeeper setup
    Configuration config = HBaseConfiguration.create();
    zkWatcher = new ZooKeeperWatcher(config, "Testing", new HBaseAdmin(config));
    zkInstance =
        new ZooKeeper(
            ZKConfig.getZKQuorumServersString(config),
            config.getInt("zookeeper.session.timeout", -1),
            zkWatcher);

    if (!confArg.get("task_id").isEmpty()) {
      confArg.put("temp_path", confArg.get("temp_path") + confArg.get("task_id"));
    }

    String lockRequest = confArg.get("file_id");
    if (!confArg.get("run_id").isEmpty())
      lockRequest = lockRequest + "_" + confArg.get("run_id") + "_";
    if (!confArg.get("task_id").isEmpty())
      lockRequest = lockRequest + "_" + confArg.get("task_id") + "_";

    // Get type of movement
    toFrom type_move = checkArgs(confArg);
    if (type_move == toFrom.LOCAL2REMOTE && !confArg.get("format").equals("unknown")) {
      List<String> arguments = new ArrayList<String>();
      arguments.add("-Dinput=" + confArg.get("local_path"));
      arguments.add("-Dtable=" + confArg.get("file_id"));
      arguments.add("-Dtimestamp=" + confArg.get("timestamp_stop"));
      arguments.add("-Dtype=" + confArg.get("format"));
      arguments.add("-Dtarget_dir=" + confArg.get("base_path") + "_" + confArg.get("file_id"));
      arguments.add("-Dtemp_hdfs_path=" + confArg.get("temp_path"));
      arguments.add("-Drun_id=" + confArg.get("run_id"));
      if (!confArg.get("run_id").isEmpty()) arguments.add("-Drun_id=" + confArg.get("run_id"));
      if (!confArg.get("task_id").isEmpty()) arguments.add("-Dtask_id=" + confArg.get("task_id"));
      if (quick_add) arguments.add("-Dquick_add=" + confArg.get("quick_add"));
      String lockName = lock(lockRequest);
      String[] argumentString = arguments.toArray(new String[arguments.size()]);
      adddb.main(argumentString);
      unlock(lockName);
      System.exit(0);
    }

    // Database registration

    dbutil db_util = new dbutil(config);
    db_util.register_database(confArg.get("db_name_files"), true);
    db_util.register_database(confArg.get("db_name_runs"), true);
    db_util.register_database(confArg.get("db_name_updates"), true);
    FileSystem hdfs = FileSystem.get(config);
    FileSystem localFS = FileSystem.getLocal(config);

    // Get source type
    confArg.put("source", getSource(db_util, confArg.get("db_name_files"), confArg.get("file_id")));
    confArg.put(
        "database", isDatabase(db_util, confArg.get("db_name_files"), confArg.get("file_id")));
    if (!confArg.get("source").equals("local")
        && type_move == toFrom.REMOTE2LOCAL
        && !confArg.get("timestamp_stop").equals(Integer.toString(Integer.MAX_VALUE))) {
      confArg.put("timestamp_stop", Long.toString(latestVersion(confArg, db_util)));
    }

    /*
     * Get previous timestamp
     */
    Get run_id_get = new Get(confArg.get("run_id").getBytes());
    Result run_get = db_util.doGet(confArg.get("db_name_runs"), run_id_get);
    KeyValue run_file_prev =
        run_get.getColumnLatest(
            "d".getBytes(), (confArg.get("file_id") + "_db_timestamp").getBytes());
    String last_timestamp = new String("0");
    if (null != run_file_prev && !confArg.get("source").equals("local")) {
      long last_timestamp_real = run_file_prev.getTimestamp();
      Long current_timestamp = new Long(confArg.get("timestamp_real"));
      if ((current_timestamp - last_timestamp_real) > 36000) {
        last_timestamp = new String(run_file_prev.getValue());
        Integer lastTimestamp = new Integer(last_timestamp);
        lastTimestamp += 1;
        last_timestamp = lastTimestamp.toString();
        logger.info("Last timestamp: " + last_timestamp + " End data: " + endDate);
        Date last_run = new Date(run_file_prev.getTimestamp());
        if (last_run.before(endDate) && !full_run) {
          confArg.put("timestamp_start", last_timestamp);
        }
      }
    }

    Integer tse = new Integer(confArg.get("timestamp_stop"));
    Integer tss = new Integer(confArg.get("timestamp_start"));
    if (tss > tse) {
      logger.info("No new version of requested file.");
      return 0;
    }

    /*
     * Generate file
     */

    String lockName = lock(lockRequest);

    Get file_id_get = new Get(confArg.get("file_id").getBytes());
    Result file_get = db_util.doGet(confArg.get("db_name_files"), file_id_get);
    if (!file_get.isEmpty()) {
      boolean found =
          hasFile(
              db_util,
              hdfs,
              confArg.get("db_name_files"),
              confArg.get("file_id"),
              getFullPath(confArg));
      if (confArg.get("source").equals("fullfile")) {
        found = false;
      }
      String filenames_put =
          getFileNames(
              db_util, confArg.get("db_name_files"), confArg.get("file_id"), getFullPath(confArg));
      // Filename not found in file database
      if (!found && type_move == toFrom.REMOTE2LOCAL) {
        if (!confArg.get("source").equals("local")) {
          // Generate intermediate file
          if (getFile(hdfs, confArg, db_util) == null) {
            unlock(lockName);
            return 1;
          }
          // Put generated file into file database
          if (!confArg.get("format").equals("fullfile")) {
            putFileEntry(
                db_util,
                hdfs,
                confArg.get("db_name_files"),
                confArg.get("file_id"),
                confArg.get("full_file_name"),
                confArg.get("source"));
          }
        } else {
          logger.warn("Remote file not found, and cannot be generated! File: " + confArg);
          unlock(lockName);
          return 1;
        }
      }
    } else {
      if (type_move == toFrom.REMOTE2LOCAL) {
        logger.warn("Remote file not found, and cannot be generated.");
        unlock(lockName);
        return 1;
      }
    }

    /*
     * Copy file
     * Update tables
     */

    if (type_move == toFrom.LOCAL2REMOTE) {
      if (!confArg.get("format").equals("fullfile")) {
        putFileEntry(
            db_util,
            hdfs,
            confArg.get("db_name_files"),
            confArg.get("file_id"),
            getFullPath(confArg),
            confArg.get("source"));
      }
      putRunEntry(
          db_util,
          confArg.get("db_name_runs"),
          confArg.get("run_id"),
          confArg.get("file_id"),
          confArg.get("type"),
          confArg.get("timestamp_real"),
          confArg.get("timestamp_stop"),
          getFullPath(confArg),
          confArg.get("delimiter"));
      hdfs.copyFromLocalFile(new Path(confArg.get("local_path")), new Path(getFullPath(confArg)));
    } else if (type_move == toFrom.REMOTE2LOCAL) {
      FileStatus[] files = hdfs.globStatus(new Path(getFullPath(confArg) + "*"));
      putRunEntry(
          db_util,
          confArg.get("db_name_runs"),
          confArg.get("run_id"),
          confArg.get("file_id"),
          confArg.get("type"),
          confArg.get("timestamp_real"),
          confArg.get("timestamp_stop"),
          getFullPath(confArg),
          confArg.get("delimiter"));
      unlock(lockName);
      for (FileStatus file : files) {
        Path cur_file = file.getPath();
        Path cur_local_path =
            new Path(new String(confArg.get("local_path") + confArg.get("file_id")));
        String suffix = getSuffix(getFileName(confArg), cur_file.getName());
        if (suffix.length() > 0) {
          cur_local_path = cur_local_path.suffix(new String("." + suffix));
        }
        if (confArg.get("copy").equals("true")) {
          String crc = hdfs.getFileChecksum(cur_file).toString();
          if (checksumLocalTest(cur_local_path, crc)) {
            continue;
          } else {
            hdfs.copyToLocalFile(cur_file, cur_local_path);
            writeChecksum(cur_local_path, crc);
          }
        } else {
          System.out.println(cur_local_path + "\t" + cur_file);
        }
      }
    }
    unlock(lockName);
    return 0;
  }
Beispiel #14
0
  // Information needed to get a single file:
  // BASE_PATH, FILE_ID, TIMESTAMP_START, TIMESTAMP_STOP, SOURCE, FILESYSTEM
  private static Vector<Path> getFile(
      FileSystem fs, Hashtable<String, String> config, dbutil db_util) throws Exception {
    Long latestVersion = latestVersion(config, db_util);

    try {
      config.put("timestamp_start", config.get("timestamp_start"));
      config.put("timestamp_real", latestVersion.toString());
      config.put("timestamp_stop", latestVersion.toString());
    } catch (Exception E) {
      logger.error("Tryign to get file that is impossible to generate: " + getFullPath(config));
      return null;
    }
    if (Integer.parseInt(config.get("timestamp_start"))
        > Integer.parseInt(config.get("timestamp_stop"))) {
      return null;
    }
    logger.debug(
        "Getting DB for timestamp "
            + config.get("timestamp_start")
            + " to "
            + config.get("timestamp_stop"));

    String final_result = getFullPath(config);

    String temp_path_base =
        config.get("local_temp_path")
            + "_"
            + config.get("task_id")
            + "_"
            + config.get("run_id")
            + "/";
    Path newPath = new Path(final_result + "*");
    Vector<Path> ret_path = new Vector<Path>();
    String lockName = lock(final_result.replaceAll("/", "_"));
    if (fs.globStatus(newPath).length != 0) {
      ret_path.add(newPath);
      unlock(lockName);
      config.put("full_file_name", final_result);
      return ret_path;
    } else {
      if (!config.get("source").equals("local")) {
        config.put("temp_path_base", temp_path_base);

        config.put("timestamp_start", config.get("timestamp_start"));
        config.put("timestamp_real", latestVersion.toString());
        config.put("timestamp_stop", latestVersion.toString());

        Class<?> sourceClass =
            Class.forName("org.gestore.plugin.source." + config.get("source") + "Source");
        Method process_data = sourceClass.getMethod("process", Hashtable.class, FileSystem.class);
        Object processor = sourceClass.newInstance();
        Object retVal;
        try {
          retVal = process_data.invoke(processor, config, fs);
        } catch (InvocationTargetException E) {
          Throwable exception = E.getTargetException();
          logger.error("Unable to call method in child class: " + exception.toString());
          exception.printStackTrace(System.out);
          unlock(lockName);
          return null;
        }
        FileStatus[] files = (FileStatus[]) retVal;
        if (files == null) {
          logger.error("Error getting files, no files returned");
          return null;
        }

        for (FileStatus file : files) {
          Path cur_file = file.getPath();
          Path cur_local_path = new Path(temp_path_base + config.get("file_id"));
          String suffix = getSuffix(config.get("file_id"), cur_file.getName());
          cur_local_path = cur_local_path.suffix(suffix);
          Path res_path = new Path(new String(final_result + suffix));
          logger.debug("Moving file" + cur_file.toString() + " to " + res_path.toString());
          if (config.get("copy").equals("true")) {
            fs.moveFromLocalFile(cur_file, res_path);
          } else {
            fs.rename(cur_file, res_path);
          }
        }

        config.put("full_file_name", final_result);
      }
    }
    unlock(lockName);
    return ret_path;
  }