Пример #1
0
  private InputStream OpenMultiplePartsWithOffset(FileSystem fs, Path pt, long offset)
      throws IOException {
    RemoteIterator<LocatedFileStatus> rit = fs.listFiles(pt, false);
    Vector<FSDataInputStream> fileHandleList = new Vector<FSDataInputStream>();
    while (rit.hasNext()) {
      Path path = rit.next().getPath();
      String filename =
          path.toString().substring(path.getParent().toString().length(), path.toString().length());

      if (filename.startsWith("/part-")) {
        long filesize = fs.getFileStatus(path).getLen();
        if (offset < filesize) {
          FSDataInputStream handle = fs.open(path);
          if (offset > 0) {
            handle.seek(offset);
          }
          fileHandleList.add(handle);
        }
        offset -= filesize;
      }
    }
    if (fileHandleList.size() == 1) return fileHandleList.get(0);
    else if (fileHandleList.size() > 1) {
      Enumeration<FSDataInputStream> enu = fileHandleList.elements();
      return new SequenceInputStream(enu);
    } else {
      System.err.println("Error, no source file loaded. run genSeedDataset.sh fisrt!");
      return null;
    }
  }
  public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    conf.setLong("mapreduce.task.timeout", 10000 * 60 * 60);

    Path train_file = new Path(args[0]);
    Path test_file = new Path(args[1]);
    conf.set("train_file", train_file.getParent().getName());

    Job job = new Job(conf, "MapTestID");
    job.setJarByClass(MapTestID.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setMapperClass(MapTestID.MapTestIDMap.class);
    job.setReducerClass(MapTestID.MapTestIDReduce.class);

    job.setNumReduceTasks(300);

    FileInputFormat.addInputPath(job, train_file);
    FileInputFormat.addInputPath(job, test_file);
    FileOutputFormat.setOutputPath(job, new Path(args[2]));

    return job.waitForCompletion(true) ? 0 : 1;
  }
Пример #3
0
    public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
      FileSplit split = (FileSplit) genericSplit;
      Configuration job = context.getConfiguration();
      m_Sb.setLength(0);
      m_Start = split.getStart();
      m_End = m_Start + split.getLength();
      final Path file = split.getPath();
      compressionCodecs = new CompressionCodecFactory(job);
      final CompressionCodec codec = compressionCodecs.getCodec(file);

      // open the file and seek to the m_Start of the split
      FileSystem fs = file.getFileSystem(job);
      //  getFileStatus fileStatus = fs.getFileStatus(split.getPath());
      //noinspection deprecation
      @SuppressWarnings(value = "deprecated")
      long length = fs.getLength(file);
      FSDataInputStream fileIn = fs.open(split.getPath());
      if (m_Start > 0) fileIn.seek(m_Start);
      if (codec != null) {
        CompressionInputStream inputStream = codec.createInputStream(fileIn);
        m_Input = new BufferedReader(new InputStreamReader(inputStream));
        m_End = length;
      } else {
        m_Input = new BufferedReader(new InputStreamReader(fileIn));
      }
      m_Current = m_Start;
      m_Key = split.getPath().getName();
    }
 public static String absolutePath(Path p) {
   if (p == null) return "";
   StringBuilder sb = new StringBuilder();
   Path parentPath = p.getParent();
   if (parentPath == null) return "/";
   sb.append(absolutePath(parentPath));
   if (sb.length() > 1) sb.append("/");
   sb.append(p.getName());
   return sb.toString();
 }
Пример #5
0
  /**
   * Generate the list of files and make them into FileSplits. This needs to be copied to insert a
   * filter on acceptable data
   */
  @Override
  public List<InputSplit> getSplits(JobContext job) throws IOException {
    long minSize = Math.max(getFormatMinSplitSize(), getMinSplitSize(job));
    long maxSize = getMaxSplitSize(job);
    long desiredMappers =
        job.getConfiguration().getLong("org.systemsbiology.jxtandem.DesiredXMLInputMappers", 0);

    // generate splits
    List<InputSplit> splits = new ArrayList<InputSplit>();
    List<FileStatus> fileStatuses = listStatus(job);
    boolean forceNumberMappers = fileStatuses.size() == 1;
    for (FileStatus file : fileStatuses) {
      Path path = file.getPath();
      if (!isPathAcceptable(path)) // filter acceptable data
      continue;
      FileSystem fs = path.getFileSystem(job.getConfiguration());
      long length = file.getLen();
      BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length);
      if ((length != 0) && isSplitable(job, path)) {
        long blockSize = file.getBlockSize();
        // use desired mappers to force more splits
        if (forceNumberMappers && desiredMappers > 0)
          maxSize = Math.min(maxSize, (length / desiredMappers));

        long splitSize = computeSplitSize(blockSize, minSize, maxSize);

        long bytesRemaining = length;
        while (withinSlop(splitSize, bytesRemaining)) {
          int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining);
          splits.add(
              new FileSplit(
                  path, length - bytesRemaining, splitSize, blkLocations[blkIndex].getHosts()));
          bytesRemaining -= splitSize;
        }

        if (bytesRemaining != 0) {
          splits.add(
              new FileSplit(
                  path,
                  length - bytesRemaining,
                  bytesRemaining,
                  blkLocations[blkLocations.length - 1].getHosts()));
        }
      } else if (length != 0) {
        splits.add(new FileSplit(path, 0, length, blkLocations[0].getHosts()));
      } else {
        // Create empty hosts array for zero length files
        splits.add(new FileSplit(path, 0, length, new String[0]));
      }
    }
    System.out.println("Total # of splits: " + splits.size());
    //     LOG.debug("Total # of splits: " + splits.size());
    return splits;
  }
Пример #6
0
  /**
   * Add an file path to the current set of classpath entries It adds the file to cache as well.
   *
   * @param file Path of the file to be added
   * @param conf Configuration that contains the classpath setting
   */
  public static void addFileToClassPath(Path file, Configuration conf) throws IOException {
    String classpath = conf.get("mapred.job.classpath.files");
    conf.set(
        "mapred.job.classpath.files",
        classpath == null
            ? file.toString()
            : classpath + System.getProperty("path.separator") + file.toString());
    URI uri = file.makeQualified(file.getFileSystem(conf)).toUri();

    addCacheFile(uri, conf);
  }
Пример #7
0
  private static URI addArchiveToClassPathHelper(Path archive, Configuration conf)
      throws IOException {

    String classpath = conf.get("mapred.job.classpath.archives");

    // the scheme/authority use ':' as separator. put the unqualified path in classpath
    String archivePath = archive.toUri().getPath();

    conf.set(
        "mapred.job.classpath.archives",
        classpath == null
            ? archivePath
            : classpath + System.getProperty("path.separator") + archivePath);
    return archive.makeQualified(archive.getFileSystem(conf)).toUri();
  }
Пример #8
0
 @Override
 protected boolean isSplitable(JobContext context, Path file) {
   String fname = file.getName().toLowerCase();
   //noinspection SimplifiableIfStatementf,RedundantIfStatement
   if (fname.endsWith(".gz")) return false;
   return true;
 }
 private void setupProcedureStore() throws IOException {
   Path testDir = UTIL.getDataTestDir();
   FileSystem fs = testDir.getFileSystem(conf);
   Path logDir = new Path(testDir, "proc-logs");
   System.out.println("Logs directory : " + logDir.toString());
   fs.delete(logDir, true);
   if ("nosync".equals(syncType)) {
     store = new NoSyncWalProcedureStore(conf, fs, logDir);
   } else {
     store = ProcedureTestingUtility.createWalStore(conf, fs, logDir);
   }
   store.start(numThreads);
   store.recoverLease();
   store.load(new ProcedureTestingUtility.LoadCounter());
   System.out.println(
       "Starting new log : " + store.getActiveLogs().get(store.getActiveLogs().size() - 1));
 }
Пример #10
0
 @Override
 public void run() {
   for (CacheStatus lcacheStatus : toBeDeletedCache) {
     synchronized (lcacheStatus) {
       Path fullUniqueParentDir =
           new Path(lcacheStatus.localizedBaseDir, lcacheStatus.uniqueParentDir);
       try {
         LOG.info("Deleting local cached path: " + fullUniqueParentDir.toString());
         deleteLocalPath(asyncDiskService, fs, fullUniqueParentDir);
         // decrement the size of the cache from baseDirSize
         deleteCacheInfoUpdate(lcacheStatus);
         LOG.info("Removed cache " + lcacheStatus.localizedLoadPath);
       } catch (IOException e) {
         LOG.warn("Error when deleting " + fullUniqueParentDir, e);
       }
     }
   }
 }
Пример #11
0
 protected boolean isPathAcceptable(final Path pPath1) {
   String path = pPath1.toString().toLowerCase();
   if (path.startsWith("part-r-")) return true;
   String extension = getExtension();
   if (extension != null && path.endsWith(extension.toLowerCase())) return true;
   if (extension != null && path.endsWith(extension.toLowerCase() + ".gz")) return true;
   //noinspection SimplifiableIfStatement,RedundantIfStatement
   if (extension == null) return true;
   return false;
 }
Пример #12
0
  // the method which actually copies the caches locally and unjars/unzips them
  // and does chmod for the files
  private static Path localizeCache(
      Configuration conf, URI cache, long confFileStamp, CacheStatus cacheStatus, boolean isArchive)
      throws IOException {
    FileSystem fs = getFileSystem(cache, conf);
    FileSystem localFs = FileSystem.getLocal(conf);
    Path parchive = null;

    if (isArchive) {
      parchive =
          new Path(
              cacheStatus.localizedLoadPath, new Path(cacheStatus.localizedLoadPath.getName()));
    } else {
      parchive = cacheStatus.localizedLoadPath;
    }
    if (!localFs.mkdirs(parchive.getParent())) {
      throw new IOException(
          "Mkdirs failed to create directory " + cacheStatus.localizedLoadPath.toString());
    }
    String cacheId = cache.getPath();

    fs.copyToLocalFile(new Path(cacheId), parchive);
    if (isArchive) {
      String tmpArchive = parchive.toString().toLowerCase();
      File srcFile = new File(parchive.toString());
      File destDir = new File(parchive.getParent().toString());
      if (tmpArchive.endsWith(".jar")) {
        RunJar.unJar(srcFile, destDir);
      } else if (tmpArchive.endsWith(".zip")) {
        FileUtil.unZip(srcFile, destDir);
      } else if (isTarFile(tmpArchive)) {
        FileUtil.unTar(srcFile, destDir);
      }
      // else will not do anyhting
      // and copy the file into the dir as it is
    }
    long cacheSize = FileUtil.getDU(new File(parchive.getParent().toString()));
    cacheStatus.size = cacheSize;
    addCacheInfoUpdate(cacheStatus);

    // do chmod here
    try {
      // Setting recursive permission to grant everyone read and execute
      Path localDir = new Path(cacheStatus.localizedBaseDir, cacheStatus.uniqueParentDir);
      LOG.info("Doing chmod on localdir :" + localDir);
      FileUtil.chmod(localDir.toString(), "ugo+rx", true);
    } catch (InterruptedException e) {
      LOG.warn("Exception in chmod" + e.toString());
    }

    // update cacheStatus to reflect the newly cached file
    cacheStatus.mtime = getTimestamp(conf, cache);
    return cacheStatus.localizedLoadPath;
  }
Пример #13
0
  /**
   * Add an archive path to the current set of classpath entries. It adds the archive to cache as
   * well. Intended to be used by user code.
   *
   * @param archive Path of the archive to be added
   * @param conf Configuration that contains the classpath setting
   * @param fs FileSystem with respect to which {@code archive} should be interpreted.
   */
  public static void addArchiveToClassPath(Path archive, Configuration conf, FileSystem fs)
      throws IOException {
    String archivepath = archive.toUri().getPath();
    String classpath = conf.get("mapred.job.classpath.archives");
    conf.set(
        "mapred.job.classpath.archives",
        classpath == null
            ? archivepath
            : classpath + System.getProperty("path.separator") + archivepath);
    URI uri = fs.makeQualified(archive).toUri();

    addCacheArchive(uri, conf);
  }
 public static void recursePath(Configuration conf, Path path, Job job) {
   try {
     FileSystem fs = path.getFileSystem(conf);
     FileStatus[] fstats = fs.listStatus(path);
     if (fstats != null) {
       for (FileStatus f : fstats) {
         Path p = f.getPath();
         ;
         if (fs.isFile(p)) {
           // connection times out otherwise
           System.err.println("file:" + p.toString());
           FileInputFormat.addInputPath(job, p);
         } else {
           System.err.println("dir:" + p.toString());
           recursePath(conf, p, job);
         }
       }
     }
   } catch (IOException e) {
     // shouldn't be here
     throw new RuntimeException(e);
   }
 }
Пример #15
0
 private static boolean writeChecksum(Path localPath, String checksum) {
   try {
     java.io.BufferedWriter writer =
         Files.newBufferedWriter(
             java.nio.file.Paths.get(localPath.toString() + ".crc"), Charset.forName("UTF-8"));
     writer.write(checksum);
     writer.flush();
     writer.close();
     return true;
   } catch (Exception e) {
     logger.warn("Unable to write CRC file!");
     return false;
   }
 }
 /**
  * open a file for writing
  *
  * @param hdfsPath !null path -
  * @return !null stream
  */
 @Override
 public OutputStream openFileForWrite(final Path src) {
   if (isRunningAsUser()) {
     return super.openFileForWrite(src);
   }
   if (true) throw new UnsupportedOperationException("Fix This"); // ToDo
   final FileSystem fs = getDFS();
   try {
     Path parent = src.getParent();
     guaranteeDirectory(parent);
     return FileSystem.create(fs, src, FULL_FILE_ACCESS);
   } catch (IOException e) {
     throw new RuntimeException(e);
   }
 }
Пример #17
0
 private static boolean checksumLocalTest(Path localPath, String checksum) {
   String crcPath = localPath.toString() + ".crc";
   if (Files.isReadable(java.nio.file.Paths.get(crcPath))) {
     try {
       List<String> lines =
           Files.readAllLines(java.nio.file.Paths.get(crcPath), Charset.forName("UTF-8"));
       for (String line : lines) {
         if (line.equals(checksum)) {
           return true;
         }
       }
     } catch (Exception e) {
       return false;
     }
   }
   return false;
 }
Пример #18
0
 private static void createSymlink(
     Configuration conf,
     URI cache,
     CacheStatus cacheStatus,
     boolean isArchive,
     Path currentWorkDir,
     boolean honorSymLinkConf)
     throws IOException {
   boolean doSymlink = honorSymLinkConf && DistributedCache.getSymlink(conf);
   if (cache.getFragment() == null) {
     doSymlink = false;
   }
   String link = currentWorkDir.toString() + Path.SEPARATOR + cache.getFragment();
   File flink = new File(link);
   if (doSymlink) {
     if (!flink.exists()) {
       FileUtil.symLink(cacheStatus.localizedLoadPath.toString(), link);
     }
   }
 }
Пример #19
0
 /**
  * Delete a local path with asyncDiskService if available, or otherwise synchronously with local
  * file system.
  */
 private static void deleteLocalPath(
     MRAsyncDiskService asyncDiskService, LocalFileSystem fs, Path path) throws IOException {
   boolean deleted = false;
   if (asyncDiskService != null) {
     // Try to delete using asyncDiskService
     String localPathToDelete = path.toUri().getPath();
     deleted = asyncDiskService.moveAndDeleteAbsolutePath(localPathToDelete);
     if (!deleted) {
       LOG.warn(
           "Cannot find DistributedCache path "
               + localPathToDelete
               + " on any of the asyncDiskService volumes!");
     }
   }
   if (!deleted) {
     // If no asyncDiskService, we will delete the files synchronously
     fs.delete(path, true);
   }
   LOG.info("Deleted path " + path);
 }
 /**
  * open a file for reading
  *
  * @param hdfsPath !null path - probably of an existing file
  * @return !null stream
  */
 @Override
 public InputStream openFileForRead(Path src) {
   if (isRunningAsUser()) {
     return super.openFileForRead(src);
   }
   String hdfsPath = src.toString();
   if (isFileNameLocal(hdfsPath)) {
     try {
       return new FileInputStream(hdfsPath); // better be local
     } catch (FileNotFoundException e) {
       throw new RuntimeException(e);
     }
   }
   if (true) throw new UnsupportedOperationException("Fix This"); // ToDo
   final FileSystem fs = getDFS();
   try {
     return fs.open(src);
   } catch (IOException e) {
     throw new RuntimeException(e);
   }
 }
Пример #21
0
  /**
   * Get the locally cached file or archive; it could either be previously cached (and valid) or
   * copy it from the {@link FileSystem} now.
   *
   * @param cache the cache to be localized, this should be specified as new
   *     URI(hdfs://hostname:port/absolute_path_to_file#LINKNAME). If no schema or hostname:port is
   *     provided the file is assumed to be in the filesystem being used in the Configuration
   * @param conf The Confguration file which contains the filesystem
   * @param subDir The sub cache Dir where you want to localize the files/archives
   * @param fileStatus The file status on the dfs.
   * @param isArchive if the cache is an archive or a file. In case it is an archive with a .zip or
   *     .jar or .tar or .tgz or .tar.gz extension it will be unzipped/unjarred/untarred
   *     automatically and the directory where the archive is unzipped/unjarred/untarred is returned
   *     as the Path. In case of a file, the path to the file is returned
   * @param confFileStamp this is the hdfs file modification timestamp to verify that the file to be
   *     cached hasn't changed since the job started
   * @param fileLength this is the length of the cache file
   * @param currentWorkDir this is the directory where you would want to create symlinks for the
   *     locally cached files/archives
   * @param honorSymLinkConf if this is false, then the symlinks are not created even if conf says
   *     so (this is required for an optimization in task launches
   * @param lDirAllocator LocalDirAllocator of the tracker
   * @return the path to directory where the archives are unjarred in case of archives, the path to
   *     the file where the file is copied locally
   * @throws IOException
   */
  private static Path getLocalCache(
      URI cache,
      Configuration conf,
      Path subDir,
      FileStatus fileStatus,
      boolean isArchive,
      long confFileStamp,
      long fileLength,
      Path currentWorkDir,
      boolean honorSymLinkConf,
      MRAsyncDiskService asyncDiskService,
      LocalDirAllocator lDirAllocator)
      throws IOException {
    String key = getKey(cache, conf, confFileStamp);

    CacheStatus lcacheStatus;
    Path localizedPath;
    synchronized (cachedArchives) {
      lcacheStatus = cachedArchives.get(key);
      if (lcacheStatus == null) {
        // was never localized
        Path uniqueParentDir = new Path(subDir, String.valueOf(random.nextLong()));
        String cachePath = new Path(uniqueParentDir, makeRelative(cache, conf)).toString();
        Path localPath = lDirAllocator.getLocalPathForWrite(cachePath, fileLength, conf);
        lcacheStatus =
            new CacheStatus(
                new Path(localPath.toString().replace(cachePath, "")), localPath, uniqueParentDir);
        cachedArchives.put(key, lcacheStatus);
      }
      lcacheStatus.refcount++;
    }
    boolean initSuccessful = false;
    try {
      synchronized (lcacheStatus) {
        if (!lcacheStatus.isInited()) {
          localizedPath = localizeCache(conf, cache, confFileStamp, lcacheStatus, isArchive);
          lcacheStatus.initComplete();
        } else {
          if (fileStatus != null) {
            localizedPath =
                checkCacheStatusValidity(
                    conf, cache, confFileStamp, lcacheStatus, fileStatus, isArchive);
          } else {
            // if fileStatus is null, then the md5 must be correct
            // so there is no need to check for cache validity
            localizedPath = lcacheStatus.localizedLoadPath;
          }
        }
        createSymlink(conf, cache, lcacheStatus, isArchive, currentWorkDir, honorSymLinkConf);
      }

      // try deleting stuff if you can
      long size = 0;
      int numberSubDir = 0;
      synchronized (lcacheStatus) {
        synchronized (baseDirSize) {
          Long get = baseDirSize.get(lcacheStatus.getBaseDir());
          if (get != null) {
            size = get.longValue();
          } else {
            LOG.warn("Cannot find size of baseDir: " + lcacheStatus.getBaseDir());
          }
        }
        synchronized (baseDirNumberSubDir) {
          Integer get = baseDirNumberSubDir.get(lcacheStatus.getBaseDir());
          if (get != null) {
            numberSubDir = get.intValue();
          } else {
            LOG.warn("Cannot find subdirectories limit of baseDir: " + lcacheStatus.getBaseDir());
          }
        }
      }
      // setting the cache size to a default of 10GB
      long allowedSize = conf.getLong("local.cache.size", DEFAULT_CACHE_SIZE);
      long allowedNumberSubDir =
          conf.getLong("local.cache.numbersubdir", DEFAULT_CACHE_SUBDIR_LIMIT);
      if (allowedSize < size || allowedNumberSubDir < numberSubDir) {
        // try some cache deletions
        LOG.debug(
            "Start deleting released cache because"
                + " [size, allowedSize, numberSubDir, allowedNumberSubDir] ="
                + " ["
                + size
                + ", "
                + allowedSize
                + ", "
                + numberSubDir
                + ", "
                + allowedNumberSubDir
                + "]");
        deleteCache(conf, asyncDiskService);
      }
      initSuccessful = true;
      return localizedPath;
    } finally {
      if (!initSuccessful) {
        synchronized (cachedArchives) {
          lcacheStatus.refcount--;
        }
      }
    }
  }
Пример #22
0
 /**
  * Add a file path to the current set of classpath entries. It adds the file to cache as well.
  * Intended to be used by user code.
  *
  * @deprecated Please use {@link #addFileToClassPath(Path, Configuration, FileSystem)} instead.
  *     The {@code FileSystem} should be obtained within an appropriate {@code doAs}.
  * @param file Path of the file to be added
  * @param conf Configuration that contains the classpath setting
  */
 @Deprecated
 public static void addFileToClassPath(Path file, Configuration conf) throws IOException {
   addFileToClassPath(file, conf, file.getFileSystem(conf));
 }
Пример #23
0
  public int run(String[] args) throws Exception {
    // printUsage();
    /*
     * SETUP
     */
    Configuration argConf = getConf();
    Hashtable<String, String> confArg = new Hashtable<String, String>();
    setup(confArg, argConf);
    Date currentTime = new Date();
    Date endDate = new Date(new Long(confArg.get("timestamp_stop")));
    Boolean full_run = confArg.get("intermediate").matches("(?i).*true.*");
    Boolean quick_add = confArg.get("quick_add").matches("(?i).*true.*");
    logger.info("Running GeStore");

    // ZooKeeper setup
    Configuration config = HBaseConfiguration.create();
    zkWatcher = new ZooKeeperWatcher(config, "Testing", new HBaseAdmin(config));
    zkInstance =
        new ZooKeeper(
            ZKConfig.getZKQuorumServersString(config),
            config.getInt("zookeeper.session.timeout", -1),
            zkWatcher);

    if (!confArg.get("task_id").isEmpty()) {
      confArg.put("temp_path", confArg.get("temp_path") + confArg.get("task_id"));
    }

    String lockRequest = confArg.get("file_id");
    if (!confArg.get("run_id").isEmpty())
      lockRequest = lockRequest + "_" + confArg.get("run_id") + "_";
    if (!confArg.get("task_id").isEmpty())
      lockRequest = lockRequest + "_" + confArg.get("task_id") + "_";

    // Get type of movement
    toFrom type_move = checkArgs(confArg);
    if (type_move == toFrom.LOCAL2REMOTE && !confArg.get("format").equals("unknown")) {
      List<String> arguments = new ArrayList<String>();
      arguments.add("-Dinput=" + confArg.get("local_path"));
      arguments.add("-Dtable=" + confArg.get("file_id"));
      arguments.add("-Dtimestamp=" + confArg.get("timestamp_stop"));
      arguments.add("-Dtype=" + confArg.get("format"));
      arguments.add("-Dtarget_dir=" + confArg.get("base_path") + "_" + confArg.get("file_id"));
      arguments.add("-Dtemp_hdfs_path=" + confArg.get("temp_path"));
      arguments.add("-Drun_id=" + confArg.get("run_id"));
      if (!confArg.get("run_id").isEmpty()) arguments.add("-Drun_id=" + confArg.get("run_id"));
      if (!confArg.get("task_id").isEmpty()) arguments.add("-Dtask_id=" + confArg.get("task_id"));
      if (quick_add) arguments.add("-Dquick_add=" + confArg.get("quick_add"));
      String lockName = lock(lockRequest);
      String[] argumentString = arguments.toArray(new String[arguments.size()]);
      adddb.main(argumentString);
      unlock(lockName);
      System.exit(0);
    }

    // Database registration

    dbutil db_util = new dbutil(config);
    db_util.register_database(confArg.get("db_name_files"), true);
    db_util.register_database(confArg.get("db_name_runs"), true);
    db_util.register_database(confArg.get("db_name_updates"), true);
    FileSystem hdfs = FileSystem.get(config);
    FileSystem localFS = FileSystem.getLocal(config);

    // Get source type
    confArg.put("source", getSource(db_util, confArg.get("db_name_files"), confArg.get("file_id")));
    confArg.put(
        "database", isDatabase(db_util, confArg.get("db_name_files"), confArg.get("file_id")));
    if (!confArg.get("source").equals("local")
        && type_move == toFrom.REMOTE2LOCAL
        && !confArg.get("timestamp_stop").equals(Integer.toString(Integer.MAX_VALUE))) {
      confArg.put("timestamp_stop", Long.toString(latestVersion(confArg, db_util)));
    }

    /*
     * Get previous timestamp
     */
    Get run_id_get = new Get(confArg.get("run_id").getBytes());
    Result run_get = db_util.doGet(confArg.get("db_name_runs"), run_id_get);
    KeyValue run_file_prev =
        run_get.getColumnLatest(
            "d".getBytes(), (confArg.get("file_id") + "_db_timestamp").getBytes());
    String last_timestamp = new String("0");
    if (null != run_file_prev && !confArg.get("source").equals("local")) {
      long last_timestamp_real = run_file_prev.getTimestamp();
      Long current_timestamp = new Long(confArg.get("timestamp_real"));
      if ((current_timestamp - last_timestamp_real) > 36000) {
        last_timestamp = new String(run_file_prev.getValue());
        Integer lastTimestamp = new Integer(last_timestamp);
        lastTimestamp += 1;
        last_timestamp = lastTimestamp.toString();
        logger.info("Last timestamp: " + last_timestamp + " End data: " + endDate);
        Date last_run = new Date(run_file_prev.getTimestamp());
        if (last_run.before(endDate) && !full_run) {
          confArg.put("timestamp_start", last_timestamp);
        }
      }
    }

    Integer tse = new Integer(confArg.get("timestamp_stop"));
    Integer tss = new Integer(confArg.get("timestamp_start"));
    if (tss > tse) {
      logger.info("No new version of requested file.");
      return 0;
    }

    /*
     * Generate file
     */

    String lockName = lock(lockRequest);

    Get file_id_get = new Get(confArg.get("file_id").getBytes());
    Result file_get = db_util.doGet(confArg.get("db_name_files"), file_id_get);
    if (!file_get.isEmpty()) {
      boolean found =
          hasFile(
              db_util,
              hdfs,
              confArg.get("db_name_files"),
              confArg.get("file_id"),
              getFullPath(confArg));
      if (confArg.get("source").equals("fullfile")) {
        found = false;
      }
      String filenames_put =
          getFileNames(
              db_util, confArg.get("db_name_files"), confArg.get("file_id"), getFullPath(confArg));
      // Filename not found in file database
      if (!found && type_move == toFrom.REMOTE2LOCAL) {
        if (!confArg.get("source").equals("local")) {
          // Generate intermediate file
          if (getFile(hdfs, confArg, db_util) == null) {
            unlock(lockName);
            return 1;
          }
          // Put generated file into file database
          if (!confArg.get("format").equals("fullfile")) {
            putFileEntry(
                db_util,
                hdfs,
                confArg.get("db_name_files"),
                confArg.get("file_id"),
                confArg.get("full_file_name"),
                confArg.get("source"));
          }
        } else {
          logger.warn("Remote file not found, and cannot be generated! File: " + confArg);
          unlock(lockName);
          return 1;
        }
      }
    } else {
      if (type_move == toFrom.REMOTE2LOCAL) {
        logger.warn("Remote file not found, and cannot be generated.");
        unlock(lockName);
        return 1;
      }
    }

    /*
     * Copy file
     * Update tables
     */

    if (type_move == toFrom.LOCAL2REMOTE) {
      if (!confArg.get("format").equals("fullfile")) {
        putFileEntry(
            db_util,
            hdfs,
            confArg.get("db_name_files"),
            confArg.get("file_id"),
            getFullPath(confArg),
            confArg.get("source"));
      }
      putRunEntry(
          db_util,
          confArg.get("db_name_runs"),
          confArg.get("run_id"),
          confArg.get("file_id"),
          confArg.get("type"),
          confArg.get("timestamp_real"),
          confArg.get("timestamp_stop"),
          getFullPath(confArg),
          confArg.get("delimiter"));
      hdfs.copyFromLocalFile(new Path(confArg.get("local_path")), new Path(getFullPath(confArg)));
    } else if (type_move == toFrom.REMOTE2LOCAL) {
      FileStatus[] files = hdfs.globStatus(new Path(getFullPath(confArg) + "*"));
      putRunEntry(
          db_util,
          confArg.get("db_name_runs"),
          confArg.get("run_id"),
          confArg.get("file_id"),
          confArg.get("type"),
          confArg.get("timestamp_real"),
          confArg.get("timestamp_stop"),
          getFullPath(confArg),
          confArg.get("delimiter"));
      unlock(lockName);
      for (FileStatus file : files) {
        Path cur_file = file.getPath();
        Path cur_local_path =
            new Path(new String(confArg.get("local_path") + confArg.get("file_id")));
        String suffix = getSuffix(getFileName(confArg), cur_file.getName());
        if (suffix.length() > 0) {
          cur_local_path = cur_local_path.suffix(new String("." + suffix));
        }
        if (confArg.get("copy").equals("true")) {
          String crc = hdfs.getFileChecksum(cur_file).toString();
          if (checksumLocalTest(cur_local_path, crc)) {
            continue;
          } else {
            hdfs.copyToLocalFile(cur_file, cur_local_path);
            writeChecksum(cur_local_path, crc);
          }
        } else {
          System.out.println(cur_local_path + "\t" + cur_file);
        }
      }
    }
    unlock(lockName);
    return 0;
  }
Пример #24
0
  // Information needed to get a single file:
  // BASE_PATH, FILE_ID, TIMESTAMP_START, TIMESTAMP_STOP, SOURCE, FILESYSTEM
  private static Vector<Path> getFile(
      FileSystem fs, Hashtable<String, String> config, dbutil db_util) throws Exception {
    Long latestVersion = latestVersion(config, db_util);

    try {
      config.put("timestamp_start", config.get("timestamp_start"));
      config.put("timestamp_real", latestVersion.toString());
      config.put("timestamp_stop", latestVersion.toString());
    } catch (Exception E) {
      logger.error("Tryign to get file that is impossible to generate: " + getFullPath(config));
      return null;
    }
    if (Integer.parseInt(config.get("timestamp_start"))
        > Integer.parseInt(config.get("timestamp_stop"))) {
      return null;
    }
    logger.debug(
        "Getting DB for timestamp "
            + config.get("timestamp_start")
            + " to "
            + config.get("timestamp_stop"));

    String final_result = getFullPath(config);

    String temp_path_base =
        config.get("local_temp_path")
            + "_"
            + config.get("task_id")
            + "_"
            + config.get("run_id")
            + "/";
    Path newPath = new Path(final_result + "*");
    Vector<Path> ret_path = new Vector<Path>();
    String lockName = lock(final_result.replaceAll("/", "_"));
    if (fs.globStatus(newPath).length != 0) {
      ret_path.add(newPath);
      unlock(lockName);
      config.put("full_file_name", final_result);
      return ret_path;
    } else {
      if (!config.get("source").equals("local")) {
        config.put("temp_path_base", temp_path_base);

        config.put("timestamp_start", config.get("timestamp_start"));
        config.put("timestamp_real", latestVersion.toString());
        config.put("timestamp_stop", latestVersion.toString());

        Class<?> sourceClass =
            Class.forName("org.gestore.plugin.source." + config.get("source") + "Source");
        Method process_data = sourceClass.getMethod("process", Hashtable.class, FileSystem.class);
        Object processor = sourceClass.newInstance();
        Object retVal;
        try {
          retVal = process_data.invoke(processor, config, fs);
        } catch (InvocationTargetException E) {
          Throwable exception = E.getTargetException();
          logger.error("Unable to call method in child class: " + exception.toString());
          exception.printStackTrace(System.out);
          unlock(lockName);
          return null;
        }
        FileStatus[] files = (FileStatus[]) retVal;
        if (files == null) {
          logger.error("Error getting files, no files returned");
          return null;
        }

        for (FileStatus file : files) {
          Path cur_file = file.getPath();
          Path cur_local_path = new Path(temp_path_base + config.get("file_id"));
          String suffix = getSuffix(config.get("file_id"), cur_file.getName());
          cur_local_path = cur_local_path.suffix(suffix);
          Path res_path = new Path(new String(final_result + suffix));
          logger.debug("Moving file" + cur_file.toString() + " to " + res_path.toString());
          if (config.get("copy").equals("true")) {
            fs.moveFromLocalFile(cur_file, res_path);
          } else {
            fs.rename(cur_file, res_path);
          }
        }

        config.put("full_file_name", final_result);
      }
    }
    unlock(lockName);
    return ret_path;
  }
Пример #25
0
 /**
  * Add an archive path to the current set of classpath entries. It adds the archive to cache as
  * well. Intended to be used by user code.
  *
  * @deprecated Please use {@link #addArchiveToClassPath(Path, Configuration, FileSystem)} instead.
  *     The {@code FileSystem} should be obtained within an appropriate {@code doAs}.
  * @param archive Path of the archive to be added
  * @param conf Configuration that contains the classpath setting
  */
 @Deprecated
 public static void addArchiveToClassPath(Path archive, Configuration conf) throws IOException {
   addArchiveToClassPath(archive, conf, archive.getFileSystem(conf));
 }