private void testAbortInternal(int version) throws IOException, InterruptedException {
    JobConf conf = new JobConf();
    FileOutputFormat.setOutputPath(conf, outDir);
    conf.set(JobContext.TASK_ATTEMPT_ID, attempt);
    conf.setInt(
        org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter
            .FILEOUTPUTCOMMITTER_ALGORITHM_VERSION,
        version);
    JobContext jContext = new JobContextImpl(conf, taskID.getJobID());
    TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
    FileOutputCommitter committer = new FileOutputCommitter();

    // do setup
    committer.setupJob(jContext);
    committer.setupTask(tContext);

    // write output
    TextOutputFormat theOutputFormat = new TextOutputFormat();
    RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(null, conf, partFile, null);
    writeOutput(theRecordWriter, tContext);

    // do abort
    committer.abortTask(tContext);
    File out = new File(outDir.toUri().getPath());
    Path workPath = committer.getWorkPath(tContext, outDir);
    File wp = new File(workPath.toUri().getPath());
    File expectedFile = new File(wp, partFile);
    assertFalse("task temp dir still exists", expectedFile.exists());

    committer.abortJob(jContext, JobStatus.State.FAILED);
    expectedFile = new File(out, FileOutputCommitter.TEMP_DIR_NAME);
    assertFalse("job temp dir still exists", expectedFile.exists());
    assertEquals("Output directory not empty", 0, out.listFiles().length);
    FileUtil.fullyDelete(out);
  }
 /**
  * Create dirs & session paths for this session: 1. HDFS scratch dir 2. Local scratch dir 3. Local
  * downloaded resource dir 4. HDFS session path 5. Local session path 6. HDFS temp table space
  *
  * @param userName
  * @throws IOException
  */
 private void createSessionDirs(String userName) throws IOException {
   HiveConf conf = getConf();
   Path rootHDFSDirPath = createRootHDFSDir(conf);
   // Now create session specific dirs
   String scratchDirPermission = HiveConf.getVar(conf, HiveConf.ConfVars.SCRATCHDIRPERMISSION);
   Path path;
   // 1. HDFS scratch dir
   path = new Path(rootHDFSDirPath, userName);
   hdfsScratchDirURIString = path.toUri().toString();
   createPath(conf, path, scratchDirPermission, false, false);
   // 2. Local scratch dir
   path = new Path(HiveConf.getVar(conf, HiveConf.ConfVars.LOCALSCRATCHDIR));
   createPath(conf, path, scratchDirPermission, true, false);
   // 3. Download resources dir
   path = new Path(HiveConf.getVar(conf, HiveConf.ConfVars.DOWNLOADED_RESOURCES_DIR));
   createPath(conf, path, scratchDirPermission, true, false);
   // Finally, create session paths for this session
   // Local & non-local tmp location is configurable. however it is the same across
   // all external file systems
   String sessionId = getSessionId();
   // 4. HDFS session path
   hdfsSessionPath = new Path(hdfsScratchDirURIString, sessionId);
   createPath(conf, hdfsSessionPath, scratchDirPermission, false, true);
   conf.set(HDFS_SESSION_PATH_KEY, hdfsSessionPath.toUri().toString());
   // 5. Local session path
   localSessionPath =
       new Path(HiveConf.getVar(conf, HiveConf.ConfVars.LOCALSCRATCHDIR), sessionId);
   createPath(conf, localSessionPath, scratchDirPermission, true, true);
   conf.set(LOCAL_SESSION_PATH_KEY, localSessionPath.toUri().toString());
   // 6. HDFS temp table space
   hdfsTmpTableSpace = new Path(hdfsSessionPath, TMP_PREFIX);
   createPath(conf, hdfsTmpTableSpace, scratchDirPermission, false, true);
   conf.set(TMP_TABLE_SPACE_KEY, hdfsTmpTableSpace.toUri().toString());
 }
Exemple #3
0
  /**
   * Validate if the input history log paths are as expected.
   *
   * @param inputs the resultant input paths to be validated
   * @param expectedHistoryFileNames the expected input history logs
   * @throws IOException
   */
  private void validateHistoryLogPaths(List<Path> inputs, List<String> expectedHistoryFileNames)
      throws IOException {

    System.out.println("\nExpected history files are:");
    for (String historyFile : expectedHistoryFileNames) {
      System.out.println(historyFile);
    }
    System.out.println("\nResultant history files are:");
    List<String> historyLogs = new ArrayList<String>();
    for (Path p : inputs) {
      historyLogs.add(p.toUri().getPath());
      System.out.println(p.toUri().getPath());
    }

    assertEquals(
        "Number of history logs found is different from the expected.",
        expectedHistoryFileNames.size(),
        inputs.size());

    // Verify if all the history logs are expected ones and they are in the
    // expected order
    assertTrue(
        "Some of the history log files do not match the expected.",
        historyLogs.equals(expectedHistoryFileNames));
  }
  public HdfsDirectory(Path hdfsDirPath, LockFactory lockFactory, Configuration configuration)
      throws IOException {
    super(lockFactory);
    this.hdfsDirPath = hdfsDirPath;
    this.configuration = configuration;
    fileSystem = FileSystem.get(hdfsDirPath.toUri(), configuration);
    fileContext = FileContext.getFileContext(hdfsDirPath.toUri(), configuration);

    if (fileSystem instanceof DistributedFileSystem) {
      // Make sure dfs is not in safe mode
      while (((DistributedFileSystem) fileSystem).setSafeMode(SafeModeAction.SAFEMODE_GET, true)) {
        LOG.warn("The NameNode is in SafeMode - Solr will wait 5 seconds and try again.");
        try {
          Thread.sleep(5000);
        } catch (InterruptedException e) {
          Thread.interrupted();
          // continue
        }
      }
    }

    try {
      if (!fileSystem.exists(hdfsDirPath)) {
        boolean success = fileSystem.mkdirs(hdfsDirPath);
        if (!success) {
          throw new RuntimeException("Could not create directory: " + hdfsDirPath);
        }
      }
    } catch (Exception e) {
      org.apache.solr.common.util.IOUtils.closeQuietly(fileSystem);
      throw new RuntimeException("Problem creating directory: " + hdfsDirPath, e);
    }
  }
Exemple #5
0
    @Override
    public void execute() throws IOException {
      if (offset < 0) throw new IllegalArgumentException("Offset cannot be less than 0.");

      System.out.println("Getting file paths...");

      final Path[] sequenceFiles = SequenceFileUtility.getFilePaths(inputPathOrUri, "part");
      final ExtractionState nps = new ExtractionState();
      nps.setMaxFileExtract(max);

      if (random >= 0) {
        System.out.println("Counting records");

        int totalRecords = 0;
        for (final Path path : sequenceFiles) {
          System.out.println("... Counting from file: " + path);
          final SequenceFileUtility<Text, BytesWritable> utility =
              new TextBytesSequenceFileUtility(path.toUri(), true);
          totalRecords += utility.getNumberRecords();
        }

        System.out.println("Selecting random subset of " + random + " from " + totalRecords);

        nps.setRandomSelection(random, totalRecords);
      }

      ZipOutputStream zos = null;
      if (zipMode) {
        zos = SequenceFileUtility.openZipOutputStream(outputPathOrUri);
      }

      for (final Path path : sequenceFiles) {
        System.out.println("Extracting from " + path.getName());

        final SequenceFileUtility<Text, BytesWritable> utility =
            new TextBytesSequenceFileUtility(path.toUri(), true);
        if (queryKey == null) {
          if (zipMode) {
            utility.exportDataToZip(zos, np, nps, autoExtension, offset);
          } else {
            utility.exportData(outputPathOrUri, np, nps, autoExtension, offset);
          }
        } else {
          if (zipMode) {
            throw new UnsupportedOperationException("Not implemented yet");
          } else {
            if (!utility.findAndExport(new Text(queryKey), outputPathOrUri, offset)) {
              if (offset == 0) System.err.format("Key '%s' was not found in the file.\n", queryKey);
              else
                System.err.format(
                    "Key '%s' was not found in the file after offset %d.\n", queryKey, offset);
            }
          }
        }

        if (nps.isFinished()) break;
      }

      if (zos != null) zos.close();
    }
Exemple #6
0
  private void checkLocalizedPath(boolean visibility)
      throws IOException, LoginException, InterruptedException {
    TrackerDistributedCacheManager manager =
        new TrackerDistributedCacheManager(conf, taskController);
    String userName = getJobOwnerName();
    File workDir = new File(TEST_ROOT_DIR, "workdir");
    Path cacheFile = new Path(TEST_ROOT_DIR, "fourthcachefile");
    if (visibility) {
      createPublicTempFile(cacheFile);
    } else {
      createPrivateTempFile(cacheFile);
    }

    Configuration conf1 = new Configuration(conf);
    conf1.set("user.name", userName);
    DistributedCache.addCacheFile(cacheFile.toUri(), conf1);
    TrackerDistributedCacheManager.determineTimestamps(conf1);
    TrackerDistributedCacheManager.determineCacheVisibilities(conf1);
    dumpState(conf1);

    // Task localizing for job
    TaskDistributedCacheManager handle =
        manager.newTaskDistributedCacheManager(new JobID("jt", 1), conf1);
    handle.setupCache(
        conf1,
        TaskTracker.getPublicDistributedCacheDir(),
        TaskTracker.getPrivateDistributedCacheDir(userName));
    JobLocalizer.downloadPrivateCache(conf1);
    TaskDistributedCacheManager.CacheFile c = handle.getCacheFiles().get(0);
    String distCacheDir;
    if (visibility) {
      distCacheDir = TaskTracker.getPublicDistributedCacheDir();
    } else {
      distCacheDir = TaskTracker.getPrivateDistributedCacheDir(userName);
    }
    Path localizedPath =
        manager.getLocalCache(
            cacheFile.toUri(),
            conf1,
            distCacheDir,
            fs.getFileStatus(cacheFile),
            false,
            c.timestamp,
            visibility,
            c);
    assertTrue(
        "Cache file didn't get localized in the expected directory. "
            + "Expected localization to happen within "
            + ROOT_MAPRED_LOCAL_DIR
            + "/"
            + distCacheDir
            + ", but was localized at "
            + localizedPath,
        localizedPath.toString().contains(distCacheDir));
    if (visibility) {
      checkPublicFilePermissions(new Path[] {localizedPath});
    } else {
      checkFilePermissions(new Path[] {localizedPath});
    }
  }
Exemple #7
0
 /**
  * Copy a directory to a new FS -both paths must be qualified
  *
  * @param conf conf file
  * @param srcDirPath src dir
  * @param destDirPath dest dir
  * @return # of files copies
  */
 public static int copyDirectory(Configuration conf, Path srcDirPath, Path destDirPath)
     throws IOException {
   FileSystem srcFS = FileSystem.get(srcDirPath.toUri(), conf);
   FileSystem destFS = FileSystem.get(destDirPath.toUri(), conf);
   // list all paths in the src.
   if (!srcFS.exists(srcDirPath)) {
     throw new FileNotFoundException("Source dir not found " + srcDirPath);
   }
   if (!srcFS.isDirectory(srcDirPath)) {
     throw new FileNotFoundException("Source dir not a directory " + srcDirPath);
   }
   FileStatus[] entries = srcFS.listStatus(srcDirPath);
   int srcFileCount = entries.length;
   if (srcFileCount == 0) {
     return 0;
   }
   if (!destFS.exists(destDirPath)) {
     destFS.mkdirs(destDirPath);
   }
   Path[] sourcePaths = new Path[srcFileCount];
   for (int i = 0; i < srcFileCount; i++) {
     FileStatus e = entries[i];
     Path srcFile = e.getPath();
     if (srcFS.isDirectory(srcFile)) {
       throw new IOException(
           "Configuration dir " + srcDirPath + " contains a directory " + srcFile);
     }
     log.debug("copying src conf file {}", srcFile);
     sourcePaths[i] = srcFile;
   }
   log.debug("Copying {} files from to {} to dest {}", srcFileCount, srcDirPath, destDirPath);
   FileUtil.copy(srcFS, sourcePaths, destFS, destDirPath, false, true, conf);
   return srcFileCount;
 }
  /**
   * Convert Hadoop path into IGFS path.
   *
   * @param path Hadoop path.
   * @return IGFS path.
   */
  @Nullable
  private IgfsPath convert(Path path) {
    if (path == null) return null;

    return path.isAbsolute()
        ? new IgfsPath(path.toUri().getPath())
        : new IgfsPath(workingDir, path.toUri().getPath());
  }
  public boolean rename(Path src, Path dst) throws IOException {
    Path absoluteS = makeAbsolute(src);
    String srepS = absoluteS.toUri().getPath();
    Path absoluteD = makeAbsolute(dst);
    String srepD = absoluteD.toUri().getPath();

    // System.out.println("Calling rename on: " + srepS + " -> " + srepD);

    return kfsImpl.rename(srepS, srepD) == 0;
  }
Exemple #10
0
  public static Job createTimesSquaredJob(
      Configuration initialConf,
      Vector v,
      int outputVectorDim,
      Path matrixInputPath,
      Path outputVectorPathBase,
      Class<? extends TimesSquaredMapper> mapClass,
      Class<? extends VectorSummingReducer> redClass)
      throws IOException {

    FileSystem fs = FileSystem.get(matrixInputPath.toUri(), initialConf);
    matrixInputPath = fs.makeQualified(matrixInputPath);
    outputVectorPathBase = fs.makeQualified(outputVectorPathBase);

    long now = System.nanoTime();
    Path inputVectorPath = new Path(outputVectorPathBase, INPUT_VECTOR + '/' + now);

    SequenceFile.Writer inputVectorPathWriter = null;

    try {
      inputVectorPathWriter =
          new SequenceFile.Writer(
              fs, initialConf, inputVectorPath, NullWritable.class, VectorWritable.class);
      inputVectorPathWriter.append(NullWritable.get(), new VectorWritable(v));
    } finally {
      Closeables.close(inputVectorPathWriter, false);
    }

    URI ivpURI = inputVectorPath.toUri();
    DistributedCache.setCacheFiles(new URI[] {ivpURI}, initialConf);

    Job job =
        HadoopUtil.prepareJob(
            matrixInputPath,
            new Path(outputVectorPathBase, OUTPUT_VECTOR_FILENAME),
            SequenceFileInputFormat.class,
            mapClass,
            NullWritable.class,
            VectorWritable.class,
            redClass,
            NullWritable.class,
            VectorWritable.class,
            SequenceFileOutputFormat.class,
            initialConf);
    job.setCombinerClass(redClass);
    job.setJobName("TimesSquaredJob: " + matrixInputPath);

    Configuration conf = job.getConfiguration();
    conf.set(INPUT_VECTOR, ivpURI.toString());
    conf.setBoolean(IS_SPARSE_OUTPUT, !v.isDense());
    conf.setInt(OUTPUT_VECTOR_DIMENSION, outputVectorDim);

    return job;
  }
Exemple #11
0
  /**
   * Given a URI for mapreduce intermediate output, swizzle the it to point to the local file
   * system. This can be called in case the caller decides to run in local mode (in which case all
   * intermediate data can be stored locally)
   *
   * @param originalURI uri to localize
   * @return localized path for map-red intermediate data
   */
  public String localizeMRTmpFileURI(String originalURI) {
    Path o = new Path(originalURI);
    Path mrbase = new Path(getMRScratchDir());

    URI relURI = mrbase.toUri().relativize(o.toUri());
    if (relURI.equals(o.toUri())) {
      throw new RuntimeException(
          "Invalid URI: " + originalURI + ", cannot relativize against" + mrbase.toString());
    }

    return getLocalScratchDir(!explain) + Path.SEPARATOR + relURI.getPath();
  }
  /**
   * If <code>f</code> is a file, this method will make a single call to S3. If <code>f</code> is a
   * directory, this method will make a maximum of (<i>n</i> / 1000) + 2 calls to S3, where <i>n</i>
   * is the total number of files and directories contained directly in <code>f</code>.
   */
  @Override
  public FileStatus[] listStatus(Path f) throws IOException {

    Path absolutePath = makeAbsolute(f);
    String key = pathToKey(absolutePath);

    if (key.length() > 0) {
      FileMetadata meta = store.retrieveMetadata(key);
      if (meta != null) {
        return new FileStatus[] {newFile(meta, absolutePath)};
      }
    }

    URI pathUri = absolutePath.toUri();
    Set<FileStatus> status = new TreeSet<FileStatus>();
    String priorLastKey = null;
    do {
      PartialListing listing = store.list(key, S3_MAX_LISTING_LENGTH, priorLastKey, false);
      for (FileMetadata fileMetadata : listing.getFiles()) {
        Path subpath = keyToPath(fileMetadata.getKey());
        String relativePath = pathUri.relativize(subpath.toUri()).getPath();

        if (fileMetadata.getKey().equals(key + "/")) {
          // this is just the directory we have been asked to list
        } else if (relativePath.endsWith(FOLDER_SUFFIX)) {
          status.add(
              newDirectory(
                  new Path(
                      absolutePath,
                      relativePath.substring(0, relativePath.indexOf(FOLDER_SUFFIX)))));
        } else {
          status.add(newFile(fileMetadata, subpath));
        }
      }
      for (String commonPrefix : listing.getCommonPrefixes()) {
        Path subpath = keyToPath(commonPrefix);
        String relativePath = pathUri.relativize(subpath.toUri()).getPath();
        status.add(newDirectory(new Path(absolutePath, relativePath)));
      }
      priorLastKey = listing.getPriorLastKey();
    } while (priorLastKey != null);

    if (status.isEmpty()
        && key.length() > 0
        && store.retrieveMetadata(key + FOLDER_SUFFIX) == null) {
      throw new FileNotFoundException("File " + f + " does not exist.");
    }

    return status.toArray(new FileStatus[status.size()]);
  }
 @Override
 public void initializeJob(
     String user,
     String jobid,
     Path credentials,
     Path jobConf,
     TaskUmbilicalProtocol taskTracker,
     InetSocketAddress ttAddr)
     throws IOException {
   List<String> command =
       new ArrayList<String>(
           Arrays.asList(
               taskControllerExe,
               user,
               localStorage.getDirsString(),
               Integer.toString(Commands.INITIALIZE_JOB.getValue()),
               jobid,
               credentials.toUri().getPath().toString(),
               jobConf.toUri().getPath().toString()));
   File jvm = // use same jvm as parent
       new File(new File(System.getProperty("java.home"), "bin"), "java");
   command.add(jvm.toString());
   command.add("-classpath");
   command.add(System.getProperty("java.class.path"));
   command.add("-Dhadoop.log.dir=" + TaskLog.getBaseLogDir());
   command.add("-Dhadoop.root.logger=INFO,console");
   command.add(JobLocalizer.class.getName()); // main of JobLocalizer
   command.add(user);
   command.add(jobid);
   // add the task tracker's reporting address
   command.add(ttAddr.getHostName());
   command.add(Integer.toString(ttAddr.getPort()));
   String[] commandArray = command.toArray(new String[0]);
   ShellCommandExecutor shExec = new ShellCommandExecutor(commandArray);
   if (LOG.isDebugEnabled()) {
     LOG.debug("initializeJob: " + Arrays.toString(commandArray));
   }
   try {
     shExec.execute();
     if (LOG.isDebugEnabled()) {
       logOutput(shExec.getOutput());
     }
   } catch (ExitCodeException e) {
     int exitCode = shExec.getExitCode();
     logOutput(shExec.getOutput());
     throw new IOException(
         "Job initialization failed (" + exitCode + ") with output: " + shExec.getOutput(), e);
   }
 }
 private static String pathToKey(Path path) {
   if (path.toUri().getScheme() != null && path.toUri().getPath().isEmpty()) {
     // allow uris without trailing slash after bucket to refer to root,
     // like s3n://mybucket
     return "";
   }
   if (!path.isAbsolute()) {
     throw new IllegalArgumentException("Path must be absolute: " + path);
   }
   String ret = path.toUri().getPath().substring(1); // remove initial slash
   if (ret.endsWith("/") && (ret.indexOf("/") != ret.length() - 1)) {
     ret = ret.substring(0, ret.length() - 1);
   }
   return ret;
 }
Exemple #15
0
  /** Returns a qualified path object. */
  @InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce"})
  public Path makeQualified(URI defaultUri, Path workingDir) {
    Path path = this;
    if (!isAbsolute()) {
      path = new Path(workingDir, this);
    }

    URI pathUri = path.toUri();

    String scheme = pathUri.getScheme();
    String authority = pathUri.getAuthority();
    String fragment = pathUri.getFragment();

    if (scheme != null && (authority != null || defaultUri.getAuthority() == null)) return path;

    if (scheme == null) {
      scheme = defaultUri.getScheme();
    }

    if (authority == null) {
      authority = defaultUri.getAuthority();
      if (authority == null) {
        authority = "";
      }
    }

    URI newUri = null;
    try {
      newUri = new URI(scheme, authority, normalizePath(pathUri.getPath()), null, fragment);
    } catch (URISyntaxException e) {
      throw new IllegalArgumentException(e);
    }
    return new Path(newUri);
  }
Exemple #16
0
 /**
  * Appends the child to the current {@link Location} on HDFS.
  *
  * <p>Returns a new instance of Location.
  *
  * @param child to be appended to this location.
  * @return A new instance of {@link Location}
  * @throws IOException
  */
 @Override
 public Location append(String child) throws IOException {
   if (child.startsWith("/")) {
     child = child.substring(1);
   }
   return new HDFSLocation(fs, new Path(URI.create(path.toUri() + "/" + child)));
 }
  @Override
  protected synchronized void startInternal() throws Exception {
    // create filesystem only now, as part of service-start. By this time, RM is
    // authenticated with kerberos so we are good to create a file-system
    // handle.
    fsConf = new Configuration(getConfig());
    fsConf.setBoolean("dfs.client.retry.policy.enabled", true);
    String retryPolicy =
        fsConf.get(
            YarnConfiguration.FS_RM_STATE_STORE_RETRY_POLICY_SPEC,
            YarnConfiguration.DEFAULT_FS_RM_STATE_STORE_RETRY_POLICY_SPEC);
    fsConf.set("dfs.client.retry.policy.spec", retryPolicy);

    String scheme = fsWorkingPath.toUri().getScheme();
    if (scheme == null) {
      scheme = FileSystem.getDefaultUri(fsConf).getScheme();
    }
    if (scheme != null) {
      String disableCacheName = String.format("fs.%s.impl.disable.cache", scheme);
      fsConf.setBoolean(disableCacheName, true);
    }

    fs = fsWorkingPath.getFileSystem(fsConf);
    mkdirsWithRetries(rmDTSecretManagerRoot);
    mkdirsWithRetries(rmAppRoot);
    mkdirsWithRetries(amrmTokenSecretManagerRoot);
    mkdirsWithRetries(reservationRoot);
  }
  @Override
  public List<LuceneSegmentInputSplit> getSplits(JobContext context)
      throws IOException, InterruptedException {
    Configuration configuration = context.getConfiguration();

    LuceneStorageConfiguration lucene2SeqConfiguration =
        new LuceneStorageConfiguration(configuration);

    List<LuceneSegmentInputSplit> inputSplits = new ArrayList<>();

    List<Path> indexPaths = lucene2SeqConfiguration.getIndexPaths();
    for (Path indexPath : indexPaths) {
      ReadOnlyFileSystemDirectory directory =
          new ReadOnlyFileSystemDirectory(
              FileSystem.get(configuration), indexPath, false, configuration);
      SegmentInfos segmentInfos = new SegmentInfos();
      segmentInfos.read(directory);

      for (SegmentCommitInfo segmentInfo : segmentInfos) {
        LuceneSegmentInputSplit inputSplit =
            new LuceneSegmentInputSplit(
                indexPath, segmentInfo.info.name, segmentInfo.sizeInBytes());
        inputSplits.add(inputSplit);
        LOG.info(
            "Created {} byte input split for index '{}' segment {}",
            segmentInfo.sizeInBytes(),
            indexPath.toUri(),
            segmentInfo.info.name);
      }
    }

    return inputSplits;
  }
  private double[] getSparkModelInfoFromHDFS(Path location, Configuration conf) throws Exception {

    FileSystem fileSystem = FileSystem.get(location.toUri(), conf);
    FileStatus[] files = fileSystem.listStatus(location);

    if (files == null) throw new Exception("Couldn't find Spark Truck ML weights at: " + location);

    ArrayList<Double> modelInfo = new ArrayList<Double>();
    for (FileStatus file : files) {

      if (file.getPath().getName().startsWith("_")) {
        continue;
      }

      InputStream stream = fileSystem.open(file.getPath());

      StringWriter writer = new StringWriter();
      IOUtils.copy(stream, writer, "UTF-8");
      String raw = writer.toString();
      for (String str : raw.split("\n")) {
        modelInfo.add(Double.valueOf(str));
      }
    }

    return Doubles.toArray(modelInfo);
  }
Exemple #20
0
  /**
   * Return a list of all urls matching this input. If autocomplete is false, the list contains only
   * 1 element (same as getUrl()). Otherwise, it will try to return all the files beginning with
   * what is returned by getUrl().
   *
   * @param jobConf A Configuration object
   * @return the list of input url
   */
  public HashSet<URI> getAllUrls(Configuration jobConf) {

    HashSet<URI> urls = new HashSet<URI>();

    if (!isAutoComplete()) {
      urls.add(url);
    } else {
      Path basePath = new Path(url);
      String filePrefix = basePath.getName();

      try {
        FileSystem fs = basePath.getFileSystem(jobConf);

        if (!fs.exists(basePath.getParent())) {
          throw new IOException("Input directory not found: " + url);
        }

        FileStatus[] stats = fs.listStatus(basePath.getParent());

        for (int i = 0; i < stats.length; i++) {
          Path path = stats[i].getPath();
          if (fs.isFile(path) && path.getName().startsWith(filePrefix)) urls.add(path.toUri());
        }
      } catch (IOException e) {
        System.err.println("Unable to autocomplete input file");
        e.printStackTrace();
        System.exit(1);
      }
    }

    return urls;
  }
 /** Convert a path to a File. */
 public File pathToFile(Path path) {
   checkPath(path);
   if (!path.isAbsolute()) {
     path = new Path(getWorkingDirectory(), path);
   }
   return new File(path.toUri().getPath());
 }
Exemple #22
0
 public H2OHdfsInputStream(Path p, long offset, ProgressMonitor pmon) throws IOException {
   super(offset, pmon);
   _path = p;
   _fs = FileSystem.get(p.toUri(), CONF);
   setExpectedSz(_fs.getFileStatus(p).getLen());
   open();
 }
  public FSDataOutputStream create(
      Path file,
      FsPermission permission,
      boolean overwrite,
      int bufferSize,
      short replication,
      long blockSize,
      Progressable progress)
      throws IOException {

    if (exists(file)) {
      if (overwrite) {
        delete(file);
      } else {
        throw new IOException("File already exists: " + file);
      }
    }

    Path parent = file.getParent();
    if (parent != null && !mkdirs(parent)) {
      throw new IOException("Mkdirs failed to create " + parent);
    }

    Path absolute = makeAbsolute(file);
    String srep = absolute.toUri().getPath();

    return kfsImpl.create(srep, replication, bufferSize);
  }
Exemple #24
0
 /**
  * Convenience method, so that we don't open a new connection when using this method from within
  * another method. Otherwise every API invocation incurs the overhead of opening/closing a TCP
  * connection.
  */
 private FileStatus getFileStatus(FTPClient client, Path file) throws IOException {
   FileStatus fileStat = null;
   Path workDir = new Path(client.printWorkingDirectory());
   Path absolute = makeAbsolute(workDir, file);
   Path parentPath = absolute.getParent();
   if (parentPath == null) { // root dir
     long length = -1; // Length of root dir on server not known
     boolean isDir = true;
     int blockReplication = 1;
     long blockSize = DEFAULT_BLOCK_SIZE; // Block Size not known.
     long modTime = -1; // Modification time of root dir not known.
     Path root = new Path("/");
     return new FileStatus(
         length, isDir, blockReplication, blockSize, modTime, root.makeQualified(this));
   }
   String pathName = parentPath.toUri().getPath();
   FTPFile[] ftpFiles = client.listFiles(pathName);
   if (ftpFiles != null) {
     for (FTPFile ftpFile : ftpFiles) {
       if (ftpFile.getName().equals(file.getName())) { // file found in dir
         fileStat = getFileStatus(ftpFile, parentPath);
         break;
       }
     }
     if (fileStat == null) {
       throw new FileNotFoundException("File " + file + " does not exist.");
     }
   } else {
     throw new FileNotFoundException("File " + file + " does not exist.");
   }
   return fileStat;
 }
  protected void setupCluster(boolean simulated, long minFileSize, String[] racks, String[] hosts)
      throws IOException {
    conf = new Configuration();
    localFileSys = FileSystem.getLocal(conf);
    conf.setLong("dfs.blockreport.intervalMsec", 1000L);
    conf.set("dfs.replication.pending.timeout.sec", "2");
    conf.setLong("dfs.block.size", 1L);
    conf.set(
        "dfs.block.replicator.classname",
        "org.apache.hadoop.hdfs.server.namenode.BlockPlacementPolicyRaid");
    conf.setLong("hdfs.raid.min.filesize", minFileSize);
    Utils.loadTestCodecs(conf, 5, 5, 1, 3, "/raid", "/raidrs", false, true);
    conf.setInt("io.bytes.per.checksum", 1);
    excludeFile = new Path(TEST_DIR, "exclude" + System.currentTimeMillis());
    cleanFile(excludeFile);
    conf.set("dfs.hosts.exclude", excludeFile.toUri().getPath());
    writeConfigFile(excludeFile, null);

    if (!simulated) {
      cluster = new MiniDFSCluster(conf, hosts.length, true, racks, hosts);
    } else {
      long[] capacities = new long[] {CAPACITY, CAPACITY, CAPACITY};
      cluster = new MiniDFSCluster(0, conf, hosts.length, true, true, null, racks, capacities);
    }
    cluster.waitActive();
    namesystem = cluster.getNameNode().getNamesystem();
    Assert.assertTrue(
        "BlockPlacementPolicy type is not correct.",
        namesystem.replicator instanceof BlockPlacementPolicyRaid);
    policy = (BlockPlacementPolicyRaid) namesystem.replicator;
    fs = cluster.getFileSystem();
    dfs = (DistributedFileSystem) fs;
    TestDirectoryRaidDfs.setupStripeStore(conf, fs);
  }
  protected Path marshal(
      Cluster cluster, JAXBElement<?> jaxbElement, JAXBContext jaxbContext, Path outPath)
      throws FalconException {
    try {
      Marshaller marshaller = jaxbContext.createMarshaller();
      marshaller.setProperty(Marshaller.JAXB_FORMATTED_OUTPUT, Boolean.TRUE);

      if (LOG.isDebugEnabled()) {
        StringWriter writer = new StringWriter();
        marshaller.marshal(jaxbElement, writer);
        LOG.debug("Writing definition to {} on cluster {}", outPath, cluster.getName());
        LOG.debug(writer.getBuffer().toString());
      }

      FileSystem fs =
          HadoopClientFactory.get()
              .createProxiedFileSystem(outPath.toUri(), ClusterHelper.getConfiguration(cluster));
      OutputStream out = fs.create(outPath);
      try {
        marshaller.marshal(jaxbElement, out);
      } finally {
        out.close();
      }

      LOG.info("Marshalled {} to {}", jaxbElement.getDeclaredType(), outPath);
      return outPath;
    } catch (Exception e) {
      throw new FalconException("Unable to marshall app object", e);
    }
  }
  /**
   * Run the job
   *
   * @param params The Job parameters containing the gramSize, input output folders, defaultCat,
   *     encoding
   */
  public static void runJob(Parameters params) throws IOException {
    Configurable client = new JobClient();
    JobConf conf = new JobConf(BayesClassifierDriver.class);
    conf.setJobName("Bayes Classifier Driver running over input: " + params.get("testDirPath"));
    conf.setOutputKeyClass(StringTuple.class);
    conf.setOutputValueClass(DoubleWritable.class);

    FileInputFormat.setInputPaths(conf, new Path(params.get("testDirPath")));
    Path outPath = new Path(params.get("testDirPath") + "-output");
    FileOutputFormat.setOutputPath(conf, outPath);

    conf.setInputFormat(KeyValueTextInputFormat.class);
    conf.setMapperClass(BayesClassifierMapper.class);
    conf.setCombinerClass(BayesClassifierReducer.class);
    conf.setReducerClass(BayesClassifierReducer.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);

    conf.set(
        "io.serializations",
        "org.apache.hadoop.io.serializer.JavaSerialization,"
            + "org.apache.hadoop.io.serializer.WritableSerialization");

    HadoopUtil.overwriteOutput(outPath);
    conf.set("bayes.parameters", params.toString());

    client.setConf(conf);
    JobClient.runJob(conf);

    Path outputFiles = new Path(outPath, "part*");
    FileSystem dfs = FileSystem.get(outPath.toUri(), conf);
    ConfusionMatrix matrix = readResult(dfs, outputFiles, conf, params);
    log.info("{}", matrix.summarize());
  }
  @Test
  public void testGetTokensForNamenodes() throws IOException {

    Credentials credentials = new Credentials();
    TokenCache.obtainTokensForNamenodesInternal(credentials, new Path[] {p1, p2}, jConf);

    // this token is keyed by hostname:port key.
    String fs_addr = SecurityUtil.buildDTServiceName(p1.toUri(), NameNode.DEFAULT_PORT);
    Token<DelegationTokenIdentifier> nnt = TokenCache.getDelegationToken(credentials, fs_addr);
    System.out.println("dt for " + p1 + "(" + fs_addr + ")" + " = " + nnt);
    assertNotNull("Token for nn is null", nnt);

    // verify the size
    Collection<Token<? extends TokenIdentifier>> tns = credentials.getAllTokens();
    assertEquals("number of tokens is not 1", 1, tns.size());

    boolean found = false;
    for (Token<? extends TokenIdentifier> t : tns) {
      if (t.getKind().equals(DelegationTokenIdentifier.HDFS_DELEGATION_KIND)
          && t.getService().equals(new Text(fs_addr))) {
        found = true;
      }
      assertTrue("didn't find token for " + p1, found);
    }
  }
Exemple #29
0
  /** It asserts the equality between an original table desc and a restored table desc. */
  private static void assertSchemaEquality(String tableName, Schema schema)
      throws IOException, TajoException {
    Path path = new Path(CommonTestingUtil.getTestDir(), tableName);
    TableDesc tableDesc =
        new TableDesc(
            IdentifierUtil.buildFQName(DEFAULT_DATABASE_NAME, tableName),
            schema,
            "TEXT",
            new KeyValueSet(),
            path.toUri());

    // schema creation
    assertFalse(catalog.existsTable(DEFAULT_DATABASE_NAME, tableName));
    catalog.createTable(tableDesc);
    assertTrue(catalog.existsTable(DEFAULT_DATABASE_NAME, tableName));

    // change it for the equals test.
    schema.setQualifier(IdentifierUtil.buildFQName(DEFAULT_DATABASE_NAME, tableName));
    TableDesc restored = catalog.getTableDesc(DEFAULT_DATABASE_NAME, tableName);
    assertEquals(schema, restored.getSchema());

    // drop test
    catalog.dropTable(IdentifierUtil.buildFQName(DEFAULT_DATABASE_NAME, tableName));
    assertFalse(catalog.existsTable(DEFAULT_DATABASE_NAME, tableName));
  }
Exemple #30
0
 public FSDataInputStream open(Path file, int bufferSize) throws IOException {
   FTPClient client = connect();
   Path workDir = new Path(client.printWorkingDirectory());
   Path absolute = makeAbsolute(workDir, file);
   FileStatus fileStat = getFileStatus(client, absolute);
   if (fileStat.isDirectory()) {
     disconnect(client);
     throw new IOException("Path " + file + " is a directory.");
   }
   client.allocate(bufferSize);
   Path parent = absolute.getParent();
   // Change to parent directory on the
   // server. Only then can we read the
   // file
   // on the server by opening up an InputStream. As a side effect the working
   // directory on the server is changed to the parent directory of the file.
   // The FTP client connection is closed when close() is called on the
   // FSDataInputStream.
   client.changeWorkingDirectory(parent.toUri().getPath());
   InputStream is = client.retrieveFileStream(file.getName());
   FSDataInputStream fis = new FSDataInputStream(new FTPInputStream(is, client, statistics));
   if (!FTPReply.isPositivePreliminary(client.getReplyCode())) {
     // The ftpClient is an inconsistent state. Must close the stream
     // which in turn will logout and disconnect from FTP server
     fis.close();
     throw new IOException("Unable to open file: " + file + ", Aborting");
   }
   return fis;
 }