Java Path Examples

Programming Language: Java

Namespace/Package Name: org.apache.hadoop.fs

Class/Type: Path

Examples at hotexamples.com: 30

Java org.apache.hadoop.fs Path is an interface in the Hadoop File System (HDFS) library that represents the path of a file or directory in a Hadoop file system. It consists of methods for creating, accessing, and manipulating file paths within a Hadoop cluster. The package library is org.apache.hadoop.fs.

Example 1: Creating a new Path object

Path path = new Path("/user/hadoop/input/file.txt");

This code creates a new Path object that represents the file "/user/hadoop/input/file.txt" in the HDFS.

Example 2: Accessing file or directory information

Path path = new Path("/user/hadoop/input");
FileSystem fs = FileSystem.get(new Configuration());
FileStatus[] status = fs.listStatus(path);
for (FileStatus s : status) {
  System.out.println("File Name : " + s.getPath().getName());
  System.out.println("Is Directory? " + s.isDirectory());
}

This code retrieves information about the files and directories within the "/user/hadoop/input" directory. It lists the name of each file or directory and specifies whether it is a directory or not.

Example 3: Resolving file paths

Path path1 = new Path("/user/hadoop/input");
Path path2 = new Path("../../file.txt");
Path fullPath = path1.resolve(path2);
System.out.println("Full Path: " + fullPath.toString());

This code resolves the relative path "../../file.txt" to the full path "/user/hadoop/file.txt" by starting at the "/user/hadoop/input" directory and navigating up two levels before accessing the file. Overall, Java org.apache.hadoop.fs Path is a useful interface for working with file paths in a Hadoop cluster. It provides a range of methods for creating, accessing, and manipulating file paths, making it an essential library for developers working with Hadoop.

Java Path - 30 examples found. These are the top rated real world Java examples of org.apache.hadoop.fs.Path extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

toUri(30)

getFileSystem(30)

getName(30)

getParent(30)

toString(30)

makeQualified(30)

isAbsolute(19)

suffix(15)

equals(14)

depth(5)

hashCode(3)

initialize(3)

compareTo(3)

getPathWithoutSchemeAndAuthority(3)

isRoot(2)

checkPathArg(2)

isUriPathAbsolute(1)

Example #1

Show file

File: StramClient.java Project: andyperlitch/Apex

 private String copyFromLocal(FileSystem fs, Path basePath, String[] files) throws IOException {
   StringBuilder csv = new StringBuilder(files.length * (basePath.toString().length() + 16));
   for (String localFile : files) {
     Path src = new Path(localFile);
     String filename = src.getName();
     Path dst = new Path(basePath, filename);
     URI localFileURI = null;
     try {
       localFileURI = new URI(localFile);
     } catch (URISyntaxException e) {
       throw new IOException(e);
     }
     if (localFileURI.getScheme() == null || localFileURI.getScheme().startsWith("file")) {
       LOG.info("Copy {} from local filesystem to {}", localFile, dst);
       fs.copyFromLocalFile(false, true, src, dst);
     } else {
       LOG.info("Copy {} from DFS to {}", localFile, dst);
       FileUtil.copy(fs, src, fs, dst, false, true, conf);
     }
     if (csv.length() > 0) {
       csv.append(LIB_JARS_SEP);
     }
     csv.append(dst.toString());
   }
   return csv.toString();
 }

Example #2

Show file

File: CascadingHCatUtil.java Project: ruseel/cascading.hive

  protected static List<String> getFilesInHivePartition(Partition part, JobConf jobConf) {
    List<String> result = newArrayList();

    String ignoreFileRegex = jobConf.get(HCatTap.IGNORE_FILE_IN_PARTITION_REGEX, "");
    Pattern ignoreFilePattern = Pattern.compile(ignoreFileRegex);

    try {
      Path partitionDirPath = new Path(part.getSd().getLocation());
      FileStatus[] partitionContent =
          partitionDirPath.getFileSystem(jobConf).listStatus(partitionDirPath);
      for (FileStatus currStatus : partitionContent) {
        if (!currStatus.isDir()) {
          if (!ignoreFilePattern.matcher(currStatus.getPath().getName()).matches()) {
            result.add(currStatus.getPath().toUri().getPath());
          } else {
            LOG.debug(
                "Ignoring path {} since matches ignore regex {}",
                currStatus.getPath().toUri().getPath(),
                ignoreFileRegex);
          }
        }
      }

    } catch (IOException e) {
      logError("Unable to read the content of partition '" + part.getSd().getLocation() + "'", e);
    }

    return result;
  }

Example #3

Show file

File: HdfsDirectory.java Project: fullstorydev/lucene-solr

  public HdfsDirectory(Path hdfsDirPath, LockFactory lockFactory, Configuration configuration)
      throws IOException {
    super(lockFactory);
    this.hdfsDirPath = hdfsDirPath;
    this.configuration = configuration;
    fileSystem = FileSystem.get(hdfsDirPath.toUri(), configuration);
    fileContext = FileContext.getFileContext(hdfsDirPath.toUri(), configuration);

    if (fileSystem instanceof DistributedFileSystem) {
      // Make sure dfs is not in safe mode
      while (((DistributedFileSystem) fileSystem).setSafeMode(SafeModeAction.SAFEMODE_GET, true)) {
        LOG.warn("The NameNode is in SafeMode - Solr will wait 5 seconds and try again.");
        try {
          Thread.sleep(5000);
        } catch (InterruptedException e) {
          Thread.interrupted();
          // continue
        }
      }
    }

    try {
      if (!fileSystem.exists(hdfsDirPath)) {
        boolean success = fileSystem.mkdirs(hdfsDirPath);
        if (!success) {
          throw new RuntimeException("Could not create directory: " + hdfsDirPath);
        }
      }
    } catch (Exception e) {
      org.apache.solr.common.util.IOUtils.closeQuietly(fileSystem);
      throw new RuntimeException("Problem creating directory: " + hdfsDirPath, e);
    }
  }

Example #4

Show file

File: ProxyFilter.java Project: JichengSong/hadoop-20

  /** check that the requested path is listed in the user permissions file */
  private boolean checkPath(String userID, X509Certificate cert, String pathInfo) {
    if (!checkUser(userID, cert)) {
      return false;
    }

    Set<Path> pathSet = permsMap.get(userID);
    if (pathSet == null) {
      LOG.info("User " + userID + " is not listed in the user permissions file");
      return false;
    }
    if (pathInfo == null || pathInfo.length() == 0) {
      LOG.info("Can't get file path from HTTPS request; user is " + userID);
      return false;
    }

    Path userPath = new Path(pathInfo);
    while (userPath != null) {
      if (LOG.isDebugEnabled()) {
        LOG.debug("\n Checking file path " + userPath);
      }
      if (pathSet.contains(userPath)) return true;
      userPath = userPath.getParent();
    }
    LOG.info("User " + userID + " is not authorized to access " + pathInfo);
    return false;
  }

Example #5

Show file

File: AbstractFileInputOperator.java Project: jbelke/incubator-apex-malhar

    public LinkedHashSet<Path> scan(FileSystem fs, Path filePath, Set<String> consumedFiles) {
      LinkedHashSet<Path> pathSet = Sets.newLinkedHashSet();
      try {
        LOG.debug("Scanning {} with pattern {}", filePath, this.filePatternRegexp);
        FileStatus[] files = fs.listStatus(filePath);
        for (FileStatus status : files) {
          Path path = status.getPath();
          String filePathStr = path.toString();

          if (consumedFiles.contains(filePathStr)) {
            continue;
          }

          if (ignoredFiles.contains(filePathStr)) {
            continue;
          }

          if (acceptFile(filePathStr)) {
            LOG.debug("Found {}", filePathStr);
            pathSet.add(path);
          } else {
            // don't look at it again
            ignoredFiles.add(filePathStr);
          }
        }
      } catch (FileNotFoundException e) {
        LOG.warn("Failed to list directory {}", filePath, e);
      } catch (IOException e) {
        throw new RuntimeException(e);
      }
      return pathSet;
    }

Example #6

Show file

File: GenericUDFGeoIPTest.java Project: huoqi/hive-geoip

  public void testCollect() throws Exception {
    Path p = new Path(this.ROOT_DIR, "rankfile");

    FSDataOutputStream o = this.getFileSystem().create(p);
    BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(o));
    bw.write("209.191.139.200\n");
    bw.write("twelve\n");
    bw.close();

    String jarFile;
    jarFile = GenericUDFGeoIP.class.getProtectionDomain().getCodeSource().getLocation().getFile();
    client.execute("add jar " + jarFile);
    jarFile =
        com.maxmind.geoip.LookupService.class
            .getProtectionDomain()
            .getCodeSource()
            .getLocation()
            .getFile();
    client.execute("add jar " + jarFile);
    // download this or put in reasources
    client.execute(" add file /tmp/GeoIP.dat");

    client.execute(
        "create temporary function geoip as 'com.jointhegrid.udf.geoip.GenericUDFGeoIP'");
    client.execute(
        "create table  ips  ( ip string) row format delimited fields terminated by '09' lines terminated by '10'");
    client.execute("load data local inpath '" + p.toString() + "' into table ips");

    client.execute("select geoip(ip, 'COUNTRY_NAME', './GeoIP.dat') FROM ips");
    List<String> expected = Arrays.asList("United States", "N/A");
    assertEquals(expected, client.fetchAll());

    client.execute("drop table ips");
  }

Example #7

Show file

File: FileStatusEntry.java Project: jinhyukchang/gobblin

  public boolean refresh(final Path path) throws IOException {
    try (FileSystem fs = path.getFileSystem(new Configuration())) {
      if (_fileStatus.isPresent()) {
        Optional<FileStatus> oldStatus = this._fileStatus;
        try {
          Optional<FileStatus> newStatus = Optional.of(fs.getFileStatus(path));
          this.exists = newStatus.isPresent();

          return (oldStatus.isPresent() != this._fileStatus.isPresent()
              || oldStatus.get().getModificationTime() != newStatus.get().getModificationTime()
              || oldStatus.get().isDirectory() != newStatus.get().isDirectory()
              || oldStatus.get().getLen() != newStatus.get().getLen());
        } catch (FileNotFoundException e) {
          _fileStatus = Optional.absent();
          this.exists = false;
          return true;
        }
      } else {
        if (path.getFileSystem(new Configuration()).exists(path)) {
          _fileStatus = Optional.of(fs.getFileStatus(path));
          return true;
        } else {
          return false;
        }
      }
    }
  }

Example #8

Show file

File: TestTokenCache.java Project: pwendell/mr2-fairscheduler

  @Test
  public void testGetTokensForViewFS() throws IOException, URISyntaxException {
    Configuration conf = new Configuration(jConf);
    FileSystem dfs = dfsCluster.getFileSystem();
    String serviceName = dfs.getCanonicalServiceName();

    Path p1 = new Path("/mount1");
    Path p2 = new Path("/mount2");
    p1 = dfs.makeQualified(p1);
    p2 = dfs.makeQualified(p2);

    conf.set("fs.viewfs.mounttable.default.link./dir1", p1.toString());
    conf.set("fs.viewfs.mounttable.default.link./dir2", p2.toString());
    Credentials credentials = new Credentials();
    Path lp1 = new Path("viewfs:///dir1");
    Path lp2 = new Path("viewfs:///dir2");
    Path[] paths = new Path[2];
    paths[0] = lp1;
    paths[1] = lp2;
    TokenCache.obtainTokensForNamenodesInternal(credentials, paths, conf);

    Collection<Token<? extends TokenIdentifier>> tns = credentials.getAllTokens();
    assertEquals("number of tokens is not 1", 1, tns.size());

    boolean found = false;
    for (Token<? extends TokenIdentifier> tt : tns) {
      System.out.println("token=" + tt);
      if (tt.getKind().equals(DelegationTokenIdentifier.HDFS_DELEGATION_KIND)
          && tt.getService().equals(new Text(serviceName))) {
        found = true;
      }
      assertTrue("didn't find token for [" + lp1 + ", " + lp2 + "]", found);
    }
  }

Example #9

Show file

File: RawLocalFileSystem.java Project: daisy8867/hadoopp

 /** Convert a path to a File. */
 public File pathToFile(Path path) {
   checkPath(path);
   if (!path.isAbsolute()) {
     path = new Path(getWorkingDirectory(), path);
   }
   return new File(path.toUri().getPath());
 }

Example #10

Show file

File: TimePartitionedDataPublisher.java Project: jacksu/gobblin

  /**
   * This method needs to be overridden for TimePartitionedDataPublisher, since the output folder
   * structure contains timestamp, we have to move the files recursively.
   *
   * <p>For example, move {writerOutput}/2015/04/08/15/output.avro to
   * {publisherOutput}/2015/04/08/15/output.avro
   */
  @Override
  protected void addWriterOutputToExistingDir(
      Path writerOutput,
      Path publisherOutput,
      WorkUnitState workUnitState,
      int branchId,
      ParallelRunner parallelRunner)
      throws IOException {

    for (FileStatus status :
        FileListUtils.listFilesRecursively(
            this.writerFileSystemByBranches.get(branchId), writerOutput)) {
      String filePathStr = status.getPath().toString();
      String pathSuffix =
          filePathStr.substring(
              filePathStr.indexOf(writerOutput.toString()) + writerOutput.toString().length() + 1);
      Path outputPath = new Path(publisherOutput, pathSuffix);

      WriterUtils.mkdirsWithRecursivePermission(
          this.publisherFileSystemByBranches.get(branchId),
          outputPath.getParent(),
          this.permissions.get(branchId));

      LOG.info(String.format("Moving %s to %s", status.getPath(), outputPath));
      parallelRunner.movePath(
          status.getPath(),
          this.publisherFileSystemByBranches.get(branchId),
          outputPath,
          Optional.<String>absent());
    }
  }

Example #11

Show file

File: SequenceFileTool.java Project: MAQ11/openimaj

    @Override
    public void execute() throws IOException {
      if (offset < 0) throw new IllegalArgumentException("Offset cannot be less than 0.");

      System.out.println("Getting file paths...");

      final Path[] sequenceFiles = SequenceFileUtility.getFilePaths(inputPathOrUri, "part");
      final ExtractionState nps = new ExtractionState();
      nps.setMaxFileExtract(max);

      if (random >= 0) {
        System.out.println("Counting records");

        int totalRecords = 0;
        for (final Path path : sequenceFiles) {
          System.out.println("... Counting from file: " + path);
          final SequenceFileUtility<Text, BytesWritable> utility =
              new TextBytesSequenceFileUtility(path.toUri(), true);
          totalRecords += utility.getNumberRecords();
        }

        System.out.println("Selecting random subset of " + random + " from " + totalRecords);

        nps.setRandomSelection(random, totalRecords);
      }

      ZipOutputStream zos = null;
      if (zipMode) {
        zos = SequenceFileUtility.openZipOutputStream(outputPathOrUri);
      }

      for (final Path path : sequenceFiles) {
        System.out.println("Extracting from " + path.getName());

        final SequenceFileUtility<Text, BytesWritable> utility =
            new TextBytesSequenceFileUtility(path.toUri(), true);
        if (queryKey == null) {
          if (zipMode) {
            utility.exportDataToZip(zos, np, nps, autoExtension, offset);
          } else {
            utility.exportData(outputPathOrUri, np, nps, autoExtension, offset);
          }
        } else {
          if (zipMode) {
            throw new UnsupportedOperationException("Not implemented yet");
          } else {
            if (!utility.findAndExport(new Text(queryKey), outputPathOrUri, offset)) {
              if (offset == 0) System.err.format("Key '%s' was not found in the file.\n", queryKey);
              else
                System.err.format(
                    "Key '%s' was not found in the file after offset %d.\n", queryKey, offset);
            }
          }
        }

        if (nps.isFinished()) break;
      }

      if (zos != null) zos.close();
    }

Example #12

Show file

File: DistRaid.java Project: fire9/hadoop-20

  /**
   * set up input file which has the list of input files.
   *
   * @return boolean
   * @throws IOException
   */
  private boolean setup() throws IOException {
    estimateSavings();

    final String randomId = getRandomId();
    JobClient jClient = new JobClient(jobconf);
    Path jobdir = new Path(jClient.getSystemDir(), NAME + "_" + randomId);

    LOG.info(JOB_DIR_LABEL + "=" + jobdir);
    jobconf.set(JOB_DIR_LABEL, jobdir.toString());
    Path log = new Path(jobdir, "_logs");

    // The control file should have small size blocks. This helps
    // in spreading out the load from mappers that will be spawned.
    jobconf.setInt("dfs.blocks.size", OP_LIST_BLOCK_SIZE);

    FileOutputFormat.setOutputPath(jobconf, log);
    LOG.info("log=" + log);

    // create operation list
    FileSystem fs = jobdir.getFileSystem(jobconf);
    Path opList = new Path(jobdir, "_" + OP_LIST_LABEL);
    jobconf.set(OP_LIST_LABEL, opList.toString());
    int opCount = 0, synCount = 0;
    SequenceFile.Writer opWriter = null;

    try {
      opWriter =
          SequenceFile.createWriter(
              fs, jobconf, opList, Text.class, PolicyInfo.class, SequenceFile.CompressionType.NONE);
      for (RaidPolicyPathPair p : raidPolicyPathPairList) {
        // If a large set of files are Raided for the first time, files
        // in the same directory that tend to have the same size will end up
        // with the same map. This shuffle mixes things up, allowing a better
        // mix of files.
        java.util.Collections.shuffle(p.srcPaths);
        for (FileStatus st : p.srcPaths) {
          opWriter.append(new Text(st.getPath().toString()), p.policy);
          opCount++;
          if (++synCount > SYNC_FILE_MAX) {
            opWriter.sync();
            synCount = 0;
          }
        }
      }

    } finally {
      if (opWriter != null) {
        opWriter.close();
      }
      fs.setReplication(opList, OP_LIST_REPLICATION); // increase replication for control file
    }
    raidPolicyPathPairList.clear();

    jobconf.setInt(OP_COUNT_LABEL, opCount);
    LOG.info("Number of files=" + opCount);
    jobconf.setNumMapTasks(
        getMapCount(opCount, new JobClient(jobconf).getClusterStatus().getTaskTrackers()));
    LOG.info("jobName= " + jobName + " numMapTasks=" + jobconf.getNumMapTasks());
    return opCount != 0;
  }

Example #13

Show file

File: DistRaid.java Project: fire9/hadoop-20

    /** Run a FileOperation */
    public void map(
        Text key,
        PolicyInfo policy,
        OutputCollector<WritableComparable, Text> out,
        Reporter reporter)
        throws IOException {
      this.reporter = reporter;
      try {
        LOG.info("Raiding file=" + key.toString() + " policy=" + policy);
        Path p = new Path(key.toString());
        FileStatus fs = p.getFileSystem(jobconf).getFileStatus(p);
        st.clear();
        RaidNode.doRaid(jobconf, policy, fs, st, reporter);

        ++succeedcount;

        reporter.incrCounter(Counter.PROCESSED_BLOCKS, st.numProcessedBlocks);
        reporter.incrCounter(Counter.PROCESSED_SIZE, st.processedSize);
        reporter.incrCounter(Counter.META_BLOCKS, st.numMetaBlocks);
        reporter.incrCounter(Counter.META_SIZE, st.metaSize);

        reporter.incrCounter(Counter.FILES_SUCCEEDED, 1);
      } catch (IOException e) {
        ++failcount;
        reporter.incrCounter(Counter.FILES_FAILED, 1);

        String s = "FAIL: " + policy + ", " + key + " " + StringUtils.stringifyException(e);
        out.collect(null, new Text(s));
        LOG.info(s);
      } finally {
        reporter.setStatus(getCountString());
      }
    }

Example #14

Show file

File: JobHelper.java Project: okrische/druid

  public static boolean runJobs(List<Jobby> jobs, HadoopDruidIndexerConfig config) {
    String failedMessage = null;
    for (Jobby job : jobs) {
      if (failedMessage == null) {
        if (!job.run()) {
          failedMessage = String.format("Job[%s] failed!", job.getClass());
        }
      }
    }

    if (!config.getSchema().getTuningConfig().isLeaveIntermediate()) {
      if (failedMessage == null || config.getSchema().getTuningConfig().isCleanupOnFailure()) {
        Path workingPath = config.makeIntermediatePath();
        log.info("Deleting path[%s]", workingPath);
        try {
          workingPath
              .getFileSystem(injectSystemProperties(new Configuration()))
              .delete(workingPath, true);
        } catch (IOException e) {
          log.error(e, "Failed to cleanup path[%s]", workingPath);
        }
      }
    }

    if (failedMessage != null) {
      throw new ISE(failedMessage);
    }

    return true;
  }

Example #15

Show file

File: TestFileOutputCommitter.java Project: ukulililixl/core

  public void testAbort() throws IOException {
    JobConf job = new JobConf();
    setConfForFileOutputCommitter(job);
    JobContext jContext = new JobContextImpl(job, taskID.getJobID());
    TaskAttemptContext tContext = new TaskAttemptContextImpl(job, taskID);
    FileOutputCommitter committer = new FileOutputCommitter();
    FileOutputFormat.setWorkOutputPath(job, committer.getTempTaskOutputPath(tContext));

    // do setup
    committer.setupJob(jContext);
    committer.setupTask(tContext);
    String file = "test.txt";

    // A reporter that does nothing
    Reporter reporter = Reporter.NULL;
    // write output
    FileSystem localFs = FileSystem.getLocal(job);
    TextOutputFormat theOutputFormat = new TextOutputFormat();
    RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(localFs, job, file, reporter);
    writeOutput(theRecordWriter, reporter);

    // do abort
    committer.abortTask(tContext);
    File expectedFile =
        new File(new Path(committer.getTempTaskOutputPath(tContext), file).toString());
    assertFalse("task temp dir still exists", expectedFile.exists());

    committer.abortJob(jContext, JobStatus.State.FAILED);
    expectedFile = new File(new Path(outDir, FileOutputCommitter.TEMP_DIR_NAME).toString());
    assertFalse("job temp dir still exists", expectedFile.exists());
    assertEquals("Output directory not empty", 0, new File(outDir.toString()).listFiles().length);
    FileUtil.fullyDelete(new File(outDir.toString()));
  }

Example #16

Show file

File: WriteUsingMR.java Project: ajay0221/parquet-mr

  public Path write(Message... messages) throws Exception {

    synchronized (WriteUsingMR.class) {
      outputPath = TestUtils.someTemporaryFilePath();

      Path inputPath = TestUtils.someTemporaryFilePath();
      FileSystem fileSystem = inputPath.getFileSystem(conf);
      fileSystem.create(inputPath);

      inputMessages = Collections.unmodifiableList(Arrays.asList(messages));

      final Job job = new Job(conf, "write");

      // input not really used
      TextInputFormat.addInputPath(job, inputPath);
      job.setInputFormatClass(TextInputFormat.class);

      job.setMapperClass(WritingMapper.class);
      job.setNumReduceTasks(0);

      job.setOutputFormatClass(ProtoParquetOutputFormat.class);
      ProtoParquetOutputFormat.setOutputPath(job, outputPath);
      ProtoParquetOutputFormat.setProtobufClass(job, TestUtils.inferRecordsClass(messages));

      waitForJob(job);

      inputMessages = null;
      return outputPath;
    }
  }

Example #17

Show file

File: Path.java Project: Jude7/bc-hadoop2.0

  /** Returns a qualified path object. */
  @InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce"})
  public Path makeQualified(URI defaultUri, Path workingDir) {
    Path path = this;
    if (!isAbsolute()) {
      path = new Path(workingDir, this);
    }

    URI pathUri = path.toUri();

    String scheme = pathUri.getScheme();
    String authority = pathUri.getAuthority();
    String fragment = pathUri.getFragment();

    if (scheme != null && (authority != null || defaultUri.getAuthority() == null)) return path;

    if (scheme == null) {
      scheme = defaultUri.getScheme();
    }

    if (authority == null) {
      authority = defaultUri.getAuthority();
      if (authority == null) {
        authority = "";
      }
    }

    URI newUri = null;
    try {
      newUri = new URI(scheme, authority, normalizePath(pathUri.getPath()), null, fragment);
    } catch (URISyntaxException e) {
      throw new IllegalArgumentException(e);
    }
    return new Path(newUri);
  }

Example #18

Show file

File: SqoopDelegator.java Project: WangTaoTheTonic/hive

 private List<String> makeBasicArgs(
     String optionsFile,
     String otherFiles,
     String statusdir,
     String completedUrl,
     boolean enablelog,
     Boolean enableJobReconnect,
     String libdir)
     throws URISyntaxException, FileNotFoundException, IOException, InterruptedException {
   ArrayList<String> args = new ArrayList<String>();
   ArrayList<String> allFiles = new ArrayList<String>();
   if (TempletonUtils.isset(optionsFile))
     allFiles.add(TempletonUtils.hadoopFsFilename(optionsFile, appConf, runAs));
   if (TempletonUtils.isset(otherFiles)) {
     String[] ofs = TempletonUtils.hadoopFsListAsArray(otherFiles, appConf, runAs);
     allFiles.addAll(Arrays.asList(ofs));
   }
   if (TempletonUtils.isset(libdir) && TempletonUtils.isset(appConf.sqoopArchive())) {
     /**
      * Sqoop accesses databases via JDBC. This means it needs to have appropriate JDBC drivers
      * available. Normally, the user would install Sqoop and place these jars into SQOOP_HOME/lib.
      * When WebHCat is configured to auto-ship the Sqoop tar file, we need to make sure that
      * relevant JDBC jars are available on target node but we cannot modify lib/ of exploded tar
      * because Dist Cache intentionally prevents this. The user is expected to place any JDBC jars
      * into an HDFS directory and specify this dir in "libdir" parameter. WebHCat then ensures
      * that these jars are localized for the launcher task and made available to Sqoop. {@link
      * org.apache.hive.hcatalog.templeton.tool.LaunchMapper#handleSqoop(org.apache.hadoop.conf.Configuration,
      * java.util.Map)} {@link #makeArgs(String, String, String, String, String, boolean, String)}
      */
     LOG.debug("libdir=" + libdir);
     List<Path> jarList = TempletonUtils.hadoopFsListChildren(libdir, appConf, runAs);
     if (TempletonUtils.isset(jarList)) {
       StringBuilder sb = new StringBuilder();
       for (Path jar : jarList) {
         allFiles.add(jar.toString());
         sb.append(jar.getName()).append(',');
       }
       sb.setLength(sb.length() - 1);
       // we use the same mechanism to copy "files"/"otherFiles" and "libdir", but we only want to
       // put
       // contents of "libdir" in Sqoop/lib, thus we pass the list of names here
       addDef(args, JobSubmissionConstants.Sqoop.LIB_JARS, sb.toString());
       addDef(args, AppConfig.SQOOP_HOME_PATH, appConf.get(AppConfig.SQOOP_HOME_PATH));
     }
   }
   args.addAll(
       makeLauncherArgs(
           appConf,
           statusdir,
           completedUrl,
           allFiles,
           enablelog,
           enableJobReconnect,
           JobType.SQOOP));
   if (TempletonUtils.isset(appConf.sqoopArchive())) {
     args.add("-archives");
     args.add(appConf.sqoopArchive());
   }
   return args;
 }

Example #19

Show file

File: ExplainSQRewriteTask.java Project: Leolh/hive

  @Override
  public int execute(DriverContext driverContext) {

    PrintStream out = null;
    try {
      Path resFile = new Path(work.getResFile());
      OutputStream outS = resFile.getFileSystem(conf).create(resFile);
      out = new PrintStream(outS);

      QB qb = work.getQb();
      TokenRewriteStream stream = work.getCtx().getTokenRewriteStream();
      String program = "sq rewrite";
      ASTNode ast = work.getAst();

      try {
        addRewrites(stream, qb, program, out);
        out.println(
            "\nRewritten Query:\n"
                + stream.toString(program, ast.getTokenStartIndex(), ast.getTokenStopIndex()));
      } finally {
        stream.deleteProgram(program);
      }

      out.close();
      out = null;
      return (0);
    } catch (Exception e) {
      console.printError(
          "Failed with exception " + e.getMessage(), "\n" + StringUtils.stringifyException(e));
      return (1);
    } finally {
      IOUtils.closeStream(out);
    }
  }

Example #20

Show file

File: MaxTemperatureWithCounters.java Project: sihanwang/hadoopforvessel

    @Override
    protected void setup(Context context) throws IOException, InterruptedException {
      // TODO Auto-generated method stub
      super.setup(context);

      InputSplit split = context.getInputSplit();

      System.out.println("***************Mapper's setup is being executed***************");
      FileSplit FS = (FileSplit) split;

      long datastart = FS.getStart();
      System.out.println("***************GetStart() returns " + datastart + " ***************");

      long datalongth = FS.getLength();
      System.out.println("***************getLength() returns " + datalongth + " ***************");

      String[] datalocations = FS.getLocations();
      System.out.println(
          "***************getLocations() returns "
              + datalocations.length
              + " locations***************");

      for (int i = 0; i < datalocations.length; i++) {
        System.out.println(
            "***************No." + i + " location is : " + datalocations[i] + " ***************");
      }

      Path path = FS.getPath();
      System.out.println(
          "***************getLocations() returns " + path.toString() + " ***************");
    }

Example #21

Show file

File: KosmosFileSystem.java Project: NikkitaSh30/i-mapreduce

  public FSDataOutputStream create(
      Path file,
      FsPermission permission,
      boolean overwrite,
      int bufferSize,
      short replication,
      long blockSize,
      Progressable progress)
      throws IOException {

    if (exists(file)) {
      if (overwrite) {
        delete(file);
      } else {
        throw new IOException("File already exists: " + file);
      }
    }

    Path parent = file.getParent();
    if (parent != null && !mkdirs(parent)) {
      throw new IOException("Mkdirs failed to create " + parent);
    }

    Path absolute = makeAbsolute(file);
    String srep = absolute.toUri().getPath();

    return kfsImpl.create(srep, replication, bufferSize);
  }

Example #22

Show file

File: BrowerLogFormatMR.java Project: wisgood/mobile-core

  public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    GenericOptionsParser gop = new GenericOptionsParser(conf, args);
    conf = gop.getConfiguration();

    Job job = new Job(conf, conf.get("job_name"));
    FileInputFormat.addInputPaths(job, conf.get("input_dir"));
    Path output = new Path(conf.get("output_dir"));
    FileOutputFormat.setOutputPath(job, output);
    output.getFileSystem(conf).delete(output, true);

    job.setJarByClass(BrowerLogFormatMR.class);
    job.setMapperClass(BrowerLogFormatMapper.class);
    job.setReducerClass(BrowerLogFormatReducer.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);
    job.setNumReduceTasks(1);

    int code = job.waitForCompletion(true) ? 0 : 1;
    return code;
  }

Example #23

Show file

File: SegmentReader.java Project: vuquangtin/news-nutch-plugin

 public void list(List<Path> dirs, Writer writer) throws Exception {
   writer.write("NAME\t\tGENERATED\tFETCHER START\t\tFETCHER END\t\tFETCHED\tPARSED\n");
   for (int i = 0; i < dirs.size(); i++) {
     Path dir = dirs.get(i);
     SegmentReaderStats stats = new SegmentReaderStats();
     getStats(dir, stats);
     writer.write(dir.getName() + "\t");
     if (stats.generated == -1) writer.write("?");
     else writer.write(stats.generated + "");
     writer.write("\t\t");
     if (stats.start == -1) writer.write("?\t");
     else writer.write(sdf.format(new Date(stats.start)));
     writer.write("\t");
     if (stats.end == -1) writer.write("?");
     else writer.write(sdf.format(new Date(stats.end)));
     writer.write("\t");
     if (stats.fetched == -1) writer.write("?");
     else writer.write(stats.fetched + "");
     writer.write("\t");
     if (stats.parsed == -1) writer.write("?");
     else writer.write(stats.parsed + "");
     writer.write("\n");
     writer.flush();
   }
 }

Example #24

Show file

File: LuceneSegmentInputFormat.java Project: SY141109/EvaluatorOnMahout

  @Override
  public List<LuceneSegmentInputSplit> getSplits(JobContext context)
      throws IOException, InterruptedException {
    Configuration configuration = context.getConfiguration();

    LuceneStorageConfiguration lucene2SeqConfiguration =
        new LuceneStorageConfiguration(configuration);

    List<LuceneSegmentInputSplit> inputSplits = new ArrayList<>();

    List<Path> indexPaths = lucene2SeqConfiguration.getIndexPaths();
    for (Path indexPath : indexPaths) {
      ReadOnlyFileSystemDirectory directory =
          new ReadOnlyFileSystemDirectory(
              FileSystem.get(configuration), indexPath, false, configuration);
      SegmentInfos segmentInfos = new SegmentInfos();
      segmentInfos.read(directory);

      for (SegmentCommitInfo segmentInfo : segmentInfos) {
        LuceneSegmentInputSplit inputSplit =
            new LuceneSegmentInputSplit(
                indexPath, segmentInfo.info.name, segmentInfo.sizeInBytes());
        inputSplits.add(inputSplit);
        LOG.info(
            "Created {} byte input split for index '{}' segment {}",
            segmentInfo.sizeInBytes(),
            indexPath.toUri(),
            segmentInfo.info.name);
      }
    }

    return inputSplits;
  }

Example #25

Show file

File: FileInputFormat.java Project: lasaris/hadoop-common-ngmon

 /**
  * Add a {@link Path} to the list of inputs for the map-reduce job.
  *
  * @param job The {@link Job} to modify
  * @param path {@link Path} to be added to the list of inputs for the map-reduce job.
  */
 public static void addInputPath(Job job, Path path) throws IOException {
   Configuration conf = job.getConfiguration();
   path = path.getFileSystem(conf).makeQualified(path);
   String dirStr = StringUtils.escapeString(path.toString());
   String dirs = conf.get(INPUT_DIR);
   conf.set(INPUT_DIR, dirs == null ? dirStr : dirs + "," + dirStr);
 }

Example #26

Show file

File: FileSystemRMStateStore.java Project: aliyun-beta/aliyun-oss-hadoop-fs

  @Override
  protected synchronized void startInternal() throws Exception {
    // create filesystem only now, as part of service-start. By this time, RM is
    // authenticated with kerberos so we are good to create a file-system
    // handle.
    fsConf = new Configuration(getConfig());
    fsConf.setBoolean("dfs.client.retry.policy.enabled", true);
    String retryPolicy =
        fsConf.get(
            YarnConfiguration.FS_RM_STATE_STORE_RETRY_POLICY_SPEC,
            YarnConfiguration.DEFAULT_FS_RM_STATE_STORE_RETRY_POLICY_SPEC);
    fsConf.set("dfs.client.retry.policy.spec", retryPolicy);

    String scheme = fsWorkingPath.toUri().getScheme();
    if (scheme == null) {
      scheme = FileSystem.getDefaultUri(fsConf).getScheme();
    }
    if (scheme != null) {
      String disableCacheName = String.format("fs.%s.impl.disable.cache", scheme);
      fsConf.setBoolean(disableCacheName, true);
    }

    fs = fsWorkingPath.getFileSystem(fsConf);
    mkdirsWithRetries(rmDTSecretManagerRoot);
    mkdirsWithRetries(rmAppRoot);
    mkdirsWithRetries(amrmTokenSecretManagerRoot);
    mkdirsWithRetries(reservationRoot);
  }

Example #27

Show file

File: ParquetFormatPlugin.java Project: huge040430512/drill

    private FileSelection expandSelection(DrillFileSystem fs, FileSelection selection)
        throws IOException {
      if (metaDataFileExists(fs, selection.getFirstPath(fs))) {
        FileStatus metaRootDir = selection.getFirstPath(fs);
        Path metaFilePath = getMetadataPath(metaRootDir);

        // get the metadata for the directory by reading the metadata file
        ParquetTableMetadata_v1 metadata = Metadata.readBlockMeta(fs, metaFilePath.toString());
        List<String> fileNames = Lists.newArrayList();
        for (ParquetFileMetadata file : metadata.files) {
          fileNames.add(file.path);
        }
        // when creating the file selection, set the selection root in the form /a/b instead of
        // file:/a/b.  The reason is that the file names above have been created in the form
        // /a/b/c.parquet and the format of the selection root must match that of the file names
        // otherwise downstream operations such as partition pruning can break.
        Path metaRootPath = Path.getPathWithoutSchemeAndAuthority(metaRootDir.getPath());
        return new FileSelection(
            fileNames, metaRootPath.toString(), metadata /* save metadata for future use */);
      } else {
        // don't expand yet; ParquetGroupScan's metadata gathering operation
        // does that.
        return selection;
      }
    }

Example #28

Show file

File: FileSystemRMStateStore.java Project: aliyun-beta/aliyun-oss-hadoop-fs

 /*
  * In order to make this update atomic as a part of write we will first write
  * data to .new file and then rename it. Here we are assuming that rename is
  * atomic for underlying file system.
  */
 protected void updateFile(Path outputPath, byte[] data, boolean makeUnradableByAdmin)
     throws Exception {
   Path newPath = new Path(outputPath.getParent(), outputPath.getName() + ".new");
   // use writeFileWithRetries to make sure .new file is created atomically
   writeFileWithRetries(newPath, data, makeUnradableByAdmin);
   replaceFile(newPath, outputPath);
 }

Example #29

Show file

File: TestCatalog.java Project: dongjinleekr/tajo

  /** It asserts the equality between an original table desc and a restored table desc. */
  private static void assertSchemaEquality(String tableName, Schema schema)
      throws IOException, TajoException {
    Path path = new Path(CommonTestingUtil.getTestDir(), tableName);
    TableDesc tableDesc =
        new TableDesc(
            IdentifierUtil.buildFQName(DEFAULT_DATABASE_NAME, tableName),
            schema,
            "TEXT",
            new KeyValueSet(),
            path.toUri());

    // schema creation
    assertFalse(catalog.existsTable(DEFAULT_DATABASE_NAME, tableName));
    catalog.createTable(tableDesc);
    assertTrue(catalog.existsTable(DEFAULT_DATABASE_NAME, tableName));

    // change it for the equals test.
    schema.setQualifier(IdentifierUtil.buildFQName(DEFAULT_DATABASE_NAME, tableName));
    TableDesc restored = catalog.getTableDesc(DEFAULT_DATABASE_NAME, tableName);
    assertEquals(schema, restored.getSchema());

    // drop test
    catalog.dropTable(IdentifierUtil.buildFQName(DEFAULT_DATABASE_NAME, tableName));
    assertFalse(catalog.existsTable(DEFAULT_DATABASE_NAME, tableName));
  }

Example #30

Show file

File: ReconstructionErrJob.java Project: caomw/sPCA

 @Override
 public void setup(Context context) throws IOException {
   Configuration conf = context.getConfiguration();
   Path cMemMatrixPath = new Path(conf.get(RECONSTRUCTIONMATRIX));
   Path dMemMatrixPath = new Path(conf.get(MATRIXY2X));
   Path zmPath = new Path(conf.get(ZMPATH));
   Path meanPath = new Path(conf.get(YMPATH));
   int inMemMatrixNumRows = conf.getInt(YCOLS, 0);
   int inMemMatrixNumCols = conf.getInt(XCOLS, 0);
   ERR_SAMPLE_RATE = conf.getFloat(ERRSAMPLERATE, 1);
   Path tmpPath = cMemMatrixPath.getParent();
   DistributedRowMatrix distMatrix =
       new DistributedRowMatrix(cMemMatrixPath, tmpPath, inMemMatrixNumRows, inMemMatrixNumCols);
   distMatrix.setConf(conf);
   matrixC = PCACommon.toDenseMatrix(distMatrix);
   distMatrix =
       new DistributedRowMatrix(dMemMatrixPath, tmpPath, inMemMatrixNumRows, inMemMatrixNumCols);
   distMatrix.setConf(conf);
   matrixY2X = PCACommon.toDenseMatrix(distMatrix);
   try {
     zm = PCACommon.toDenseVector(zmPath, conf);
     ym = PCACommon.toDenseVector(meanPath, conf);
   } catch (IOException e) {
     e.printStackTrace();
   }
   xiCt = new DenseVector(matrixC.numRows());
   sumOfErr = new DenseVector(matrixC.numRows());
   sumOfyi = new DenseVector(matrixC.numRows());
   sumOfyc = new DenseVector(matrixC.numRows());
 }