Example #1
0
 private String copyFromLocal(FileSystem fs, Path basePath, String[] files) throws IOException {
   StringBuilder csv = new StringBuilder(files.length * (basePath.toString().length() + 16));
   for (String localFile : files) {
     Path src = new Path(localFile);
     String filename = src.getName();
     Path dst = new Path(basePath, filename);
     URI localFileURI = null;
     try {
       localFileURI = new URI(localFile);
     } catch (URISyntaxException e) {
       throw new IOException(e);
     }
     if (localFileURI.getScheme() == null || localFileURI.getScheme().startsWith("file")) {
       LOG.info("Copy {} from local filesystem to {}", localFile, dst);
       fs.copyFromLocalFile(false, true, src, dst);
     } else {
       LOG.info("Copy {} from DFS to {}", localFile, dst);
       FileUtil.copy(fs, src, fs, dst, false, true, conf);
     }
     if (csv.length() > 0) {
       csv.append(LIB_JARS_SEP);
     }
     csv.append(dst.toString());
   }
   return csv.toString();
 }
  protected static List<String> getFilesInHivePartition(Partition part, JobConf jobConf) {
    List<String> result = newArrayList();

    String ignoreFileRegex = jobConf.get(HCatTap.IGNORE_FILE_IN_PARTITION_REGEX, "");
    Pattern ignoreFilePattern = Pattern.compile(ignoreFileRegex);

    try {
      Path partitionDirPath = new Path(part.getSd().getLocation());
      FileStatus[] partitionContent =
          partitionDirPath.getFileSystem(jobConf).listStatus(partitionDirPath);
      for (FileStatus currStatus : partitionContent) {
        if (!currStatus.isDir()) {
          if (!ignoreFilePattern.matcher(currStatus.getPath().getName()).matches()) {
            result.add(currStatus.getPath().toUri().getPath());
          } else {
            LOG.debug(
                "Ignoring path {} since matches ignore regex {}",
                currStatus.getPath().toUri().getPath(),
                ignoreFileRegex);
          }
        }
      }

    } catch (IOException e) {
      logError("Unable to read the content of partition '" + part.getSd().getLocation() + "'", e);
    }

    return result;
  }
  public HdfsDirectory(Path hdfsDirPath, LockFactory lockFactory, Configuration configuration)
      throws IOException {
    super(lockFactory);
    this.hdfsDirPath = hdfsDirPath;
    this.configuration = configuration;
    fileSystem = FileSystem.get(hdfsDirPath.toUri(), configuration);
    fileContext = FileContext.getFileContext(hdfsDirPath.toUri(), configuration);

    if (fileSystem instanceof DistributedFileSystem) {
      // Make sure dfs is not in safe mode
      while (((DistributedFileSystem) fileSystem).setSafeMode(SafeModeAction.SAFEMODE_GET, true)) {
        LOG.warn("The NameNode is in SafeMode - Solr will wait 5 seconds and try again.");
        try {
          Thread.sleep(5000);
        } catch (InterruptedException e) {
          Thread.interrupted();
          // continue
        }
      }
    }

    try {
      if (!fileSystem.exists(hdfsDirPath)) {
        boolean success = fileSystem.mkdirs(hdfsDirPath);
        if (!success) {
          throw new RuntimeException("Could not create directory: " + hdfsDirPath);
        }
      }
    } catch (Exception e) {
      org.apache.solr.common.util.IOUtils.closeQuietly(fileSystem);
      throw new RuntimeException("Problem creating directory: " + hdfsDirPath, e);
    }
  }
Example #4
0
  /** check that the requested path is listed in the user permissions file */
  private boolean checkPath(String userID, X509Certificate cert, String pathInfo) {
    if (!checkUser(userID, cert)) {
      return false;
    }

    Set<Path> pathSet = permsMap.get(userID);
    if (pathSet == null) {
      LOG.info("User " + userID + " is not listed in the user permissions file");
      return false;
    }
    if (pathInfo == null || pathInfo.length() == 0) {
      LOG.info("Can't get file path from HTTPS request; user is " + userID);
      return false;
    }

    Path userPath = new Path(pathInfo);
    while (userPath != null) {
      if (LOG.isDebugEnabled()) {
        LOG.debug("\n Checking file path " + userPath);
      }
      if (pathSet.contains(userPath)) return true;
      userPath = userPath.getParent();
    }
    LOG.info("User " + userID + " is not authorized to access " + pathInfo);
    return false;
  }
    public LinkedHashSet<Path> scan(FileSystem fs, Path filePath, Set<String> consumedFiles) {
      LinkedHashSet<Path> pathSet = Sets.newLinkedHashSet();
      try {
        LOG.debug("Scanning {} with pattern {}", filePath, this.filePatternRegexp);
        FileStatus[] files = fs.listStatus(filePath);
        for (FileStatus status : files) {
          Path path = status.getPath();
          String filePathStr = path.toString();

          if (consumedFiles.contains(filePathStr)) {
            continue;
          }

          if (ignoredFiles.contains(filePathStr)) {
            continue;
          }

          if (acceptFile(filePathStr)) {
            LOG.debug("Found {}", filePathStr);
            pathSet.add(path);
          } else {
            // don't look at it again
            ignoredFiles.add(filePathStr);
          }
        }
      } catch (FileNotFoundException e) {
        LOG.warn("Failed to list directory {}", filePath, e);
      } catch (IOException e) {
        throw new RuntimeException(e);
      }
      return pathSet;
    }
  public void testCollect() throws Exception {
    Path p = new Path(this.ROOT_DIR, "rankfile");

    FSDataOutputStream o = this.getFileSystem().create(p);
    BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(o));
    bw.write("209.191.139.200\n");
    bw.write("twelve\n");
    bw.close();

    String jarFile;
    jarFile = GenericUDFGeoIP.class.getProtectionDomain().getCodeSource().getLocation().getFile();
    client.execute("add jar " + jarFile);
    jarFile =
        com.maxmind.geoip.LookupService.class
            .getProtectionDomain()
            .getCodeSource()
            .getLocation()
            .getFile();
    client.execute("add jar " + jarFile);
    // download this or put in reasources
    client.execute(" add file /tmp/GeoIP.dat");

    client.execute(
        "create temporary function geoip as 'com.jointhegrid.udf.geoip.GenericUDFGeoIP'");
    client.execute(
        "create table  ips  ( ip string) row format delimited fields terminated by '09' lines terminated by '10'");
    client.execute("load data local inpath '" + p.toString() + "' into table ips");

    client.execute("select geoip(ip, 'COUNTRY_NAME', './GeoIP.dat') FROM ips");
    List<String> expected = Arrays.asList("United States", "N/A");
    assertEquals(expected, client.fetchAll());

    client.execute("drop table ips");
  }
  public boolean refresh(final Path path) throws IOException {
    try (FileSystem fs = path.getFileSystem(new Configuration())) {
      if (_fileStatus.isPresent()) {
        Optional<FileStatus> oldStatus = this._fileStatus;
        try {
          Optional<FileStatus> newStatus = Optional.of(fs.getFileStatus(path));
          this.exists = newStatus.isPresent();

          return (oldStatus.isPresent() != this._fileStatus.isPresent()
              || oldStatus.get().getModificationTime() != newStatus.get().getModificationTime()
              || oldStatus.get().isDirectory() != newStatus.get().isDirectory()
              || oldStatus.get().getLen() != newStatus.get().getLen());
        } catch (FileNotFoundException e) {
          _fileStatus = Optional.absent();
          this.exists = false;
          return true;
        }
      } else {
        if (path.getFileSystem(new Configuration()).exists(path)) {
          _fileStatus = Optional.of(fs.getFileStatus(path));
          return true;
        } else {
          return false;
        }
      }
    }
  }
  @Test
  public void testGetTokensForViewFS() throws IOException, URISyntaxException {
    Configuration conf = new Configuration(jConf);
    FileSystem dfs = dfsCluster.getFileSystem();
    String serviceName = dfs.getCanonicalServiceName();

    Path p1 = new Path("/mount1");
    Path p2 = new Path("/mount2");
    p1 = dfs.makeQualified(p1);
    p2 = dfs.makeQualified(p2);

    conf.set("fs.viewfs.mounttable.default.link./dir1", p1.toString());
    conf.set("fs.viewfs.mounttable.default.link./dir2", p2.toString());
    Credentials credentials = new Credentials();
    Path lp1 = new Path("viewfs:///dir1");
    Path lp2 = new Path("viewfs:///dir2");
    Path[] paths = new Path[2];
    paths[0] = lp1;
    paths[1] = lp2;
    TokenCache.obtainTokensForNamenodesInternal(credentials, paths, conf);

    Collection<Token<? extends TokenIdentifier>> tns = credentials.getAllTokens();
    assertEquals("number of tokens is not 1", 1, tns.size());

    boolean found = false;
    for (Token<? extends TokenIdentifier> tt : tns) {
      System.out.println("token=" + tt);
      if (tt.getKind().equals(DelegationTokenIdentifier.HDFS_DELEGATION_KIND)
          && tt.getService().equals(new Text(serviceName))) {
        found = true;
      }
      assertTrue("didn't find token for [" + lp1 + ", " + lp2 + "]", found);
    }
  }
 /** Convert a path to a File. */
 public File pathToFile(Path path) {
   checkPath(path);
   if (!path.isAbsolute()) {
     path = new Path(getWorkingDirectory(), path);
   }
   return new File(path.toUri().getPath());
 }
  /**
   * This method needs to be overridden for TimePartitionedDataPublisher, since the output folder
   * structure contains timestamp, we have to move the files recursively.
   *
   * <p>For example, move {writerOutput}/2015/04/08/15/output.avro to
   * {publisherOutput}/2015/04/08/15/output.avro
   */
  @Override
  protected void addWriterOutputToExistingDir(
      Path writerOutput,
      Path publisherOutput,
      WorkUnitState workUnitState,
      int branchId,
      ParallelRunner parallelRunner)
      throws IOException {

    for (FileStatus status :
        FileListUtils.listFilesRecursively(
            this.writerFileSystemByBranches.get(branchId), writerOutput)) {
      String filePathStr = status.getPath().toString();
      String pathSuffix =
          filePathStr.substring(
              filePathStr.indexOf(writerOutput.toString()) + writerOutput.toString().length() + 1);
      Path outputPath = new Path(publisherOutput, pathSuffix);

      WriterUtils.mkdirsWithRecursivePermission(
          this.publisherFileSystemByBranches.get(branchId),
          outputPath.getParent(),
          this.permissions.get(branchId));

      LOG.info(String.format("Moving %s to %s", status.getPath(), outputPath));
      parallelRunner.movePath(
          status.getPath(),
          this.publisherFileSystemByBranches.get(branchId),
          outputPath,
          Optional.<String>absent());
    }
  }
Example #11
0
    @Override
    public void execute() throws IOException {
      if (offset < 0) throw new IllegalArgumentException("Offset cannot be less than 0.");

      System.out.println("Getting file paths...");

      final Path[] sequenceFiles = SequenceFileUtility.getFilePaths(inputPathOrUri, "part");
      final ExtractionState nps = new ExtractionState();
      nps.setMaxFileExtract(max);

      if (random >= 0) {
        System.out.println("Counting records");

        int totalRecords = 0;
        for (final Path path : sequenceFiles) {
          System.out.println("... Counting from file: " + path);
          final SequenceFileUtility<Text, BytesWritable> utility =
              new TextBytesSequenceFileUtility(path.toUri(), true);
          totalRecords += utility.getNumberRecords();
        }

        System.out.println("Selecting random subset of " + random + " from " + totalRecords);

        nps.setRandomSelection(random, totalRecords);
      }

      ZipOutputStream zos = null;
      if (zipMode) {
        zos = SequenceFileUtility.openZipOutputStream(outputPathOrUri);
      }

      for (final Path path : sequenceFiles) {
        System.out.println("Extracting from " + path.getName());

        final SequenceFileUtility<Text, BytesWritable> utility =
            new TextBytesSequenceFileUtility(path.toUri(), true);
        if (queryKey == null) {
          if (zipMode) {
            utility.exportDataToZip(zos, np, nps, autoExtension, offset);
          } else {
            utility.exportData(outputPathOrUri, np, nps, autoExtension, offset);
          }
        } else {
          if (zipMode) {
            throw new UnsupportedOperationException("Not implemented yet");
          } else {
            if (!utility.findAndExport(new Text(queryKey), outputPathOrUri, offset)) {
              if (offset == 0) System.err.format("Key '%s' was not found in the file.\n", queryKey);
              else
                System.err.format(
                    "Key '%s' was not found in the file after offset %d.\n", queryKey, offset);
            }
          }
        }

        if (nps.isFinished()) break;
      }

      if (zos != null) zos.close();
    }
Example #12
0
  /**
   * set up input file which has the list of input files.
   *
   * @return boolean
   * @throws IOException
   */
  private boolean setup() throws IOException {
    estimateSavings();

    final String randomId = getRandomId();
    JobClient jClient = new JobClient(jobconf);
    Path jobdir = new Path(jClient.getSystemDir(), NAME + "_" + randomId);

    LOG.info(JOB_DIR_LABEL + "=" + jobdir);
    jobconf.set(JOB_DIR_LABEL, jobdir.toString());
    Path log = new Path(jobdir, "_logs");

    // The control file should have small size blocks. This helps
    // in spreading out the load from mappers that will be spawned.
    jobconf.setInt("dfs.blocks.size", OP_LIST_BLOCK_SIZE);

    FileOutputFormat.setOutputPath(jobconf, log);
    LOG.info("log=" + log);

    // create operation list
    FileSystem fs = jobdir.getFileSystem(jobconf);
    Path opList = new Path(jobdir, "_" + OP_LIST_LABEL);
    jobconf.set(OP_LIST_LABEL, opList.toString());
    int opCount = 0, synCount = 0;
    SequenceFile.Writer opWriter = null;

    try {
      opWriter =
          SequenceFile.createWriter(
              fs, jobconf, opList, Text.class, PolicyInfo.class, SequenceFile.CompressionType.NONE);
      for (RaidPolicyPathPair p : raidPolicyPathPairList) {
        // If a large set of files are Raided for the first time, files
        // in the same directory that tend to have the same size will end up
        // with the same map. This shuffle mixes things up, allowing a better
        // mix of files.
        java.util.Collections.shuffle(p.srcPaths);
        for (FileStatus st : p.srcPaths) {
          opWriter.append(new Text(st.getPath().toString()), p.policy);
          opCount++;
          if (++synCount > SYNC_FILE_MAX) {
            opWriter.sync();
            synCount = 0;
          }
        }
      }

    } finally {
      if (opWriter != null) {
        opWriter.close();
      }
      fs.setReplication(opList, OP_LIST_REPLICATION); // increase replication for control file
    }
    raidPolicyPathPairList.clear();

    jobconf.setInt(OP_COUNT_LABEL, opCount);
    LOG.info("Number of files=" + opCount);
    jobconf.setNumMapTasks(
        getMapCount(opCount, new JobClient(jobconf).getClusterStatus().getTaskTrackers()));
    LOG.info("jobName= " + jobName + " numMapTasks=" + jobconf.getNumMapTasks());
    return opCount != 0;
  }
Example #13
0
    /** Run a FileOperation */
    public void map(
        Text key,
        PolicyInfo policy,
        OutputCollector<WritableComparable, Text> out,
        Reporter reporter)
        throws IOException {
      this.reporter = reporter;
      try {
        LOG.info("Raiding file=" + key.toString() + " policy=" + policy);
        Path p = new Path(key.toString());
        FileStatus fs = p.getFileSystem(jobconf).getFileStatus(p);
        st.clear();
        RaidNode.doRaid(jobconf, policy, fs, st, reporter);

        ++succeedcount;

        reporter.incrCounter(Counter.PROCESSED_BLOCKS, st.numProcessedBlocks);
        reporter.incrCounter(Counter.PROCESSED_SIZE, st.processedSize);
        reporter.incrCounter(Counter.META_BLOCKS, st.numMetaBlocks);
        reporter.incrCounter(Counter.META_SIZE, st.metaSize);

        reporter.incrCounter(Counter.FILES_SUCCEEDED, 1);
      } catch (IOException e) {
        ++failcount;
        reporter.incrCounter(Counter.FILES_FAILED, 1);

        String s = "FAIL: " + policy + ", " + key + " " + StringUtils.stringifyException(e);
        out.collect(null, new Text(s));
        LOG.info(s);
      } finally {
        reporter.setStatus(getCountString());
      }
    }
Example #14
0
  public static boolean runJobs(List<Jobby> jobs, HadoopDruidIndexerConfig config) {
    String failedMessage = null;
    for (Jobby job : jobs) {
      if (failedMessage == null) {
        if (!job.run()) {
          failedMessage = String.format("Job[%s] failed!", job.getClass());
        }
      }
    }

    if (!config.getSchema().getTuningConfig().isLeaveIntermediate()) {
      if (failedMessage == null || config.getSchema().getTuningConfig().isCleanupOnFailure()) {
        Path workingPath = config.makeIntermediatePath();
        log.info("Deleting path[%s]", workingPath);
        try {
          workingPath
              .getFileSystem(injectSystemProperties(new Configuration()))
              .delete(workingPath, true);
        } catch (IOException e) {
          log.error(e, "Failed to cleanup path[%s]", workingPath);
        }
      }
    }

    if (failedMessage != null) {
      throw new ISE(failedMessage);
    }

    return true;
  }
  public void testAbort() throws IOException {
    JobConf job = new JobConf();
    setConfForFileOutputCommitter(job);
    JobContext jContext = new JobContextImpl(job, taskID.getJobID());
    TaskAttemptContext tContext = new TaskAttemptContextImpl(job, taskID);
    FileOutputCommitter committer = new FileOutputCommitter();
    FileOutputFormat.setWorkOutputPath(job, committer.getTempTaskOutputPath(tContext));

    // do setup
    committer.setupJob(jContext);
    committer.setupTask(tContext);
    String file = "test.txt";

    // A reporter that does nothing
    Reporter reporter = Reporter.NULL;
    // write output
    FileSystem localFs = FileSystem.getLocal(job);
    TextOutputFormat theOutputFormat = new TextOutputFormat();
    RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(localFs, job, file, reporter);
    writeOutput(theRecordWriter, reporter);

    // do abort
    committer.abortTask(tContext);
    File expectedFile =
        new File(new Path(committer.getTempTaskOutputPath(tContext), file).toString());
    assertFalse("task temp dir still exists", expectedFile.exists());

    committer.abortJob(jContext, JobStatus.State.FAILED);
    expectedFile = new File(new Path(outDir, FileOutputCommitter.TEMP_DIR_NAME).toString());
    assertFalse("job temp dir still exists", expectedFile.exists());
    assertEquals("Output directory not empty", 0, new File(outDir.toString()).listFiles().length);
    FileUtil.fullyDelete(new File(outDir.toString()));
  }
Example #16
0
  public Path write(Message... messages) throws Exception {

    synchronized (WriteUsingMR.class) {
      outputPath = TestUtils.someTemporaryFilePath();

      Path inputPath = TestUtils.someTemporaryFilePath();
      FileSystem fileSystem = inputPath.getFileSystem(conf);
      fileSystem.create(inputPath);

      inputMessages = Collections.unmodifiableList(Arrays.asList(messages));

      final Job job = new Job(conf, "write");

      // input not really used
      TextInputFormat.addInputPath(job, inputPath);
      job.setInputFormatClass(TextInputFormat.class);

      job.setMapperClass(WritingMapper.class);
      job.setNumReduceTasks(0);

      job.setOutputFormatClass(ProtoParquetOutputFormat.class);
      ProtoParquetOutputFormat.setOutputPath(job, outputPath);
      ProtoParquetOutputFormat.setProtobufClass(job, TestUtils.inferRecordsClass(messages));

      waitForJob(job);

      inputMessages = null;
      return outputPath;
    }
  }
Example #17
0
  /** Returns a qualified path object. */
  @InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce"})
  public Path makeQualified(URI defaultUri, Path workingDir) {
    Path path = this;
    if (!isAbsolute()) {
      path = new Path(workingDir, this);
    }

    URI pathUri = path.toUri();

    String scheme = pathUri.getScheme();
    String authority = pathUri.getAuthority();
    String fragment = pathUri.getFragment();

    if (scheme != null && (authority != null || defaultUri.getAuthority() == null)) return path;

    if (scheme == null) {
      scheme = defaultUri.getScheme();
    }

    if (authority == null) {
      authority = defaultUri.getAuthority();
      if (authority == null) {
        authority = "";
      }
    }

    URI newUri = null;
    try {
      newUri = new URI(scheme, authority, normalizePath(pathUri.getPath()), null, fragment);
    } catch (URISyntaxException e) {
      throw new IllegalArgumentException(e);
    }
    return new Path(newUri);
  }
Example #18
0
 private List<String> makeBasicArgs(
     String optionsFile,
     String otherFiles,
     String statusdir,
     String completedUrl,
     boolean enablelog,
     Boolean enableJobReconnect,
     String libdir)
     throws URISyntaxException, FileNotFoundException, IOException, InterruptedException {
   ArrayList<String> args = new ArrayList<String>();
   ArrayList<String> allFiles = new ArrayList<String>();
   if (TempletonUtils.isset(optionsFile))
     allFiles.add(TempletonUtils.hadoopFsFilename(optionsFile, appConf, runAs));
   if (TempletonUtils.isset(otherFiles)) {
     String[] ofs = TempletonUtils.hadoopFsListAsArray(otherFiles, appConf, runAs);
     allFiles.addAll(Arrays.asList(ofs));
   }
   if (TempletonUtils.isset(libdir) && TempletonUtils.isset(appConf.sqoopArchive())) {
     /**
      * Sqoop accesses databases via JDBC. This means it needs to have appropriate JDBC drivers
      * available. Normally, the user would install Sqoop and place these jars into SQOOP_HOME/lib.
      * When WebHCat is configured to auto-ship the Sqoop tar file, we need to make sure that
      * relevant JDBC jars are available on target node but we cannot modify lib/ of exploded tar
      * because Dist Cache intentionally prevents this. The user is expected to place any JDBC jars
      * into an HDFS directory and specify this dir in "libdir" parameter. WebHCat then ensures
      * that these jars are localized for the launcher task and made available to Sqoop. {@link
      * org.apache.hive.hcatalog.templeton.tool.LaunchMapper#handleSqoop(org.apache.hadoop.conf.Configuration,
      * java.util.Map)} {@link #makeArgs(String, String, String, String, String, boolean, String)}
      */
     LOG.debug("libdir=" + libdir);
     List<Path> jarList = TempletonUtils.hadoopFsListChildren(libdir, appConf, runAs);
     if (TempletonUtils.isset(jarList)) {
       StringBuilder sb = new StringBuilder();
       for (Path jar : jarList) {
         allFiles.add(jar.toString());
         sb.append(jar.getName()).append(',');
       }
       sb.setLength(sb.length() - 1);
       // we use the same mechanism to copy "files"/"otherFiles" and "libdir", but we only want to
       // put
       // contents of "libdir" in Sqoop/lib, thus we pass the list of names here
       addDef(args, JobSubmissionConstants.Sqoop.LIB_JARS, sb.toString());
       addDef(args, AppConfig.SQOOP_HOME_PATH, appConf.get(AppConfig.SQOOP_HOME_PATH));
     }
   }
   args.addAll(
       makeLauncherArgs(
           appConf,
           statusdir,
           completedUrl,
           allFiles,
           enablelog,
           enableJobReconnect,
           JobType.SQOOP));
   if (TempletonUtils.isset(appConf.sqoopArchive())) {
     args.add("-archives");
     args.add(appConf.sqoopArchive());
   }
   return args;
 }
Example #19
0
  @Override
  public int execute(DriverContext driverContext) {

    PrintStream out = null;
    try {
      Path resFile = new Path(work.getResFile());
      OutputStream outS = resFile.getFileSystem(conf).create(resFile);
      out = new PrintStream(outS);

      QB qb = work.getQb();
      TokenRewriteStream stream = work.getCtx().getTokenRewriteStream();
      String program = "sq rewrite";
      ASTNode ast = work.getAst();

      try {
        addRewrites(stream, qb, program, out);
        out.println(
            "\nRewritten Query:\n"
                + stream.toString(program, ast.getTokenStartIndex(), ast.getTokenStopIndex()));
      } finally {
        stream.deleteProgram(program);
      }

      out.close();
      out = null;
      return (0);
    } catch (Exception e) {
      console.printError(
          "Failed with exception " + e.getMessage(), "\n" + StringUtils.stringifyException(e));
      return (1);
    } finally {
      IOUtils.closeStream(out);
    }
  }
    @Override
    protected void setup(Context context) throws IOException, InterruptedException {
      // TODO Auto-generated method stub
      super.setup(context);

      InputSplit split = context.getInputSplit();

      System.out.println("***************Mapper's setup is being executed***************");
      FileSplit FS = (FileSplit) split;

      long datastart = FS.getStart();
      System.out.println("***************GetStart() returns " + datastart + " ***************");

      long datalongth = FS.getLength();
      System.out.println("***************getLength() returns " + datalongth + " ***************");

      String[] datalocations = FS.getLocations();
      System.out.println(
          "***************getLocations() returns "
              + datalocations.length
              + " locations***************");

      for (int i = 0; i < datalocations.length; i++) {
        System.out.println(
            "***************No." + i + " location is : " + datalocations[i] + " ***************");
      }

      Path path = FS.getPath();
      System.out.println(
          "***************getLocations() returns " + path.toString() + " ***************");
    }
  public FSDataOutputStream create(
      Path file,
      FsPermission permission,
      boolean overwrite,
      int bufferSize,
      short replication,
      long blockSize,
      Progressable progress)
      throws IOException {

    if (exists(file)) {
      if (overwrite) {
        delete(file);
      } else {
        throw new IOException("File already exists: " + file);
      }
    }

    Path parent = file.getParent();
    if (parent != null && !mkdirs(parent)) {
      throw new IOException("Mkdirs failed to create " + parent);
    }

    Path absolute = makeAbsolute(file);
    String srep = absolute.toUri().getPath();

    return kfsImpl.create(srep, replication, bufferSize);
  }
  public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    GenericOptionsParser gop = new GenericOptionsParser(conf, args);
    conf = gop.getConfiguration();

    Job job = new Job(conf, conf.get("job_name"));
    FileInputFormat.addInputPaths(job, conf.get("input_dir"));
    Path output = new Path(conf.get("output_dir"));
    FileOutputFormat.setOutputPath(job, output);
    output.getFileSystem(conf).delete(output, true);

    job.setJarByClass(BrowerLogFormatMR.class);
    job.setMapperClass(BrowerLogFormatMapper.class);
    job.setReducerClass(BrowerLogFormatReducer.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);
    job.setNumReduceTasks(1);

    int code = job.waitForCompletion(true) ? 0 : 1;
    return code;
  }
 public void list(List<Path> dirs, Writer writer) throws Exception {
   writer.write("NAME\t\tGENERATED\tFETCHER START\t\tFETCHER END\t\tFETCHED\tPARSED\n");
   for (int i = 0; i < dirs.size(); i++) {
     Path dir = dirs.get(i);
     SegmentReaderStats stats = new SegmentReaderStats();
     getStats(dir, stats);
     writer.write(dir.getName() + "\t");
     if (stats.generated == -1) writer.write("?");
     else writer.write(stats.generated + "");
     writer.write("\t\t");
     if (stats.start == -1) writer.write("?\t");
     else writer.write(sdf.format(new Date(stats.start)));
     writer.write("\t");
     if (stats.end == -1) writer.write("?");
     else writer.write(sdf.format(new Date(stats.end)));
     writer.write("\t");
     if (stats.fetched == -1) writer.write("?");
     else writer.write(stats.fetched + "");
     writer.write("\t");
     if (stats.parsed == -1) writer.write("?");
     else writer.write(stats.parsed + "");
     writer.write("\n");
     writer.flush();
   }
 }
  @Override
  public List<LuceneSegmentInputSplit> getSplits(JobContext context)
      throws IOException, InterruptedException {
    Configuration configuration = context.getConfiguration();

    LuceneStorageConfiguration lucene2SeqConfiguration =
        new LuceneStorageConfiguration(configuration);

    List<LuceneSegmentInputSplit> inputSplits = new ArrayList<>();

    List<Path> indexPaths = lucene2SeqConfiguration.getIndexPaths();
    for (Path indexPath : indexPaths) {
      ReadOnlyFileSystemDirectory directory =
          new ReadOnlyFileSystemDirectory(
              FileSystem.get(configuration), indexPath, false, configuration);
      SegmentInfos segmentInfos = new SegmentInfos();
      segmentInfos.read(directory);

      for (SegmentCommitInfo segmentInfo : segmentInfos) {
        LuceneSegmentInputSplit inputSplit =
            new LuceneSegmentInputSplit(
                indexPath, segmentInfo.info.name, segmentInfo.sizeInBytes());
        inputSplits.add(inputSplit);
        LOG.info(
            "Created {} byte input split for index '{}' segment {}",
            segmentInfo.sizeInBytes(),
            indexPath.toUri(),
            segmentInfo.info.name);
      }
    }

    return inputSplits;
  }
 /**
  * Add a {@link Path} to the list of inputs for the map-reduce job.
  *
  * @param job The {@link Job} to modify
  * @param path {@link Path} to be added to the list of inputs for the map-reduce job.
  */
 public static void addInputPath(Job job, Path path) throws IOException {
   Configuration conf = job.getConfiguration();
   path = path.getFileSystem(conf).makeQualified(path);
   String dirStr = StringUtils.escapeString(path.toString());
   String dirs = conf.get(INPUT_DIR);
   conf.set(INPUT_DIR, dirs == null ? dirStr : dirs + "," + dirStr);
 }
  @Override
  protected synchronized void startInternal() throws Exception {
    // create filesystem only now, as part of service-start. By this time, RM is
    // authenticated with kerberos so we are good to create a file-system
    // handle.
    fsConf = new Configuration(getConfig());
    fsConf.setBoolean("dfs.client.retry.policy.enabled", true);
    String retryPolicy =
        fsConf.get(
            YarnConfiguration.FS_RM_STATE_STORE_RETRY_POLICY_SPEC,
            YarnConfiguration.DEFAULT_FS_RM_STATE_STORE_RETRY_POLICY_SPEC);
    fsConf.set("dfs.client.retry.policy.spec", retryPolicy);

    String scheme = fsWorkingPath.toUri().getScheme();
    if (scheme == null) {
      scheme = FileSystem.getDefaultUri(fsConf).getScheme();
    }
    if (scheme != null) {
      String disableCacheName = String.format("fs.%s.impl.disable.cache", scheme);
      fsConf.setBoolean(disableCacheName, true);
    }

    fs = fsWorkingPath.getFileSystem(fsConf);
    mkdirsWithRetries(rmDTSecretManagerRoot);
    mkdirsWithRetries(rmAppRoot);
    mkdirsWithRetries(amrmTokenSecretManagerRoot);
    mkdirsWithRetries(reservationRoot);
  }
    private FileSelection expandSelection(DrillFileSystem fs, FileSelection selection)
        throws IOException {
      if (metaDataFileExists(fs, selection.getFirstPath(fs))) {
        FileStatus metaRootDir = selection.getFirstPath(fs);
        Path metaFilePath = getMetadataPath(metaRootDir);

        // get the metadata for the directory by reading the metadata file
        ParquetTableMetadata_v1 metadata = Metadata.readBlockMeta(fs, metaFilePath.toString());
        List<String> fileNames = Lists.newArrayList();
        for (ParquetFileMetadata file : metadata.files) {
          fileNames.add(file.path);
        }
        // when creating the file selection, set the selection root in the form /a/b instead of
        // file:/a/b.  The reason is that the file names above have been created in the form
        // /a/b/c.parquet and the format of the selection root must match that of the file names
        // otherwise downstream operations such as partition pruning can break.
        Path metaRootPath = Path.getPathWithoutSchemeAndAuthority(metaRootDir.getPath());
        return new FileSelection(
            fileNames, metaRootPath.toString(), metadata /* save metadata for future use */);
      } else {
        // don't expand yet; ParquetGroupScan's metadata gathering operation
        // does that.
        return selection;
      }
    }
 /*
  * In order to make this update atomic as a part of write we will first write
  * data to .new file and then rename it. Here we are assuming that rename is
  * atomic for underlying file system.
  */
 protected void updateFile(Path outputPath, byte[] data, boolean makeUnradableByAdmin)
     throws Exception {
   Path newPath = new Path(outputPath.getParent(), outputPath.getName() + ".new");
   // use writeFileWithRetries to make sure .new file is created atomically
   writeFileWithRetries(newPath, data, makeUnradableByAdmin);
   replaceFile(newPath, outputPath);
 }
Example #29
0
  /** It asserts the equality between an original table desc and a restored table desc. */
  private static void assertSchemaEquality(String tableName, Schema schema)
      throws IOException, TajoException {
    Path path = new Path(CommonTestingUtil.getTestDir(), tableName);
    TableDesc tableDesc =
        new TableDesc(
            IdentifierUtil.buildFQName(DEFAULT_DATABASE_NAME, tableName),
            schema,
            "TEXT",
            new KeyValueSet(),
            path.toUri());

    // schema creation
    assertFalse(catalog.existsTable(DEFAULT_DATABASE_NAME, tableName));
    catalog.createTable(tableDesc);
    assertTrue(catalog.existsTable(DEFAULT_DATABASE_NAME, tableName));

    // change it for the equals test.
    schema.setQualifier(IdentifierUtil.buildFQName(DEFAULT_DATABASE_NAME, tableName));
    TableDesc restored = catalog.getTableDesc(DEFAULT_DATABASE_NAME, tableName);
    assertEquals(schema, restored.getSchema());

    // drop test
    catalog.dropTable(IdentifierUtil.buildFQName(DEFAULT_DATABASE_NAME, tableName));
    assertFalse(catalog.existsTable(DEFAULT_DATABASE_NAME, tableName));
  }
Example #30
0
 @Override
 public void setup(Context context) throws IOException {
   Configuration conf = context.getConfiguration();
   Path cMemMatrixPath = new Path(conf.get(RECONSTRUCTIONMATRIX));
   Path dMemMatrixPath = new Path(conf.get(MATRIXY2X));
   Path zmPath = new Path(conf.get(ZMPATH));
   Path meanPath = new Path(conf.get(YMPATH));
   int inMemMatrixNumRows = conf.getInt(YCOLS, 0);
   int inMemMatrixNumCols = conf.getInt(XCOLS, 0);
   ERR_SAMPLE_RATE = conf.getFloat(ERRSAMPLERATE, 1);
   Path tmpPath = cMemMatrixPath.getParent();
   DistributedRowMatrix distMatrix =
       new DistributedRowMatrix(cMemMatrixPath, tmpPath, inMemMatrixNumRows, inMemMatrixNumCols);
   distMatrix.setConf(conf);
   matrixC = PCACommon.toDenseMatrix(distMatrix);
   distMatrix =
       new DistributedRowMatrix(dMemMatrixPath, tmpPath, inMemMatrixNumRows, inMemMatrixNumCols);
   distMatrix.setConf(conf);
   matrixY2X = PCACommon.toDenseMatrix(distMatrix);
   try {
     zm = PCACommon.toDenseVector(zmPath, conf);
     ym = PCACommon.toDenseVector(meanPath, conf);
   } catch (IOException e) {
     e.printStackTrace();
   }
   xiCt = new DenseVector(matrixC.numRows());
   sumOfErr = new DenseVector(matrixC.numRows());
   sumOfyi = new DenseVector(matrixC.numRows());
   sumOfyc = new DenseVector(matrixC.numRows());
 }