private String copyFromLocal(FileSystem fs, Path basePath, String[] files) throws IOException { StringBuilder csv = new StringBuilder(files.length * (basePath.toString().length() + 16)); for (String localFile : files) { Path src = new Path(localFile); String filename = src.getName(); Path dst = new Path(basePath, filename); URI localFileURI = null; try { localFileURI = new URI(localFile); } catch (URISyntaxException e) { throw new IOException(e); } if (localFileURI.getScheme() == null || localFileURI.getScheme().startsWith("file")) { LOG.info("Copy {} from local filesystem to {}", localFile, dst); fs.copyFromLocalFile(false, true, src, dst); } else { LOG.info("Copy {} from DFS to {}", localFile, dst); FileUtil.copy(fs, src, fs, dst, false, true, conf); } if (csv.length() > 0) { csv.append(LIB_JARS_SEP); } csv.append(dst.toString()); } return csv.toString(); }
protected static List<String> getFilesInHivePartition(Partition part, JobConf jobConf) { List<String> result = newArrayList(); String ignoreFileRegex = jobConf.get(HCatTap.IGNORE_FILE_IN_PARTITION_REGEX, ""); Pattern ignoreFilePattern = Pattern.compile(ignoreFileRegex); try { Path partitionDirPath = new Path(part.getSd().getLocation()); FileStatus[] partitionContent = partitionDirPath.getFileSystem(jobConf).listStatus(partitionDirPath); for (FileStatus currStatus : partitionContent) { if (!currStatus.isDir()) { if (!ignoreFilePattern.matcher(currStatus.getPath().getName()).matches()) { result.add(currStatus.getPath().toUri().getPath()); } else { LOG.debug( "Ignoring path {} since matches ignore regex {}", currStatus.getPath().toUri().getPath(), ignoreFileRegex); } } } } catch (IOException e) { logError("Unable to read the content of partition '" + part.getSd().getLocation() + "'", e); } return result; }
public HdfsDirectory(Path hdfsDirPath, LockFactory lockFactory, Configuration configuration) throws IOException { super(lockFactory); this.hdfsDirPath = hdfsDirPath; this.configuration = configuration; fileSystem = FileSystem.get(hdfsDirPath.toUri(), configuration); fileContext = FileContext.getFileContext(hdfsDirPath.toUri(), configuration); if (fileSystem instanceof DistributedFileSystem) { // Make sure dfs is not in safe mode while (((DistributedFileSystem) fileSystem).setSafeMode(SafeModeAction.SAFEMODE_GET, true)) { LOG.warn("The NameNode is in SafeMode - Solr will wait 5 seconds and try again."); try { Thread.sleep(5000); } catch (InterruptedException e) { Thread.interrupted(); // continue } } } try { if (!fileSystem.exists(hdfsDirPath)) { boolean success = fileSystem.mkdirs(hdfsDirPath); if (!success) { throw new RuntimeException("Could not create directory: " + hdfsDirPath); } } } catch (Exception e) { org.apache.solr.common.util.IOUtils.closeQuietly(fileSystem); throw new RuntimeException("Problem creating directory: " + hdfsDirPath, e); } }
/** check that the requested path is listed in the user permissions file */ private boolean checkPath(String userID, X509Certificate cert, String pathInfo) { if (!checkUser(userID, cert)) { return false; } Set<Path> pathSet = permsMap.get(userID); if (pathSet == null) { LOG.info("User " + userID + " is not listed in the user permissions file"); return false; } if (pathInfo == null || pathInfo.length() == 0) { LOG.info("Can't get file path from HTTPS request; user is " + userID); return false; } Path userPath = new Path(pathInfo); while (userPath != null) { if (LOG.isDebugEnabled()) { LOG.debug("\n Checking file path " + userPath); } if (pathSet.contains(userPath)) return true; userPath = userPath.getParent(); } LOG.info("User " + userID + " is not authorized to access " + pathInfo); return false; }
public LinkedHashSet<Path> scan(FileSystem fs, Path filePath, Set<String> consumedFiles) { LinkedHashSet<Path> pathSet = Sets.newLinkedHashSet(); try { LOG.debug("Scanning {} with pattern {}", filePath, this.filePatternRegexp); FileStatus[] files = fs.listStatus(filePath); for (FileStatus status : files) { Path path = status.getPath(); String filePathStr = path.toString(); if (consumedFiles.contains(filePathStr)) { continue; } if (ignoredFiles.contains(filePathStr)) { continue; } if (acceptFile(filePathStr)) { LOG.debug("Found {}", filePathStr); pathSet.add(path); } else { // don't look at it again ignoredFiles.add(filePathStr); } } } catch (FileNotFoundException e) { LOG.warn("Failed to list directory {}", filePath, e); } catch (IOException e) { throw new RuntimeException(e); } return pathSet; }
public void testCollect() throws Exception { Path p = new Path(this.ROOT_DIR, "rankfile"); FSDataOutputStream o = this.getFileSystem().create(p); BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(o)); bw.write("209.191.139.200\n"); bw.write("twelve\n"); bw.close(); String jarFile; jarFile = GenericUDFGeoIP.class.getProtectionDomain().getCodeSource().getLocation().getFile(); client.execute("add jar " + jarFile); jarFile = com.maxmind.geoip.LookupService.class .getProtectionDomain() .getCodeSource() .getLocation() .getFile(); client.execute("add jar " + jarFile); // download this or put in reasources client.execute(" add file /tmp/GeoIP.dat"); client.execute( "create temporary function geoip as 'com.jointhegrid.udf.geoip.GenericUDFGeoIP'"); client.execute( "create table ips ( ip string) row format delimited fields terminated by '09' lines terminated by '10'"); client.execute("load data local inpath '" + p.toString() + "' into table ips"); client.execute("select geoip(ip, 'COUNTRY_NAME', './GeoIP.dat') FROM ips"); List<String> expected = Arrays.asList("United States", "N/A"); assertEquals(expected, client.fetchAll()); client.execute("drop table ips"); }
public boolean refresh(final Path path) throws IOException { try (FileSystem fs = path.getFileSystem(new Configuration())) { if (_fileStatus.isPresent()) { Optional<FileStatus> oldStatus = this._fileStatus; try { Optional<FileStatus> newStatus = Optional.of(fs.getFileStatus(path)); this.exists = newStatus.isPresent(); return (oldStatus.isPresent() != this._fileStatus.isPresent() || oldStatus.get().getModificationTime() != newStatus.get().getModificationTime() || oldStatus.get().isDirectory() != newStatus.get().isDirectory() || oldStatus.get().getLen() != newStatus.get().getLen()); } catch (FileNotFoundException e) { _fileStatus = Optional.absent(); this.exists = false; return true; } } else { if (path.getFileSystem(new Configuration()).exists(path)) { _fileStatus = Optional.of(fs.getFileStatus(path)); return true; } else { return false; } } } }
@Test public void testGetTokensForViewFS() throws IOException, URISyntaxException { Configuration conf = new Configuration(jConf); FileSystem dfs = dfsCluster.getFileSystem(); String serviceName = dfs.getCanonicalServiceName(); Path p1 = new Path("/mount1"); Path p2 = new Path("/mount2"); p1 = dfs.makeQualified(p1); p2 = dfs.makeQualified(p2); conf.set("fs.viewfs.mounttable.default.link./dir1", p1.toString()); conf.set("fs.viewfs.mounttable.default.link./dir2", p2.toString()); Credentials credentials = new Credentials(); Path lp1 = new Path("viewfs:///dir1"); Path lp2 = new Path("viewfs:///dir2"); Path[] paths = new Path[2]; paths[0] = lp1; paths[1] = lp2; TokenCache.obtainTokensForNamenodesInternal(credentials, paths, conf); Collection<Token<? extends TokenIdentifier>> tns = credentials.getAllTokens(); assertEquals("number of tokens is not 1", 1, tns.size()); boolean found = false; for (Token<? extends TokenIdentifier> tt : tns) { System.out.println("token=" + tt); if (tt.getKind().equals(DelegationTokenIdentifier.HDFS_DELEGATION_KIND) && tt.getService().equals(new Text(serviceName))) { found = true; } assertTrue("didn't find token for [" + lp1 + ", " + lp2 + "]", found); } }
/** Convert a path to a File. */ public File pathToFile(Path path) { checkPath(path); if (!path.isAbsolute()) { path = new Path(getWorkingDirectory(), path); } return new File(path.toUri().getPath()); }
/** * This method needs to be overridden for TimePartitionedDataPublisher, since the output folder * structure contains timestamp, we have to move the files recursively. * * <p>For example, move {writerOutput}/2015/04/08/15/output.avro to * {publisherOutput}/2015/04/08/15/output.avro */ @Override protected void addWriterOutputToExistingDir( Path writerOutput, Path publisherOutput, WorkUnitState workUnitState, int branchId, ParallelRunner parallelRunner) throws IOException { for (FileStatus status : FileListUtils.listFilesRecursively( this.writerFileSystemByBranches.get(branchId), writerOutput)) { String filePathStr = status.getPath().toString(); String pathSuffix = filePathStr.substring( filePathStr.indexOf(writerOutput.toString()) + writerOutput.toString().length() + 1); Path outputPath = new Path(publisherOutput, pathSuffix); WriterUtils.mkdirsWithRecursivePermission( this.publisherFileSystemByBranches.get(branchId), outputPath.getParent(), this.permissions.get(branchId)); LOG.info(String.format("Moving %s to %s", status.getPath(), outputPath)); parallelRunner.movePath( status.getPath(), this.publisherFileSystemByBranches.get(branchId), outputPath, Optional.<String>absent()); } }
@Override public void execute() throws IOException { if (offset < 0) throw new IllegalArgumentException("Offset cannot be less than 0."); System.out.println("Getting file paths..."); final Path[] sequenceFiles = SequenceFileUtility.getFilePaths(inputPathOrUri, "part"); final ExtractionState nps = new ExtractionState(); nps.setMaxFileExtract(max); if (random >= 0) { System.out.println("Counting records"); int totalRecords = 0; for (final Path path : sequenceFiles) { System.out.println("... Counting from file: " + path); final SequenceFileUtility<Text, BytesWritable> utility = new TextBytesSequenceFileUtility(path.toUri(), true); totalRecords += utility.getNumberRecords(); } System.out.println("Selecting random subset of " + random + " from " + totalRecords); nps.setRandomSelection(random, totalRecords); } ZipOutputStream zos = null; if (zipMode) { zos = SequenceFileUtility.openZipOutputStream(outputPathOrUri); } for (final Path path : sequenceFiles) { System.out.println("Extracting from " + path.getName()); final SequenceFileUtility<Text, BytesWritable> utility = new TextBytesSequenceFileUtility(path.toUri(), true); if (queryKey == null) { if (zipMode) { utility.exportDataToZip(zos, np, nps, autoExtension, offset); } else { utility.exportData(outputPathOrUri, np, nps, autoExtension, offset); } } else { if (zipMode) { throw new UnsupportedOperationException("Not implemented yet"); } else { if (!utility.findAndExport(new Text(queryKey), outputPathOrUri, offset)) { if (offset == 0) System.err.format("Key '%s' was not found in the file.\n", queryKey); else System.err.format( "Key '%s' was not found in the file after offset %d.\n", queryKey, offset); } } } if (nps.isFinished()) break; } if (zos != null) zos.close(); }
/** * set up input file which has the list of input files. * * @return boolean * @throws IOException */ private boolean setup() throws IOException { estimateSavings(); final String randomId = getRandomId(); JobClient jClient = new JobClient(jobconf); Path jobdir = new Path(jClient.getSystemDir(), NAME + "_" + randomId); LOG.info(JOB_DIR_LABEL + "=" + jobdir); jobconf.set(JOB_DIR_LABEL, jobdir.toString()); Path log = new Path(jobdir, "_logs"); // The control file should have small size blocks. This helps // in spreading out the load from mappers that will be spawned. jobconf.setInt("dfs.blocks.size", OP_LIST_BLOCK_SIZE); FileOutputFormat.setOutputPath(jobconf, log); LOG.info("log=" + log); // create operation list FileSystem fs = jobdir.getFileSystem(jobconf); Path opList = new Path(jobdir, "_" + OP_LIST_LABEL); jobconf.set(OP_LIST_LABEL, opList.toString()); int opCount = 0, synCount = 0; SequenceFile.Writer opWriter = null; try { opWriter = SequenceFile.createWriter( fs, jobconf, opList, Text.class, PolicyInfo.class, SequenceFile.CompressionType.NONE); for (RaidPolicyPathPair p : raidPolicyPathPairList) { // If a large set of files are Raided for the first time, files // in the same directory that tend to have the same size will end up // with the same map. This shuffle mixes things up, allowing a better // mix of files. java.util.Collections.shuffle(p.srcPaths); for (FileStatus st : p.srcPaths) { opWriter.append(new Text(st.getPath().toString()), p.policy); opCount++; if (++synCount > SYNC_FILE_MAX) { opWriter.sync(); synCount = 0; } } } } finally { if (opWriter != null) { opWriter.close(); } fs.setReplication(opList, OP_LIST_REPLICATION); // increase replication for control file } raidPolicyPathPairList.clear(); jobconf.setInt(OP_COUNT_LABEL, opCount); LOG.info("Number of files=" + opCount); jobconf.setNumMapTasks( getMapCount(opCount, new JobClient(jobconf).getClusterStatus().getTaskTrackers())); LOG.info("jobName= " + jobName + " numMapTasks=" + jobconf.getNumMapTasks()); return opCount != 0; }
/** Run a FileOperation */ public void map( Text key, PolicyInfo policy, OutputCollector<WritableComparable, Text> out, Reporter reporter) throws IOException { this.reporter = reporter; try { LOG.info("Raiding file=" + key.toString() + " policy=" + policy); Path p = new Path(key.toString()); FileStatus fs = p.getFileSystem(jobconf).getFileStatus(p); st.clear(); RaidNode.doRaid(jobconf, policy, fs, st, reporter); ++succeedcount; reporter.incrCounter(Counter.PROCESSED_BLOCKS, st.numProcessedBlocks); reporter.incrCounter(Counter.PROCESSED_SIZE, st.processedSize); reporter.incrCounter(Counter.META_BLOCKS, st.numMetaBlocks); reporter.incrCounter(Counter.META_SIZE, st.metaSize); reporter.incrCounter(Counter.FILES_SUCCEEDED, 1); } catch (IOException e) { ++failcount; reporter.incrCounter(Counter.FILES_FAILED, 1); String s = "FAIL: " + policy + ", " + key + " " + StringUtils.stringifyException(e); out.collect(null, new Text(s)); LOG.info(s); } finally { reporter.setStatus(getCountString()); } }
public static boolean runJobs(List<Jobby> jobs, HadoopDruidIndexerConfig config) { String failedMessage = null; for (Jobby job : jobs) { if (failedMessage == null) { if (!job.run()) { failedMessage = String.format("Job[%s] failed!", job.getClass()); } } } if (!config.getSchema().getTuningConfig().isLeaveIntermediate()) { if (failedMessage == null || config.getSchema().getTuningConfig().isCleanupOnFailure()) { Path workingPath = config.makeIntermediatePath(); log.info("Deleting path[%s]", workingPath); try { workingPath .getFileSystem(injectSystemProperties(new Configuration())) .delete(workingPath, true); } catch (IOException e) { log.error(e, "Failed to cleanup path[%s]", workingPath); } } } if (failedMessage != null) { throw new ISE(failedMessage); } return true; }
public void testAbort() throws IOException { JobConf job = new JobConf(); setConfForFileOutputCommitter(job); JobContext jContext = new JobContextImpl(job, taskID.getJobID()); TaskAttemptContext tContext = new TaskAttemptContextImpl(job, taskID); FileOutputCommitter committer = new FileOutputCommitter(); FileOutputFormat.setWorkOutputPath(job, committer.getTempTaskOutputPath(tContext)); // do setup committer.setupJob(jContext); committer.setupTask(tContext); String file = "test.txt"; // A reporter that does nothing Reporter reporter = Reporter.NULL; // write output FileSystem localFs = FileSystem.getLocal(job); TextOutputFormat theOutputFormat = new TextOutputFormat(); RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(localFs, job, file, reporter); writeOutput(theRecordWriter, reporter); // do abort committer.abortTask(tContext); File expectedFile = new File(new Path(committer.getTempTaskOutputPath(tContext), file).toString()); assertFalse("task temp dir still exists", expectedFile.exists()); committer.abortJob(jContext, JobStatus.State.FAILED); expectedFile = new File(new Path(outDir, FileOutputCommitter.TEMP_DIR_NAME).toString()); assertFalse("job temp dir still exists", expectedFile.exists()); assertEquals("Output directory not empty", 0, new File(outDir.toString()).listFiles().length); FileUtil.fullyDelete(new File(outDir.toString())); }
public Path write(Message... messages) throws Exception { synchronized (WriteUsingMR.class) { outputPath = TestUtils.someTemporaryFilePath(); Path inputPath = TestUtils.someTemporaryFilePath(); FileSystem fileSystem = inputPath.getFileSystem(conf); fileSystem.create(inputPath); inputMessages = Collections.unmodifiableList(Arrays.asList(messages)); final Job job = new Job(conf, "write"); // input not really used TextInputFormat.addInputPath(job, inputPath); job.setInputFormatClass(TextInputFormat.class); job.setMapperClass(WritingMapper.class); job.setNumReduceTasks(0); job.setOutputFormatClass(ProtoParquetOutputFormat.class); ProtoParquetOutputFormat.setOutputPath(job, outputPath); ProtoParquetOutputFormat.setProtobufClass(job, TestUtils.inferRecordsClass(messages)); waitForJob(job); inputMessages = null; return outputPath; } }
/** Returns a qualified path object. */ @InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce"}) public Path makeQualified(URI defaultUri, Path workingDir) { Path path = this; if (!isAbsolute()) { path = new Path(workingDir, this); } URI pathUri = path.toUri(); String scheme = pathUri.getScheme(); String authority = pathUri.getAuthority(); String fragment = pathUri.getFragment(); if (scheme != null && (authority != null || defaultUri.getAuthority() == null)) return path; if (scheme == null) { scheme = defaultUri.getScheme(); } if (authority == null) { authority = defaultUri.getAuthority(); if (authority == null) { authority = ""; } } URI newUri = null; try { newUri = new URI(scheme, authority, normalizePath(pathUri.getPath()), null, fragment); } catch (URISyntaxException e) { throw new IllegalArgumentException(e); } return new Path(newUri); }
private List<String> makeBasicArgs( String optionsFile, String otherFiles, String statusdir, String completedUrl, boolean enablelog, Boolean enableJobReconnect, String libdir) throws URISyntaxException, FileNotFoundException, IOException, InterruptedException { ArrayList<String> args = new ArrayList<String>(); ArrayList<String> allFiles = new ArrayList<String>(); if (TempletonUtils.isset(optionsFile)) allFiles.add(TempletonUtils.hadoopFsFilename(optionsFile, appConf, runAs)); if (TempletonUtils.isset(otherFiles)) { String[] ofs = TempletonUtils.hadoopFsListAsArray(otherFiles, appConf, runAs); allFiles.addAll(Arrays.asList(ofs)); } if (TempletonUtils.isset(libdir) && TempletonUtils.isset(appConf.sqoopArchive())) { /** * Sqoop accesses databases via JDBC. This means it needs to have appropriate JDBC drivers * available. Normally, the user would install Sqoop and place these jars into SQOOP_HOME/lib. * When WebHCat is configured to auto-ship the Sqoop tar file, we need to make sure that * relevant JDBC jars are available on target node but we cannot modify lib/ of exploded tar * because Dist Cache intentionally prevents this. The user is expected to place any JDBC jars * into an HDFS directory and specify this dir in "libdir" parameter. WebHCat then ensures * that these jars are localized for the launcher task and made available to Sqoop. {@link * org.apache.hive.hcatalog.templeton.tool.LaunchMapper#handleSqoop(org.apache.hadoop.conf.Configuration, * java.util.Map)} {@link #makeArgs(String, String, String, String, String, boolean, String)} */ LOG.debug("libdir=" + libdir); List<Path> jarList = TempletonUtils.hadoopFsListChildren(libdir, appConf, runAs); if (TempletonUtils.isset(jarList)) { StringBuilder sb = new StringBuilder(); for (Path jar : jarList) { allFiles.add(jar.toString()); sb.append(jar.getName()).append(','); } sb.setLength(sb.length() - 1); // we use the same mechanism to copy "files"/"otherFiles" and "libdir", but we only want to // put // contents of "libdir" in Sqoop/lib, thus we pass the list of names here addDef(args, JobSubmissionConstants.Sqoop.LIB_JARS, sb.toString()); addDef(args, AppConfig.SQOOP_HOME_PATH, appConf.get(AppConfig.SQOOP_HOME_PATH)); } } args.addAll( makeLauncherArgs( appConf, statusdir, completedUrl, allFiles, enablelog, enableJobReconnect, JobType.SQOOP)); if (TempletonUtils.isset(appConf.sqoopArchive())) { args.add("-archives"); args.add(appConf.sqoopArchive()); } return args; }
@Override public int execute(DriverContext driverContext) { PrintStream out = null; try { Path resFile = new Path(work.getResFile()); OutputStream outS = resFile.getFileSystem(conf).create(resFile); out = new PrintStream(outS); QB qb = work.getQb(); TokenRewriteStream stream = work.getCtx().getTokenRewriteStream(); String program = "sq rewrite"; ASTNode ast = work.getAst(); try { addRewrites(stream, qb, program, out); out.println( "\nRewritten Query:\n" + stream.toString(program, ast.getTokenStartIndex(), ast.getTokenStopIndex())); } finally { stream.deleteProgram(program); } out.close(); out = null; return (0); } catch (Exception e) { console.printError( "Failed with exception " + e.getMessage(), "\n" + StringUtils.stringifyException(e)); return (1); } finally { IOUtils.closeStream(out); } }
@Override protected void setup(Context context) throws IOException, InterruptedException { // TODO Auto-generated method stub super.setup(context); InputSplit split = context.getInputSplit(); System.out.println("***************Mapper's setup is being executed***************"); FileSplit FS = (FileSplit) split; long datastart = FS.getStart(); System.out.println("***************GetStart() returns " + datastart + " ***************"); long datalongth = FS.getLength(); System.out.println("***************getLength() returns " + datalongth + " ***************"); String[] datalocations = FS.getLocations(); System.out.println( "***************getLocations() returns " + datalocations.length + " locations***************"); for (int i = 0; i < datalocations.length; i++) { System.out.println( "***************No." + i + " location is : " + datalocations[i] + " ***************"); } Path path = FS.getPath(); System.out.println( "***************getLocations() returns " + path.toString() + " ***************"); }
public FSDataOutputStream create( Path file, FsPermission permission, boolean overwrite, int bufferSize, short replication, long blockSize, Progressable progress) throws IOException { if (exists(file)) { if (overwrite) { delete(file); } else { throw new IOException("File already exists: " + file); } } Path parent = file.getParent(); if (parent != null && !mkdirs(parent)) { throw new IOException("Mkdirs failed to create " + parent); } Path absolute = makeAbsolute(file); String srep = absolute.toUri().getPath(); return kfsImpl.create(srep, replication, bufferSize); }
public int run(String[] args) throws Exception { Configuration conf = getConf(); GenericOptionsParser gop = new GenericOptionsParser(conf, args); conf = gop.getConfiguration(); Job job = new Job(conf, conf.get("job_name")); FileInputFormat.addInputPaths(job, conf.get("input_dir")); Path output = new Path(conf.get("output_dir")); FileOutputFormat.setOutputPath(job, output); output.getFileSystem(conf).delete(output, true); job.setJarByClass(BrowerLogFormatMR.class); job.setMapperClass(BrowerLogFormatMapper.class); job.setReducerClass(BrowerLogFormatReducer.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(1); int code = job.waitForCompletion(true) ? 0 : 1; return code; }
public void list(List<Path> dirs, Writer writer) throws Exception { writer.write("NAME\t\tGENERATED\tFETCHER START\t\tFETCHER END\t\tFETCHED\tPARSED\n"); for (int i = 0; i < dirs.size(); i++) { Path dir = dirs.get(i); SegmentReaderStats stats = new SegmentReaderStats(); getStats(dir, stats); writer.write(dir.getName() + "\t"); if (stats.generated == -1) writer.write("?"); else writer.write(stats.generated + ""); writer.write("\t\t"); if (stats.start == -1) writer.write("?\t"); else writer.write(sdf.format(new Date(stats.start))); writer.write("\t"); if (stats.end == -1) writer.write("?"); else writer.write(sdf.format(new Date(stats.end))); writer.write("\t"); if (stats.fetched == -1) writer.write("?"); else writer.write(stats.fetched + ""); writer.write("\t"); if (stats.parsed == -1) writer.write("?"); else writer.write(stats.parsed + ""); writer.write("\n"); writer.flush(); } }
@Override public List<LuceneSegmentInputSplit> getSplits(JobContext context) throws IOException, InterruptedException { Configuration configuration = context.getConfiguration(); LuceneStorageConfiguration lucene2SeqConfiguration = new LuceneStorageConfiguration(configuration); List<LuceneSegmentInputSplit> inputSplits = new ArrayList<>(); List<Path> indexPaths = lucene2SeqConfiguration.getIndexPaths(); for (Path indexPath : indexPaths) { ReadOnlyFileSystemDirectory directory = new ReadOnlyFileSystemDirectory( FileSystem.get(configuration), indexPath, false, configuration); SegmentInfos segmentInfos = new SegmentInfos(); segmentInfos.read(directory); for (SegmentCommitInfo segmentInfo : segmentInfos) { LuceneSegmentInputSplit inputSplit = new LuceneSegmentInputSplit( indexPath, segmentInfo.info.name, segmentInfo.sizeInBytes()); inputSplits.add(inputSplit); LOG.info( "Created {} byte input split for index '{}' segment {}", segmentInfo.sizeInBytes(), indexPath.toUri(), segmentInfo.info.name); } } return inputSplits; }
/** * Add a {@link Path} to the list of inputs for the map-reduce job. * * @param job The {@link Job} to modify * @param path {@link Path} to be added to the list of inputs for the map-reduce job. */ public static void addInputPath(Job job, Path path) throws IOException { Configuration conf = job.getConfiguration(); path = path.getFileSystem(conf).makeQualified(path); String dirStr = StringUtils.escapeString(path.toString()); String dirs = conf.get(INPUT_DIR); conf.set(INPUT_DIR, dirs == null ? dirStr : dirs + "," + dirStr); }
@Override protected synchronized void startInternal() throws Exception { // create filesystem only now, as part of service-start. By this time, RM is // authenticated with kerberos so we are good to create a file-system // handle. fsConf = new Configuration(getConfig()); fsConf.setBoolean("dfs.client.retry.policy.enabled", true); String retryPolicy = fsConf.get( YarnConfiguration.FS_RM_STATE_STORE_RETRY_POLICY_SPEC, YarnConfiguration.DEFAULT_FS_RM_STATE_STORE_RETRY_POLICY_SPEC); fsConf.set("dfs.client.retry.policy.spec", retryPolicy); String scheme = fsWorkingPath.toUri().getScheme(); if (scheme == null) { scheme = FileSystem.getDefaultUri(fsConf).getScheme(); } if (scheme != null) { String disableCacheName = String.format("fs.%s.impl.disable.cache", scheme); fsConf.setBoolean(disableCacheName, true); } fs = fsWorkingPath.getFileSystem(fsConf); mkdirsWithRetries(rmDTSecretManagerRoot); mkdirsWithRetries(rmAppRoot); mkdirsWithRetries(amrmTokenSecretManagerRoot); mkdirsWithRetries(reservationRoot); }
private FileSelection expandSelection(DrillFileSystem fs, FileSelection selection) throws IOException { if (metaDataFileExists(fs, selection.getFirstPath(fs))) { FileStatus metaRootDir = selection.getFirstPath(fs); Path metaFilePath = getMetadataPath(metaRootDir); // get the metadata for the directory by reading the metadata file ParquetTableMetadata_v1 metadata = Metadata.readBlockMeta(fs, metaFilePath.toString()); List<String> fileNames = Lists.newArrayList(); for (ParquetFileMetadata file : metadata.files) { fileNames.add(file.path); } // when creating the file selection, set the selection root in the form /a/b instead of // file:/a/b. The reason is that the file names above have been created in the form // /a/b/c.parquet and the format of the selection root must match that of the file names // otherwise downstream operations such as partition pruning can break. Path metaRootPath = Path.getPathWithoutSchemeAndAuthority(metaRootDir.getPath()); return new FileSelection( fileNames, metaRootPath.toString(), metadata /* save metadata for future use */); } else { // don't expand yet; ParquetGroupScan's metadata gathering operation // does that. return selection; } }
/* * In order to make this update atomic as a part of write we will first write * data to .new file and then rename it. Here we are assuming that rename is * atomic for underlying file system. */ protected void updateFile(Path outputPath, byte[] data, boolean makeUnradableByAdmin) throws Exception { Path newPath = new Path(outputPath.getParent(), outputPath.getName() + ".new"); // use writeFileWithRetries to make sure .new file is created atomically writeFileWithRetries(newPath, data, makeUnradableByAdmin); replaceFile(newPath, outputPath); }
/** It asserts the equality between an original table desc and a restored table desc. */ private static void assertSchemaEquality(String tableName, Schema schema) throws IOException, TajoException { Path path = new Path(CommonTestingUtil.getTestDir(), tableName); TableDesc tableDesc = new TableDesc( IdentifierUtil.buildFQName(DEFAULT_DATABASE_NAME, tableName), schema, "TEXT", new KeyValueSet(), path.toUri()); // schema creation assertFalse(catalog.existsTable(DEFAULT_DATABASE_NAME, tableName)); catalog.createTable(tableDesc); assertTrue(catalog.existsTable(DEFAULT_DATABASE_NAME, tableName)); // change it for the equals test. schema.setQualifier(IdentifierUtil.buildFQName(DEFAULT_DATABASE_NAME, tableName)); TableDesc restored = catalog.getTableDesc(DEFAULT_DATABASE_NAME, tableName); assertEquals(schema, restored.getSchema()); // drop test catalog.dropTable(IdentifierUtil.buildFQName(DEFAULT_DATABASE_NAME, tableName)); assertFalse(catalog.existsTable(DEFAULT_DATABASE_NAME, tableName)); }
@Override public void setup(Context context) throws IOException { Configuration conf = context.getConfiguration(); Path cMemMatrixPath = new Path(conf.get(RECONSTRUCTIONMATRIX)); Path dMemMatrixPath = new Path(conf.get(MATRIXY2X)); Path zmPath = new Path(conf.get(ZMPATH)); Path meanPath = new Path(conf.get(YMPATH)); int inMemMatrixNumRows = conf.getInt(YCOLS, 0); int inMemMatrixNumCols = conf.getInt(XCOLS, 0); ERR_SAMPLE_RATE = conf.getFloat(ERRSAMPLERATE, 1); Path tmpPath = cMemMatrixPath.getParent(); DistributedRowMatrix distMatrix = new DistributedRowMatrix(cMemMatrixPath, tmpPath, inMemMatrixNumRows, inMemMatrixNumCols); distMatrix.setConf(conf); matrixC = PCACommon.toDenseMatrix(distMatrix); distMatrix = new DistributedRowMatrix(dMemMatrixPath, tmpPath, inMemMatrixNumRows, inMemMatrixNumCols); distMatrix.setConf(conf); matrixY2X = PCACommon.toDenseMatrix(distMatrix); try { zm = PCACommon.toDenseVector(zmPath, conf); ym = PCACommon.toDenseVector(meanPath, conf); } catch (IOException e) { e.printStackTrace(); } xiCt = new DenseVector(matrixC.numRows()); sumOfErr = new DenseVector(matrixC.numRows()); sumOfyi = new DenseVector(matrixC.numRows()); sumOfyc = new DenseVector(matrixC.numRows()); }