/** * This is the main driver for recursively copying directories across file systems. It takes at * least two cmdline parameters. A source URL and a destination URL. It then essentially does an * "ls -lR" on the source URL, and writes the output in a round-robin manner to all the map input * files. The mapper actually copies the files allotted to it. The reduce is empty. */ public int run(String[] args) { try { copy(conf, Arguments.valueOf(args, conf)); return 0; } catch (IllegalArgumentException e) { System.err.println(StringUtils.stringifyException(e) + "\n" + usage); ToolRunner.printGenericCommandUsage(System.err); return -1; } catch (DuplicationException e) { System.err.println(StringUtils.stringifyException(e)); return DuplicationException.ERROR_CODE; } catch (RemoteException e) { final IOException unwrapped = e.unwrapRemoteException( FileNotFoundException.class, AccessControlException.class, QuotaExceededException.class); System.err.println(StringUtils.stringifyException(unwrapped)); return -3; } catch (Exception e) { System.err.println( "With failures, global counters are inaccurate; " + "consider running with -i"); System.err.println("Copy failed: " + StringUtils.stringifyException(e)); return -999; } }
/** Exercise 4 - Read a list of people from Cassandra */ private static void readPeopleFromCassandra(JavaSparkContext javaSparkContext) { SparkContextJavaFunctions sparkContextJavaFunctions = CassandraJavaUtil.javaFunctions(javaSparkContext); CassandraJavaRDD<Person> personCassandraJavaRDD = sparkContextJavaFunctions.cassandraTable( "test", "people", CassandraJavaUtil.mapRowTo(Person.class)); JavaRDD<String> readPeopleRDD = personCassandraJavaRDD.map(Person::toString); System.out.println( "Data as Person beans: \n" + StringUtils.join("\n", readPeopleRDD.collect())); // select id from test.people where id=45; JavaRDD<String> rdd4 = sparkContextJavaFunctions .cassandraTable("test", "people") .select("id") .where("id = 45") .map(CassandraRow::toString); System.out.println( "Data with only 'id' column fetched: \n" + StringUtils.join("\n", rdd4.collect())); }
public String getInodeLimitText() { long inodes = fsn.dir.totalInodes(); long blocks = fsn.getBlocksTotal(); long maxobjects = fsn.getMaxObjects(); MemoryMXBean mem = ManagementFactory.getMemoryMXBean(); MemoryUsage heap = mem.getHeapMemoryUsage(); long totalMemory = heap.getUsed(); long maxMemory = heap.getMax(); long used = (totalMemory * 100) / maxMemory; String str = inodes + " files and directories, " + blocks + " blocks = " + (inodes + blocks) + " total"; if (maxobjects != 0) { long pct = ((inodes + blocks) * 100) / maxobjects; str += " / " + maxobjects + " (" + pct + "%)"; } str += ". Heap Size is " + StringUtils.byteDesc(totalMemory) + " / " + StringUtils.byteDesc(maxMemory) + " (" + used + "%) <br>"; return str; }
@Override public int run(Configuration conf, List<String> args) throws IOException { final String path = StringUtils.popOptionWithArgument("-path", args); if (path == null) { System.err.println("You must specify a path with -path."); return 1; } final String keyName = StringUtils.popOptionWithArgument("-keyName", args); if (keyName == null) { System.err.println("You must specify a key name with -keyName."); return 1; } if (!args.isEmpty()) { System.err.println("Can't understand argument: " + args.get(0)); return 1; } final DistributedFileSystem dfs = getDFS(conf); try { dfs.createEncryptionZone(new Path(path), keyName); System.out.println("Added encryption zone " + path); } catch (IOException e) { System.err.println(prettifyException(e)); return 2; } return 0; }
/** * Get the list of input {@link Path}s for the map-reduce job. * * @param context The job * @return the list of input {@link Path}s for the map-reduce job. */ public static Path[] getInputPaths(JobContext context) { String dirs = context.getConfiguration().get(INPUT_DIR, ""); String[] list = StringUtils.split(dirs); Path[] result = new Path[list.length]; for (int i = 0; i < list.length; i++) { result[i] = new Path(StringUtils.unEscapeString(list[i])); } return result; }
Upgradeable instantiate() throws IOException { try { return (Upgradeable) Class.forName(getClassName()).newInstance(); } catch (ClassNotFoundException e) { throw new IOException(StringUtils.stringifyException(e)); } catch (InstantiationException e) { throw new IOException(StringUtils.stringifyException(e)); } catch (IllegalAccessException e) { throw new IOException(StringUtils.stringifyException(e)); } }
/** * Set the array of {@link Path}s as the list of inputs for the map-reduce job. * * @param job The job to modify * @param inputPaths the {@link Path}s of the input directories/files for the map-reduce job. */ public static void setInputPaths(Job job, Path... inputPaths) throws IOException { Configuration conf = job.getConfiguration(); Path path = inputPaths[0].getFileSystem(conf).makeQualified(inputPaths[0]); StringBuffer str = new StringBuffer(StringUtils.escapeString(path.toString())); for (int i = 1; i < inputPaths.length; i++) { str.append(StringUtils.COMMA_STR); path = inputPaths[i].getFileSystem(conf).makeQualified(inputPaths[i]); str.append(StringUtils.escapeString(path.toString())); } conf.set(INPUT_DIR, str.toString()); }
public void run() { while (running) { try { LOG.info("LocalBlockFixer continuing to run..."); doFix(); } catch (Exception e) { LOG.error(StringUtils.stringifyException(e)); } catch (Error err) { LOG.error("Exiting after encountering " + StringUtils.stringifyException(err)); throw err; } } }
public void run( RecordReader<Text, ArcFileItem> input, OutputCollector<Text, CrawlURLMetadata> output, Reporter reporter) throws IOException { int lastValidPos = 0; try { // allocate key & value instances that are re-used for all entries Text key = input.createKey(); ArcFileItem value = input.createValue(); while (input.next(key, value)) { lastValidPos = value.getArcFilePos(); // map pair to output map(key, value, output, reporter); } } catch (IOException e) { String errorMessage = "Exception processing Split:" + _splitDetails + " Exception:" + StringUtils.stringifyException(e); LOG.error(errorMessage); if (_attemptID.getId() == 0 || (lastValidPos == 0 && _attemptID.getId() != _maxAttemptsPerTask - 1)) { throw new IOException(errorMessage); } // and just ignore the message } catch (Throwable e) { String errorMessage = "Unknown Exception processing Split:" + _splitDetails + " Exception:" + StringUtils.stringifyException(e); LOG.error(errorMessage); // if attempt number is not max attempt number configured... if (_attemptID.getId() != _maxAttemptsPerTask - 1) { // then bubble up exception throw new IOException(errorMessage); } } finally { close(); } }
public String toString() { StringBuffer sb = new StringBuffer(); sb.append("<"); for (int i = 0; i < ResourceType.values().length; i++) { if (i != 0) { sb.append(", "); } ResourceType resourceType = ResourceType.values()[i]; sb.append(StringUtils.toLowerCase(resourceType.name())); sb.append(StringUtils.format(" weight=%.1f", getWeight(resourceType))); } sb.append(">"); return sb.toString(); }
public void logStats() { long milliseconds = this.timeSinceLastAccess.get() / 1000000; LOG.info( "For Slab of size " + this.blockSize + ": " + this.getOccupiedSize() / this.blockSize + " occupied, out of a capacity of " + this.numBlocks + " blocks. HeapSize is " + StringUtils.humanReadableInt(this.heapSize()) + " bytes." + ", " + "churnTime=" + StringUtils.formatTime(milliseconds)); LOG.info( "Slab Stats: " + "accesses=" + stats.getRequestCount() + ", " + "hits=" + stats.getHitCount() + ", " + "hitRatio=" + (stats.getHitCount() == 0 ? "0" : (StringUtils.formatPercent(stats.getHitRatio(), 2) + "%, ")) + "cachingAccesses=" + stats.getRequestCachingCount() + ", " + "cachingHits=" + stats.getHitCachingCount() + ", " + "cachingHitsRatio=" + (stats.getHitCachingCount() == 0 ? "0" : (StringUtils.formatPercent(stats.getHitCachingRatio(), 2) + "%, ")) + "evictions=" + stats.getEvictionCount() + ", " + "evicted=" + stats.getEvictedCount() + ", " + "evictedPerRun=" + stats.evictedPerEviction()); }
/** * Checks if the map-reduce job has completed. * * @return true if the job completed, false otherwise. * @throws IOException */ public boolean checkComplete() throws IOException { JobID jobID = runningJob.getID(); if (runningJob.isComplete()) { // delete job directory final String jobdir = jobconf.get(JOB_DIR_LABEL); if (jobdir != null) { final Path jobpath = new Path(jobdir); jobpath.getFileSystem(jobconf).delete(jobpath, true); } if (runningJob.isSuccessful()) { LOG.info("Job Complete(Succeeded): " + jobID); } else { LOG.info("Job Complete(Failed): " + jobID); } raidPolicyPathPairList.clear(); Counters ctrs = runningJob.getCounters(); if (ctrs != null) { RaidNodeMetrics metrics = RaidNodeMetrics.getInstance(RaidNodeMetrics.DEFAULT_NAMESPACE_ID); if (ctrs.findCounter(Counter.FILES_FAILED) != null) { long filesFailed = ctrs.findCounter(Counter.FILES_FAILED).getValue(); metrics.raidFailures.inc(filesFailed); } long slotSeconds = ctrs.findCounter(JobInProgress.Counter.SLOTS_MILLIS_MAPS).getValue() / 1000; metrics.raidSlotSeconds.inc(slotSeconds); } return true; } else { String report = (" job " + jobID + " map " + StringUtils.formatPercent(runningJob.mapProgress(), 0) + " reduce " + StringUtils.formatPercent(runningJob.reduceProgress(), 0)); if (!report.equals(lastReport)) { LOG.info(report); lastReport = report; } TaskCompletionEvent[] events = runningJob.getTaskCompletionEvents(jobEventCounter); jobEventCounter += events.length; for (TaskCompletionEvent event : events) { if (event.getTaskStatus() == TaskCompletionEvent.Status.FAILED) { LOG.info(" Job " + jobID + " " + event.toString()); } } return false; } }
/** * checks if jobs have completed and updates job and file index returns a list of failed files for * restarting */ void checkJobs() throws IOException { Iterator<Job> jobIter = jobIndex.keySet().iterator(); while (jobIter.hasNext()) { Job job = jobIter.next(); try { if (job.isComplete()) { long slotSeconds = job.getCounters().findCounter(JobInProgress.Counter.SLOTS_MILLIS_MAPS).getValue() / 1000; RaidNodeMetrics.getInstance().blockFixSlotSeconds.inc(slotSeconds); long filesSucceeded = job.getCounters().findCounter(Counter.FILES_SUCCEEDED) != null ? job.getCounters().findCounter(Counter.FILES_SUCCEEDED).getValue() : 0; long filesFailed = job.getCounters().findCounter(Counter.FILES_FAILED) != null ? job.getCounters().findCounter(Counter.FILES_FAILED).getValue() : 0; long filesNoAction = job.getCounters().findCounter(Counter.FILES_NOACTION) != null ? job.getCounters().findCounter(Counter.FILES_NOACTION).getValue() : 0; int files = jobIndex.get(job).size(); if (job.isSuccessful() && (filesSucceeded + filesFailed + filesNoAction == ((long) files))) { // job has processed all files succeedJob(job, filesSucceeded, filesFailed); } else { failJob(job); } jobIter.remove(); } else { LOG.info("Job " + job.getID() + "(" + job.getJobName() + " still running"); } } catch (Exception e) { LOG.error(StringUtils.stringifyException(e)); failJob(job); try { job.killJob(); } catch (Exception ee) { LOG.error(StringUtils.stringifyException(ee)); } jobIter.remove(); } } purgeFileIndex(); }
/** * Add a {@link Path} to the list of inputs for the map-reduce job. * * @param job The {@link Job} to modify * @param path {@link Path} to be added to the list of inputs for the map-reduce job. */ public static void addInputPath(Job job, Path path) throws IOException { Configuration conf = job.getConfiguration(); path = path.getFileSystem(conf).makeQualified(path); String dirStr = StringUtils.escapeString(path.toString()); String dirs = conf.get(INPUT_DIR); conf.set(INPUT_DIR, dirs == null ? dirStr : dirs + "," + dirStr); }
@Override public void run() { while (!isStopped()) { try { NavigableMap<HRegionInfo, ServerName> regions = MetaScanner.allTableRegions(connection, TABLENAME); LOG.info("-------"); byte[] lastEndKey = HConstants.EMPTY_START_ROW; for (HRegionInfo hri : regions.navigableKeySet()) { long startKey = 0, endKey = Long.MAX_VALUE; if (!Bytes.equals(HConstants.EMPTY_START_ROW, hri.getStartKey())) { startKey = Bytes.toLong(hri.getStartKey()); } if (!Bytes.equals(HConstants.EMPTY_END_ROW, hri.getEndKey())) { endKey = Bytes.toLong(hri.getEndKey()); } LOG.info("start:" + startKey + " end:" + endKey + " hri:" + hri); Assert.assertTrue( "lastEndKey=" + Bytes.toString(lastEndKey) + ", startKey=" + Bytes.toString(hri.getStartKey()), Bytes.equals(lastEndKey, hri.getStartKey())); lastEndKey = hri.getEndKey(); } Assert.assertTrue(Bytes.equals(lastEndKey, HConstants.EMPTY_END_ROW)); LOG.info("-------"); Threads.sleep(10 + random.nextInt(50)); } catch (Throwable e) { ex = e; Assert.fail(StringUtils.stringifyException(e)); } } }
/** * When auto-shipping hive tar (for example when hive query or pig script is submitted via * webhcat), Hive client is launched on some remote node where Hive has not been installed. We * need pass some properties to that client to make sure it connects to the right Metastore, * configures Tez, etc. Here we look for such properties in hive config, and set a comma-separated * list of key values in {@link #HIVE_PROPS_NAME}. Note that the user may choose to set the same * keys in HIVE_PROPS_NAME directly, in which case those values should take precedence. */ private void handleHiveProperties() { HiveConf hiveConf = new HiveConf(); // load hive-site.xml from classpath List<String> interestingPropNames = Arrays.asList( "hive.metastore.uris", "hive.metastore.sasl.enabled", "hive.metastore.execute.setugi", "hive.execution.engine"); // each items is a "key=value" format List<String> webhcatHiveProps = new ArrayList<String>(hiveProps()); for (String interestingPropName : interestingPropNames) { String value = hiveConf.get(interestingPropName); if (value != null) { boolean found = false; for (String whProp : webhcatHiveProps) { if (whProp.startsWith(interestingPropName + "=")) { found = true; break; } } if (!found) { webhcatHiveProps.add(interestingPropName + "=" + value); } } } StringBuilder hiveProps = new StringBuilder(); for (String whProp : webhcatHiveProps) { // make sure to escape separator char in prop values hiveProps.append(hiveProps.length() > 0 ? "," : "").append(StringUtils.escapeString(whProp)); } set(HIVE_PROPS_NAME, hiveProps.toString()); }
@Override public int execute(DriverContext driverContext) { PrintStream out = null; try { Path resFile = new Path(work.getResFile()); OutputStream outS = resFile.getFileSystem(conf).create(resFile); out = new PrintStream(outS); QB qb = work.getQb(); TokenRewriteStream stream = work.getCtx().getTokenRewriteStream(); String program = "sq rewrite"; ASTNode ast = work.getAst(); try { addRewrites(stream, qb, program, out); out.println( "\nRewritten Query:\n" + stream.toString(program, ast.getTokenStartIndex(), ast.getTokenStopIndex())); } finally { stream.deleteProgram(program); } out.close(); out = null; return (0); } catch (Exception e) { console.printError( "Failed with exception " + e.getMessage(), "\n" + StringUtils.stringifyException(e)); return (1); } finally { IOUtils.closeStream(out); } }
/** Run a FileOperation */ public void map( Text key, PolicyInfo policy, OutputCollector<WritableComparable, Text> out, Reporter reporter) throws IOException { this.reporter = reporter; try { LOG.info("Raiding file=" + key.toString() + " policy=" + policy); Path p = new Path(key.toString()); FileStatus fs = p.getFileSystem(jobconf).getFileStatus(p); st.clear(); RaidNode.doRaid(jobconf, policy, fs, st, reporter); ++succeedcount; reporter.incrCounter(Counter.PROCESSED_BLOCKS, st.numProcessedBlocks); reporter.incrCounter(Counter.PROCESSED_SIZE, st.processedSize); reporter.incrCounter(Counter.META_BLOCKS, st.numMetaBlocks); reporter.incrCounter(Counter.META_SIZE, st.metaSize); reporter.incrCounter(Counter.FILES_SUCCEEDED, 1); } catch (IOException e) { ++failcount; reporter.incrCounter(Counter.FILES_FAILED, 1); String s = "FAIL: " + policy + ", " + key + " " + StringUtils.stringifyException(e); out.collect(null, new Text(s)); LOG.info(s); } finally { reporter.setStatus(getCountString()); } }
@Override public int run(String[] args) throws Exception { if (args.length < 1) { System.err.println("Usage: InjectorJob <url_dir> [-crawlId <id>]"); return -1; } for (int i = 1; i < args.length; i++) { if ("-crawlId".equals(args[i])) { getConf().set(Nutch.CRAWL_ID_KEY, args[i + 1]); i++; } else { System.err.println("Unrecognized arg " + args[i]); return -1; } } try { inject(new Path(args[0])); LOG.info("InjectorJob: finished"); return -0; } catch (Exception e) { LOG.error("InjectorJob: " + StringUtils.stringifyException(e)); return -1; } }
private void shutdown() { try { this.connection.commit(); this.connection.close(); } catch (Throwable ex) { LOG.warn( "Exception occurred while closing connection :" + StringUtils.stringifyException(ex)); } finally { try { if (this.server != null) this.server.shutdown(); } catch (Throwable ex) { LOG.warn( "Exception occurred while shutting down HSQLDB :" + StringUtils.stringifyException(ex)); } } }
@Override public void run() { try { Thread.sleep(delay); containerExecutor.signalContainer( new ContainerSignalContext.Builder() .setContainer(container) .setUser(user) .setPid(pid) .setSignal(signal) .build()); } catch (InterruptedException e) { return; } catch (IOException e) { String message = "Exception when user " + user + " killing task " + pid + " in DelayedProcessKiller: " + StringUtils.stringifyException(e); LOG.warn(message); container.handle(new ContainerDiagnosticsUpdateEvent(container.getContainerId(), message)); } }
public ChecksumFSInputChecker(ChecksumFileSystem fs, Path file, int bufferSize) throws IOException { super(file, fs.getFileStatus(file).getReplication()); this.datas = fs.getRawFileSystem().open(file, bufferSize); this.fs = fs; Path sumFile = fs.getChecksumFile(file); try { int sumBufferSize = fs.getSumBufferSize(fs.getBytesPerSum(), bufferSize); sums = fs.getRawFileSystem().open(sumFile, sumBufferSize); byte[] version = new byte[CHECKSUM_VERSION.length]; sums.readFully(version); if (!Arrays.equals(version, CHECKSUM_VERSION)) throw new IOException("Not a checksum file: " + sumFile); this.bytesPerSum = sums.readInt(); set(fs.verifyChecksum, new CRC32(), bytesPerSum, 4); } catch (FileNotFoundException e) { // quietly ignore set(fs.verifyChecksum, null, 1, 0); } catch (IOException e) { // loudly ignore LOG.warn( "Problem opening checksum file: " + file + ". Ignoring exception: " + StringUtils.stringifyException(e)); set(fs.verifyChecksum, null, 1, 0); } }
@Override public String toString() { String fsList = Joiner.on(", ") .join( Collections2.transform( Collections2.filter( this.getFiles(), new Predicate<StoreFile>() { public boolean apply(StoreFile sf) { return sf.getReader() != null; } }), new Function<StoreFile, String>() { public String apply(StoreFile sf) { return StringUtils.humanReadableInt( (sf.getReader() == null) ? 0 : sf.getReader().length()); } })); return "regionName=" + regionName + ", storeName=" + storeName + ", fileCount=" + this.getFiles().size() + ", fileSize=" + StringUtils.humanReadableInt(totalSize) + ((fsList.isEmpty()) ? "" : " (" + fsList + ")") + ", priority=" + priority + ", time=" + timeInNanos; }
public static void main(String[] args) throws IOException { Text text = new Text("\u0041"); ObjectWritable writable = new ObjectWritable(text); System.out.println(StringUtils.byteToHexString(serialize(writable))); // 00196f72672e6170616368652e6861646f6f702e696f2e5465787400196f72672e6170616368652e6861646f6f702e696f2e546578740141 // (a)0019 6f72672e6170616368652e6861646f6f702e696f2e54657874, (b)0019 // 6f72672e6170616368652e6861646f6f702e696f2e54657874,(c)0141 /* (1)序列化 ObjectWritable 的声明部分 UTF8.writeString(out, declaredClass.getName()); ==> 0019 6f72672e6170616368652e6861646f6f702e696f2e54657874(第一部分是一个short数值,为该对象class名字的字符串长度,org.apache.hadoop.io.Text,25位=0x0019) (2)序列化 Writable 接口对象的实现类 if (Writable.class.isAssignableFrom(declaredClass)) { // Writable接口实现类 UTF8.writeString(out, instance.getClass().getName()); ((Writable)instance).write(out); } ==> 0019 6f72672e6170616368652e6861646f6f702e696f2e54657874 0141(可变长Text的序列化值,0x01长度,0x41数值内容) */ ObjectWritable srcWritable = new ObjectWritable(Integer.TYPE, 188); ObjectWritable destWritable = new ObjectWritable(); cloneInto(srcWritable, destWritable); System.out.println(serializeToHexString(srcWritable)); // 0003696e74000000bc System.out.println((Integer) destWritable.get()); // 188 }
/** * Verify if the NodeManager could identify disk failures. * * @param localORLogDirs <em>true</em> represent nm-local-dirs and <em>false </em> means * nm-log-dirs * @param expectedDirs expected nm-local-dirs/nm-log-dirs as a string * @param isHealthy <em>true</em> if the overall node should be healthy */ private void verifyDisksHealth(boolean localORLogDirs, String expectedDirs, boolean isHealthy) { // Wait for the NodeManager to identify disk failures. waitForDiskHealthCheck(); List<String> list = localORLogDirs ? dirsHandler.getLocalDirs() : dirsHandler.getLogDirs(); String seenDirs = StringUtils.join(",", list); LOG.info("ExpectedDirs=" + expectedDirs); LOG.info("SeenDirs=" + seenDirs); Assert.assertTrue( "NodeManager could not identify disk failure.", expectedDirs.equals(seenDirs)); Assert.assertEquals( "Node's health in terms of disks is wrong", isHealthy, dirsHandler.areDisksHealthy()); for (int i = 0; i < 10; i++) { Iterator<RMNode> iter = yarnCluster.getResourceManager().getRMContext().getRMNodes().values().iterator(); if (iter.next().getNodeHealthStatus().getIsNodeHealthy() == isHealthy) { break; } // wait for the node health info to go to RM try { Thread.sleep(1000); } catch (InterruptedException e) { LOG.error("Interrupted while waiting for NM->RM heartbeat."); } } Iterator<RMNode> iter = yarnCluster.getResourceManager().getRMContext().getRMNodes().values().iterator(); Assert.assertEquals( "RM is not updated with the health status of a node", isHealthy, iter.next().getNodeHealthStatus().getIsNodeHealthy()); }
/** fix a stripe */ @Override public void map(LongWritable key, Text fileText, Context context) throws IOException, InterruptedException { BlockFixerHelper helper = new BlockFixerHelper(context.getConfiguration()); String fileStr = fileText.toString(); LOG.info("fixing " + fileStr); Path file = new Path(fileStr); try { boolean fixed = helper.fixFile(file, context); if (fixed) { context.getCounter(Counter.FILES_SUCCEEDED).increment(1L); } else { context.getCounter(Counter.FILES_NOACTION).increment(1L); } } catch (Exception e) { LOG.error(StringUtils.stringifyException(e)); // report file as failed context.getCounter(Counter.FILES_FAILED).increment(1L); String outkey = fileStr; String outval = "failed"; context.write(new Text(outkey), new Text(outval)); } context.progress(); }
/** * @return the total key count in the files being merged * @throws IOException */ private long prepareForMerge() throws IOException { LOG.info("Merging " + inputFileNames); LOG.info("Using block size: " + blockSize); inputStoreFiles = new ArrayList<StoreFile>(); long maxKeyCount = 0; for (String fileName : inputFileNames) { Path filePath = new Path(fileName); // Open without caching. StoreFile sf = openStoreFile(filePath, false); sf.createReader(); inputStoreFiles.add(sf); StoreFile.Reader r = sf.getReader(); if (r != null) { long keyCount = r.getFilterEntries(); maxKeyCount += keyCount; LOG.info( "Compacting: " + sf + "; keyCount = " + keyCount + "; Bloom Type = " + r.getBloomFilterType().toString() + "; Size = " + StringUtils.humanReadableInt(r.length())); } } return maxKeyCount; }
/** * splits this string around matches of the given separator. * * @param str strings * @param separator separator * @param trim include the last separator * @return the array of strings computed by splitting this string around matches of the given * separator */ public static String[] split(String str, String separator, boolean trim) { if (str == null) { return null; } char sep = separator.charAt(0); ArrayList<String> strList = new ArrayList<String>(); StringBuilder split = new StringBuilder(); int index = 0; while ((index = StringUtils.findNext(str, sep, StringUtils.ESCAPE_CHAR, index, split)) >= 0) { ++index; // move over the separator for next search strList.add(split.toString()); split.setLength(0); // reset the buffer } strList.add(split.toString()); // remove trailing empty split(s) if (trim) { int last = strList.size(); // last split while (--last >= 0 && "".equals(strList.get(last))) { strList.remove(last); } } return strList.toArray(new String[strList.size()]); }
<T> SummaryBuilder add(String key, T value) { String escapedString = StringUtils.escapeString(String.valueOf(value), StringUtils.ESCAPE_CHAR, charsToEscape) .replaceAll("\n", "\\\\n") .replaceAll("\r", "\\\\r"); return _add(key, escapedString); }
// Mostly for setting up the symlinks. Note that when we setup the distributed // cache, we didn't create the symlinks. This is done on a per task basis // by the currently executing task. public static void setupWorkDir(JobConf conf) throws IOException { File workDir = new File(".").getAbsoluteFile(); FileUtil.fullyDelete(workDir); if (DistributedCache.getSymlink(conf)) { URI[] archives = DistributedCache.getCacheArchives(conf); URI[] files = DistributedCache.getCacheFiles(conf); Path[] localArchives = DistributedCache.getLocalCacheArchives(conf); Path[] localFiles = DistributedCache.getLocalCacheFiles(conf); if (archives != null) { for (int i = 0; i < archives.length; i++) { String link = archives[i].getFragment(); if (link != null) { link = workDir.toString() + Path.SEPARATOR + link; File flink = new File(link); if (!flink.exists()) { FileUtil.symLink(localArchives[i].toString(), link); } } } } if (files != null) { for (int i = 0; i < files.length; i++) { String link = files[i].getFragment(); if (link != null) { link = workDir.toString() + Path.SEPARATOR + link; File flink = new File(link); if (!flink.exists()) { FileUtil.symLink(localFiles[i].toString(), link); } } } } } File jobCacheDir = null; if (conf.getJar() != null) { jobCacheDir = new File(new Path(conf.getJar()).getParent().toString()); } // create symlinks for all the files in job cache dir in current // workingdir for streaming try { DistributedCache.createAllSymlink(conf, jobCacheDir, workDir); } catch (IOException ie) { // Do not exit even if symlinks have not been created. LOG.warn(StringUtils.stringifyException(ie)); } // add java.io.tmpdir given by mapred.child.tmp String tmp = conf.get("mapred.child.tmp", "./tmp"); Path tmpDir = new Path(tmp); // if temp directory path is not absolute // prepend it with workDir. if (!tmpDir.isAbsolute()) { tmpDir = new Path(workDir.toString(), tmp); FileSystem localFs = FileSystem.getLocal(conf); if (!localFs.mkdirs(tmpDir) && !localFs.getFileStatus(tmpDir).isDir()) { throw new IOException("Mkdirs failed to create " + tmpDir.toString()); } } }