ActiveFile(File f, List<Thread> list) { file = f; if (list != null) { threads.addAll(list); } threads.add(Thread.currentThread()); }
void checkFormat(Job job) throws Exception { TaskAttemptContext attemptContext = new TaskAttemptContext(job.getConfiguration(), new TaskAttemptID("123", 0, false, 1, 2)); MyClassMessagePackBase64LineInputFormat format = new MyClassMessagePackBase64LineInputFormat(); FileInputFormat.setInputPaths(job, workDir); List<InputSplit> splits = format.getSplits(job); for (int j = 0; j < splits.size(); j++) { RecordReader<LongWritable, MyClassWritable> reader = format.createRecordReader(splits.get(j), attemptContext); reader.initialize(splits.get(j), attemptContext); int count = 0; try { while (reader.nextKeyValue()) { LongWritable key = reader.getCurrentKey(); MyClassWritable val = reader.getCurrentValue(); MyClass mc = val.get(); assertEquals(mc.v, count); assertEquals(mc.s, Integer.toString(count)); count++; } } finally { reader.close(); } } }
protected static double Distance(List<Double> p1, List<Double> p2) { double sumOfSquaredDifferences = 0.0; for (int i = 0; i < p1.size(); i++) { sumOfSquaredDifferences += Math.pow(p1.get(i) - p2.get(i), 2.0); } return Math.pow(sumOfSquaredDifferences, 0.5); }
public static List<Double> GetPoint(String s) { StringTokenizer tokenizer = new StringTokenizer(s); List<Double> p = new ArrayList<Double>(58); while (tokenizer.hasMoreTokens()) { p.add(Double.parseDouble(tokenizer.nextToken())); } return p; }
public static List<String> tokenizeString(Analyzer analyzer, String string) { List<String> result = new ArrayList<String>(); try { TokenStream stream = analyzer.tokenStream(null, new StringReader(string)); stream.reset(); while (stream.incrementToken()) { result.add(stream.getAttribute(CharTermAttribute.class).toString()); } } catch (IOException e) { // not thrown b/c we're using a string reader... throw new RuntimeException(e); } return result; }
public static List<InetSocketAddress> readAddresses(Path path, Configuration conf) throws IOException { final List<InetSocketAddress> addrs = new ArrayList<InetSocketAddress>(); for (final String line : readConfig(path, conf)) { final StringTokenizer tokens = new StringTokenizer(line); if (tokens.hasMoreTokens()) { final String host = tokens.nextToken(); if (tokens.hasMoreTokens()) { final String port = tokens.nextToken(); addrs.add(new InetSocketAddress(host, Integer.parseInt(port))); } } } return addrs; }
public void configure(JobConf conf) { numberOfCenters = Integer.valueOf(conf.get("numberOfCenters")); centersDirectory = conf.get("centersReadDirectory"); try { Configuration c = new Configuration(); FileSystem fs = FileSystem.get(c); for (int index = 0; index < numberOfCenters; ++index) { SequenceFile.Reader reader = new SequenceFile.Reader(fs, new Path(centersDirectory + "/centers/" + index), c); LongWritable key = new LongWritable(); Point value = new Point(); reader.next(key, value); Point center = (Point) value; centers.add(center); reader.close(); } } catch (IOException e) { // do nothing // I hope this doesn't happen System.out.println("well, damn."); e.printStackTrace(); } }
/** * Generate the list of files and make them into FileSplits. This needs to be copied to insert a * filter on acceptable data */ @Override public List<InputSplit> getSplits(JobContext job) throws IOException { long minSize = Math.max(getFormatMinSplitSize(), getMinSplitSize(job)); long maxSize = getMaxSplitSize(job); long desiredMappers = job.getConfiguration().getLong("org.systemsbiology.jxtandem.DesiredXMLInputMappers", 0); // generate splits List<InputSplit> splits = new ArrayList<InputSplit>(); List<FileStatus> fileStatuses = listStatus(job); boolean forceNumberMappers = fileStatuses.size() == 1; for (FileStatus file : fileStatuses) { Path path = file.getPath(); if (!isPathAcceptable(path)) // filter acceptable data continue; FileSystem fs = path.getFileSystem(job.getConfiguration()); long length = file.getLen(); BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length); if ((length != 0) && isSplitable(job, path)) { long blockSize = file.getBlockSize(); // use desired mappers to force more splits if (forceNumberMappers && desiredMappers > 0) maxSize = Math.min(maxSize, (length / desiredMappers)); long splitSize = computeSplitSize(blockSize, minSize, maxSize); long bytesRemaining = length; while (withinSlop(splitSize, bytesRemaining)) { int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining); splits.add( new FileSplit( path, length - bytesRemaining, splitSize, blkLocations[blkIndex].getHosts())); bytesRemaining -= splitSize; } if (bytesRemaining != 0) { splits.add( new FileSplit( path, length - bytesRemaining, bytesRemaining, blkLocations[blkLocations.length - 1].getHosts())); } } else if (length != 0) { splits.add(new FileSplit(path, 0, length, blkLocations[0].getHosts())); } else { // Create empty hosts array for zero length files splits.add(new FileSplit(path, 0, length, new String[0])); } } System.out.println("Total # of splits: " + splits.size()); // LOG.debug("Total # of splits: " + splits.size()); return splits; }
public List<String> resolve(List<String> names) { List<String> m = new ArrayList<String>(names.size()); if (names.isEmpty()) { return m; } if (scriptName == null) { for (int i = 0; i < names.size(); i++) { m.add(NetworkTopology.DEFAULT_RACK); } return m; } String output = runResolveCommand(names); if (output != null) { StringTokenizer allSwitchInfo = new StringTokenizer(output); while (allSwitchInfo.hasMoreTokens()) { String switchInfo = allSwitchInfo.nextToken(); m.add(switchInfo); } if (m.size() != names.size()) { // invalid number of entries returned by the script LOG.warn( "Script " + scriptName + " returned " + Integer.toString(m.size()) + " values when " + Integer.toString(names.size()) + " were expected."); return null; } } else { // an error occurred. return null to signify this. // (exn was already logged in runResolveCommand) return null; } return m; }
public void map( LongWritable key, Point value, OutputCollector<LongWritable, Point> output, Reporter reporter) throws IOException { double min = value.sumOfSquares(centers.get(0)); int best = 0; for (int index = 1; index < numberOfCenters; ++index) { double current = value.sumOfSquares(centers.get(index)); if (current < min) { min = current; best = index; } } reporter.incrCounter("NUMBER", "NODES", 1); reporter.incrCounter("CENTER", "" + best, 1); output.collect(new LongWritable(best), value); }
private static void deleteCache(Configuration conf, MRAsyncDiskService asyncDiskService) throws IOException { List<CacheStatus> deleteSet = new LinkedList<CacheStatus>(); // try deleting cache Status with refcount of zero synchronized (cachedArchives) { for (Iterator<String> it = cachedArchives.keySet().iterator(); it.hasNext(); ) { String cacheId = (String) it.next(); CacheStatus lcacheStatus = cachedArchives.get(cacheId); if (lcacheStatus.refcount == 0) { // delete this cache entry from the global list // and mark the localized file for deletion deleteSet.add(lcacheStatus); it.remove(); } } } // do the deletion asynchronously, after releasing the global lock Thread cacheFileCleaner = new Thread(new CacheFileCleanTask(asyncDiskService, FileSystem.getLocal(conf), deleteSet)); cacheFileCleaner.start(); }
/** * list subfiles * * @param hdfsPath !null path probably exists * @return !null list of enclused file names - emptu if file of not exists */ @Override public String[] ls(final String hdfsPath) { if (isRunningAsUser()) { return super.ls(hdfsPath); } final FileSystem fs = getDFS(); try { FileStatus[] statuses = fs.listStatus(new Path(hdfsPath)); if (statuses == null) return new String[0]; List<String> holder = new ArrayList<String>(); for (int i = 0; i < statuses.length; i++) { FileStatus statuse = statuses[i]; String s = statuse.getPath().getName(); holder.add(s); } String[] ret = new String[holder.size()]; holder.toArray(ret); return ret; } catch (IOException e) { throw new RuntimeException(e); } }
protected void setup(Context context) throws IOException, InterruptedException { FileSystem fs = FileSystem.get(context.getConfiguration()); Path cFile = new Path(context.getConfiguration().get("CFILE")); DataInputStream d = new DataInputStream(fs.open(cFile)); BufferedReader reader = new BufferedReader(new InputStreamReader(d)); String line; while ((line = reader.readLine()) != null) { StringTokenizer tokenizer = new StringTokenizer(line.toString()); if (tokenizer.hasMoreTokens()) { List<Double> centroid = new ArrayList<Double>(58); while (tokenizer.hasMoreTokens()) { centroid.add(Double.parseDouble(tokenizer.nextToken())); } centroids.add(centroid); } } k = centroids.size(); }
private String runResolveCommand(List<String> args) { int loopCount = 0; if (args.size() == 0) { return null; } StringBuffer allOutput = new StringBuffer(); int numProcessed = 0; if (maxArgs < MIN_ALLOWABLE_ARGS) { LOG.warn( "Invalid value " + Integer.toString(maxArgs) + " for " + SCRIPT_ARG_COUNT_KEY + "; must be >= " + Integer.toString(MIN_ALLOWABLE_ARGS)); return null; } while (numProcessed != args.size()) { int start = maxArgs * loopCount; List<String> cmdList = new ArrayList<String>(); cmdList.add(scriptName); for (numProcessed = start; numProcessed < (start + maxArgs) && numProcessed < args.size(); numProcessed++) { cmdList.add(args.get(numProcessed)); } File dir = null; String userDir; if ((userDir = System.getProperty("user.dir")) != null) { dir = new File(userDir); } ShellCommandExecutor s = new ShellCommandExecutor(cmdList.toArray(new String[0]), dir); try { s.execute(); allOutput.append(s.getOutput() + " "); } catch (Exception e) { LOG.warn(StringUtils.stringifyException(e)); return null; } loopCount++; } return allOutput.toString(); }
public void reduce(IntWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException { List<Double> newCentroid = null; int numPoints = 0; for (Text value : values) { ++numPoints; List<Double> p = GetPoint(value.toString()); if (newCentroid == null) { // initialize the new centroid to the first element newCentroid = new ArrayList<Double>(p); } else { for (int i = 0; i < newCentroid.size(); i++) { newCentroid.set(i, newCentroid.get(i) + p.get(i)); } } } // now the newCentroid contains the sum of all the points // so to get the average of all the points, we need to // divide each entry by the total number of points for (int i = 0; i < newCentroid.size(); i++) { newCentroid.set(i, newCentroid.get(i) / (double) numPoints); } // now create a string containing all the new centroid's coordinates String s = null; for (Double d : newCentroid) { if (s == null) { s = d.toString(); } else { s += " " + d.toString(); } } newCentroids.add(s); if (newCentroids.size() == 10) { WriteNewCentroids(context); } // output the centroid ID and the centroid data context.write(key, new Text(s)); }
/** * Search for pages matching a query, eliminating excessive hits with matching values for a named * field. Hits after the first <code>maxHitsPerDup</code> are removed from results. The remaining * hits have {@link Hit#moreFromDupExcluded()} set. * * <p>If maxHitsPerDup is zero then all hits are returned. * * @param query query * @param numHits number of requested hits * @param maxHitsPerDup the maximum hits returned with matching values, or zero * @param dedupField field name to check for duplicates * @param sortField Field to sort on (or null if no sorting). * @param reverse True if we are to reverse sort by <code>sortField</code>. * @return Hits the matching hits * @throws IOException */ public Hits search( Query query, int numHits, int maxHitsPerDup, String dedupField, String sortField, boolean reverse) throws IOException { if (maxHitsPerDup <= 0) // disable dup checking return search(query, numHits, dedupField, sortField, reverse); final float rawHitsFactor = this.conf.getFloat("searcher.hostgrouping.rawhits.factor", 2.0f); int numHitsRaw = (int) (numHits * rawHitsFactor); if (LOG.isInfoEnabled()) { LOG.info("searching for " + numHitsRaw + " raw hits"); } Hits hits = searchBean.search(query, numHitsRaw, dedupField, sortField, reverse); final long total = hits.getTotal(); final Map<String, DupHits> dupToHits = new HashMap<String, DupHits>(); final List<Hit> resultList = new ArrayList<Hit>(); final Set<Hit> seen = new HashSet<Hit>(); final List<String> excludedValues = new ArrayList<String>(); boolean totalIsExact = true; for (int rawHitNum = 0; rawHitNum < hits.getTotal(); rawHitNum++) { // get the next raw hit if (rawHitNum >= hits.getLength()) { // optimize query by prohibiting more matches on some excluded values final Query optQuery = (Query) query.clone(); for (int i = 0; i < excludedValues.size(); i++) { if (i == MAX_PROHIBITED_TERMS) break; optQuery.addProhibitedTerm(excludedValues.get(i), dedupField); } numHitsRaw = (int) (numHitsRaw * rawHitsFactor); if (LOG.isInfoEnabled()) { LOG.info("re-searching for " + numHitsRaw + " raw hits, query: " + optQuery); } hits = searchBean.search(optQuery, numHitsRaw, dedupField, sortField, reverse); if (LOG.isInfoEnabled()) { LOG.info("found " + hits.getTotal() + " raw hits"); } rawHitNum = -1; continue; } final Hit hit = hits.getHit(rawHitNum); if (seen.contains(hit)) continue; seen.add(hit); // get dup hits for its value final String value = hit.getDedupValue(); DupHits dupHits = dupToHits.get(value); if (dupHits == null) dupToHits.put(value, dupHits = new DupHits()); // does this hit exceed maxHitsPerDup? if (dupHits.size() == maxHitsPerDup) { // yes -- ignore the hit if (!dupHits.maxSizeExceeded) { // mark prior hits with moreFromDupExcluded for (int i = 0; i < dupHits.size(); i++) { dupHits.get(i).setMoreFromDupExcluded(true); } dupHits.maxSizeExceeded = true; excludedValues.add(value); // exclude dup } totalIsExact = false; } else { // no -- collect the hit resultList.add(hit); dupHits.add(hit); // are we done? // we need to find one more than asked for, so that we can tell if // there are more hits to be shown if (resultList.size() > numHits) break; } } final Hits results = new Hits(total, resultList.toArray(new Hit[resultList.size()])); results.setTotalIsExact(totalIsExact); return results; }
public void generateFileDetails(JspWriter out, HttpServletRequest req, Configuration conf) throws IOException, InterruptedException { int chunkSizeToView = 0; long startOffset = 0; int datanodePort; String blockIdStr = null; long currBlockId = 0; blockIdStr = req.getParameter("blockId"); if (blockIdStr == null) { out.print("Invalid input (blockId absent)"); return; } currBlockId = Long.parseLong(blockIdStr); String datanodePortStr = req.getParameter("datanodePort"); if (datanodePortStr == null) { out.print("Invalid input (datanodePort absent)"); return; } datanodePort = Integer.parseInt(datanodePortStr); String namenodeInfoPortStr = req.getParameter("namenodeInfoPort"); int namenodeInfoPort = -1; if (namenodeInfoPortStr != null) namenodeInfoPort = Integer.parseInt(namenodeInfoPortStr); String chunkSizeToViewStr = req.getParameter("chunkSizeToView"); if (chunkSizeToViewStr != null && Integer.parseInt(chunkSizeToViewStr) > 0) { chunkSizeToView = Integer.parseInt(chunkSizeToViewStr); } else { chunkSizeToView = JspHelper.getDefaultChunkSize(conf); } String startOffsetStr = req.getParameter("startOffset"); if (startOffsetStr == null || Long.parseLong(startOffsetStr) < 0) startOffset = 0; else startOffset = Long.parseLong(startOffsetStr); String filename = HtmlQuoting.unquoteHtmlChars(req.getParameter("filename")); if (filename == null || filename.length() == 0) { out.print("Invalid input"); return; } String blockSizeStr = req.getParameter("blockSize"); long blockSize = 0; if (blockSizeStr == null || blockSizeStr.length() == 0) { out.print("Invalid input"); return; } blockSize = Long.parseLong(blockSizeStr); String tokenString = req.getParameter(JspHelper.DELEGATION_PARAMETER_NAME); UserGroupInformation ugi = JspHelper.getUGI(req, conf); DFSClient dfs = JspHelper.getDFSClient(ugi, jspHelper.nameNodeAddr, conf); List<LocatedBlock> blocks = dfs.namenode.getBlockLocations(filename, 0, Long.MAX_VALUE).getLocatedBlocks(); // Add the various links for looking at the file contents // URL for downloading the full file String downloadUrl = "http://" + req.getServerName() + ":" + +req.getServerPort() + "/streamFile" + URLEncoder.encode(filename, "UTF-8") + "?" + JspHelper.DELEGATION_PARAMETER_NAME + "=" + tokenString; out.print("<a name=\"viewOptions\"></a>"); out.print("<a href=\"" + downloadUrl + "\">Download this file</a><br>"); DatanodeInfo chosenNode; // URL for TAIL LocatedBlock lastBlk = blocks.get(blocks.size() - 1); long blockId = lastBlk.getBlock().getBlockId(); try { chosenNode = jspHelper.bestNode(lastBlk); } catch (IOException e) { out.print(e.toString()); dfs.close(); return; } String fqdn = InetAddress.getByName(chosenNode.getHost()).getCanonicalHostName(); String tailUrl = "http://" + fqdn + ":" + chosenNode.getInfoPort() + "/tail.jsp?filename=" + URLEncoder.encode(filename, "UTF-8") + "&namenodeInfoPort=" + namenodeInfoPort + "&chunkSizeToView=" + chunkSizeToView + "&referrer=" + URLEncoder.encode(req.getRequestURL() + "?" + req.getQueryString(), "UTF-8") + JspHelper.getDelegationTokenUrlParam(tokenString); out.print("<a href=\"" + tailUrl + "\">Tail this file</a><br>"); out.print("<form action=\"/browseBlock.jsp\" method=GET>"); out.print("<b>Chunk size to view (in bytes, up to file's DFS block size): </b>"); out.print("<input type=\"hidden\" name=\"blockId\" value=\"" + currBlockId + "\">"); out.print("<input type=\"hidden\" name=\"blockSize\" value=\"" + blockSize + "\">"); out.print("<input type=\"hidden\" name=\"startOffset\" value=\"" + startOffset + "\">"); out.print("<input type=\"hidden\" name=\"filename\" value=\"" + filename + "\">"); out.print("<input type=\"hidden\" name=\"datanodePort\" value=\"" + datanodePort + "\">"); out.print( "<input type=\"hidden\" name=\"namenodeInfoPort\" value=\"" + namenodeInfoPort + "\">"); out.print( "<input type=\"text\" name=\"chunkSizeToView\" value=" + chunkSizeToView + " size=10 maxlength=10>"); out.print(" <input type=\"submit\" name=\"submit\" value=\"Refresh\">"); out.print("</form>"); out.print("<hr>"); out.print("<a name=\"blockDetails\"></a>"); out.print("<B>Total number of blocks: " + blocks.size() + "</B><br>"); // generate a table and dump the info out.println("\n<table>"); for (LocatedBlock cur : blocks) { out.print("<tr>"); blockId = cur.getBlock().getBlockId(); blockSize = cur.getBlock().getNumBytes(); String blk = "blk_" + Long.toString(blockId); out.print("<td>" + Long.toString(blockId) + ":</td>"); DatanodeInfo[] locs = cur.getLocations(); for (int j = 0; j < locs.length; j++) { String datanodeAddr = locs[j].getName(); datanodePort = Integer.parseInt( datanodeAddr.substring(datanodeAddr.indexOf(':') + 1, datanodeAddr.length())); fqdn = InetAddress.getByName(locs[j].getHost()).getCanonicalHostName(); String blockUrl = "http://" + fqdn + ":" + locs[j].getInfoPort() + "/browseBlock.jsp?blockId=" + Long.toString(blockId) + "&blockSize=" + blockSize + "&filename=" + URLEncoder.encode(filename, "UTF-8") + "&datanodePort=" + datanodePort + "&genstamp=" + cur.getBlock().getGenerationStamp() + "&namenodeInfoPort=" + namenodeInfoPort + "&chunkSizeToView=" + chunkSizeToView; out.print( "<td> </td>" + "<td><a href=\"" + blockUrl + "\">" + datanodeAddr + "</a></td>"); } out.println("</tr>"); } out.println("</table>"); out.print("<hr>"); String namenodeHost = jspHelper.nameNodeAddr.getHostName(); out.print( "<br><a href=\"http://" + InetAddress.getByName(namenodeHost).getCanonicalHostName() + ":" + namenodeInfoPort + "/dfshealth.jsp\">Go back to DFS home</a>"); dfs.close(); }
public void generateFileChunks(JspWriter out, HttpServletRequest req, Configuration conf) throws IOException, InterruptedException { long startOffset = 0; int datanodePort = 0; int chunkSizeToView = 0; String namenodeInfoPortStr = req.getParameter("namenodeInfoPort"); int namenodeInfoPort = -1; if (namenodeInfoPortStr != null) namenodeInfoPort = Integer.parseInt(namenodeInfoPortStr); String filename = HtmlQuoting.unquoteHtmlChars(req.getParameter("filename")); if (filename == null) { out.print("Invalid input (filename absent)"); return; } String blockIdStr = null; long blockId = 0; blockIdStr = req.getParameter("blockId"); if (blockIdStr == null) { out.print("Invalid input (blockId absent)"); return; } blockId = Long.parseLong(blockIdStr); String tokenString = req.getParameter(JspHelper.DELEGATION_PARAMETER_NAME); UserGroupInformation ugi = JspHelper.getUGI(req, conf); final DFSClient dfs = JspHelper.getDFSClient(ugi, jspHelper.nameNodeAddr, conf); Token<BlockTokenIdentifier> accessToken = BlockTokenSecretManager.DUMMY_TOKEN; if (conf.getBoolean(DFSConfigKeys.DFS_BLOCK_ACCESS_TOKEN_ENABLE_KEY, false)) { List<LocatedBlock> blks = dfs.namenode.getBlockLocations(filename, 0, Long.MAX_VALUE).getLocatedBlocks(); if (blks == null || blks.size() == 0) { out.print("Can't locate file blocks"); dfs.close(); return; } for (int i = 0; i < blks.size(); i++) { if (blks.get(i).getBlock().getBlockId() == blockId) { accessToken = blks.get(i).getBlockToken(); break; } } } String blockGenStamp = null; long genStamp = 0; blockGenStamp = req.getParameter("genstamp"); if (blockGenStamp == null) { out.print("Invalid input (genstamp absent)"); return; } genStamp = Long.parseLong(blockGenStamp); String blockSizeStr; long blockSize = 0; blockSizeStr = req.getParameter("blockSize"); if (blockSizeStr == null) { out.print("Invalid input (blockSize absent)"); return; } blockSize = Long.parseLong(blockSizeStr); String chunkSizeToViewStr = req.getParameter("chunkSizeToView"); if (chunkSizeToViewStr != null && Integer.parseInt(chunkSizeToViewStr) > 0) chunkSizeToView = Integer.parseInt(chunkSizeToViewStr); else chunkSizeToView = JspHelper.getDefaultChunkSize(conf); String startOffsetStr = req.getParameter("startOffset"); if (startOffsetStr == null || Long.parseLong(startOffsetStr) < 0) startOffset = 0; else startOffset = Long.parseLong(startOffsetStr); String datanodePortStr = req.getParameter("datanodePort"); if (datanodePortStr == null) { out.print("Invalid input (datanodePort absent)"); return; } datanodePort = Integer.parseInt(datanodePortStr); out.print("<h3>File: "); JspHelper.printPathWithLinks( HtmlQuoting.quoteHtmlChars(filename), out, namenodeInfoPort, tokenString); out.print("</h3><hr>"); String parent = new File(filename).getParent(); JspHelper.printGotoForm(out, namenodeInfoPort, tokenString, HtmlQuoting.quoteHtmlChars(parent)); out.print("<hr>"); out.print( "<a href=\"http://" + req.getServerName() + ":" + req.getServerPort() + "/browseDirectory.jsp?dir=" + URLEncoder.encode(parent, "UTF-8") + "&namenodeInfoPort=" + namenodeInfoPort + "\"><i>Go back to dir listing</i></a><br>"); out.print("<a href=\"#viewOptions\">Advanced view/download options</a><br>"); out.print("<hr>"); // Determine the prev & next blocks long nextStartOffset = 0; long nextBlockSize = 0; String nextBlockIdStr = null; String nextGenStamp = null; String nextHost = req.getServerName(); int nextPort = req.getServerPort(); int nextDatanodePort = datanodePort; // determine data for the next link if (startOffset + chunkSizeToView >= blockSize) { // we have to go to the next block from this point onwards List<LocatedBlock> blocks = dfs.namenode.getBlockLocations(filename, 0, Long.MAX_VALUE).getLocatedBlocks(); for (int i = 0; i < blocks.size(); i++) { if (blocks.get(i).getBlock().getBlockId() == blockId) { if (i != blocks.size() - 1) { LocatedBlock nextBlock = blocks.get(i + 1); nextBlockIdStr = Long.toString(nextBlock.getBlock().getBlockId()); nextGenStamp = Long.toString(nextBlock.getBlock().getGenerationStamp()); nextStartOffset = 0; nextBlockSize = nextBlock.getBlock().getNumBytes(); DatanodeInfo d = jspHelper.bestNode(nextBlock); String datanodeAddr = d.getName(); nextDatanodePort = Integer.parseInt( datanodeAddr.substring(datanodeAddr.indexOf(':') + 1, datanodeAddr.length())); nextHost = InetAddress.getByName(d.getHost()).getCanonicalHostName(); nextPort = d.getInfoPort(); } } } } else { // we are in the same block nextBlockIdStr = blockIdStr; nextStartOffset = startOffset + chunkSizeToView; nextBlockSize = blockSize; nextGenStamp = blockGenStamp; } String nextUrl = null; if (nextBlockIdStr != null) { nextUrl = "http://" + nextHost + ":" + nextPort + "/browseBlock.jsp?blockId=" + nextBlockIdStr + "&blockSize=" + nextBlockSize + "&startOffset=" + nextStartOffset + "&genstamp=" + nextGenStamp + "&filename=" + URLEncoder.encode(filename, "UTF-8") + "&chunkSizeToView=" + chunkSizeToView + "&datanodePort=" + nextDatanodePort + "&namenodeInfoPort=" + namenodeInfoPort + JspHelper.getDelegationTokenUrlParam(tokenString); out.print("<a href=\"" + nextUrl + "\">View Next chunk</a> "); } // determine data for the prev link String prevBlockIdStr = null; String prevGenStamp = null; long prevStartOffset = 0; long prevBlockSize = 0; String prevHost = req.getServerName(); int prevPort = req.getServerPort(); int prevDatanodePort = datanodePort; if (startOffset == 0) { List<LocatedBlock> blocks = dfs.namenode.getBlockLocations(filename, 0, Long.MAX_VALUE).getLocatedBlocks(); for (int i = 0; i < blocks.size(); i++) { if (blocks.get(i).getBlock().getBlockId() == blockId) { if (i != 0) { LocatedBlock prevBlock = blocks.get(i - 1); prevBlockIdStr = Long.toString(prevBlock.getBlock().getBlockId()); prevGenStamp = Long.toString(prevBlock.getBlock().getGenerationStamp()); prevStartOffset = prevBlock.getBlock().getNumBytes() - chunkSizeToView; if (prevStartOffset < 0) prevStartOffset = 0; prevBlockSize = prevBlock.getBlock().getNumBytes(); DatanodeInfo d = jspHelper.bestNode(prevBlock); String datanodeAddr = d.getName(); prevDatanodePort = Integer.parseInt( datanodeAddr.substring(datanodeAddr.indexOf(':') + 1, datanodeAddr.length())); prevHost = InetAddress.getByName(d.getHost()).getCanonicalHostName(); prevPort = d.getInfoPort(); } } } } else { // we are in the same block prevBlockIdStr = blockIdStr; prevStartOffset = startOffset - chunkSizeToView; if (prevStartOffset < 0) prevStartOffset = 0; prevBlockSize = blockSize; prevGenStamp = blockGenStamp; } String prevUrl = null; if (prevBlockIdStr != null) { prevUrl = "http://" + prevHost + ":" + prevPort + "/browseBlock.jsp?blockId=" + prevBlockIdStr + "&blockSize=" + prevBlockSize + "&startOffset=" + prevStartOffset + "&filename=" + URLEncoder.encode(filename, "UTF-8") + "&chunkSizeToView=" + chunkSizeToView + "&genstamp=" + prevGenStamp + "&datanodePort=" + prevDatanodePort + "&namenodeInfoPort=" + namenodeInfoPort + JspHelper.getDelegationTokenUrlParam(tokenString); out.print("<a href=\"" + prevUrl + "\">View Prev chunk</a> "); } out.print("<hr>"); out.print("<textarea cols=\"100\" rows=\"25\" wrap=\"virtual\" style=\"width:100%\" READONLY>"); try { jspHelper.streamBlockInAscii( new InetSocketAddress(req.getServerName(), datanodePort), blockId, accessToken, genStamp, blockSize, startOffset, chunkSizeToView, out, conf); } catch (Exception e) { out.print(e); } out.print("</textarea>"); dfs.close(); }
public static List<String> getText(BytesWritable value, Boolean tokenizep) throws InterruptedException { Session s = Session.getDefaultInstance(new Properties()); InputStream is = new ByteArrayInputStream(value.getBytes()); List<String> out = new ArrayList<String>(); try { MimeMessage message = new MimeMessage(s, is); message.getAllHeaderLines(); Analyzer standard_analyzer = new StandardAnalyzer(Version.LUCENE_43); Analyzer email_analyzer = new UAX29URLEmailAnalyzer(Version.LUCENE_43); Address[] fromAddrs = message.getFrom(); String fromAddrstr = ""; if (fromAddrs != null) { for (Address addr : fromAddrs) { fromAddrstr += (addr.toString() + " "); } } Address[] toAddrs = message.getAllRecipients(); String toAddrstr = ""; if (toAddrs != null) { for (Address addr : toAddrs) { toAddrstr += (addr.toString() + " "); } } String subject = message.getSubject(); String body = ""; try { Object content = message.getContent(); // System.err.println(content.getContentType()); if (content instanceof String) { body = (String) content; } else if (content instanceof Multipart) { Multipart mp = (Multipart) content; for (int i = 0; i < mp.getCount(); i++) { BodyPart bp = mp.getBodyPart(i); // System.err.println(bp.getContentType()); Object c = bp.getContent(); if (c instanceof String) { body = (String) c; } } } // people do really evil things with email, we're not sorting through it all now } catch (DecodingException e) { System.err.println("DecodingException"); } catch (UnsupportedEncodingException e) { System.err.println("UnsuportedEncodingException"); } catch (IOException e) { System.err.println("IOException"); } if (tokenizep) { List<String> fromData = new ArrayList<String>(); List<String> toData = new ArrayList<String>(); List<String> subjectData = new ArrayList<String>(); List<String> bodyData = new ArrayList<String>(); if (fromAddrstr != null) { fromData = tokenizeString(email_analyzer, fromAddrstr); } if (toAddrstr != null) { toData = tokenizeString(email_analyzer, toAddrstr); } if (subject != null) { subjectData = tokenizeString(standard_analyzer, subject); } if (body != null) { bodyData = tokenizeString(standard_analyzer, body); } out.add("FROM "); out.addAll(fromData); out.add("TO "); out.addAll(toData); out.add("SUBJECT "); out.addAll(subjectData); out.add("BODY "); out.addAll(bodyData); } else { // if not tokenizep, return list with from and subject fields only out.add(fromAddrstr); out.add(subject); } } catch (MessagingException e) { System.err.println("MessagineException"); } return out; }
public void generateFileChunks(JspWriter out, HttpServletRequest req) throws IOException { long startOffset = 0; int chunkSizeToView = 0; String referrer = req.getParameter("referrer"); boolean noLink = false; if (referrer == null) { noLink = true; } String filename = req.getParameter("filename"); if (filename == null) { out.print("Invalid input (file name absent)"); return; } String namenodeInfoPortStr = req.getParameter("namenodeInfoPort"); int namenodeInfoPort = -1; if (namenodeInfoPortStr != null) namenodeInfoPort = Integer.parseInt(namenodeInfoPortStr); String chunkSizeToViewStr = req.getParameter("chunkSizeToView"); if (chunkSizeToViewStr != null && Integer.parseInt(chunkSizeToViewStr) > 0) chunkSizeToView = Integer.parseInt(chunkSizeToViewStr); else chunkSizeToView = jspHelper.defaultChunkSizeToView; if (!noLink) { out.print("<h3>Tail of File: "); JspHelper.printPathWithLinks(filename, out, namenodeInfoPort); out.print("</h3><hr>"); out.print("<a href=\"" + referrer + "\">Go Back to File View</a><hr>"); } else { out.print("<h3>" + filename + "</h3>"); } out.print("<b>Chunk size to view (in bytes, up to file's DFS block size): </b>"); out.print( "<input type=\"text\" name=\"chunkSizeToView\" value=" + chunkSizeToView + " size=10 maxlength=10>"); out.print(" <input type=\"submit\" name=\"submit\" value=\"Refresh\"><hr>"); out.print("<input type=\"hidden\" name=\"filename\" value=\"" + filename + "\">"); out.print( "<input type=\"hidden\" name=\"namenodeInfoPort\" value=\"" + namenodeInfoPort + "\">"); if (!noLink) out.print("<input type=\"hidden\" name=\"referrer\" value=\"" + referrer + "\">"); // fetch the block from the datanode that has the last block for this file DFSClient dfs = new DFSClient(jspHelper.nameNodeAddr, jspHelper.conf); List<LocatedBlock> blocks = dfs.namenode.getBlockLocations(filename, 0, Long.MAX_VALUE).getLocatedBlocks(); if (blocks == null || blocks.size() == 0) { out.print("No datanodes contain blocks of file " + filename); dfs.close(); return; } LocatedBlock lastBlk = blocks.get(blocks.size() - 1); long blockSize = lastBlk.getBlock().getNumBytes(); long blockId = lastBlk.getBlock().getBlockId(); long genStamp = lastBlk.getBlock().getGenerationStamp(); DatanodeInfo chosenNode; try { chosenNode = jspHelper.bestNode(lastBlk); } catch (IOException e) { out.print(e.toString()); dfs.close(); return; } InetSocketAddress addr = NetUtils.createSocketAddr(chosenNode.getName()); // view the last chunkSizeToView bytes while Tailing if (blockSize >= chunkSizeToView) startOffset = blockSize - chunkSizeToView; else startOffset = 0; out.print("<textarea cols=\"100\" rows=\"25\" wrap=\"virtual\" style=\"width:100%\" READONLY>"); jspHelper.streamBlockInAscii( addr, blockId, genStamp, blockSize, startOffset, chunkSizeToView, out); out.print("</textarea>"); dfs.close(); }
public int run(String[] args) throws Exception { // printUsage(); /* * SETUP */ Configuration argConf = getConf(); Hashtable<String, String> confArg = new Hashtable<String, String>(); setup(confArg, argConf); Date currentTime = new Date(); Date endDate = new Date(new Long(confArg.get("timestamp_stop"))); Boolean full_run = confArg.get("intermediate").matches("(?i).*true.*"); Boolean quick_add = confArg.get("quick_add").matches("(?i).*true.*"); logger.info("Running GeStore"); // ZooKeeper setup Configuration config = HBaseConfiguration.create(); zkWatcher = new ZooKeeperWatcher(config, "Testing", new HBaseAdmin(config)); zkInstance = new ZooKeeper( ZKConfig.getZKQuorumServersString(config), config.getInt("zookeeper.session.timeout", -1), zkWatcher); if (!confArg.get("task_id").isEmpty()) { confArg.put("temp_path", confArg.get("temp_path") + confArg.get("task_id")); } String lockRequest = confArg.get("file_id"); if (!confArg.get("run_id").isEmpty()) lockRequest = lockRequest + "_" + confArg.get("run_id") + "_"; if (!confArg.get("task_id").isEmpty()) lockRequest = lockRequest + "_" + confArg.get("task_id") + "_"; // Get type of movement toFrom type_move = checkArgs(confArg); if (type_move == toFrom.LOCAL2REMOTE && !confArg.get("format").equals("unknown")) { List<String> arguments = new ArrayList<String>(); arguments.add("-Dinput=" + confArg.get("local_path")); arguments.add("-Dtable=" + confArg.get("file_id")); arguments.add("-Dtimestamp=" + confArg.get("timestamp_stop")); arguments.add("-Dtype=" + confArg.get("format")); arguments.add("-Dtarget_dir=" + confArg.get("base_path") + "_" + confArg.get("file_id")); arguments.add("-Dtemp_hdfs_path=" + confArg.get("temp_path")); arguments.add("-Drun_id=" + confArg.get("run_id")); if (!confArg.get("run_id").isEmpty()) arguments.add("-Drun_id=" + confArg.get("run_id")); if (!confArg.get("task_id").isEmpty()) arguments.add("-Dtask_id=" + confArg.get("task_id")); if (quick_add) arguments.add("-Dquick_add=" + confArg.get("quick_add")); String lockName = lock(lockRequest); String[] argumentString = arguments.toArray(new String[arguments.size()]); adddb.main(argumentString); unlock(lockName); System.exit(0); } // Database registration dbutil db_util = new dbutil(config); db_util.register_database(confArg.get("db_name_files"), true); db_util.register_database(confArg.get("db_name_runs"), true); db_util.register_database(confArg.get("db_name_updates"), true); FileSystem hdfs = FileSystem.get(config); FileSystem localFS = FileSystem.getLocal(config); // Get source type confArg.put("source", getSource(db_util, confArg.get("db_name_files"), confArg.get("file_id"))); confArg.put( "database", isDatabase(db_util, confArg.get("db_name_files"), confArg.get("file_id"))); if (!confArg.get("source").equals("local") && type_move == toFrom.REMOTE2LOCAL && !confArg.get("timestamp_stop").equals(Integer.toString(Integer.MAX_VALUE))) { confArg.put("timestamp_stop", Long.toString(latestVersion(confArg, db_util))); } /* * Get previous timestamp */ Get run_id_get = new Get(confArg.get("run_id").getBytes()); Result run_get = db_util.doGet(confArg.get("db_name_runs"), run_id_get); KeyValue run_file_prev = run_get.getColumnLatest( "d".getBytes(), (confArg.get("file_id") + "_db_timestamp").getBytes()); String last_timestamp = new String("0"); if (null != run_file_prev && !confArg.get("source").equals("local")) { long last_timestamp_real = run_file_prev.getTimestamp(); Long current_timestamp = new Long(confArg.get("timestamp_real")); if ((current_timestamp - last_timestamp_real) > 36000) { last_timestamp = new String(run_file_prev.getValue()); Integer lastTimestamp = new Integer(last_timestamp); lastTimestamp += 1; last_timestamp = lastTimestamp.toString(); logger.info("Last timestamp: " + last_timestamp + " End data: " + endDate); Date last_run = new Date(run_file_prev.getTimestamp()); if (last_run.before(endDate) && !full_run) { confArg.put("timestamp_start", last_timestamp); } } } Integer tse = new Integer(confArg.get("timestamp_stop")); Integer tss = new Integer(confArg.get("timestamp_start")); if (tss > tse) { logger.info("No new version of requested file."); return 0; } /* * Generate file */ String lockName = lock(lockRequest); Get file_id_get = new Get(confArg.get("file_id").getBytes()); Result file_get = db_util.doGet(confArg.get("db_name_files"), file_id_get); if (!file_get.isEmpty()) { boolean found = hasFile( db_util, hdfs, confArg.get("db_name_files"), confArg.get("file_id"), getFullPath(confArg)); if (confArg.get("source").equals("fullfile")) { found = false; } String filenames_put = getFileNames( db_util, confArg.get("db_name_files"), confArg.get("file_id"), getFullPath(confArg)); // Filename not found in file database if (!found && type_move == toFrom.REMOTE2LOCAL) { if (!confArg.get("source").equals("local")) { // Generate intermediate file if (getFile(hdfs, confArg, db_util) == null) { unlock(lockName); return 1; } // Put generated file into file database if (!confArg.get("format").equals("fullfile")) { putFileEntry( db_util, hdfs, confArg.get("db_name_files"), confArg.get("file_id"), confArg.get("full_file_name"), confArg.get("source")); } } else { logger.warn("Remote file not found, and cannot be generated! File: " + confArg); unlock(lockName); return 1; } } } else { if (type_move == toFrom.REMOTE2LOCAL) { logger.warn("Remote file not found, and cannot be generated."); unlock(lockName); return 1; } } /* * Copy file * Update tables */ if (type_move == toFrom.LOCAL2REMOTE) { if (!confArg.get("format").equals("fullfile")) { putFileEntry( db_util, hdfs, confArg.get("db_name_files"), confArg.get("file_id"), getFullPath(confArg), confArg.get("source")); } putRunEntry( db_util, confArg.get("db_name_runs"), confArg.get("run_id"), confArg.get("file_id"), confArg.get("type"), confArg.get("timestamp_real"), confArg.get("timestamp_stop"), getFullPath(confArg), confArg.get("delimiter")); hdfs.copyFromLocalFile(new Path(confArg.get("local_path")), new Path(getFullPath(confArg))); } else if (type_move == toFrom.REMOTE2LOCAL) { FileStatus[] files = hdfs.globStatus(new Path(getFullPath(confArg) + "*")); putRunEntry( db_util, confArg.get("db_name_runs"), confArg.get("run_id"), confArg.get("file_id"), confArg.get("type"), confArg.get("timestamp_real"), confArg.get("timestamp_stop"), getFullPath(confArg), confArg.get("delimiter")); unlock(lockName); for (FileStatus file : files) { Path cur_file = file.getPath(); Path cur_local_path = new Path(new String(confArg.get("local_path") + confArg.get("file_id"))); String suffix = getSuffix(getFileName(confArg), cur_file.getName()); if (suffix.length() > 0) { cur_local_path = cur_local_path.suffix(new String("." + suffix)); } if (confArg.get("copy").equals("true")) { String crc = hdfs.getFileChecksum(cur_file).toString(); if (checksumLocalTest(cur_local_path, crc)) { continue; } else { hdfs.copyToLocalFile(cur_file, cur_local_path); writeChecksum(cur_local_path, crc); } } else { System.out.println(cur_local_path + "\t" + cur_file); } } } unlock(lockName); return 0; }