public int run(String[] args) throws Exception { Configuration argConf = getConf(); // JobConf conf = new JobConf(diffdb.class); Configuration config = HBaseConfiguration.create(); HBaseAdmin hbAdmin = new HBaseAdmin(config); dbutil db_util = new dbutil(config); HTable runTable = new HTable(config, "gestore_runs"); Get runGet = new Get(argConf.get("id").getBytes()); Result pipeline = runTable.get(runGet); NavigableMap<byte[], byte[]> pipeMap = pipeline.getFamilyMap("d".getBytes()); Map.Entry<byte[], byte[]> results = pipeMap.pollFirstEntry(); HashMap<String, HashMap<String, String>> resultMap = new HashMap<String, HashMap<String, String>>(); while (results != null) { String resultKey = new String(results.getKey()); String resultValue = new String(results.getValue()); String field = "type"; HashMap<String, String> tempMap = new HashMap<String, String>(); String entry = resultKey; if (resultKey.endsWith("_db_timestamp")) { field = "db_timestamp"; entry = resultKey.substring(0, resultKey.lastIndexOf("_db_timestamp")); } else if (resultKey.endsWith("_filename")) { field = "filename"; entry = resultKey.substring(0, resultKey.lastIndexOf("_filename")); } else if (resultKey.endsWith("_regex")) { field = "regex"; entry = resultKey.substring(0, resultKey.lastIndexOf("_regex")); } if (resultMap.containsKey(entry)) { tempMap = resultMap.get(entry); } tempMap.put(field, resultValue); resultMap.put(entry, tempMap); // System.out.println("Key: " + resultKey + " Value: " + resultValue); results = pipeMap.pollFirstEntry(); } for (String key : resultMap.keySet()) { System.out.println("File ID: " + key); for (String subKey : resultMap.get(key).keySet()) { // System.out.println("\t " + subKey + "\t\t" + resultMap.get(key).get(subKey)); System.out.format(" %1$-20s %2$s\n", subKey, resultMap.get(key).get(subKey)); } } return 0; }
/** Remove the temporary block file (if any) */ public synchronized void unfinalizeBlock(Block b) throws IOException { // remove the block from in-memory data structure ActiveFile activefile = ongoingCreates.remove(b); if (activefile == null) { return; } volumeMap.remove(b); // delete the on-disk temp file if (delBlockFromDisk(activefile.file, getMetaFile(activefile.file, b), b)) { DataNode.LOG.warn("Block " + b + " unfinalized and removed. "); } }
/** Turn the block identifier into a filename. */ public synchronized File getFile(Block b) { DatanodeBlockInfo info = volumeMap.get(b); if (info != null) { return info.getFile(); } return null; }
/** * Make a copy of the block if this block is linked to an existing snapshot. This ensures that * modifying this block does not modify data in any existing snapshots. * * @param block Block * @param numLinks Detach if the number of links exceed this value * @throws IOException * @return - true if the specified block was detached */ public boolean detachBlock(Block block, int numLinks) throws IOException { DatanodeBlockInfo info = null; synchronized (this) { info = volumeMap.get(block); } return info.detachBlock(block, numLinks); }
synchronized File createTmpFile(FSVolume vol, Block blk) throws IOException { if (vol == null) { vol = volumeMap.get(blk).getVolume(); if (vol == null) { throw new IOException("Could not find volume for block " + blk); } } return vol.createTmpFile(blk); }
/** Complete the block write! */ public synchronized void finalizeBlock(Block b) throws IOException { ActiveFile activeFile = ongoingCreates.get(b); if (activeFile == null) { throw new IOException("Block " + b + " is already finalized."); } File f = activeFile.file; if (f == null || !f.exists()) { throw new IOException("No temporary file " + f + " for block " + b); } FSVolume v = volumeMap.get(b).getVolume(); if (v == null) { throw new IOException("No volume for temporary file " + f + " for block " + b); } File dest = null; dest = v.addBlock(b, f); volumeMap.put(b, new DatanodeBlockInfo(v, dest)); ongoingCreates.remove(b); }
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { Configuration c = context.getConfiguration(); String s = value.toString(); String input[] = s.split(","); Text outputkey = new Text(); Text outputvalue = new Text(); double result = 0.0; /* multiplies matrix and vector entry with matching column value */ result = (Double.parseDouble(input[2])) * (vector.get(Long.parseLong(input[1]))); outputkey.set(input[0]); outputvalue.set(Double.toString(result)); context.write(outputkey, outputvalue); }
/** {@inheritDoc} */ public void validateBlockMetadata(Block b) throws IOException { DatanodeBlockInfo info = volumeMap.get(b); if (info == null) { throw new IOException("Block " + b + " does not exist in volumeMap."); } FSVolume v = info.getVolume(); File tmp = v.getTmpFile(b); File f = getFile(b); if (f == null) { f = tmp; } if (f == null) { throw new IOException("Block " + b + " does not exist on disk."); } if (!f.exists()) { throw new IOException("Block " + b + " block file " + f + " does not exist on disk."); } if (b.getNumBytes() != f.length()) { throw new IOException( "Block " + b + " length is " + b.getNumBytes() + " does not match block file length " + f.length()); } File meta = getMetaFile(f, b); if (meta == null) { throw new IOException("Block " + b + " metafile does not exist."); } if (!meta.exists()) { throw new IOException("Block " + b + " metafile " + meta + " does not exist on disk."); } if (meta.length() == 0) { throw new IOException("Block " + b + " metafile " + meta + " is empty."); } long stamp = parseGenerationStamp(f, meta); if (stamp != b.getGenerationStamp()) { throw new IOException( "Block " + b + " genstamp is " + b.getGenerationStamp() + " does not match meta file stamp " + stamp); } }
void getVolumeMap(HashMap<Block, DatanodeBlockInfo> volumeMap, FSVolume volume) { if (children != null) { for (int i = 0; i < children.length; i++) { children[i].getVolumeMap(volumeMap, volume); } } File blockFiles[] = dir.listFiles(); for (int i = 0; i < blockFiles.length; i++) { if (Block.isBlockFilename(blockFiles[i])) { long genStamp = getGenerationStampFromFile(blockFiles, blockFiles[i]); volumeMap.put( new Block(blockFiles[i], blockFiles[i].length(), genStamp), new DatanodeBlockInfo(volume, blockFiles[i])); } } }
/* called once at the beginning of the task */ public void setup(Context context) throws IOException, InterruptedException { BufferedReader br = null; Path[] files = DistributedCache.getLocalCacheFiles(context.getConfiguration()); if (files != null && files.length > 0) br = new BufferedReader(new FileReader(files[0].toString())); String line = null; /* reads the cached file into a hashmap */ try { while ((line = br.readLine()) != null) { String input[] = line.split(","); vector.put(Long.valueOf(input[0]), Double.valueOf(input[1])); } } finally { br.close(); } }
/** Returns handles to the block file and its metadata file */ public synchronized BlockInputStreams getTmpInputStreams(Block b, long blkOffset, long ckoff) throws IOException { DatanodeBlockInfo info = volumeMap.get(b); if (info == null) { throw new IOException("Block " + b + " does not exist in volumeMap."); } FSVolume v = info.getVolume(); File blockFile = v.getTmpFile(b); RandomAccessFile blockInFile = new RandomAccessFile(blockFile, "r"); if (blkOffset > 0) { blockInFile.seek(blkOffset); } File metaFile = getMetaFile(blockFile, b); RandomAccessFile metaInFile = new RandomAccessFile(metaFile, "r"); if (ckoff > 0) { metaInFile.seek(ckoff); } return new BlockInputStreams( new FileInputStream(blockInFile.getFD()), new FileInputStream(metaInFile.getFD())); }
/** Sets the offset in the block to which the the next write will write data to. */ public void setChannelPosition(Block b, BlockWriteStreams streams, long dataOffset, long ckOffset) throws IOException { long size = 0; synchronized (this) { FSVolume vol = volumeMap.get(b).getVolume(); size = vol.getTmpFile(b).length(); } if (size < dataOffset) { String msg = "Trying to change block file offset of block " + b + " to " + dataOffset + " but actual size of file is " + size; throw new IOException(msg); } FileOutputStream file = (FileOutputStream) streams.dataOut; file.getChannel().position(dataOffset); file = (FileOutputStream) streams.checksumOut; file.getChannel().position(ckOffset); }
public void reduce( IntWritable key, Iterator<Text> values, OutputCollector<IntWritable, Text> output, Reporter reporter) throws IOException { HashMap<String, Integer> countries_map = new HashMap<String, Integer>(); ArrayList<Integer> counties = new ArrayList<>(); String cp = new String(); while (values.hasNext()) { cp = values.next().toString(); if (countries_map.containsKey(cp)) { countries_map.put(cp, countries_map.get(cp) + 1); } else { countries_map.put(cp, 1); } } for (java.util.Map.Entry<String, Integer> entry : countries_map.entrySet()) { counties.add(entry.getValue()); } output.collect( key, new Text( "" + countries_map.entrySet().size() + " " + Collections.min(counties) + " " + median(counties) + " " + Collections.max(counties) + " " + mean(counties) + " " + standard_deviation(counties))); }
/** * Start writing to a block file If isRecovery is true and the block pre-exists, then we kill all * volumeMap.put(b, v); volumeMap.put(b, v); other threads that might be writing to this block, * and then reopen the file. */ public BlockWriteStreams writeToBlock(Block b, boolean isRecovery) throws IOException { // // Make sure the block isn't a valid one - we're still creating it! // if (isValidBlock(b)) { if (!isRecovery) { throw new BlockAlreadyExistsException( "Block " + b + " is valid, and cannot be written to."); } // If the block was successfully finalized because all packets // were successfully processed at the Datanode but the ack for // some of the packets were not received by the client. The client // re-opens the connection and retries sending those packets. // The other reason is that an "append" is occurring to this block. detachBlock(b, 1); } long blockSize = b.getNumBytes(); // // Serialize access to /tmp, and check if file already there. // File f = null; List<Thread> threads = null; synchronized (this) { // // Is it already in the create process? // ActiveFile activeFile = ongoingCreates.get(b); if (activeFile != null) { f = activeFile.file; threads = activeFile.threads; if (!isRecovery) { throw new BlockAlreadyExistsException( "Block " + b + " has already been started (though not completed), and thus cannot be created."); } else { for (Thread thread : threads) { thread.interrupt(); } } ongoingCreates.remove(b); } FSVolume v = null; if (!isRecovery) { v = volumes.getNextVolume(blockSize); // create temporary file to hold block in the designated volume f = createTmpFile(v, b); volumeMap.put(b, new DatanodeBlockInfo(v)); } else if (f != null) { DataNode.LOG.info("Reopen already-open Block for append " + b); // create or reuse temporary file to hold block in the // designated volume v = volumeMap.get(b).getVolume(); volumeMap.put(b, new DatanodeBlockInfo(v)); } else { // reopening block for appending to it. DataNode.LOG.info("Reopen Block for append " + b); v = volumeMap.get(b).getVolume(); f = createTmpFile(v, b); File blkfile = getBlockFile(b); File oldmeta = getMetaFile(b); File newmeta = getMetaFile(f, b); // rename meta file to tmp directory DataNode.LOG.debug("Renaming " + oldmeta + " to " + newmeta); if (!oldmeta.renameTo(newmeta)) { throw new IOException( "Block " + b + " reopen failed. " + " Unable to move meta file " + oldmeta + " to tmp dir " + newmeta); } // rename block file to tmp directory DataNode.LOG.debug("Renaming " + blkfile + " to " + f); if (!blkfile.renameTo(f)) { if (!f.delete()) { throw new IOException( "Block " + b + " reopen failed. " + " Unable to remove file " + f); } if (!blkfile.renameTo(f)) { throw new IOException( "Block " + b + " reopen failed. " + " Unable to move block file " + blkfile + " to tmp dir " + f); } } volumeMap.put(b, new DatanodeBlockInfo(v)); } if (f == null) { DataNode.LOG.warn("Block " + b + " reopen failed " + " Unable to locate tmp file."); throw new IOException("Block " + b + " reopen failed " + " Unable to locate tmp file."); } ongoingCreates.put(b, new ActiveFile(f, threads)); } try { if (threads != null) { for (Thread thread : threads) { thread.join(); } } } catch (InterruptedException e) { throw new IOException("Recovery waiting for thread interrupted."); } // // Finally, allow a writer to the block file // REMIND - mjc - make this a filter stream that enforces a max // block size, so clients can't go crazy // File metafile = getMetaFile(f, b); DataNode.LOG.debug("writeTo blockfile is " + f + " of size " + f.length()); DataNode.LOG.debug("writeTo metafile is " + metafile + " of size " + metafile.length()); return createBlockWriteStreams(f, metafile); }
/** * Try to update an old block to a new block. If there are ongoing create threads running for the * old block, the threads will be returned without updating the block. * * @return ongoing create threads if there is any. Otherwise, return null. */ private synchronized List<Thread> tryUpdateBlock(Block oldblock, Block newblock) throws IOException { // check ongoing create threads final ActiveFile activefile = ongoingCreates.get(oldblock); if (activefile != null && !activefile.threads.isEmpty()) { // remove dead threads for (Iterator<Thread> i = activefile.threads.iterator(); i.hasNext(); ) { final Thread t = i.next(); if (!t.isAlive()) { i.remove(); } } // return living threads if (!activefile.threads.isEmpty()) { return new ArrayList<Thread>(activefile.threads); } } // No ongoing create threads is alive. Update block. File blockFile = findBlockFile(oldblock.getBlockId()); if (blockFile == null) { throw new IOException("Block " + oldblock + " does not exist."); } File oldMetaFile = findMetaFile(blockFile); long oldgs = parseGenerationStamp(blockFile, oldMetaFile); // rename meta file to a tmp file File tmpMetaFile = new File( oldMetaFile.getParent(), oldMetaFile.getName() + "_tmp" + newblock.getGenerationStamp()); if (!oldMetaFile.renameTo(tmpMetaFile)) { throw new IOException("Cannot rename block meta file to " + tmpMetaFile); } // update generation stamp if (oldgs > newblock.getGenerationStamp()) { throw new IOException( "Cannot update block (id=" + newblock.getBlockId() + ") generation stamp from " + oldgs + " to " + newblock.getGenerationStamp()); } // update length if (newblock.getNumBytes() > oldblock.getNumBytes()) { throw new IOException( "Cannot update block file (=" + blockFile + ") length from " + oldblock.getNumBytes() + " to " + newblock.getNumBytes()); } if (newblock.getNumBytes() < oldblock.getNumBytes()) { truncateBlock(blockFile, tmpMetaFile, oldblock.getNumBytes(), newblock.getNumBytes()); } // rename the tmp file to the new meta file (with new generation stamp) File newMetaFile = getMetaFile(blockFile, newblock); if (!tmpMetaFile.renameTo(newMetaFile)) { throw new IOException("Cannot rename tmp meta file to " + newMetaFile); } updateBlockMap(ongoingCreates, oldblock, newblock); updateBlockMap(volumeMap, oldblock, newblock); // paranoia! verify that the contents of the stored block // matches the block file on disk. validateBlockMetadata(newblock); return null; }
/** * We're informed that a block is no longer valid. We could lazily garbage-collect the block, but * why bother? just get rid of it. */ public void invalidate(Block invalidBlks[]) throws IOException { boolean error = false; for (int i = 0; i < invalidBlks.length; i++) { File f = null; FSVolume v; synchronized (this) { f = getFile(invalidBlks[i]); DatanodeBlockInfo dinfo = volumeMap.get(invalidBlks[i]); if (dinfo == null) { DataNode.LOG.warn( "Unexpected error trying to delete block " + invalidBlks[i] + ". BlockInfo not found in volumeMap."); error = true; continue; } v = dinfo.getVolume(); if (f == null) { DataNode.LOG.warn( "Unexpected error trying to delete block " + invalidBlks[i] + ". Block not found in blockMap." + ((v == null) ? " " : " Block found in volumeMap.")); error = true; continue; } if (v == null) { DataNode.LOG.warn( "Unexpected error trying to delete block " + invalidBlks[i] + ". No volume for this block." + " Block found in blockMap. " + f + "."); error = true; continue; } File parent = f.getParentFile(); if (parent == null) { DataNode.LOG.warn( "Unexpected error trying to delete block " + invalidBlks[i] + ". Parent not found for file " + f + "."); error = true; continue; } v.clearPath(parent); volumeMap.remove(invalidBlks[i]); } File metaFile = getMetaFile(f, invalidBlks[i]); long blockSize = f.length() + metaFile.length(); if (!f.delete() || (!metaFile.delete() && metaFile.exists())) { DataNode.LOG.warn( "Unexpected error trying to delete block " + invalidBlks[i] + " at file " + f); error = true; continue; } v.decDfsUsed(blockSize); DataNode.LOG.info("Deleting block " + invalidBlks[i] + " file " + f); if (f.exists()) { // // This is a temporary check especially for hadoop-1220. // This will go away in the future. // DataNode.LOG.info("File " + f + " was deleted but still exists!"); } } if (error) { throw new IOException("Error in deleting blocks."); } }