// topK sort,write the result to hdfs public void reduce(DoubleWritable key, Iterable<DataPair> values, Context context) throws IOException, InterruptedException { for (DataPair val : values) { if (count < k) { hf.createNewHDFSFile(val.getLigandPath(), val.getVinaResult()); hf.createNewHDFSFile(val.getLogPath(), val.getVinaLog()); int position = val.getLigandPath().lastIndexOf("/"); String ligandName = val.getLigandPath().substring(position + 1); context.write(new DoubleWritable(val.getLigandDouble()), new Text(ligandName)); /* context.write(new DoubleWritable(val.getLigandDouble()), new Text(val.getLigandPath()));*/ count++; } else { break; } } }
/* * (non-Javadoc) * * @see org.apache.hadoop.mapreduce.Mapper#map(KEYIN, VALUEIN, * org.apache.hadoop.mapreduce.Mapper.Context) */ public void map(Object key, Text value, Context context) throws IOException, InterruptedException { String path = value.toString(); String[] sub = value.toString().split("/"); String ligandName = sub[sub.length - 1]; String ligandLocal = "/home/hadoop/vinaJob/" + tmpPath + "/data/" + ligandName; hf.HadoopToLocal(path, ligandLocal); String out = "/home/hadoop/vinaJob/" + tmpPath + "/out/" + ligandName; fo.createDir("/home/hadoop/vinaJob/" + tmpPath + "/out/"); String log = "/home/hadoop/vinaJob/" + tmpPath + "/out/" + ligandName + ".log"; String exceptionPath = "/vinaResult/vinaJobID/" + vinaJobID + "/exception/" + ligandName + ".exception"; String exceptionBackupPath = "/vinaResult/vinaJobID/" + vinaJobID + "/exceptionBackup/" + ligandName + ".exception"; RunVina vinajob = new RunVina(conf2Local, ligandLocal, receptorLocal, seed, out, log, path); String flag = vinajob.runVina(); if ("success".equals(flag)) { String logHdfsPath = "/vinaResult/vinaJobID/" + vinaJobID + "/result/" + ligandName + ".log"; String outHdfsPath = "/vinaResult/vinaJobID/" + vinaJobID + "/result/" + ligandName; String result = fo.readFile(out); String logInfo = fo.readFile(log); try { DataPair data = new DataPair( Double.parseDouble(result.split("\n")[1].split(" ")[1].trim()), outHdfsPath, result, logHdfsPath, logInfo); tree.add(data); if (tree.size() > k) { tree.pollLast(); } } catch (Exception e) { hf.createNewHDFSFile(exceptionPath, path + " bad result"); } } else { hf.createNewHDFSFile(exceptionPath, flag); hf.createNewHDFSFile(exceptionBackupPath, flag); } }
/* * (non-Javadoc) * * @see org.apache.hadoop.mapreduce.Mapper#setup(org.apache.hadoop.mapreduce.Mapper.Context) */ public void setup(Context context) throws IOException, InterruptedException { conf = context.getConfiguration(); conf2HDFS = conf.get("conf2HDFS"); receptorHDFS = conf.get("receptorHDFS"); seed = conf.get("seed"); vinaJobID = conf.get("vinaJobID"); k = conf.getInt("k", 1000); hf = new HadoopFile(); fo = new FileOperation(); tmpPath = fo.randomString(); conf2Local = "/home/hadoop/vinaJob/" + tmpPath + "/" + conf2HDFS.substring(conf2HDFS.lastIndexOf("/") + 1); receptorLocal = "/home/hadoop/vinaJob/" + tmpPath + "/" + receptorHDFS.substring(receptorHDFS.lastIndexOf("/") + 1); hf.HadoopToLocal(conf2HDFS, conf2Local); hf.HadoopToLocal(receptorHDFS, receptorLocal); }
/** * start a vina hadoop job * * @param confLocalPath * @param receptorLocalPath * @param ligandPath * @param seed * @param topK * @param vinaJobID * @param node * @return */ public HashMap<String, String> startJob( String confLocalPath, String receptorLocalPath, ArrayList<String> ligandPath, String seed, int topK, String vinaJobID, int numPerNode, boolean verbose) { HashMap<String, String> hm = new HashMap<String, String>(); if (confLocalPath == null || receptorLocalPath == null || ligandPath == null || seed == null || vinaJobID == null || ligandPath.size() == 0 || topK < 0) { hm.put("flag", "false"); hm.put("hadoopID", "null"); hm.put("vinaJobID", vinaJobID); hm.put("log", "error arguments"); return hm; } GeneratePath gp = new GeneratePath(jobPath, srcDataPath); String confName = confLocalPath.substring(confLocalPath.lastIndexOf("/")); String confHDFSPath = jobPath + vinaJobID + confName; String receptorName = receptorLocalPath.substring(receptorLocalPath.lastIndexOf("/")); String receptorHDFSPATH = jobPath + vinaJobID + receptorName; HadoopFile hf; final String input = jobPath + vinaJobID + "/metadata"; final String output = jobPath + vinaJobID + "/order"; Path path = new Path(output); Configuration conf; FileSystem fs; Job job; try { gp.createMeta(ligandPath, vinaJobID, numPerNode); hf = new HadoopFile(); hf.mkdir(jobPath + "/" + vinaJobID + "/exception"); hf.mkdir(jobPath + "/" + vinaJobID + "/exceptionBackup"); hf.localToHadoop(confLocalPath, confHDFSPath); hf.localToHadoop(receptorLocalPath, receptorHDFSPATH); conf = (new HadoopConf()).getConf(); fs = FileSystem.get(conf); // set heart beat time 45min long milliSeconds = 45 * 60 * 1000; conf.setLong("mapred.task.timeout", milliSeconds); conf.set("vinaJobID", vinaJobID); conf.setInt("k", topK); conf.set("conf2HDFS", confHDFSPath); conf.set("receptorHDFS", receptorHDFSPATH); conf.set("seed", seed); if (fs.exists(path)) { fs.delete(path, true); } job = new Job(conf, vinaJobID); job.setNumReduceTasks(1); job.setJarByClass(VinaHadoop.class); job.setMapperClass(VinaMapper.class); job.setReducerClass(VinaReducer.class); job.setMapOutputKeyClass(DoubleWritable.class); job.setMapOutputValueClass(DataPair.class); job.setOutputKeyClass(DoubleWritable.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path(input)); FileOutputFormat.setOutputPath(job, new Path(output)); } catch (IOException e) { // TODO Auto-generated catch block hm.put("flag", "false"); hm.put("hadoopID", "null"); hm.put("vinaJobID", vinaJobID); hm.put("log", e.getMessage()); return hm; } try { if (verbose) { // System.exit(job.waitForCompletion(true) ? 0 : 1); job.waitForCompletion(true); } else { job.submit(); } } catch (ClassNotFoundException | IOException | InterruptedException e) { // TODO Auto-generated catch block hm.put("flag", "false"); hm.put("hadoopID", "null"); hm.put("vinaJobID", vinaJobID); hm.put("log", e.getMessage()); return hm; } hm.put("flag", "true"); hm.put("hadoopID", job.getJobID().toString()); hm.put("vinaJobID", vinaJobID); hm.put("log", "null"); return hm; }