private static Long latestVersion(Hashtable<String, String> config, dbutil db_util) throws Exception { if (!config.get("timestamp_stop").equals(Integer.toString(Integer.MAX_VALUE))) { return new Long(config.get("timestamp_stop")); } String rowName = config.get("file_id") + config.get("run_id") + "_"; if (config.get("task_id") != "") { try { rowName = rowName + String.format("%04d", new Integer(config.get("task_id"))); } catch (NumberFormatException E) { rowName = rowName + config.get("task_id"); } } Get timestampGet = new Get(rowName.getBytes()); timestampGet.addColumn("d".getBytes(), "update".getBytes()); Result timestampResult = db_util.doGet(config.get("db_name_updates"), timestampGet); KeyValue tsKv = timestampResult.getColumnLatest("d".getBytes(), "update".getBytes()); if (tsKv == null) { rowName = config.get("file_id") + "_"; timestampGet = new Get(rowName.getBytes()); timestampGet.addColumn("d".getBytes(), "update".getBytes()); timestampResult = db_util.doGet(config.get("db_name_updates"), timestampGet); tsKv = timestampResult.getColumnLatest("d".getBytes(), "update".getBytes()); } if (tsKv == null) { return new Long(Integer.MAX_VALUE); } Long latestVersion = new Long(tsKv.getTimestamp()); return latestVersion; }
private static boolean hasFile( dbutil db_util, FileSystem fs, String db_name, String file_id, String file_path) throws Exception { Get file_id_get = new Get(file_id.getBytes()); Result file_result = db_util.doGet(db_name, file_id_get); KeyValue file_names = file_result.getColumnLatest("d".getBytes(), "filenames".getBytes()); if (file_names == null) { return false; } String all_files = new String(file_names.getValue()); String[] files = all_files.split("\n"); for (String line : files) { if (line.equals(file_path)) { if (fs.globStatus(new Path(line + "*")).length == 0) { Put new_put = new Put(file_id.getBytes()); new_put.add( "d".getBytes(), "filenames".getBytes(), all_files.replace(file_path + "\n", "").getBytes()); db_util.doPut(db_name, new_put); return false; } return true; } } return false; }
private static boolean putFileEntry( dbutil db_util, FileSystem fs, String db_name, String file_id, String file_path, String source) throws Exception { String all_paths = file_path; if (hasFile(db_util, fs, db_name, file_id, file_path)) { logger.debug("File already found, putFileEntry aborting"); return false; } else { Get file_id_get = new Get(file_id.getBytes()); Result file_result = db_util.doGet(db_name, file_id_get); KeyValue file_names = file_result.getColumnLatest("d".getBytes(), "filenames".getBytes()); if (file_names != null) { String paths = new String(file_names.getValue()); all_paths = paths + "\n" + file_path; } } Put file_id_put = new Put(file_id.getBytes()); file_id_put.add("d".getBytes(), "source".getBytes(), source.getBytes()); if (!source.equals("fullfile")) { file_id_put.add("d".getBytes(), "filenames".getBytes(), all_paths.getBytes()); } db_util.doPut(db_name, file_id_put); return true; }
private static boolean putRunEntry( dbutil db_util, String db_name, String run_id, String file_id, String type, String timestamp, String timestamp_stop, String path, String regex) throws Exception { Put run_id_put = new Put(run_id.getBytes()); run_id_put.add("d".getBytes(), file_id.getBytes(), new Long(timestamp), type.getBytes()); run_id_put.add( "d".getBytes(), (file_id + "_db_timestamp").getBytes(), new Long(timestamp), timestamp_stop.getBytes()); run_id_put.add( "d".getBytes(), (file_id + "_filename").getBytes(), new Long(timestamp), path.getBytes()); run_id_put.add( "d".getBytes(), (file_id + "_regex").getBytes(), new Long(timestamp), regex.getBytes()); db_util.doPut(db_name, run_id_put); return true; }
private static String getSource(dbutil db_util, String db_name, String file_id) throws Exception { Get file_id_get = new Get(file_id.getBytes()); Result file_result = db_util.doGet(db_name, file_id_get); KeyValue file_source = file_result.getColumnLatest("d".getBytes(), "source".getBytes()); if (file_source == null) { return "local"; } return new String(file_source.getValue()); }
private static String isDatabase(dbutil db_util, String db_name, String file_id) throws Exception { Get file_id_get = new Get(file_id.getBytes()); Result file_result = db_util.doGet(db_name, file_id_get); KeyValue file_db = file_result.getColumnLatest("d".getBytes(), "database".getBytes()); if (file_db == null) { return "n"; } String db = new String(file_db.getValue()); if (db.equals("y")) { return "y"; } else { return "n"; } }
private static String getFileNames( dbutil db_util, String db_name, String file_id, String file_path) throws Exception { Get file_id_get = new Get(file_id.getBytes()); Result file_result = db_util.doGet(db_name, file_id_get); KeyValue file_names = file_result.getColumnLatest("d".getBytes(), "filenames".getBytes()); if (file_names == null) { return ""; } String all_files = new String(file_names.getValue()); String[] files = all_files.split("\n"); for (String line : files) { if (line.equals(file_path)) { return all_files; } } return all_files + "\n" + file_path; }
public int run(String[] args) throws Exception { // printUsage(); /* * SETUP */ Configuration argConf = getConf(); Hashtable<String, String> confArg = new Hashtable<String, String>(); setup(confArg, argConf); Date currentTime = new Date(); Date endDate = new Date(new Long(confArg.get("timestamp_stop"))); Boolean full_run = confArg.get("intermediate").matches("(?i).*true.*"); Boolean quick_add = confArg.get("quick_add").matches("(?i).*true.*"); logger.info("Running GeStore"); // ZooKeeper setup Configuration config = HBaseConfiguration.create(); zkWatcher = new ZooKeeperWatcher(config, "Testing", new HBaseAdmin(config)); zkInstance = new ZooKeeper( ZKConfig.getZKQuorumServersString(config), config.getInt("zookeeper.session.timeout", -1), zkWatcher); if (!confArg.get("task_id").isEmpty()) { confArg.put("temp_path", confArg.get("temp_path") + confArg.get("task_id")); } String lockRequest = confArg.get("file_id"); if (!confArg.get("run_id").isEmpty()) lockRequest = lockRequest + "_" + confArg.get("run_id") + "_"; if (!confArg.get("task_id").isEmpty()) lockRequest = lockRequest + "_" + confArg.get("task_id") + "_"; // Get type of movement toFrom type_move = checkArgs(confArg); if (type_move == toFrom.LOCAL2REMOTE && !confArg.get("format").equals("unknown")) { List<String> arguments = new ArrayList<String>(); arguments.add("-Dinput=" + confArg.get("local_path")); arguments.add("-Dtable=" + confArg.get("file_id")); arguments.add("-Dtimestamp=" + confArg.get("timestamp_stop")); arguments.add("-Dtype=" + confArg.get("format")); arguments.add("-Dtarget_dir=" + confArg.get("base_path") + "_" + confArg.get("file_id")); arguments.add("-Dtemp_hdfs_path=" + confArg.get("temp_path")); arguments.add("-Drun_id=" + confArg.get("run_id")); if (!confArg.get("run_id").isEmpty()) arguments.add("-Drun_id=" + confArg.get("run_id")); if (!confArg.get("task_id").isEmpty()) arguments.add("-Dtask_id=" + confArg.get("task_id")); if (quick_add) arguments.add("-Dquick_add=" + confArg.get("quick_add")); String lockName = lock(lockRequest); String[] argumentString = arguments.toArray(new String[arguments.size()]); adddb.main(argumentString); unlock(lockName); System.exit(0); } // Database registration dbutil db_util = new dbutil(config); db_util.register_database(confArg.get("db_name_files"), true); db_util.register_database(confArg.get("db_name_runs"), true); db_util.register_database(confArg.get("db_name_updates"), true); FileSystem hdfs = FileSystem.get(config); FileSystem localFS = FileSystem.getLocal(config); // Get source type confArg.put("source", getSource(db_util, confArg.get("db_name_files"), confArg.get("file_id"))); confArg.put( "database", isDatabase(db_util, confArg.get("db_name_files"), confArg.get("file_id"))); if (!confArg.get("source").equals("local") && type_move == toFrom.REMOTE2LOCAL && !confArg.get("timestamp_stop").equals(Integer.toString(Integer.MAX_VALUE))) { confArg.put("timestamp_stop", Long.toString(latestVersion(confArg, db_util))); } /* * Get previous timestamp */ Get run_id_get = new Get(confArg.get("run_id").getBytes()); Result run_get = db_util.doGet(confArg.get("db_name_runs"), run_id_get); KeyValue run_file_prev = run_get.getColumnLatest( "d".getBytes(), (confArg.get("file_id") + "_db_timestamp").getBytes()); String last_timestamp = new String("0"); if (null != run_file_prev && !confArg.get("source").equals("local")) { long last_timestamp_real = run_file_prev.getTimestamp(); Long current_timestamp = new Long(confArg.get("timestamp_real")); if ((current_timestamp - last_timestamp_real) > 36000) { last_timestamp = new String(run_file_prev.getValue()); Integer lastTimestamp = new Integer(last_timestamp); lastTimestamp += 1; last_timestamp = lastTimestamp.toString(); logger.info("Last timestamp: " + last_timestamp + " End data: " + endDate); Date last_run = new Date(run_file_prev.getTimestamp()); if (last_run.before(endDate) && !full_run) { confArg.put("timestamp_start", last_timestamp); } } } Integer tse = new Integer(confArg.get("timestamp_stop")); Integer tss = new Integer(confArg.get("timestamp_start")); if (tss > tse) { logger.info("No new version of requested file."); return 0; } /* * Generate file */ String lockName = lock(lockRequest); Get file_id_get = new Get(confArg.get("file_id").getBytes()); Result file_get = db_util.doGet(confArg.get("db_name_files"), file_id_get); if (!file_get.isEmpty()) { boolean found = hasFile( db_util, hdfs, confArg.get("db_name_files"), confArg.get("file_id"), getFullPath(confArg)); if (confArg.get("source").equals("fullfile")) { found = false; } String filenames_put = getFileNames( db_util, confArg.get("db_name_files"), confArg.get("file_id"), getFullPath(confArg)); // Filename not found in file database if (!found && type_move == toFrom.REMOTE2LOCAL) { if (!confArg.get("source").equals("local")) { // Generate intermediate file if (getFile(hdfs, confArg, db_util) == null) { unlock(lockName); return 1; } // Put generated file into file database if (!confArg.get("format").equals("fullfile")) { putFileEntry( db_util, hdfs, confArg.get("db_name_files"), confArg.get("file_id"), confArg.get("full_file_name"), confArg.get("source")); } } else { logger.warn("Remote file not found, and cannot be generated! File: " + confArg); unlock(lockName); return 1; } } } else { if (type_move == toFrom.REMOTE2LOCAL) { logger.warn("Remote file not found, and cannot be generated."); unlock(lockName); return 1; } } /* * Copy file * Update tables */ if (type_move == toFrom.LOCAL2REMOTE) { if (!confArg.get("format").equals("fullfile")) { putFileEntry( db_util, hdfs, confArg.get("db_name_files"), confArg.get("file_id"), getFullPath(confArg), confArg.get("source")); } putRunEntry( db_util, confArg.get("db_name_runs"), confArg.get("run_id"), confArg.get("file_id"), confArg.get("type"), confArg.get("timestamp_real"), confArg.get("timestamp_stop"), getFullPath(confArg), confArg.get("delimiter")); hdfs.copyFromLocalFile(new Path(confArg.get("local_path")), new Path(getFullPath(confArg))); } else if (type_move == toFrom.REMOTE2LOCAL) { FileStatus[] files = hdfs.globStatus(new Path(getFullPath(confArg) + "*")); putRunEntry( db_util, confArg.get("db_name_runs"), confArg.get("run_id"), confArg.get("file_id"), confArg.get("type"), confArg.get("timestamp_real"), confArg.get("timestamp_stop"), getFullPath(confArg), confArg.get("delimiter")); unlock(lockName); for (FileStatus file : files) { Path cur_file = file.getPath(); Path cur_local_path = new Path(new String(confArg.get("local_path") + confArg.get("file_id"))); String suffix = getSuffix(getFileName(confArg), cur_file.getName()); if (suffix.length() > 0) { cur_local_path = cur_local_path.suffix(new String("." + suffix)); } if (confArg.get("copy").equals("true")) { String crc = hdfs.getFileChecksum(cur_file).toString(); if (checksumLocalTest(cur_local_path, crc)) { continue; } else { hdfs.copyToLocalFile(cur_file, cur_local_path); writeChecksum(cur_local_path, crc); } } else { System.out.println(cur_local_path + "\t" + cur_file); } } } unlock(lockName); return 0; }