Exemple #1
1
  private static Long latestVersion(Hashtable<String, String> config, dbutil db_util)
      throws Exception {
    if (!config.get("timestamp_stop").equals(Integer.toString(Integer.MAX_VALUE))) {
      return new Long(config.get("timestamp_stop"));
    }

    String rowName = config.get("file_id") + config.get("run_id") + "_";
    if (config.get("task_id") != "") {
      try {
        rowName = rowName + String.format("%04d", new Integer(config.get("task_id")));
      } catch (NumberFormatException E) {
        rowName = rowName + config.get("task_id");
      }
    }
    Get timestampGet = new Get(rowName.getBytes());
    timestampGet.addColumn("d".getBytes(), "update".getBytes());
    Result timestampResult = db_util.doGet(config.get("db_name_updates"), timestampGet);
    KeyValue tsKv = timestampResult.getColumnLatest("d".getBytes(), "update".getBytes());
    if (tsKv == null) {
      rowName = config.get("file_id") + "_";
      timestampGet = new Get(rowName.getBytes());
      timestampGet.addColumn("d".getBytes(), "update".getBytes());
      timestampResult = db_util.doGet(config.get("db_name_updates"), timestampGet);
      tsKv = timestampResult.getColumnLatest("d".getBytes(), "update".getBytes());
    }

    if (tsKv == null) {
      return new Long(Integer.MAX_VALUE);
    }
    Long latestVersion = new Long(tsKv.getTimestamp());
    return latestVersion;
  }
Exemple #2
0
  private static boolean hasFile(
      dbutil db_util, FileSystem fs, String db_name, String file_id, String file_path)
      throws Exception {
    Get file_id_get = new Get(file_id.getBytes());
    Result file_result = db_util.doGet(db_name, file_id_get);

    KeyValue file_names = file_result.getColumnLatest("d".getBytes(), "filenames".getBytes());
    if (file_names == null) {
      return false;
    }
    String all_files = new String(file_names.getValue());
    String[] files = all_files.split("\n");
    for (String line : files) {
      if (line.equals(file_path)) {
        if (fs.globStatus(new Path(line + "*")).length == 0) {
          Put new_put = new Put(file_id.getBytes());
          new_put.add(
              "d".getBytes(),
              "filenames".getBytes(),
              all_files.replace(file_path + "\n", "").getBytes());
          db_util.doPut(db_name, new_put);
          return false;
        }
        return true;
      }
    }
    return false;
  }
Exemple #3
0
 private static boolean putFileEntry(
     dbutil db_util,
     FileSystem fs,
     String db_name,
     String file_id,
     String file_path,
     String source)
     throws Exception {
   String all_paths = file_path;
   if (hasFile(db_util, fs, db_name, file_id, file_path)) {
     logger.debug("File already found, putFileEntry aborting");
     return false;
   } else {
     Get file_id_get = new Get(file_id.getBytes());
     Result file_result = db_util.doGet(db_name, file_id_get);
     KeyValue file_names = file_result.getColumnLatest("d".getBytes(), "filenames".getBytes());
     if (file_names != null) {
       String paths = new String(file_names.getValue());
       all_paths = paths + "\n" + file_path;
     }
   }
   Put file_id_put = new Put(file_id.getBytes());
   file_id_put.add("d".getBytes(), "source".getBytes(), source.getBytes());
   if (!source.equals("fullfile")) {
     file_id_put.add("d".getBytes(), "filenames".getBytes(), all_paths.getBytes());
   }
   db_util.doPut(db_name, file_id_put);
   return true;
 }
  public int run(String[] args) throws Exception {
    Configuration argConf = getConf();

    // JobConf conf = new JobConf(diffdb.class);
    Configuration config = HBaseConfiguration.create();
    HBaseAdmin hbAdmin = new HBaseAdmin(config);
    dbutil db_util = new dbutil(config);

    HTable runTable = new HTable(config, "gestore_runs");
    Get runGet = new Get(argConf.get("id").getBytes());
    Result pipeline = runTable.get(runGet);

    NavigableMap<byte[], byte[]> pipeMap = pipeline.getFamilyMap("d".getBytes());

    Map.Entry<byte[], byte[]> results = pipeMap.pollFirstEntry();

    HashMap<String, HashMap<String, String>> resultMap =
        new HashMap<String, HashMap<String, String>>();

    while (results != null) {
      String resultKey = new String(results.getKey());
      String resultValue = new String(results.getValue());
      String field = "type";
      HashMap<String, String> tempMap = new HashMap<String, String>();
      String entry = resultKey;

      if (resultKey.endsWith("_db_timestamp")) {
        field = "db_timestamp";
        entry = resultKey.substring(0, resultKey.lastIndexOf("_db_timestamp"));
      } else if (resultKey.endsWith("_filename")) {
        field = "filename";
        entry = resultKey.substring(0, resultKey.lastIndexOf("_filename"));
      } else if (resultKey.endsWith("_regex")) {
        field = "regex";
        entry = resultKey.substring(0, resultKey.lastIndexOf("_regex"));
      }

      if (resultMap.containsKey(entry)) {
        tempMap = resultMap.get(entry);
      }

      tempMap.put(field, resultValue);
      resultMap.put(entry, tempMap);

      // System.out.println("Key: " + resultKey + " Value: " + resultValue);
      results = pipeMap.pollFirstEntry();
    }

    for (String key : resultMap.keySet()) {
      System.out.println("File ID: " + key);
      for (String subKey : resultMap.get(key).keySet()) {
        // System.out.println("\t " + subKey + "\t\t" + resultMap.get(key).get(subKey));
        System.out.format("  %1$-20s  %2$s\n", subKey, resultMap.get(key).get(subKey));
      }
    }

    return 0;
  }
Exemple #5
0
 private static String getSource(dbutil db_util, String db_name, String file_id) throws Exception {
   Get file_id_get = new Get(file_id.getBytes());
   Result file_result = db_util.doGet(db_name, file_id_get);
   KeyValue file_source = file_result.getColumnLatest("d".getBytes(), "source".getBytes());
   if (file_source == null) {
     return "local";
   }
   return new String(file_source.getValue());
 }
Exemple #6
0
 private static String isDatabase(dbutil db_util, String db_name, String file_id)
     throws Exception {
   Get file_id_get = new Get(file_id.getBytes());
   Result file_result = db_util.doGet(db_name, file_id_get);
   KeyValue file_db = file_result.getColumnLatest("d".getBytes(), "database".getBytes());
   if (file_db == null) {
     return "n";
   }
   String db = new String(file_db.getValue());
   if (db.equals("y")) {
     return "y";
   } else {
     return "n";
   }
 }
Exemple #7
0
  private static String getFileNames(
      dbutil db_util, String db_name, String file_id, String file_path) throws Exception {
    Get file_id_get = new Get(file_id.getBytes());
    Result file_result = db_util.doGet(db_name, file_id_get);

    KeyValue file_names = file_result.getColumnLatest("d".getBytes(), "filenames".getBytes());
    if (file_names == null) {
      return "";
    }
    String all_files = new String(file_names.getValue());
    String[] files = all_files.split("\n");
    for (String line : files) {
      if (line.equals(file_path)) {
        return all_files;
      }
    }
    return all_files + "\n" + file_path;
  }
Exemple #8
0
  public int run(String[] args) throws Exception {
    // printUsage();
    /*
     * SETUP
     */
    Configuration argConf = getConf();
    Hashtable<String, String> confArg = new Hashtable<String, String>();
    setup(confArg, argConf);
    Date currentTime = new Date();
    Date endDate = new Date(new Long(confArg.get("timestamp_stop")));
    Boolean full_run = confArg.get("intermediate").matches("(?i).*true.*");
    Boolean quick_add = confArg.get("quick_add").matches("(?i).*true.*");
    logger.info("Running GeStore");

    // ZooKeeper setup
    Configuration config = HBaseConfiguration.create();
    zkWatcher = new ZooKeeperWatcher(config, "Testing", new HBaseAdmin(config));
    zkInstance =
        new ZooKeeper(
            ZKConfig.getZKQuorumServersString(config),
            config.getInt("zookeeper.session.timeout", -1),
            zkWatcher);

    if (!confArg.get("task_id").isEmpty()) {
      confArg.put("temp_path", confArg.get("temp_path") + confArg.get("task_id"));
    }

    String lockRequest = confArg.get("file_id");
    if (!confArg.get("run_id").isEmpty())
      lockRequest = lockRequest + "_" + confArg.get("run_id") + "_";
    if (!confArg.get("task_id").isEmpty())
      lockRequest = lockRequest + "_" + confArg.get("task_id") + "_";

    // Get type of movement
    toFrom type_move = checkArgs(confArg);
    if (type_move == toFrom.LOCAL2REMOTE && !confArg.get("format").equals("unknown")) {
      List<String> arguments = new ArrayList<String>();
      arguments.add("-Dinput=" + confArg.get("local_path"));
      arguments.add("-Dtable=" + confArg.get("file_id"));
      arguments.add("-Dtimestamp=" + confArg.get("timestamp_stop"));
      arguments.add("-Dtype=" + confArg.get("format"));
      arguments.add("-Dtarget_dir=" + confArg.get("base_path") + "_" + confArg.get("file_id"));
      arguments.add("-Dtemp_hdfs_path=" + confArg.get("temp_path"));
      arguments.add("-Drun_id=" + confArg.get("run_id"));
      if (!confArg.get("run_id").isEmpty()) arguments.add("-Drun_id=" + confArg.get("run_id"));
      if (!confArg.get("task_id").isEmpty()) arguments.add("-Dtask_id=" + confArg.get("task_id"));
      if (quick_add) arguments.add("-Dquick_add=" + confArg.get("quick_add"));
      String lockName = lock(lockRequest);
      String[] argumentString = arguments.toArray(new String[arguments.size()]);
      adddb.main(argumentString);
      unlock(lockName);
      System.exit(0);
    }

    // Database registration

    dbutil db_util = new dbutil(config);
    db_util.register_database(confArg.get("db_name_files"), true);
    db_util.register_database(confArg.get("db_name_runs"), true);
    db_util.register_database(confArg.get("db_name_updates"), true);
    FileSystem hdfs = FileSystem.get(config);
    FileSystem localFS = FileSystem.getLocal(config);

    // Get source type
    confArg.put("source", getSource(db_util, confArg.get("db_name_files"), confArg.get("file_id")));
    confArg.put(
        "database", isDatabase(db_util, confArg.get("db_name_files"), confArg.get("file_id")));
    if (!confArg.get("source").equals("local")
        && type_move == toFrom.REMOTE2LOCAL
        && !confArg.get("timestamp_stop").equals(Integer.toString(Integer.MAX_VALUE))) {
      confArg.put("timestamp_stop", Long.toString(latestVersion(confArg, db_util)));
    }

    /*
     * Get previous timestamp
     */
    Get run_id_get = new Get(confArg.get("run_id").getBytes());
    Result run_get = db_util.doGet(confArg.get("db_name_runs"), run_id_get);
    KeyValue run_file_prev =
        run_get.getColumnLatest(
            "d".getBytes(), (confArg.get("file_id") + "_db_timestamp").getBytes());
    String last_timestamp = new String("0");
    if (null != run_file_prev && !confArg.get("source").equals("local")) {
      long last_timestamp_real = run_file_prev.getTimestamp();
      Long current_timestamp = new Long(confArg.get("timestamp_real"));
      if ((current_timestamp - last_timestamp_real) > 36000) {
        last_timestamp = new String(run_file_prev.getValue());
        Integer lastTimestamp = new Integer(last_timestamp);
        lastTimestamp += 1;
        last_timestamp = lastTimestamp.toString();
        logger.info("Last timestamp: " + last_timestamp + " End data: " + endDate);
        Date last_run = new Date(run_file_prev.getTimestamp());
        if (last_run.before(endDate) && !full_run) {
          confArg.put("timestamp_start", last_timestamp);
        }
      }
    }

    Integer tse = new Integer(confArg.get("timestamp_stop"));
    Integer tss = new Integer(confArg.get("timestamp_start"));
    if (tss > tse) {
      logger.info("No new version of requested file.");
      return 0;
    }

    /*
     * Generate file
     */

    String lockName = lock(lockRequest);

    Get file_id_get = new Get(confArg.get("file_id").getBytes());
    Result file_get = db_util.doGet(confArg.get("db_name_files"), file_id_get);
    if (!file_get.isEmpty()) {
      boolean found =
          hasFile(
              db_util,
              hdfs,
              confArg.get("db_name_files"),
              confArg.get("file_id"),
              getFullPath(confArg));
      if (confArg.get("source").equals("fullfile")) {
        found = false;
      }
      String filenames_put =
          getFileNames(
              db_util, confArg.get("db_name_files"), confArg.get("file_id"), getFullPath(confArg));
      // Filename not found in file database
      if (!found && type_move == toFrom.REMOTE2LOCAL) {
        if (!confArg.get("source").equals("local")) {
          // Generate intermediate file
          if (getFile(hdfs, confArg, db_util) == null) {
            unlock(lockName);
            return 1;
          }
          // Put generated file into file database
          if (!confArg.get("format").equals("fullfile")) {
            putFileEntry(
                db_util,
                hdfs,
                confArg.get("db_name_files"),
                confArg.get("file_id"),
                confArg.get("full_file_name"),
                confArg.get("source"));
          }
        } else {
          logger.warn("Remote file not found, and cannot be generated! File: " + confArg);
          unlock(lockName);
          return 1;
        }
      }
    } else {
      if (type_move == toFrom.REMOTE2LOCAL) {
        logger.warn("Remote file not found, and cannot be generated.");
        unlock(lockName);
        return 1;
      }
    }

    /*
     * Copy file
     * Update tables
     */

    if (type_move == toFrom.LOCAL2REMOTE) {
      if (!confArg.get("format").equals("fullfile")) {
        putFileEntry(
            db_util,
            hdfs,
            confArg.get("db_name_files"),
            confArg.get("file_id"),
            getFullPath(confArg),
            confArg.get("source"));
      }
      putRunEntry(
          db_util,
          confArg.get("db_name_runs"),
          confArg.get("run_id"),
          confArg.get("file_id"),
          confArg.get("type"),
          confArg.get("timestamp_real"),
          confArg.get("timestamp_stop"),
          getFullPath(confArg),
          confArg.get("delimiter"));
      hdfs.copyFromLocalFile(new Path(confArg.get("local_path")), new Path(getFullPath(confArg)));
    } else if (type_move == toFrom.REMOTE2LOCAL) {
      FileStatus[] files = hdfs.globStatus(new Path(getFullPath(confArg) + "*"));
      putRunEntry(
          db_util,
          confArg.get("db_name_runs"),
          confArg.get("run_id"),
          confArg.get("file_id"),
          confArg.get("type"),
          confArg.get("timestamp_real"),
          confArg.get("timestamp_stop"),
          getFullPath(confArg),
          confArg.get("delimiter"));
      unlock(lockName);
      for (FileStatus file : files) {
        Path cur_file = file.getPath();
        Path cur_local_path =
            new Path(new String(confArg.get("local_path") + confArg.get("file_id")));
        String suffix = getSuffix(getFileName(confArg), cur_file.getName());
        if (suffix.length() > 0) {
          cur_local_path = cur_local_path.suffix(new String("." + suffix));
        }
        if (confArg.get("copy").equals("true")) {
          String crc = hdfs.getFileChecksum(cur_file).toString();
          if (checksumLocalTest(cur_local_path, crc)) {
            continue;
          } else {
            hdfs.copyToLocalFile(cur_file, cur_local_path);
            writeChecksum(cur_local_path, crc);
          }
        } else {
          System.out.println(cur_local_path + "\t" + cur_file);
        }
      }
    }
    unlock(lockName);
    return 0;
  }