Esempio n. 1
0
 /**
  * This method checks to see if symlinks are to be create for the localized cache files in the
  * current working directory
  *
  * @param conf the jobconf
  * @return true if symlinks are to be created- else return false
  */
 public static boolean getSymlink(Configuration conf) {
   String result = conf.get("mapred.create.symlink");
   if ("yes".equals(result)) {
     return true;
   }
   return false;
 }
 /**
  * This method checks to see if symlinks are to be create for the localized cache files in the
  * current working directory Used by internal DistributedCache code.
  *
  * @param conf the jobconf
  * @return true if symlinks are to be created- else return false
  */
 public static boolean getSymlink(Configuration conf) {
   String result = conf.get(CACHE_SYMLINK);
   if ("yes".equals(result)) {
     return true;
   }
   return false;
 }
  public int run(String[] args) throws Exception {
    Configuration argConf = getConf();

    // JobConf conf = new JobConf(diffdb.class);
    Configuration config = HBaseConfiguration.create();
    HBaseAdmin hbAdmin = new HBaseAdmin(config);
    dbutil db_util = new dbutil(config);

    HTable runTable = new HTable(config, "gestore_runs");
    Get runGet = new Get(argConf.get("id").getBytes());
    Result pipeline = runTable.get(runGet);

    NavigableMap<byte[], byte[]> pipeMap = pipeline.getFamilyMap("d".getBytes());

    Map.Entry<byte[], byte[]> results = pipeMap.pollFirstEntry();

    HashMap<String, HashMap<String, String>> resultMap =
        new HashMap<String, HashMap<String, String>>();

    while (results != null) {
      String resultKey = new String(results.getKey());
      String resultValue = new String(results.getValue());
      String field = "type";
      HashMap<String, String> tempMap = new HashMap<String, String>();
      String entry = resultKey;

      if (resultKey.endsWith("_db_timestamp")) {
        field = "db_timestamp";
        entry = resultKey.substring(0, resultKey.lastIndexOf("_db_timestamp"));
      } else if (resultKey.endsWith("_filename")) {
        field = "filename";
        entry = resultKey.substring(0, resultKey.lastIndexOf("_filename"));
      } else if (resultKey.endsWith("_regex")) {
        field = "regex";
        entry = resultKey.substring(0, resultKey.lastIndexOf("_regex"));
      }

      if (resultMap.containsKey(entry)) {
        tempMap = resultMap.get(entry);
      }

      tempMap.put(field, resultValue);
      resultMap.put(entry, tempMap);

      // System.out.println("Key: " + resultKey + " Value: " + resultValue);
      results = pipeMap.pollFirstEntry();
    }

    for (String key : resultMap.keySet()) {
      System.out.println("File ID: " + key);
      for (String subKey : resultMap.get(key).keySet()) {
        // System.out.println("\t " + subKey + "\t\t" + resultMap.get(key).get(subKey));
        System.out.format("  %1$-20s  %2$s\n", subKey, resultMap.get(key).get(subKey));
      }
    }

    return 0;
  }
Esempio n. 4
0
 public static InetSocketAddress getAddress(Configuration conf) {
   String jobTrackerStr = conf.get("mapred.job.tracker", "localhost:8012");
   int colon = jobTrackerStr.indexOf(":");
   if (colon < 0) {
     throw new RuntimeException("Bad mapred.job.tracker: " + jobTrackerStr);
   }
   String jobTrackerName = jobTrackerStr.substring(0, colon);
   int jobTrackerPort = Integer.parseInt(jobTrackerStr.substring(colon + 1));
   return new InetSocketAddress(jobTrackerName, jobTrackerPort);
 }
 /**
  * Add a file path to the current set of classpath entries. It adds the file to cache as well.
  * Intended to be used by user code.
  *
  * @param file Path of the file to be added
  * @param conf Configuration that contains the classpath setting
  * @param fs FileSystem with respect to which {@code archivefile} should be interpreted.
  */
 public static void addFileToClassPath(Path file, Configuration conf, FileSystem fs)
     throws IOException {
   String filepath = file.toUri().getPath();
   String classpath = conf.get("mapred.job.classpath.files");
   conf.set(
       "mapred.job.classpath.files",
       classpath == null ? filepath : classpath + System.getProperty("path.separator") + filepath);
   URI uri = fs.makeQualified(file).toUri();
   addCacheFile(uri, conf);
 }
Esempio n. 6
0
 /**
  * Get the archive entries in classpath as an array of Path
  *
  * @param conf Configuration that contains the classpath setting
  */
 public static Path[] getArchiveClassPaths(Configuration conf) {
   String classpath = conf.get("mapred.job.classpath.archives");
   if (classpath == null) return null;
   ArrayList list =
       Collections.list(new StringTokenizer(classpath, System.getProperty("path.separator")));
   Path[] paths = new Path[list.size()];
   for (int i = 0; i < list.size(); i++) {
     paths[i] = new Path((String) list.get(i));
   }
   return paths;
 }
  /**
   * Add an archive path to the current set of classpath entries. It adds the archive to cache as
   * well. Intended to be used by user code.
   *
   * @param archive Path of the archive to be added
   * @param conf Configuration that contains the classpath setting
   * @param fs FileSystem with respect to which {@code archive} should be interpreted.
   */
  public static void addArchiveToClassPath(Path archive, Configuration conf, FileSystem fs)
      throws IOException {
    String archivepath = archive.toUri().getPath();
    String classpath = conf.get("mapred.job.classpath.archives");
    conf.set(
        "mapred.job.classpath.archives",
        classpath == null
            ? archivepath
            : classpath + System.getProperty("path.separator") + archivepath);
    URI uri = fs.makeQualified(archive).toUri();

    addCacheArchive(uri, conf);
  }
Esempio n. 8
0
  private static URI addArchiveToClassPathHelper(Path archive, Configuration conf)
      throws IOException {

    String classpath = conf.get("mapred.job.classpath.archives");

    // the scheme/authority use ':' as separator. put the unqualified path in classpath
    String archivePath = archive.toUri().getPath();

    conf.set(
        "mapred.job.classpath.archives",
        classpath == null
            ? archivePath
            : classpath + System.getProperty("path.separator") + archivePath);
    return archive.makeQualified(archive.getFileSystem(conf)).toUri();
  }
Esempio n. 9
0
  /**
   * Construct in a named directory.
   *
   * @param conf
   * @param dir
   * @throws IOException
   */
  public NutchBean(Configuration conf, Path dir) throws IOException {
    this.conf = conf;
    this.fs = FileSystem.get(this.conf);
    if (dir == null) {
      dir = new Path(this.conf.get("searcher.dir", "crawl"));
    }
    final Path luceneConfig = new Path(dir, "search-servers.txt");
    final Path solrConfig = new Path(dir, "solr-servers.txt");

    if (fs.exists(luceneConfig) || fs.exists(solrConfig)) {
      searchBean = new DistributedSearchBean(conf, luceneConfig, solrConfig);
    } else {
      final Path indexDir = new Path(dir, "index");
      final Path indexesDir = new Path(dir, "indexes");
      searchBean = new LuceneSearchBean(conf, indexDir, indexesDir);
    }

    String tableName = conf.get("table.name", "webtable");
    table = new HTable(tableName);

    summarizerFactory = new SummarizerFactory(conf);
  }
Esempio n. 10
0
 /**
  * Add a file to be localized to the conf
  *
  * @param uri The uri of the cache to be localized
  * @param conf Configuration to add the cache to
  */
 public static void addCacheFile(URI uri, Configuration conf) {
   String files = conf.get("mapred.cache.files");
   conf.set("mapred.cache.files", files == null ? uri.toString() : files + "," + uri.toString());
 }
Esempio n. 11
0
 /**
  * Add a archives to be localized to the conf
  *
  * @param uri The uri of the cache to be localized
  * @param conf Configuration to add the cache to
  */
 public static void addSharedCacheArchive(URI uri, Configuration conf) {
   String archives = conf.get("mapred.cache.shared.archives");
   conf.set(
       "mapred.cache.shared.archives",
       archives == null ? uri.toString() : archives + "," + uri.toString());
 }
Esempio n. 12
0
  /** Sets up configuration based on params */
  private static boolean setup(Hashtable<String, String> curConf, Configuration argConf) {

    if (argConf.get("file") == null) {
      logger.fatal("Missing file parameter");
      System.exit(1);
    }

    if (argConf.get("hdfs_base_path") == null) {
      logger.fatal("Missing HDFS base path, check gestore-conf.xml");
      System.exit(1);
    }

    if (argConf.get("hdfs_temp_path") == null) {
      logger.fatal("Missing HDFS temp path, check gestore-conf.xml");
      System.exit(1);
    }

    if (argConf.get("local_temp_path") == null) {
      logger.fatal("Missing local temp path, check gestore-conf.xml");
      System.exit(1);
    }

    // Input paramaters
    curConf.put("run_id", argConf.get("run", ""));
    curConf.put("task_id", argConf.get("task", ""));
    curConf.put("file_id", argConf.get("file"));
    curConf.put("local_path", argConf.get("path", ""));
    curConf.put("type", argConf.get("type", "l2r"));
    curConf.put("timestamp_start", argConf.get("timestamp_start", "1"));
    curConf.put(
        "timestamp_stop", argConf.get("timestamp_stop", Integer.toString(Integer.MAX_VALUE)));
    curConf.put("delimiter", argConf.get("regex", "ID=.*"));
    curConf.put("taxon", argConf.get("taxon", "all"));
    curConf.put("intermediate", argConf.get("full_run", "false"));
    curConf.put("quick_add", argConf.get("quick_add", "false"));
    Boolean full_run = curConf.get("intermediate").matches("(?i).*true.*");
    curConf.put("format", argConf.get("format", "unknown"));
    curConf.put("split", argConf.get("split", "1"));
    curConf.put("copy", argConf.get("copy", "true"));

    // Constants
    curConf.put("base_path", argConf.get("hdfs_base_path"));
    curConf.put("temp_path", argConf.get("hdfs_temp_path"));
    curConf.put("local_temp_path", argConf.get("local_temp_path"));
    curConf.put("db_name_files", argConf.get("hbase_file_table"));
    curConf.put("db_name_runs", argConf.get("hbase_run_table"));
    curConf.put("db_name_updates", argConf.get("hbase_db_update_table"));

    // Timestamps
    Date currentTime = new Date();
    Date endDate = new Date(new Long(curConf.get("timestamp_stop")));
    curConf.put("timestamp_real", Long.toString(currentTime.getTime()));

    return true;
  }
Esempio n. 13
0
 /**
  * Add a file to be localized to the conf. Intended to be used by user code.
  *
  * @param uri The uri of the cache to be localized
  * @param conf Configuration to add the cache to
  */
 public static void addCacheFile(URI uri, Configuration conf) {
   String files = conf.get(CACHE_FILES);
   conf.set(CACHE_FILES, files == null ? uri.toString() : files + "," + uri.toString());
 }
Esempio n. 14
0
 /**
  * Add a archives to be localized to the conf. Intended to be used by user code.
  *
  * @param uri The uri of the cache to be localized
  * @param conf Configuration to add the cache to
  */
 public static void addCacheArchive(URI uri, Configuration conf) {
   String archives = conf.get(CACHE_ARCHIVES);
   conf.set(CACHE_ARCHIVES, archives == null ? uri.toString() : archives + "," + uri.toString());
 }
Esempio n. 15
0
 /**
  * Add a file that has been localized to the conf.. Used by internal DistributedCache code.
  *
  * @param conf The conf to modify to contain the localized caches
  * @param str a comma separated list of local files
  */
 public static void addLocalFiles(Configuration conf, String str) {
   String files = conf.get(CACHE_LOCALFILES);
   conf.set(CACHE_LOCALFILES, files == null ? str : files + "," + str);
 }
Esempio n. 16
0
 /**
  * Add a archive that has been localized to the conf. Used by internal DistributedCache code.
  *
  * @param conf The conf to modify to contain the localized caches
  * @param str a comma separated list of local archives
  */
 public static void addLocalArchives(Configuration conf, String str) {
   String archives = conf.get(CACHE_LOCALARCHIVES);
   conf.set(CACHE_LOCALARCHIVES, archives == null ? str : archives + "," + str);
 }