public int run(String[] args) throws Exception {
    Configuration argConf = getConf();

    // JobConf conf = new JobConf(diffdb.class);
    Configuration config = HBaseConfiguration.create();
    HBaseAdmin hbAdmin = new HBaseAdmin(config);
    dbutil db_util = new dbutil(config);

    HTable runTable = new HTable(config, "gestore_runs");
    Get runGet = new Get(argConf.get("id").getBytes());
    Result pipeline = runTable.get(runGet);

    NavigableMap<byte[], byte[]> pipeMap = pipeline.getFamilyMap("d".getBytes());

    Map.Entry<byte[], byte[]> results = pipeMap.pollFirstEntry();

    HashMap<String, HashMap<String, String>> resultMap =
        new HashMap<String, HashMap<String, String>>();

    while (results != null) {
      String resultKey = new String(results.getKey());
      String resultValue = new String(results.getValue());
      String field = "type";
      HashMap<String, String> tempMap = new HashMap<String, String>();
      String entry = resultKey;

      if (resultKey.endsWith("_db_timestamp")) {
        field = "db_timestamp";
        entry = resultKey.substring(0, resultKey.lastIndexOf("_db_timestamp"));
      } else if (resultKey.endsWith("_filename")) {
        field = "filename";
        entry = resultKey.substring(0, resultKey.lastIndexOf("_filename"));
      } else if (resultKey.endsWith("_regex")) {
        field = "regex";
        entry = resultKey.substring(0, resultKey.lastIndexOf("_regex"));
      }

      if (resultMap.containsKey(entry)) {
        tempMap = resultMap.get(entry);
      }

      tempMap.put(field, resultValue);
      resultMap.put(entry, tempMap);

      // System.out.println("Key: " + resultKey + " Value: " + resultValue);
      results = pipeMap.pollFirstEntry();
    }

    for (String key : resultMap.keySet()) {
      System.out.println("File ID: " + key);
      for (String subKey : resultMap.get(key).keySet()) {
        // System.out.println("\t " + subKey + "\t\t" + resultMap.get(key).get(subKey));
        System.out.format("  %1$-20s  %2$s\n", subKey, resultMap.get(key).get(subKey));
      }
    }

    return 0;
  }
 /**
  * Add a file path to the current set of classpath entries. It adds the file to cache as well.
  * Intended to be used by user code.
  *
  * @param file Path of the file to be added
  * @param conf Configuration that contains the classpath setting
  * @param fs FileSystem with respect to which {@code archivefile} should be interpreted.
  */
 public static void addFileToClassPath(Path file, Configuration conf, FileSystem fs)
     throws IOException {
   String filepath = file.toUri().getPath();
   String classpath = conf.get("mapred.job.classpath.files");
   conf.set(
       "mapred.job.classpath.files",
       classpath == null ? filepath : classpath + System.getProperty("path.separator") + filepath);
   URI uri = fs.makeQualified(file).toUri();
   addCacheFile(uri, conf);
 }
  /**
   * Add an archive path to the current set of classpath entries. It adds the archive to cache as
   * well. Intended to be used by user code.
   *
   * @param archive Path of the archive to be added
   * @param conf Configuration that contains the classpath setting
   * @param fs FileSystem with respect to which {@code archive} should be interpreted.
   */
  public static void addArchiveToClassPath(Path archive, Configuration conf, FileSystem fs)
      throws IOException {
    String archivepath = archive.toUri().getPath();
    String classpath = conf.get("mapred.job.classpath.archives");
    conf.set(
        "mapred.job.classpath.archives",
        classpath == null
            ? archivepath
            : classpath + System.getProperty("path.separator") + archivepath);
    URI uri = fs.makeQualified(archive).toUri();

    addCacheArchive(uri, conf);
  }
Example #4
0
  private static URI addArchiveToClassPathHelper(Path archive, Configuration conf)
      throws IOException {

    String classpath = conf.get("mapred.job.classpath.archives");

    // the scheme/authority use ':' as separator. put the unqualified path in classpath
    String archivePath = archive.toUri().getPath();

    conf.set(
        "mapred.job.classpath.archives",
        classpath == null
            ? archivePath
            : classpath + System.getProperty("path.separator") + archivePath);
    return archive.makeQualified(archive.getFileSystem(conf)).toUri();
  }
Example #5
0
 /**
  * This method checks to see if symlinks are to be create for the localized cache files in the
  * current working directory
  *
  * @param conf the jobconf
  * @return true if symlinks are to be created- else return false
  */
 public static boolean getSymlink(Configuration conf) {
   String result = conf.get("mapred.create.symlink");
   if ("yes".equals(result)) {
     return true;
   }
   return false;
 }
 /**
  * This method checks to see if symlinks are to be create for the localized cache files in the
  * current working directory Used by internal DistributedCache code.
  *
  * @param conf the jobconf
  * @return true if symlinks are to be created- else return false
  */
 public static boolean getSymlink(Configuration conf) {
   String result = conf.get(CACHE_SYMLINK);
   if ("yes".equals(result)) {
     return true;
   }
   return false;
 }
Example #7
0
 private static String getConfigString(Configuration config) {
   String output = "";
   Iterator<Map.Entry<String, String>> iterConfig = config.iterator();
   while (iterConfig.hasNext()) {
     Map.Entry<String, String> curEntry = iterConfig.next();
     output = output + "Key: \t" + curEntry.getKey() + "\nValue: \t" + curEntry.getValue() + "\n";
   }
   return output;
 }
Example #8
0
 public static InetSocketAddress getAddress(Configuration conf) {
   String jobTrackerStr = conf.get("mapred.job.tracker", "localhost:8012");
   int colon = jobTrackerStr.indexOf(":");
   if (colon < 0) {
     throw new RuntimeException("Bad mapred.job.tracker: " + jobTrackerStr);
   }
   String jobTrackerName = jobTrackerStr.substring(0, colon);
   int jobTrackerPort = Integer.parseInt(jobTrackerStr.substring(colon + 1));
   return new InetSocketAddress(jobTrackerName, jobTrackerPort);
 }
Example #9
0
 /**
  * Get the archive entries in classpath as an array of Path
  *
  * @param conf Configuration that contains the classpath setting
  */
 public static Path[] getArchiveClassPaths(Configuration conf) {
   String classpath = conf.get("mapred.job.classpath.archives");
   if (classpath == null) return null;
   ArrayList list =
       Collections.list(new StringTokenizer(classpath, System.getProperty("path.separator")));
   Path[] paths = new Path[list.size()];
   for (int i = 0; i < list.size(); i++) {
     paths[i] = new Path((String) list.get(i));
   }
   return paths;
 }
 /** An FSDataset has a directory where it loads its data files. */
 public FSDataset(DataStorage storage, Configuration conf) throws IOException {
   this.maxBlocksPerDir = conf.getInt("dfs.datanode.numblocks", 64);
   FSVolume[] volArray = new FSVolume[storage.getNumStorageDirs()];
   for (int idx = 0; idx < storage.getNumStorageDirs(); idx++) {
     volArray[idx] = new FSVolume(storage.getStorageDir(idx).getCurrentDir(), conf);
   }
   volumes = new FSVolumeSet(volArray);
   volumeMap = new HashMap<Block, DatanodeBlockInfo>();
   volumes.getVolumeMap(volumeMap);
   registerMBean(storage.getStorageID());
 }
    FSVolume(File currentDir, Configuration conf) throws IOException {
      this.reserved = conf.getLong("dfs.datanode.du.reserved", 0);
      boolean supportAppends = conf.getBoolean("dfs.support.append", false);
      File parent = currentDir.getParentFile();

      this.detachDir = new File(parent, "detach");
      if (detachDir.exists()) {
        recoverDetachedBlocks(currentDir, detachDir);
      }

      // Files that were being written when the datanode was last shutdown
      // are now moved back to the data directory. It is possible that
      // in the future, we might want to do some sort of datanode-local
      // recovery for these blocks. For example, crc validation.
      //
      this.tmpDir = new File(parent, "tmp");
      if (tmpDir.exists()) {
        if (supportAppends) {
          recoverDetachedBlocks(currentDir, tmpDir);
        } else {
          FileUtil.fullyDelete(tmpDir);
        }
      }
      this.dataDir = new FSDir(currentDir);
      if (!tmpDir.mkdirs()) {
        if (!tmpDir.isDirectory()) {
          throw new IOException("Mkdirs failed to create " + tmpDir.toString());
        }
      }
      if (!detachDir.mkdirs()) {
        if (!detachDir.isDirectory()) {
          throw new IOException("Mkdirs failed to create " + detachDir.toString());
        }
      }
      this.usage = new DF(parent, conf);
      this.dfsUsage = new DU(parent, conf);
      this.dfsUsage.start();
    }
Example #12
0
  /**
   * Construct in a named directory.
   *
   * @param conf
   * @param dir
   * @throws IOException
   */
  public NutchBean(Configuration conf, Path dir) throws IOException {
    this.conf = conf;
    this.fs = FileSystem.get(this.conf);
    if (dir == null) {
      dir = new Path(this.conf.get("searcher.dir", "crawl"));
    }
    final Path luceneConfig = new Path(dir, "search-servers.txt");
    final Path solrConfig = new Path(dir, "solr-servers.txt");

    if (fs.exists(luceneConfig) || fs.exists(solrConfig)) {
      searchBean = new DistributedSearchBean(conf, luceneConfig, solrConfig);
    } else {
      final Path indexDir = new Path(dir, "index");
      final Path indexesDir = new Path(dir, "indexes");
      searchBean = new LuceneSearchBean(conf, indexDir, indexesDir);
    }

    String tableName = conf.get("table.name", "webtable");
    table = new HTable(tableName);

    summarizerFactory = new SummarizerFactory(conf);
  }
Example #13
0
  /** Start the JobTracker process, listen on the indicated port */
  JobTracker(Configuration conf) throws IOException {
    //
    // Grab some static constants
    //
    maxCurrentTasks = conf.getInt("mapred.tasktracker.tasks.maximum", 2);
    RETIRE_JOB_INTERVAL = conf.getLong("mapred.jobtracker.retirejob.interval", 24 * 60 * 60 * 1000);
    RETIRE_JOB_CHECK_INTERVAL = conf.getLong("mapred.jobtracker.retirejob.check", 60 * 1000);
    TASK_ALLOC_EPSILON = conf.getFloat("mapred.jobtracker.taskalloc.loadbalance.epsilon", 0.2f);
    PAD_FRACTION = conf.getFloat("mapred.jobtracker.taskalloc.capacitypad", 0.1f);
    MIN_SLOTS_FOR_PADDING = 3 * maxCurrentTasks;

    // This is a directory of temporary submission files.  We delete it
    // on startup, and can delete any files that we're done with
    this.conf = conf;
    JobConf jobConf = new JobConf(conf);
    this.systemDir = jobConf.getSystemDir();
    this.fs = FileSystem.get(conf);
    FileUtil.fullyDelete(fs, systemDir);
    fs.mkdirs(systemDir);

    // Same with 'localDir' except it's always on the local disk.
    jobConf.deleteLocalFiles(SUBDIR);

    // Set ports, start RPC servers, etc.
    InetSocketAddress addr = getAddress(conf);
    this.localMachine = addr.getHostName();
    this.port = addr.getPort();
    this.interTrackerServer = RPC.getServer(this, addr.getPort(), 10, false, conf);
    this.interTrackerServer.start();
    Properties p = System.getProperties();
    for (Iterator it = p.keySet().iterator(); it.hasNext(); ) {
      String key = (String) it.next();
      String val = (String) p.getProperty(key);
      LOG.info("Property '" + key + "' is " + val);
    }

    this.infoPort = conf.getInt("mapred.job.tracker.info.port", 50030);
    this.infoServer = new JobTrackerInfoServer(this, infoPort);
    this.infoServer.start();

    this.startTime = System.currentTimeMillis();

    new Thread(this.expireTrackers).start();
    new Thread(this.retireJobs).start();
    new Thread(this.initJobs).start();
  }
  /**
   * Test how IPC cache map works.
   *
   * @throws Exception If failed.
   */
  @SuppressWarnings("unchecked")
  public void testIpcCache() throws Exception {
    Field cacheField = GridGgfsHadoopIpcIo.class.getDeclaredField("ipcCache");

    cacheField.setAccessible(true);

    Field activeCntField = GridGgfsHadoopIpcIo.class.getDeclaredField("activeCnt");

    activeCntField.setAccessible(true);

    Map<String, GridGgfsHadoopIpcIo> cache =
        (Map<String, GridGgfsHadoopIpcIo>) cacheField.get(null);

    String name = "ggfs:" + getTestGridName(0) + "@";

    Configuration cfg = new Configuration();

    cfg.addResource(U.resolveGridGainUrl(HADOOP_FS_CFG));
    cfg.setBoolean("fs.ggfs.impl.disable.cache", true);
    cfg.setBoolean(String.format(GridGgfsHadoopUtils.PARAM_GGFS_ENDPOINT_NO_EMBED, name), true);

    // Ensure that existing IO is reused.
    FileSystem fs1 = FileSystem.get(new URI("ggfs://" + name + "/"), cfg);

    assertEquals(1, cache.size());

    GridGgfsHadoopIpcIo io = null;

    System.out.println("CACHE: " + cache);

    for (String key : cache.keySet()) {
      if (key.contains("10500")) {
        io = cache.get(key);

        break;
      }
    }

    assert io != null;

    assertEquals(1, ((AtomicInteger) activeCntField.get(io)).get());

    // Ensure that when IO is used by multiple file systems and one of them is closed, IO is not
    // stopped.
    FileSystem fs2 = FileSystem.get(new URI("ggfs://" + name + "/abc"), cfg);

    assertEquals(1, cache.size());
    assertEquals(2, ((AtomicInteger) activeCntField.get(io)).get());

    fs2.close();

    assertEquals(1, cache.size());
    assertEquals(1, ((AtomicInteger) activeCntField.get(io)).get());

    Field stopField = GridGgfsHadoopIpcIo.class.getDeclaredField("stopping");

    stopField.setAccessible(true);

    assert !(Boolean) stopField.get(io);

    // Ensure that IO is stopped when nobody else is need it.
    fs1.close();

    assert cache.isEmpty();

    assert (Boolean) stopField.get(io);
  }
Example #15
0
 FileDataGenNew(String HDFSMaster) {
   fsConf.set("fs.default.name", HDFSMaster);
 }
Example #16
0
 /**
  * Add a file to be localized to the conf
  *
  * @param uri The uri of the cache to be localized
  * @param conf Configuration to add the cache to
  */
 public static void addCacheFile(URI uri, Configuration conf) {
   String files = conf.get("mapred.cache.files");
   conf.set("mapred.cache.files", files == null ? uri.toString() : files + "," + uri.toString());
 }
Example #17
0
 /**
  * Add a archives to be localized to the conf
  *
  * @param uri The uri of the cache to be localized
  * @param conf Configuration to add the cache to
  */
 public static void addSharedCacheArchive(URI uri, Configuration conf) {
   String archives = conf.get("mapred.cache.shared.archives");
   conf.set(
       "mapred.cache.shared.archives",
       archives == null ? uri.toString() : archives + "," + uri.toString());
 }
Example #18
0
 /**
  * Set the conf to contain the location for localized files
  *
  * @param conf The conf to modify to contain the localized caches
  * @param str a comma separated list of local files
  */
 public static void setLocalSharedFiles(Configuration conf, String str) {
   conf.set("mapred.cache.shared.localFiles", str);
 }
Example #19
0
 /**
  * Set the conf to contain the location for localized archives
  *
  * @param conf The conf to modify to contain the localized caches
  * @param str a comma separated list of local archives
  */
 public static void setLocalArchives(Configuration conf, String str) {
   conf.set("mapred.cache.localArchives", str);
 }
Example #20
0
 /**
  * Get cache files set in the Configuration
  *
  * @param conf The configuration which contains the files
  * @return A URI array of the files set in the Configuration
  * @throws IOException
  */
 public static URI[] getSharedCacheFiles(Configuration conf) throws IOException {
   return StringUtils.stringToURI(conf.getStrings("mapred.cache.shared.files"));
 }
Example #21
0
 /**
  * This is to check the timestamp of the files to be localized
  *
  * @param conf Configuration which stores the timestamp's
  * @param timestamps comma separated list of timestamps of files. The order should be the same as
  *     the order in which the files are added.
  */
 public static void setFileTimestamps(Configuration conf, String timestamps) {
   conf.set("mapred.cache.files.timestamps", timestamps);
 }
Example #22
0
 /**
  * This is to check the timestamp of the archives to be localized
  *
  * @param conf Configuration which stores the timestamp's
  * @param timestamps comma separated list of timestamps of archives. The order should be the same
  *     as the order in which the archives are added.
  */
 public static void setArchiveTimestamps(Configuration conf, String timestamps) {
   conf.set("mapred.cache.archives.timestamps", timestamps);
 }
Example #23
0
 public static String[] getSharedFileLength(Configuration conf) {
   return conf.getStrings("mapred.cache.shared.files.length");
 }
Example #24
0
 /**
  * Get the timestamps of the files
  *
  * @param conf The configuration which stored the timestamps
  * @return a string array of timestamps
  * @throws IOException
  */
 public static String[] getFileTimestamps(Configuration conf) {
   return conf.getStrings("mapred.cache.files.timestamps");
 }
Example #25
0
 /**
  * Get the timestamps of the archives
  *
  * @param conf The configuration which stored the timestamps
  * @return a string array of timestamps
  * @throws IOException
  */
 public static String[] getArchiveTimestamps(Configuration conf) {
   return conf.getStrings("mapred.cache.archives.timestamps");
 }
Example #26
0
 /**
  * Set the configuration with the given set of files
  *
  * @param files The list of files that need to be localized
  * @param conf Configuration which will be changed
  */
 public static void setCacheFiles(URI[] files, Configuration conf) {
   String sfiles = StringUtils.uriToString(files);
   conf.set("mapred.cache.files", sfiles);
 }
Example #27
0
 /**
  * Return the path array of the localized caches
  *
  * @param conf Configuration that contains the localized archives
  * @return A path array of localized caches
  * @throws IOException
  */
 public static Path[] getLocalCacheArchives(Configuration conf) throws IOException {
   return StringUtils.stringToPath(conf.getStrings("mapred.cache.localArchives"));
 }
Example #28
0
 public static void setSharedFileLength(Configuration conf, String length) {
   conf.set("mapred.cache.shared.files.length", length);
 }
Example #29
0
 /**
  * Return the path array of the localized files
  *
  * @param conf Configuration that contains the localized files
  * @return A path array of localized files
  * @throws IOException
  */
 public static Path[] getLocalSharedCacheFiles(Configuration conf) throws IOException {
   return StringUtils.stringToPath(conf.getStrings("mapred.cache.shared.localFiles"));
 }
Example #30
0
 /**
  * Get cache archives set in the Configuration
  *
  * @param conf The configuration which contains the archives
  * @return A URI array of the caches set in the Configuration
  * @throws IOException
  */
 public static URI[] getCacheArchives(Configuration conf) throws IOException {
   return StringUtils.stringToURI(conf.getStrings("mapred.cache.archives"));
 }