Example #1
0
  public static void main(final String[] pArgs) throws Exception {
    TreasuryYieldMulti tym = new TreasuryYieldMulti();

    // Here is an example of how to use multiple collections as the input to
    // a hadoop job, from within Java code directly.
    MultiCollectionSplitBuilder mcsb = new MultiCollectionSplitBuilder();
    mcsb.add(
            new MongoURI("mongodb://localhost:27017/mongo_hadoop.yield_historical.in"),
            (MongoURI) null, // authuri
            true, // notimeout
            (DBObject) null, // fields
            (DBObject) null, // sort
            (DBObject) null, // query
            false)
        .add(
            new MongoURI("mongodb://localhost:27017/mongo_hadoop.yield_historical.in"),
            (MongoURI) null, // authuri
            true, // notimeout
            (DBObject) null, // fields
            (DBObject) null, // sort
            new BasicDBObject("_id", new BasicDBObject("$gt", new Date(883440000000L))),
            false); // range query

    Configuration conf = new Configuration();
    conf.set(MultiMongoCollectionSplitter.MULTI_COLLECTION_CONF_KEY, mcsb.toJSON());
    conf.setSplitterClass(conf, MultiMongoCollectionSplitter.class);

    System.exit(ToolRunner.run(conf, new TreasuryYieldXMLConfig(), pArgs));
  }
  public int run(String[] args) throws Exception {
    Configuration argConf = getConf();

    // JobConf conf = new JobConf(diffdb.class);
    Configuration config = HBaseConfiguration.create();
    HBaseAdmin hbAdmin = new HBaseAdmin(config);
    dbutil db_util = new dbutil(config);

    HTable runTable = new HTable(config, "gestore_runs");
    Get runGet = new Get(argConf.get("id").getBytes());
    Result pipeline = runTable.get(runGet);

    NavigableMap<byte[], byte[]> pipeMap = pipeline.getFamilyMap("d".getBytes());

    Map.Entry<byte[], byte[]> results = pipeMap.pollFirstEntry();

    HashMap<String, HashMap<String, String>> resultMap =
        new HashMap<String, HashMap<String, String>>();

    while (results != null) {
      String resultKey = new String(results.getKey());
      String resultValue = new String(results.getValue());
      String field = "type";
      HashMap<String, String> tempMap = new HashMap<String, String>();
      String entry = resultKey;

      if (resultKey.endsWith("_db_timestamp")) {
        field = "db_timestamp";
        entry = resultKey.substring(0, resultKey.lastIndexOf("_db_timestamp"));
      } else if (resultKey.endsWith("_filename")) {
        field = "filename";
        entry = resultKey.substring(0, resultKey.lastIndexOf("_filename"));
      } else if (resultKey.endsWith("_regex")) {
        field = "regex";
        entry = resultKey.substring(0, resultKey.lastIndexOf("_regex"));
      }

      if (resultMap.containsKey(entry)) {
        tempMap = resultMap.get(entry);
      }

      tempMap.put(field, resultValue);
      resultMap.put(entry, tempMap);

      // System.out.println("Key: " + resultKey + " Value: " + resultValue);
      results = pipeMap.pollFirstEntry();
    }

    for (String key : resultMap.keySet()) {
      System.out.println("File ID: " + key);
      for (String subKey : resultMap.get(key).keySet()) {
        // System.out.println("\t " + subKey + "\t\t" + resultMap.get(key).get(subKey));
        System.out.format("  %1$-20s  %2$s\n", subKey, resultMap.get(key).get(subKey));
      }
    }

    return 0;
  }
 /**
  * Add a file path to the current set of classpath entries. It adds the file to cache as well.
  * Intended to be used by user code.
  *
  * @param file Path of the file to be added
  * @param conf Configuration that contains the classpath setting
  * @param fs FileSystem with respect to which {@code archivefile} should be interpreted.
  */
 public static void addFileToClassPath(Path file, Configuration conf, FileSystem fs)
     throws IOException {
   String filepath = file.toUri().getPath();
   String classpath = conf.get("mapred.job.classpath.files");
   conf.set(
       "mapred.job.classpath.files",
       classpath == null ? filepath : classpath + System.getProperty("path.separator") + filepath);
   URI uri = fs.makeQualified(file).toUri();
   addCacheFile(uri, conf);
 }
  /**
   * Add an archive path to the current set of classpath entries. It adds the archive to cache as
   * well. Intended to be used by user code.
   *
   * @param archive Path of the archive to be added
   * @param conf Configuration that contains the classpath setting
   * @param fs FileSystem with respect to which {@code archive} should be interpreted.
   */
  public static void addArchiveToClassPath(Path archive, Configuration conf, FileSystem fs)
      throws IOException {
    String archivepath = archive.toUri().getPath();
    String classpath = conf.get("mapred.job.classpath.archives");
    conf.set(
        "mapred.job.classpath.archives",
        classpath == null
            ? archivepath
            : classpath + System.getProperty("path.separator") + archivepath);
    URI uri = fs.makeQualified(archive).toUri();

    addCacheArchive(uri, conf);
  }
Example #5
0
  private static URI addArchiveToClassPathHelper(Path archive, Configuration conf)
      throws IOException {

    String classpath = conf.get("mapred.job.classpath.archives");

    // the scheme/authority use ':' as separator. put the unqualified path in classpath
    String archivePath = archive.toUri().getPath();

    conf.set(
        "mapred.job.classpath.archives",
        classpath == null
            ? archivePath
            : classpath + System.getProperty("path.separator") + archivePath);
    return archive.makeQualified(archive.getFileSystem(conf)).toUri();
  }
Example #6
0
 /**
  * This method checks to see if symlinks are to be create for the localized cache files in the
  * current working directory
  *
  * @param conf the jobconf
  * @return true if symlinks are to be created- else return false
  */
 public static boolean getSymlink(Configuration conf) {
   String result = conf.get("mapred.create.symlink");
   if ("yes".equals(result)) {
     return true;
   }
   return false;
 }
 /**
  * This method checks to see if symlinks are to be create for the localized cache files in the
  * current working directory Used by internal DistributedCache code.
  *
  * @param conf the jobconf
  * @return true if symlinks are to be created- else return false
  */
 public static boolean getSymlink(Configuration conf) {
   String result = conf.get(CACHE_SYMLINK);
   if ("yes".equals(result)) {
     return true;
   }
   return false;
 }
Example #8
0
 private static String getConfigString(Configuration config) {
   String output = "";
   Iterator<Map.Entry<String, String>> iterConfig = config.iterator();
   while (iterConfig.hasNext()) {
     Map.Entry<String, String> curEntry = iterConfig.next();
     output = output + "Key: \t" + curEntry.getKey() + "\nValue: \t" + curEntry.getValue() + "\n";
   }
   return output;
 }
Example #9
0
 /**
  * Get the archive entries in classpath as an array of Path
  *
  * @param conf Configuration that contains the classpath setting
  */
 public static Path[] getArchiveClassPaths(Configuration conf) {
   String classpath = conf.get("mapred.job.classpath.archives");
   if (classpath == null) return null;
   ArrayList list =
       Collections.list(new StringTokenizer(classpath, System.getProperty("path.separator")));
   Path[] paths = new Path[list.size()];
   for (int i = 0; i < list.size(); i++) {
     paths[i] = new Path((String) list.get(i));
   }
   return paths;
 }
  public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);

    Path matFile = new Path(args[0]);
    FSDataInputStream matData = fs.open(matFile);
    BufferedReader br = new BufferedReader(new InputStreamReader(matData));

    int i = 0;
    String line;
    while ((line = br.readLine()) != null) {
      StringTokenizer tokenizer = new StringTokenizer(line);
      String iRow = tokenizer.nextToken();
      String iCol = tokenizer.nextToken();
      if (Integer.parseInt(iRow) == Integer.parseInt(iCol)) {
        i++;
      }
    }
    br.close();
    int dimention = i;

    conf.setInt("DIMENTION", dimention);
    Path xFile = new Path("preX/Result");
    FSDataOutputStream xData = fs.create(xFile);
    BufferedWriter iniX = new BufferedWriter(new OutputStreamWriter(xData));
    for (int j = 0; j < dimention; j++) {
      iniX.write(String.valueOf(j) + " 0");
      iniX.newLine();
    }
    iniX.close();

    URI matVec = new URI(args[0]);
    DistributedCache.addCacheFile(matVec, conf);

    int iteration = 0;
    do {
      ToolRunner.run(conf, new Jacobi(), args);
    } while (iteration++ < max_iter && (!stopIteration(conf)));
  }
  @Override
  public int run(String[] args) throws Exception {

    String locatorHost = args[0];
    int locatorPort = Integer.parseInt(args[1]);
    String hdfsHomeDir = args[2];

    System.out.println(
        "KnownKeysMRv2 invoked with args (locatorHost = "
            + locatorHost
            + " locatorPort = "
            + locatorPort
            + " hdfsHomeDir = "
            + hdfsHomeDir);

    Configuration conf = getConf();
    conf.set(GFInputFormat.INPUT_REGION, "partitionedRegion");
    conf.set(GFInputFormat.HOME_DIR, hdfsHomeDir);
    conf.setBoolean(GFInputFormat.CHECKPOINT, false);
    conf.set(GFOutputFormat.REGION, "validationRegion");
    conf.set(GFOutputFormat.LOCATOR_HOST, locatorHost);
    conf.setInt(GFOutputFormat.LOCATOR_PORT, locatorPort);

    Job job = Job.getInstance(conf, "knownKeysMRv2");
    job.setInputFormatClass(GFInputFormat.class);
    job.setOutputFormatClass(GFOutputFormat.class);

    job.setMapperClass(KnownKeysMRv2Mapper.class);
    job.setMapOutputKeyClass(GFKey.class);
    job.setMapOutputValueClass(PEIWritable.class);

    job.setReducerClass(KnownKeysMRv2Reducer.class);
    // job.setOutputKeyClass(String.class);
    // job.setOutputValueClass(ValueHolder.class);

    return job.waitForCompletion(false) ? 0 : 1;
  }
Example #12
0
 /**
  * Set the configuration with the given set of archives
  *
  * @param archives The list of archives that need to be localized
  * @param conf Configuration which will be changed
  */
 public static void setCacheArchives(URI[] archives, Configuration conf) {
   String sarchives = StringUtils.uriToString(archives);
   conf.set("mapred.cache.archives", sarchives);
 }
Example #13
0
 /**
  * Add a file to be localized to the conf
  *
  * @param uri The uri of the cache to be localized
  * @param conf Configuration to add the cache to
  */
 public static void addCacheFile(URI uri, Configuration conf) {
   String files = conf.get("mapred.cache.files");
   conf.set("mapred.cache.files", files == null ? uri.toString() : files + "," + uri.toString());
 }
Example #14
0
 /**
  * Set the conf to contain the location for localized files
  *
  * @param conf The conf to modify to contain the localized caches
  * @param str a comma separated list of local files
  */
 public static void setLocalSharedFiles(Configuration conf, String str) {
   conf.set("mapred.cache.shared.localFiles", str);
 }
Example #15
0
 /**
  * Add a archives to be localized to the conf
  *
  * @param uri The uri of the cache to be localized
  * @param conf Configuration to add the cache to
  */
 public static void addSharedCacheArchive(URI uri, Configuration conf) {
   String archives = conf.get("mapred.cache.shared.archives");
   conf.set(
       "mapred.cache.shared.archives",
       archives == null ? uri.toString() : archives + "," + uri.toString());
 }
Example #16
0
 public void setConf(Configuration conf) {
   this.scriptName = conf.get(SCRIPT_FILENAME_KEY);
   this.maxArgs = conf.getInt(SCRIPT_ARG_COUNT_KEY, DEFAULT_ARG_COUNT);
   this.conf = conf;
 }
Example #17
0
 /**
  * Get cache files set in the Configuration
  *
  * @param conf The configuration which contains the files
  * @return A URI array of the files set in the Configuration
  * @throws IOException
  */
 public static URI[] getSharedCacheFiles(Configuration conf) throws IOException {
   return StringUtils.stringToURI(conf.getStrings("mapred.cache.shared.files"));
 }
Example #18
0
 /**
  * This is to check the timestamp of the archives to be localized
  *
  * @param conf Configuration which stores the timestamp's
  * @param timestamps comma separated list of timestamps of archives. The order should be the same
  *     as the order in which the archives are added.
  */
 public static void setArchiveTimestamps(Configuration conf, String timestamps) {
   conf.set("mapred.cache.archives.timestamps", timestamps);
 }
Example #19
0
 /**
  * This is to check the timestamp of the files to be localized
  *
  * @param conf Configuration which stores the timestamp's
  * @param timestamps comma separated list of timestamps of files. The order should be the same as
  *     the order in which the files are added.
  */
 public static void setFileTimestamps(Configuration conf, String timestamps) {
   conf.set("mapred.cache.files.timestamps", timestamps);
 }
Example #20
0
 /**
  * Set the configuration with the given set of files
  *
  * @param files The list of files that need to be localized
  * @param conf Configuration which will be changed
  */
 public static void setCacheFiles(URI[] files, Configuration conf) {
   String sfiles = StringUtils.uriToString(files);
   conf.set("mapred.cache.files", sfiles);
 }
Example #21
0
 public static String[] getSharedFileLength(Configuration conf) {
   return conf.getStrings("mapred.cache.shared.files.length");
 }
Example #22
0
 /**
  * Get the timestamps of the archives
  *
  * @param conf The configuration which stored the timestamps
  * @return a string array of timestamps
  * @throws IOException
  */
 public static String[] getArchiveTimestamps(Configuration conf) {
   return conf.getStrings("mapred.cache.archives.timestamps");
 }
Example #23
0
 /**
  * Return the path array of the localized files
  *
  * @param conf Configuration that contains the localized files
  * @return A path array of localized files
  * @throws IOException
  */
 public static Path[] getLocalSharedCacheFiles(Configuration conf) throws IOException {
   return StringUtils.stringToPath(conf.getStrings("mapred.cache.shared.localFiles"));
 }
Example #24
0
 /**
  * Return the path array of the localized caches
  *
  * @param conf Configuration that contains the localized archives
  * @return A path array of localized caches
  * @throws IOException
  */
 public static Path[] getLocalCacheArchives(Configuration conf) throws IOException {
   return StringUtils.stringToPath(conf.getStrings("mapred.cache.localArchives"));
 }
Example #25
0
 static {
   // Load the XML config defined in hadoop-local.xml
   Configuration.addDefaultResource("src/examples/hadoop-local.xml");
   Configuration.addDefaultResource("src/examples/mongo-defaults.xml");
 }
Example #26
0
 public static void setSharedFileLength(Configuration conf, String length) {
   conf.set("mapred.cache.shared.files.length", length);
 }
Example #27
0
  /**
   * Get the locally cached file or archive; it could either be previously cached (and valid) or
   * copy it from the {@link FileSystem} now.
   *
   * @param cache the cache to be localized, this should be specified as new
   *     URI(hdfs://hostname:port/absolute_path_to_file#LINKNAME). If no schema or hostname:port is
   *     provided the file is assumed to be in the filesystem being used in the Configuration
   * @param conf The Confguration file which contains the filesystem
   * @param subDir The sub cache Dir where you want to localize the files/archives
   * @param fileStatus The file status on the dfs.
   * @param isArchive if the cache is an archive or a file. In case it is an archive with a .zip or
   *     .jar or .tar or .tgz or .tar.gz extension it will be unzipped/unjarred/untarred
   *     automatically and the directory where the archive is unzipped/unjarred/untarred is returned
   *     as the Path. In case of a file, the path to the file is returned
   * @param confFileStamp this is the hdfs file modification timestamp to verify that the file to be
   *     cached hasn't changed since the job started
   * @param fileLength this is the length of the cache file
   * @param currentWorkDir this is the directory where you would want to create symlinks for the
   *     locally cached files/archives
   * @param honorSymLinkConf if this is false, then the symlinks are not created even if conf says
   *     so (this is required for an optimization in task launches
   * @param lDirAllocator LocalDirAllocator of the tracker
   * @return the path to directory where the archives are unjarred in case of archives, the path to
   *     the file where the file is copied locally
   * @throws IOException
   */
  private static Path getLocalCache(
      URI cache,
      Configuration conf,
      Path subDir,
      FileStatus fileStatus,
      boolean isArchive,
      long confFileStamp,
      long fileLength,
      Path currentWorkDir,
      boolean honorSymLinkConf,
      MRAsyncDiskService asyncDiskService,
      LocalDirAllocator lDirAllocator)
      throws IOException {
    String key = getKey(cache, conf, confFileStamp);

    CacheStatus lcacheStatus;
    Path localizedPath;
    synchronized (cachedArchives) {
      lcacheStatus = cachedArchives.get(key);
      if (lcacheStatus == null) {
        // was never localized
        Path uniqueParentDir = new Path(subDir, String.valueOf(random.nextLong()));
        String cachePath = new Path(uniqueParentDir, makeRelative(cache, conf)).toString();
        Path localPath = lDirAllocator.getLocalPathForWrite(cachePath, fileLength, conf);
        lcacheStatus =
            new CacheStatus(
                new Path(localPath.toString().replace(cachePath, "")), localPath, uniqueParentDir);
        cachedArchives.put(key, lcacheStatus);
      }
      lcacheStatus.refcount++;
    }
    boolean initSuccessful = false;
    try {
      synchronized (lcacheStatus) {
        if (!lcacheStatus.isInited()) {
          localizedPath = localizeCache(conf, cache, confFileStamp, lcacheStatus, isArchive);
          lcacheStatus.initComplete();
        } else {
          if (fileStatus != null) {
            localizedPath =
                checkCacheStatusValidity(
                    conf, cache, confFileStamp, lcacheStatus, fileStatus, isArchive);
          } else {
            // if fileStatus is null, then the md5 must be correct
            // so there is no need to check for cache validity
            localizedPath = lcacheStatus.localizedLoadPath;
          }
        }
        createSymlink(conf, cache, lcacheStatus, isArchive, currentWorkDir, honorSymLinkConf);
      }

      // try deleting stuff if you can
      long size = 0;
      int numberSubDir = 0;
      synchronized (lcacheStatus) {
        synchronized (baseDirSize) {
          Long get = baseDirSize.get(lcacheStatus.getBaseDir());
          if (get != null) {
            size = get.longValue();
          } else {
            LOG.warn("Cannot find size of baseDir: " + lcacheStatus.getBaseDir());
          }
        }
        synchronized (baseDirNumberSubDir) {
          Integer get = baseDirNumberSubDir.get(lcacheStatus.getBaseDir());
          if (get != null) {
            numberSubDir = get.intValue();
          } else {
            LOG.warn("Cannot find subdirectories limit of baseDir: " + lcacheStatus.getBaseDir());
          }
        }
      }
      // setting the cache size to a default of 10GB
      long allowedSize = conf.getLong("local.cache.size", DEFAULT_CACHE_SIZE);
      long allowedNumberSubDir =
          conf.getLong("local.cache.numbersubdir", DEFAULT_CACHE_SUBDIR_LIMIT);
      if (allowedSize < size || allowedNumberSubDir < numberSubDir) {
        // try some cache deletions
        LOG.debug(
            "Start deleting released cache because"
                + " [size, allowedSize, numberSubDir, allowedNumberSubDir] ="
                + " ["
                + size
                + ", "
                + allowedSize
                + ", "
                + numberSubDir
                + ", "
                + allowedNumberSubDir
                + "]");
        deleteCache(conf, asyncDiskService);
      }
      initSuccessful = true;
      return localizedPath;
    } finally {
      if (!initSuccessful) {
        synchronized (cachedArchives) {
          lcacheStatus.refcount--;
        }
      }
    }
  }
Example #28
0
 /**
  * Set the conf to contain the location for localized archives
  *
  * @param conf The conf to modify to contain the localized caches
  * @param str a comma separated list of local archives
  */
 public static void setLocalArchives(Configuration conf, String str) {
   conf.set("mapred.cache.localArchives", str);
 }
Example #29
0
 /**
  * Get the timestamps of the files
  *
  * @param conf The configuration which stored the timestamps
  * @return a string array of timestamps
  * @throws IOException
  */
 public static String[] getFileTimestamps(Configuration conf) {
   return conf.getStrings("mapred.cache.files.timestamps");
 }
Example #30
0
 /**
  * Get cache archives set in the Configuration
  *
  * @param conf The configuration which contains the archives
  * @return A URI array of the caches set in the Configuration
  * @throws IOException
  */
 public static URI[] getCacheArchives(Configuration conf) throws IOException {
   return StringUtils.stringToURI(conf.getStrings("mapred.cache.archives"));
 }