@Override
 protected void map(LongWritable key, Text value, Context context)
     throws IOException, InterruptedException {
   String line = value.toString();
   Matcher m = NUMBERS.matcher(line);
   m.find();
   VarLongWritable userID = new VarLongWritable(Long.parseLong(m.group()));
   VarLongWritable itemID = new VarLongWritable();
   while (m.find()) {
     itemID.set(Long.parseLong(m.group()));
     context.write(userID, itemID);
   }
 }
Example #2
0
  /** Sets up configuration based on params */
  private static boolean setup(Hashtable<String, String> curConf, Configuration argConf) {

    if (argConf.get("file") == null) {
      logger.fatal("Missing file parameter");
      System.exit(1);
    }

    if (argConf.get("hdfs_base_path") == null) {
      logger.fatal("Missing HDFS base path, check gestore-conf.xml");
      System.exit(1);
    }

    if (argConf.get("hdfs_temp_path") == null) {
      logger.fatal("Missing HDFS temp path, check gestore-conf.xml");
      System.exit(1);
    }

    if (argConf.get("local_temp_path") == null) {
      logger.fatal("Missing local temp path, check gestore-conf.xml");
      System.exit(1);
    }

    // Input paramaters
    curConf.put("run_id", argConf.get("run", ""));
    curConf.put("task_id", argConf.get("task", ""));
    curConf.put("file_id", argConf.get("file"));
    curConf.put("local_path", argConf.get("path", ""));
    curConf.put("type", argConf.get("type", "l2r"));
    curConf.put("timestamp_start", argConf.get("timestamp_start", "1"));
    curConf.put(
        "timestamp_stop", argConf.get("timestamp_stop", Integer.toString(Integer.MAX_VALUE)));
    curConf.put("delimiter", argConf.get("regex", "ID=.*"));
    curConf.put("taxon", argConf.get("taxon", "all"));
    curConf.put("intermediate", argConf.get("full_run", "false"));
    curConf.put("quick_add", argConf.get("quick_add", "false"));
    Boolean full_run = curConf.get("intermediate").matches("(?i).*true.*");
    curConf.put("format", argConf.get("format", "unknown"));
    curConf.put("split", argConf.get("split", "1"));
    curConf.put("copy", argConf.get("copy", "true"));

    // Constants
    curConf.put("base_path", argConf.get("hdfs_base_path"));
    curConf.put("temp_path", argConf.get("hdfs_temp_path"));
    curConf.put("local_temp_path", argConf.get("local_temp_path"));
    curConf.put("db_name_files", argConf.get("hbase_file_table"));
    curConf.put("db_name_runs", argConf.get("hbase_run_table"));
    curConf.put("db_name_updates", argConf.get("hbase_db_update_table"));

    // Timestamps
    Date currentTime = new Date();
    Date endDate = new Date(new Long(curConf.get("timestamp_stop")));
    curConf.put("timestamp_real", Long.toString(currentTime.getTime()));

    return true;
  }
    public void map(LongWritable key, Text value, Context context)
        throws IOException, InterruptedException {
      Configuration c = context.getConfiguration();
      String s = value.toString();
      String input[] = s.split(",");
      Text outputkey = new Text();
      Text outputvalue = new Text();
      double result = 0.0;

      /* multiplies matrix and vector entry with matching column value */

      result = (Double.parseDouble(input[2])) * (vector.get(Long.parseLong(input[1])));
      outputkey.set(input[0]);
      outputvalue.set(Double.toString(result));
      context.write(outputkey, outputvalue);
    }
    /* called once at the beginning of the task */
    public void setup(Context context) throws IOException, InterruptedException {
      BufferedReader br = null;
      Path[] files = DistributedCache.getLocalCacheFiles(context.getConfiguration());
      if (files != null && files.length > 0)
        br = new BufferedReader(new FileReader(files[0].toString()));
      String line = null;

      /* reads the cached file into a hashmap */
      try {
        while ((line = br.readLine()) != null) {
          String input[] = line.split(",");
          vector.put(Long.valueOf(input[0]), Double.valueOf(input[1]));
        }
      } finally {
        br.close();
      }
    }
    /**
     * The work is done here for preparing the output Mutation. The TileIdWritable and
     * RasterWritable are transformed here.
     */
    @Override
    public void write(TileIdWritable key, RasterWritable value)
        throws IOException, InterruptedException {
      int zoom = zoomLevel;
      if (key instanceof TileIdZoomWritable) {
        zoom = ((TileIdZoomWritable) key).getZoom();
      }

      // ColumnVisibility cv = new ColumnVisibility();
      // transform the keys
      ByteBuffer buf = ByteBuffer.allocate(8);
      buf.putLong(key.get());
      Mutation m = new Mutation(new Text(buf.array()));
      // We only want the actual bytes for the value, not the full array of bytes
      Value v = new Value(value.copyBytes());
      m.put(Integer.toString(zoom), Long.toString(key.get()), cv, v);
      _inRecordWriter.write(outTable, m);
    } // end write
  public long calculatePreviousLinesInFile(IdSetterKey currentKey, TreeMap<String, Long> tempMap) {
    long result = 0;
    for (Map.Entry<String, Long> entry : tempMap.entrySet()) {
      String counterName = entry.getKey();

      IdSetterKey keyFromCounter =
          new IdSetterKey(
              counterName.split(IdSetterKey.DELIMITER)[0],
              Long.parseLong(counterName.split(IdSetterKey.DELIMITER)[1]),
              0);

      if (currentKey.compareTo(keyFromCounter) > 0) {
        result += entry.getValue();
      }
    }

    return result;
  }
Example #7
0
  /**
   * Parse arguments and then runs a map/reduce job. Print output in standard out.
   *
   * @return a non-zero if there is an error. Otherwise, return 0.
   */
  public int run(String[] args) throws Exception {
    if (args.length != 2) {
      System.err.println("Usage: " + getClass().getName() + " <nMaps> <nSamples>");
      ToolRunner.printGenericCommandUsage(System.err);
      return 2;
    }

    final int nMaps = Integer.parseInt(args[0]);
    final long nSamples = Long.parseLong(args[1]);
    long now = System.currentTimeMillis();
    int rand = new Random().nextInt(Integer.MAX_VALUE);
    final Path tmpDir = new Path(TMP_DIR_PREFIX + "_" + now + "_" + rand);

    System.out.println("Number of Maps  = " + nMaps);
    System.out.println("Samples per Map = " + nSamples);

    System.out.println(
        "Estimated value of Pi is " + estimatePi(nMaps, nSamples, tmpDir, getConf()));
    return 0;
  }
Example #8
0
  public int run(String[] args) throws Exception {
    // printUsage();
    /*
     * SETUP
     */
    Configuration argConf = getConf();
    Hashtable<String, String> confArg = new Hashtable<String, String>();
    setup(confArg, argConf);
    Date currentTime = new Date();
    Date endDate = new Date(new Long(confArg.get("timestamp_stop")));
    Boolean full_run = confArg.get("intermediate").matches("(?i).*true.*");
    Boolean quick_add = confArg.get("quick_add").matches("(?i).*true.*");
    logger.info("Running GeStore");

    // ZooKeeper setup
    Configuration config = HBaseConfiguration.create();
    zkWatcher = new ZooKeeperWatcher(config, "Testing", new HBaseAdmin(config));
    zkInstance =
        new ZooKeeper(
            ZKConfig.getZKQuorumServersString(config),
            config.getInt("zookeeper.session.timeout", -1),
            zkWatcher);

    if (!confArg.get("task_id").isEmpty()) {
      confArg.put("temp_path", confArg.get("temp_path") + confArg.get("task_id"));
    }

    String lockRequest = confArg.get("file_id");
    if (!confArg.get("run_id").isEmpty())
      lockRequest = lockRequest + "_" + confArg.get("run_id") + "_";
    if (!confArg.get("task_id").isEmpty())
      lockRequest = lockRequest + "_" + confArg.get("task_id") + "_";

    // Get type of movement
    toFrom type_move = checkArgs(confArg);
    if (type_move == toFrom.LOCAL2REMOTE && !confArg.get("format").equals("unknown")) {
      List<String> arguments = new ArrayList<String>();
      arguments.add("-Dinput=" + confArg.get("local_path"));
      arguments.add("-Dtable=" + confArg.get("file_id"));
      arguments.add("-Dtimestamp=" + confArg.get("timestamp_stop"));
      arguments.add("-Dtype=" + confArg.get("format"));
      arguments.add("-Dtarget_dir=" + confArg.get("base_path") + "_" + confArg.get("file_id"));
      arguments.add("-Dtemp_hdfs_path=" + confArg.get("temp_path"));
      arguments.add("-Drun_id=" + confArg.get("run_id"));
      if (!confArg.get("run_id").isEmpty()) arguments.add("-Drun_id=" + confArg.get("run_id"));
      if (!confArg.get("task_id").isEmpty()) arguments.add("-Dtask_id=" + confArg.get("task_id"));
      if (quick_add) arguments.add("-Dquick_add=" + confArg.get("quick_add"));
      String lockName = lock(lockRequest);
      String[] argumentString = arguments.toArray(new String[arguments.size()]);
      adddb.main(argumentString);
      unlock(lockName);
      System.exit(0);
    }

    // Database registration

    dbutil db_util = new dbutil(config);
    db_util.register_database(confArg.get("db_name_files"), true);
    db_util.register_database(confArg.get("db_name_runs"), true);
    db_util.register_database(confArg.get("db_name_updates"), true);
    FileSystem hdfs = FileSystem.get(config);
    FileSystem localFS = FileSystem.getLocal(config);

    // Get source type
    confArg.put("source", getSource(db_util, confArg.get("db_name_files"), confArg.get("file_id")));
    confArg.put(
        "database", isDatabase(db_util, confArg.get("db_name_files"), confArg.get("file_id")));
    if (!confArg.get("source").equals("local")
        && type_move == toFrom.REMOTE2LOCAL
        && !confArg.get("timestamp_stop").equals(Integer.toString(Integer.MAX_VALUE))) {
      confArg.put("timestamp_stop", Long.toString(latestVersion(confArg, db_util)));
    }

    /*
     * Get previous timestamp
     */
    Get run_id_get = new Get(confArg.get("run_id").getBytes());
    Result run_get = db_util.doGet(confArg.get("db_name_runs"), run_id_get);
    KeyValue run_file_prev =
        run_get.getColumnLatest(
            "d".getBytes(), (confArg.get("file_id") + "_db_timestamp").getBytes());
    String last_timestamp = new String("0");
    if (null != run_file_prev && !confArg.get("source").equals("local")) {
      long last_timestamp_real = run_file_prev.getTimestamp();
      Long current_timestamp = new Long(confArg.get("timestamp_real"));
      if ((current_timestamp - last_timestamp_real) > 36000) {
        last_timestamp = new String(run_file_prev.getValue());
        Integer lastTimestamp = new Integer(last_timestamp);
        lastTimestamp += 1;
        last_timestamp = lastTimestamp.toString();
        logger.info("Last timestamp: " + last_timestamp + " End data: " + endDate);
        Date last_run = new Date(run_file_prev.getTimestamp());
        if (last_run.before(endDate) && !full_run) {
          confArg.put("timestamp_start", last_timestamp);
        }
      }
    }

    Integer tse = new Integer(confArg.get("timestamp_stop"));
    Integer tss = new Integer(confArg.get("timestamp_start"));
    if (tss > tse) {
      logger.info("No new version of requested file.");
      return 0;
    }

    /*
     * Generate file
     */

    String lockName = lock(lockRequest);

    Get file_id_get = new Get(confArg.get("file_id").getBytes());
    Result file_get = db_util.doGet(confArg.get("db_name_files"), file_id_get);
    if (!file_get.isEmpty()) {
      boolean found =
          hasFile(
              db_util,
              hdfs,
              confArg.get("db_name_files"),
              confArg.get("file_id"),
              getFullPath(confArg));
      if (confArg.get("source").equals("fullfile")) {
        found = false;
      }
      String filenames_put =
          getFileNames(
              db_util, confArg.get("db_name_files"), confArg.get("file_id"), getFullPath(confArg));
      // Filename not found in file database
      if (!found && type_move == toFrom.REMOTE2LOCAL) {
        if (!confArg.get("source").equals("local")) {
          // Generate intermediate file
          if (getFile(hdfs, confArg, db_util) == null) {
            unlock(lockName);
            return 1;
          }
          // Put generated file into file database
          if (!confArg.get("format").equals("fullfile")) {
            putFileEntry(
                db_util,
                hdfs,
                confArg.get("db_name_files"),
                confArg.get("file_id"),
                confArg.get("full_file_name"),
                confArg.get("source"));
          }
        } else {
          logger.warn("Remote file not found, and cannot be generated! File: " + confArg);
          unlock(lockName);
          return 1;
        }
      }
    } else {
      if (type_move == toFrom.REMOTE2LOCAL) {
        logger.warn("Remote file not found, and cannot be generated.");
        unlock(lockName);
        return 1;
      }
    }

    /*
     * Copy file
     * Update tables
     */

    if (type_move == toFrom.LOCAL2REMOTE) {
      if (!confArg.get("format").equals("fullfile")) {
        putFileEntry(
            db_util,
            hdfs,
            confArg.get("db_name_files"),
            confArg.get("file_id"),
            getFullPath(confArg),
            confArg.get("source"));
      }
      putRunEntry(
          db_util,
          confArg.get("db_name_runs"),
          confArg.get("run_id"),
          confArg.get("file_id"),
          confArg.get("type"),
          confArg.get("timestamp_real"),
          confArg.get("timestamp_stop"),
          getFullPath(confArg),
          confArg.get("delimiter"));
      hdfs.copyFromLocalFile(new Path(confArg.get("local_path")), new Path(getFullPath(confArg)));
    } else if (type_move == toFrom.REMOTE2LOCAL) {
      FileStatus[] files = hdfs.globStatus(new Path(getFullPath(confArg) + "*"));
      putRunEntry(
          db_util,
          confArg.get("db_name_runs"),
          confArg.get("run_id"),
          confArg.get("file_id"),
          confArg.get("type"),
          confArg.get("timestamp_real"),
          confArg.get("timestamp_stop"),
          getFullPath(confArg),
          confArg.get("delimiter"));
      unlock(lockName);
      for (FileStatus file : files) {
        Path cur_file = file.getPath();
        Path cur_local_path =
            new Path(new String(confArg.get("local_path") + confArg.get("file_id")));
        String suffix = getSuffix(getFileName(confArg), cur_file.getName());
        if (suffix.length() > 0) {
          cur_local_path = cur_local_path.suffix(new String("." + suffix));
        }
        if (confArg.get("copy").equals("true")) {
          String crc = hdfs.getFileChecksum(cur_file).toString();
          if (checksumLocalTest(cur_local_path, crc)) {
            continue;
          } else {
            hdfs.copyToLocalFile(cur_file, cur_local_path);
            writeChecksum(cur_local_path, crc);
          }
        } else {
          System.out.println(cur_local_path + "\t" + cur_file);
        }
      }
    }
    unlock(lockName);
    return 0;
  }
Example #9
0
  // Information needed to get a single file:
  // BASE_PATH, FILE_ID, TIMESTAMP_START, TIMESTAMP_STOP, SOURCE, FILESYSTEM
  private static Vector<Path> getFile(
      FileSystem fs, Hashtable<String, String> config, dbutil db_util) throws Exception {
    Long latestVersion = latestVersion(config, db_util);

    try {
      config.put("timestamp_start", config.get("timestamp_start"));
      config.put("timestamp_real", latestVersion.toString());
      config.put("timestamp_stop", latestVersion.toString());
    } catch (Exception E) {
      logger.error("Tryign to get file that is impossible to generate: " + getFullPath(config));
      return null;
    }
    if (Integer.parseInt(config.get("timestamp_start"))
        > Integer.parseInt(config.get("timestamp_stop"))) {
      return null;
    }
    logger.debug(
        "Getting DB for timestamp "
            + config.get("timestamp_start")
            + " to "
            + config.get("timestamp_stop"));

    String final_result = getFullPath(config);

    String temp_path_base =
        config.get("local_temp_path")
            + "_"
            + config.get("task_id")
            + "_"
            + config.get("run_id")
            + "/";
    Path newPath = new Path(final_result + "*");
    Vector<Path> ret_path = new Vector<Path>();
    String lockName = lock(final_result.replaceAll("/", "_"));
    if (fs.globStatus(newPath).length != 0) {
      ret_path.add(newPath);
      unlock(lockName);
      config.put("full_file_name", final_result);
      return ret_path;
    } else {
      if (!config.get("source").equals("local")) {
        config.put("temp_path_base", temp_path_base);

        config.put("timestamp_start", config.get("timestamp_start"));
        config.put("timestamp_real", latestVersion.toString());
        config.put("timestamp_stop", latestVersion.toString());

        Class<?> sourceClass =
            Class.forName("org.gestore.plugin.source." + config.get("source") + "Source");
        Method process_data = sourceClass.getMethod("process", Hashtable.class, FileSystem.class);
        Object processor = sourceClass.newInstance();
        Object retVal;
        try {
          retVal = process_data.invoke(processor, config, fs);
        } catch (InvocationTargetException E) {
          Throwable exception = E.getTargetException();
          logger.error("Unable to call method in child class: " + exception.toString());
          exception.printStackTrace(System.out);
          unlock(lockName);
          return null;
        }
        FileStatus[] files = (FileStatus[]) retVal;
        if (files == null) {
          logger.error("Error getting files, no files returned");
          return null;
        }

        for (FileStatus file : files) {
          Path cur_file = file.getPath();
          Path cur_local_path = new Path(temp_path_base + config.get("file_id"));
          String suffix = getSuffix(config.get("file_id"), cur_file.getName());
          cur_local_path = cur_local_path.suffix(suffix);
          Path res_path = new Path(new String(final_result + suffix));
          logger.debug("Moving file" + cur_file.toString() + " to " + res_path.toString());
          if (config.get("copy").equals("true")) {
            fs.moveFromLocalFile(cur_file, res_path);
          } else {
            fs.rename(cur_file, res_path);
          }
        }

        config.put("full_file_name", final_result);
      }
    }
    unlock(lockName);
    return ret_path;
  }
Example #10
0
    // The input video files are split into chunks of 64MB here...
    public void map(Object key, Text value, Context context)
        throws IOException, InterruptedException {
      String line = value.toString();
      System.out.println("job1:mapInp:-" + line);
      String[] info = line.split(" ");
      info[0] = info[0].trim();
      info[1] = info[1].trim();
      String lstfnames = "", fname = "";
      try {
        Configuration config = new Configuration();
        FileSystem hdfs = FileSystem.get(config);
        String prefixPath = "", fnm = "";
        Pattern x = Pattern.compile("(.*)/(.*)");
        Matcher xm = x.matcher(info[0]);
        while (xm.find()) {
          prefixPath = xm.group(1);
          fnm = xm.group(2);
        }
        String dst = "/home/" + fnm; // dst is path of the file on local system.
        hdfs.copyToLocalFile(new Path(info[0]), new Path(dst));

        Process p = Runtime.getRuntime().exec("ffmpeg -i " + dst);
        String s;

        BufferedReader stdError = new BufferedReader(new InputStreamReader(p.getErrorStream()));
        Pattern D = Pattern.compile("Duration:[ ]*([0-9]+):([0-9]+):([0-9]+)");
        long time = 0; // "time" is the duration of the input video file
        long sps = 0; // "sps" is the number of seconds(duration) of each video split
        while ((s = stdError.readLine()) != null) {
          Matcher md = D.matcher(s);
          while (md.find()) {
            time =
                Long.parseLong(md.group(1)) * 3600
                    + Long.parseLong(md.group(2)) * 60
                    + Long.parseLong(md.group(3));
          }
        }
        Process p1 = Runtime.getRuntime().exec("du -s " + dst);
        BufferedReader stdInput1 = new BufferedReader(new InputStreamReader(p1.getInputStream()));
        String s1 = "", size = ""; // "size" is the size of input video file
        while ((s1 = stdInput1.readLine()) != null) {
          String s11[] = s1.split("\t");
          size = s11[0];
        }
        sps = (64 * 1024) * time / (Long.parseLong(size)); // chunk size is 64MB
        String hr, min, sc;
        hr = Long.toString((sps / 3600));
        min = Long.toString((sps % 3600) / 60);
        sc = Long.toString(sps % 60);
        if (hr.length() < 2) hr = "0" + hr;
        if (min.length() < 2) min = "0" + min;
        if (sc.length() < 2) sc = "0" + sc;
        String splt = hr + ":" + min + ":" + sc;

        String query =
            "mencoder -oac copy -ovc copy -ss "; // building query to split the input video file
        String app = "", inpExt = "";
        Pattern xx = Pattern.compile("(.*)\\.(.*)");
        Matcher xxm = xx.matcher(dst);
        while (xxm.find()) {
          fname = xxm.group(1);
          inpExt = xxm.group(2);
        }
        String[] tmpArr = fname.split("/");
        String hdfsFname = "";
        long stSrt = 0;
        int cnt = 0;

        while (true) {
          if (stSrt > time) break;
          if (stSrt + sps > time) {
            long t = time - stSrt;
            hr = Long.toString((t / 3600));
            min = Long.toString((t % 3600) / 60);
            sc = Long.toString(t % 60);
            if (hr.length() < 2) hr = "0" + hr;
            if (min.length() < 2) min = "0" + min;
            if (sc.length() < 2) sc = "0" + sc;
            splt = hr + ":" + min + ":" + sc;
          }
          cnt++;
          hr = Long.toString((stSrt / 3600));
          min = Long.toString((stSrt % 3600) / 60);
          sc = Long.toString(stSrt % 60);
          if (hr.length() < 2) hr = "0" + hr;
          if (min.length() < 2) min = "0" + min;
          if (sc.length() < 2) sc = "0" + sc;
          app =
              hr
                  + ":"
                  + min
                  + ":"
                  + sc
                  + " -endPos "
                  + splt
                  + " "
                  + dst
                  + " -o "
                  + fname
                  + "_"
                  + Integer.toString(cnt)
                  + "."
                  + inpExt;

          Process p2 = Runtime.getRuntime().exec(query + app);
          String ls_str = "";
          DataInputStream ls_in = new DataInputStream(p2.getInputStream());
          while ((ls_str = ls_in.readLine()) != null) {}
          p2.destroy();
          String[] tmpArr1 = fnm.split("\\.");
          hdfs.copyFromLocalFile(
              true,
              true,
              new Path(fname + "_" + Integer.toString(cnt) + "." + inpExt),
              new Path(prefixPath + "/" + tmpArr1[0] + "_" + Integer.toString(cnt) + "." + inpExt));
          lstfnames +=
              prefixPath + "/" + tmpArr1[0] + "_" + Integer.toString(cnt) + "." + inpExt + " #!# ";
          stSrt += sps;
        }
        Runtime rt1 = Runtime.getRuntime();
        String[] cmd1 = {"/bin/bash", "-c", "rm " + dst}; // delete the file after use
        Process pr1 = rt1.exec(cmd1);
        pr1.waitFor();
        lstfnames += "*" + info[1];

        context.write(
            new Text(fname),
            new Text(
                lstfnames)); // "fname" contains name of the input video file with
                             // extension(eg.".avi") removed #### "lstfnames" is a string, contains
                             // all the names of video splits(concatenated)
        System.out.println("lstfnames : " + lstfnames);
      } catch (IOException e) {
        System.out.println("exception happened - here's what I know: ");
        e.printStackTrace();
        System.exit(-1);
      }
    }