Example #1
5
  /** Process incoming heartbeat messages from the task trackers. */
  public synchronized int emitHeartbeat(TaskTrackerStatus trackerStatus, boolean initialContact) {
    String trackerName = trackerStatus.getTrackerName();
    trackerStatus.setLastSeen(System.currentTimeMillis());

    synchronized (taskTrackers) {
      synchronized (trackerExpiryQueue) {
        boolean seenBefore = updateTaskTrackerStatus(trackerName, trackerStatus);
        if (initialContact) {
          // If it's first contact, then clear out any state hanging around
          if (seenBefore) {
            lostTaskTracker(trackerName);
          }
        } else {
          // If not first contact, there should be some record of the tracker
          if (!seenBefore) {
            return InterTrackerProtocol.UNKNOWN_TASKTRACKER;
          }
        }

        if (initialContact) {
          trackerExpiryQueue.add(trackerStatus);
        }
      }
    }

    updateTaskStatuses(trackerStatus);
    // LOG.info("Got heartbeat from "+trackerName);
    return InterTrackerProtocol.TRACKERS_OK;
  }
Example #2
0
  /** For debugging. */
  public static void main(String[] args) throws Exception {
    final String usage = "NutchBean query";

    if (args.length == 0) {
      System.err.println(usage);
      System.exit(-1);
    }

    final Configuration conf = NutchConfiguration.create();
    final NutchBean bean = new NutchBean(conf);
    try {
      final Query query = Query.parse(args[0], conf);
      final Hits hits = bean.search(query, 10);
      System.out.println("Total hits: " + hits.getTotal());
      final int length = (int) Math.min(hits.getTotal(), 10);
      final Hit[] show = hits.getHits(0, length);
      final HitDetails[] details = bean.getDetails(show);
      final Summary[] summaries = bean.getSummary(details, query);

      for (int i = 0; i < hits.getLength(); i++) {
        System.out.println(" " + i + " " + details[i] + "\n" + summaries[i]);
      }
    } catch (Throwable t) {
      LOG.error("Exception occured while executing search: " + t, t);
      System.exit(1);
    }
    System.exit(0);
  }
Example #3
0
  public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
      System.err.println("Usage: test.icde12.HadoopJoin <in> <out>");
      System.exit(2);
    }
    Job job = new Job(conf, "hadoop join");
    job.setJarByClass(HadoopJoin.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);

    job.setPartitionerClass(ICDEPartitioner.class);

    //    WritableComparator.define(Text.class,new ICDEComparator());

    job.setSortComparatorClass(ICDEComparator.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setNumReduceTasks(8);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
  }
Example #4
0
 private static void run(Callable c, boolean read, int size) {
   // Count all i/o time from here, including all retry overheads
   long start_io_ms = System.currentTimeMillis();
   while (true) {
     try {
       long start_ns = System.nanoTime(); // Blocking i/o call timing - without counting repeats
       c.call();
       TimeLine.record_IOclose(start_ns, start_io_ms, read ? 1 : 0, size, Value.HDFS);
       break;
       // Explicitly ignore the following exceptions but
       // fail on the rest IOExceptions
     } catch (EOFException e) {
       ignoreAndWait(e, false);
     } catch (SocketTimeoutException e) {
       ignoreAndWait(e, false);
     } catch (S3Exception e) {
       // Preserve S3Exception before IOException
       // Since this is tricky code - we are supporting different HDFS version
       // New version declares S3Exception as IOException
       // But old versions (0.20.xxx) declares it as RuntimeException
       // So we have to catch it before IOException !!!
       ignoreAndWait(e, false);
     } catch (IOException e) {
       ignoreAndWait(e, true);
     } catch (Exception e) {
       throw Log.errRTExcept(e);
     }
   }
 }
 public static void main(String[] args) throws Exception {
   if (ToolRunner.run(new FeatureMatching(), args) == 1) {
     System.out.println(".......Feature Match failure........");
     System.exit(1);
   }
   System.exit(0);
 }
 @Override
 public int run(String[] args) throws Exception {
   if (args.length < 4) {
     writeUsage();
     return 1;
   }
   Path secretsPath = new Path(args[0]);
   Path saltFilePath = new Path(args[1]);
   Path inputPath = new Path(args[2]);
   Path outputPath = new Path(args[3]);
   // Make sure the salt file exists
   generateSaltIfNeeded(saltFilePath, secretsPath);
   // Configure the job
   Job job = configureJob(secretsPath, saltFilePath, inputPath, outputPath);
   // Run it
   long startTime = System.currentTimeMillis();
   job.submit();
   if (job.waitForCompletion(true)) {
     System.out.printf(
         "Done obfuscating - took %d seconds.\n", (System.currentTimeMillis() - startTime) / 1000);
   } else {
     System.err.printf("Job finished with errors: %s\n", job.getStatus().getFailureInfo());
     return 2;
   }
   return 0;
 }
Example #7
0
  public void inject(Path crawlDb, Path urlDir) throws IOException {
    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
    long start = System.currentTimeMillis();
    if (LOG.isInfoEnabled()) {
      LOG.info("Injector: starting at " + sdf.format(start));
      LOG.info("Injector: crawlDb: " + crawlDb);
      LOG.info("Injector: urlDir: " + urlDir);
    }

    Path tempDir =
        new Path(
            getConf().get("mapred.temp.dir", ".")
                + "/inject-temp-"
                + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));

    // map text input file to a <url,CrawlDatum> file
    if (LOG.isInfoEnabled()) {
      LOG.info("Injector: Converting injected urls to crawl db entries.");
    }
    JobConf sortJob = new NutchJob(getConf());
    sortJob.setJobName("inject " + urlDir);
    FileInputFormat.addInputPath(sortJob, urlDir);
    sortJob.setMapperClass(InjectMapper.class);

    FileOutputFormat.setOutputPath(sortJob, tempDir);
    sortJob.setOutputFormat(SequenceFileOutputFormat.class);
    sortJob.setOutputKeyClass(Text.class);
    sortJob.setOutputValueClass(CrawlDatum.class);
    sortJob.setLong("injector.current.time", System.currentTimeMillis());
    RunningJob mapJob = JobClient.runJob(sortJob);

    long urlsInjected = mapJob.getCounters().findCounter("injector", "urls_injected").getValue();
    long urlsFiltered = mapJob.getCounters().findCounter("injector", "urls_filtered").getValue();
    LOG.info("Injector: total number of urls rejected by filters: " + urlsFiltered);
    LOG.info(
        "Injector: total number of urls injected after normalization and filtering: "
            + urlsInjected);

    // merge with existing crawl db
    if (LOG.isInfoEnabled()) {
      LOG.info("Injector: Merging injected urls into crawl db.");
    }
    JobConf mergeJob = CrawlDb.createJob(getConf(), crawlDb);
    FileInputFormat.addInputPath(mergeJob, tempDir);
    mergeJob.setReducerClass(InjectReducer.class);
    JobClient.runJob(mergeJob);
    CrawlDb.install(mergeJob, crawlDb);

    // clean up
    FileSystem fs = FileSystem.get(getConf());
    fs.delete(tempDir, true);

    long end = System.currentTimeMillis();
    LOG.info(
        "Injector: finished at "
            + sdf.format(end)
            + ", elapsed: "
            + TimingUtil.elapsedTime(start, end));
  }
 static {
   System.load(
       (new File("/home/gathors/proj/v-opencv/FeatureMatching/libs/libopencv_java2412.so"))
           .getAbsolutePath());
   System.load(
       (new File("/home/gathors/proj/v-opencv/FeatureMatching/libs/libopencv_highgui.so"))
           .getAbsolutePath());
 }
Example #9
0
  /** Sets up configuration based on params */
  private static boolean setup(Hashtable<String, String> curConf, Configuration argConf) {

    if (argConf.get("file") == null) {
      logger.fatal("Missing file parameter");
      System.exit(1);
    }

    if (argConf.get("hdfs_base_path") == null) {
      logger.fatal("Missing HDFS base path, check gestore-conf.xml");
      System.exit(1);
    }

    if (argConf.get("hdfs_temp_path") == null) {
      logger.fatal("Missing HDFS temp path, check gestore-conf.xml");
      System.exit(1);
    }

    if (argConf.get("local_temp_path") == null) {
      logger.fatal("Missing local temp path, check gestore-conf.xml");
      System.exit(1);
    }

    // Input paramaters
    curConf.put("run_id", argConf.get("run", ""));
    curConf.put("task_id", argConf.get("task", ""));
    curConf.put("file_id", argConf.get("file"));
    curConf.put("local_path", argConf.get("path", ""));
    curConf.put("type", argConf.get("type", "l2r"));
    curConf.put("timestamp_start", argConf.get("timestamp_start", "1"));
    curConf.put(
        "timestamp_stop", argConf.get("timestamp_stop", Integer.toString(Integer.MAX_VALUE)));
    curConf.put("delimiter", argConf.get("regex", "ID=.*"));
    curConf.put("taxon", argConf.get("taxon", "all"));
    curConf.put("intermediate", argConf.get("full_run", "false"));
    curConf.put("quick_add", argConf.get("quick_add", "false"));
    Boolean full_run = curConf.get("intermediate").matches("(?i).*true.*");
    curConf.put("format", argConf.get("format", "unknown"));
    curConf.put("split", argConf.get("split", "1"));
    curConf.put("copy", argConf.get("copy", "true"));

    // Constants
    curConf.put("base_path", argConf.get("hdfs_base_path"));
    curConf.put("temp_path", argConf.get("hdfs_temp_path"));
    curConf.put("local_temp_path", argConf.get("local_temp_path"));
    curConf.put("db_name_files", argConf.get("hbase_file_table"));
    curConf.put("db_name_runs", argConf.get("hbase_run_table"));
    curConf.put("db_name_updates", argConf.get("hbase_db_update_table"));

    // Timestamps
    Date currentTime = new Date();
    Date endDate = new Date(new Long(curConf.get("timestamp_stop")));
    curConf.put("timestamp_real", Long.toString(currentTime.getTime()));

    return true;
  }
    public void setup(Context context) {

      try {
        // System.setProperty("java.library.path", "/home/gathors/proj/libs");
        // System.loadLibrary(Core.NATIVE_xxx);
        // System.loacLibrary("/home/gathors/proj/libs/opencv-300.jar");
      } catch (UnsatisfiedLinkError e) {
        System.err.println("\nNATIVE LIBRARY failed to load...");
        System.err.println("ERROR:" + e);
        System.err.println("NATIVE_LIBRARY_NAME:" + Core.NATIVE_LIBRARY_NAME);
        System.err.println("#" + System.getProperty("java.library.path"));
        System.exit(1);
      }
    }
Example #11
0
    /**
     * The run method lives for the life of the JobTracker, and removes Jobs that are not still
     * running, but which finished a long time ago.
     */
    public void run() {
      while (shouldRun) {
        try {
          Thread.sleep(RETIRE_JOB_CHECK_INTERVAL);
        } catch (InterruptedException ie) {
        }

        synchronized (jobs) {
          synchronized (jobInitQueue) {
            synchronized (jobsByArrival) {
              for (Iterator it = jobs.keySet().iterator(); it.hasNext(); ) {
                String jobid = (String) it.next();
                JobInProgress job = (JobInProgress) jobs.get(jobid);

                if (job.getStatus().getRunState() != JobStatus.RUNNING
                    && job.getStatus().getRunState() != JobStatus.PREP
                    && (job.getFinishTime() + RETIRE_JOB_INTERVAL < System.currentTimeMillis())) {
                  it.remove();

                  jobInitQueue.remove(job);
                  jobsByArrival.remove(job);
                }
              }
            }
          }
        }
      }
    }
Example #12
0
  /** Implements basic throttling capabilities. */
  public static class Throttler {

    double bytesPerSec;
    long lastTime = System.currentTimeMillis();

    public Throttler(double bytesPerSec) {
      this.bytesPerSec = bytesPerSec;
    }

    public void incrementAndThrottle(int bytes) {
      if (bytesPerSec < 1) { // no throttle at all
        return;
      }
      long currentTime = System.currentTimeMillis();
      long timeDiff = currentTime - lastTime;
      if (timeDiff == 0) {
        timeDiff = 1;
      }

      double bytesPerSec = (bytes / (double) timeDiff) * 1000;
      if (bytesPerSec > this.bytesPerSec) {
        // Throttle
        double exceededByFactorOf = bytesPerSec / this.bytesPerSec;
        try {
          long mustSleep = (long) ((exceededByFactorOf - 1) * timeDiff);
          Thread.sleep(mustSleep);
        } catch (InterruptedException e) {
          e.printStackTrace();
        }
      }

      lastTime = System.currentTimeMillis();
    }
  }
Example #13
0
  /**
   * Start the JobTracker process. This is used only for debugging. As a rule, JobTracker should be
   * run as part of the DFS Namenode process.
   */
  public static void main(String argv[]) throws IOException, InterruptedException {
    if (argv.length != 0) {
      System.out.println("usage: JobTracker");
      System.exit(-1);
    }

    startTracker(new Configuration());
  }
Example #14
0
  /** Start the JobTracker process, listen on the indicated port */
  JobTracker(Configuration conf) throws IOException {
    //
    // Grab some static constants
    //
    maxCurrentTasks = conf.getInt("mapred.tasktracker.tasks.maximum", 2);
    RETIRE_JOB_INTERVAL = conf.getLong("mapred.jobtracker.retirejob.interval", 24 * 60 * 60 * 1000);
    RETIRE_JOB_CHECK_INTERVAL = conf.getLong("mapred.jobtracker.retirejob.check", 60 * 1000);
    TASK_ALLOC_EPSILON = conf.getFloat("mapred.jobtracker.taskalloc.loadbalance.epsilon", 0.2f);
    PAD_FRACTION = conf.getFloat("mapred.jobtracker.taskalloc.capacitypad", 0.1f);
    MIN_SLOTS_FOR_PADDING = 3 * maxCurrentTasks;

    // This is a directory of temporary submission files.  We delete it
    // on startup, and can delete any files that we're done with
    this.conf = conf;
    JobConf jobConf = new JobConf(conf);
    this.systemDir = jobConf.getSystemDir();
    this.fs = FileSystem.get(conf);
    FileUtil.fullyDelete(fs, systemDir);
    fs.mkdirs(systemDir);

    // Same with 'localDir' except it's always on the local disk.
    jobConf.deleteLocalFiles(SUBDIR);

    // Set ports, start RPC servers, etc.
    InetSocketAddress addr = getAddress(conf);
    this.localMachine = addr.getHostName();
    this.port = addr.getPort();
    this.interTrackerServer = RPC.getServer(this, addr.getPort(), 10, false, conf);
    this.interTrackerServer.start();
    Properties p = System.getProperties();
    for (Iterator it = p.keySet().iterator(); it.hasNext(); ) {
      String key = (String) it.next();
      String val = (String) p.getProperty(key);
      LOG.info("Property '" + key + "' is " + val);
    }

    this.infoPort = conf.getInt("mapred.job.tracker.info.port", 50030);
    this.infoServer = new JobTrackerInfoServer(this, infoPort);
    this.infoServer.start();

    this.startTime = System.currentTimeMillis();

    new Thread(this.expireTrackers).start();
    new Thread(this.retireJobs).start();
    new Thread(this.initJobs).start();
  }
Example #15
0
 public void configure(JobConf job) {
   this.jobConf = job;
   urlNormalizers = new URLNormalizers(job, URLNormalizers.SCOPE_INJECT);
   interval = jobConf.getInt("db.fetch.interval.default", 2592000);
   filters = new URLFilters(jobConf);
   scfilters = new ScoringFilters(jobConf);
   scoreInjected = jobConf.getFloat("db.score.injected", 1.0f);
   curTime = job.getLong("injector.current.time", System.currentTimeMillis());
 }
 /**
  * Add a file path to the current set of classpath entries. It adds the file to cache as well.
  * Intended to be used by user code.
  *
  * @param file Path of the file to be added
  * @param conf Configuration that contains the classpath setting
  * @param fs FileSystem with respect to which {@code archivefile} should be interpreted.
  */
 public static void addFileToClassPath(Path file, Configuration conf, FileSystem fs)
     throws IOException {
   String filepath = file.toUri().getPath();
   String classpath = conf.get("mapred.job.classpath.files");
   conf.set(
       "mapred.job.classpath.files",
       classpath == null ? filepath : classpath + System.getProperty("path.separator") + filepath);
   URI uri = fs.makeQualified(file).toUri();
   addCacheFile(uri, conf);
 }
Example #17
0
 /**
  * Get the archive entries in classpath as an array of Path
  *
  * @param conf Configuration that contains the classpath setting
  */
 public static Path[] getArchiveClassPaths(Configuration conf) {
   String classpath = conf.get("mapred.job.classpath.archives");
   if (classpath == null) return null;
   ArrayList list =
       Collections.list(new StringTokenizer(classpath, System.getProperty("path.separator")));
   Path[] paths = new Path[list.size()];
   for (int i = 0; i < list.size(); i++) {
     paths[i] = new Path((String) list.get(i));
   }
   return paths;
 }
 @Override
 public int doWork() {
   try {
     setupProcedureStore();
     ExecutorService executor = Executors.newFixedThreadPool(numThreads);
     Future<?>[] futures = new Future<?>[numThreads];
     // Start worker threads.
     long start = System.currentTimeMillis();
     for (int i = 0; i < numThreads; i++) {
       futures[i] = executor.submit(this.new Worker(start));
     }
     boolean failure = false;
     try {
       for (Future<?> future : futures) {
         long timeout = start + WORKER_THREADS_TIMEOUT_SEC * 1000 - System.currentTimeMillis();
         failure |= (future.get(timeout, TimeUnit.MILLISECONDS).equals(EXIT_FAILURE));
       }
     } catch (Exception e) {
       System.err.println("Exception in worker thread.");
       e.printStackTrace();
       return EXIT_FAILURE;
     }
     executor.shutdown();
     if (failure) {
       return EXIT_FAILURE;
     }
     long timeTaken = System.currentTimeMillis() - start;
     System.out.println("******************************************");
     System.out.println("Num threads    : " + numThreads);
     System.out.println("Num procedures : " + numProcs);
     System.out.println("Sync type      : " + syncType);
     System.out.println("Time taken     : " + (timeTaken / 1000.0f) + "sec");
     System.out.println("******************************************");
     return EXIT_SUCCESS;
   } catch (IOException e) {
     e.printStackTrace();
     return EXIT_FAILURE;
   } finally {
     tearDownProcedureStore();
   }
 }
Example #19
0
 private static void run(Callable c, boolean read, int size) {
   // Count all i/o time from here, including all retry overheads
   long start_io_ms = System.currentTimeMillis();
   while (true) {
     try {
       long start_ns = System.nanoTime(); // Blocking i/o call timing - without counting repeats
       c.call();
       TimeLine.record_IOclose(start_ns, start_io_ms, read ? 1 : 0, size, Value.HDFS);
       break;
       // Explicitly ignore the following exceptions but
       // fail on the rest IOExceptions
     } catch (EOFException e) {
       ignoreAndWait(e, false);
     } catch (SocketTimeoutException e) {
       ignoreAndWait(e, false);
     } catch (IOException e) {
       ignoreAndWait(e, true);
     } catch (Exception e) {
       throw Log.errRTExcept(e);
     }
   }
 }
  /**
   * Add an archive path to the current set of classpath entries. It adds the archive to cache as
   * well. Intended to be used by user code.
   *
   * @param archive Path of the archive to be added
   * @param conf Configuration that contains the classpath setting
   * @param fs FileSystem with respect to which {@code archive} should be interpreted.
   */
  public static void addArchiveToClassPath(Path archive, Configuration conf, FileSystem fs)
      throws IOException {
    String archivepath = archive.toUri().getPath();
    String classpath = conf.get("mapred.job.classpath.archives");
    conf.set(
        "mapred.job.classpath.archives",
        classpath == null
            ? archivepath
            : classpath + System.getProperty("path.separator") + archivepath);
    URI uri = fs.makeQualified(archive).toUri();

    addCacheArchive(uri, conf);
  }
  /**
   * default class initialization
   *
   * @param fsuri path to Swift
   * @param conf Hadoop configuration
   * @throws IOException
   */
  @Override
  public void initialize(URI fsuri, Configuration conf) throws IOException {
    super.initialize(fsuri, conf);

    setConf(conf);
    if (store == null) {
      store = new SwiftNativeFileSystemStore();
    }
    this.uri = fsuri;
    this.workingDir =
        new Path("/user", System.getProperty("user.name"))
            .makeQualified(uri, new Path(System.getProperty("user.name")));
    if (LOG.isDebugEnabled()) {
      LOG.debug(
          "Initializing SwiftNativeFileSystem against URI "
              + uri
              + " and working dir "
              + workingDir);
    }
    store.initialize(uri, conf);
    LOG.debug("SwiftFileSystem initialized");
  }
Example #22
0
  private static URI addArchiveToClassPathHelper(Path archive, Configuration conf)
      throws IOException {

    String classpath = conf.get("mapred.job.classpath.archives");

    // the scheme/authority use ':' as separator. put the unqualified path in classpath
    String archivePath = archive.toUri().getPath();

    conf.set(
        "mapred.job.classpath.archives",
        classpath == null
            ? archivePath
            : classpath + System.getProperty("path.separator") + archivePath);
    return archive.makeQualified(archive.getFileSystem(conf)).toUri();
  }
Example #23
0
    /**
     * The run method lives for the life of the JobTracker, and removes TaskTrackers that have not
     * checked in for some time.
     */
    public void run() {
      while (shouldRun) {
        //
        // Thread runs periodically to check whether trackers should be expired.
        // The sleep interval must be no more than half the maximum expiry time
        // for a task tracker.
        //
        try {
          Thread.sleep(TASKTRACKER_EXPIRY_INTERVAL / 3);
        } catch (InterruptedException ie) {
        }

        //
        // Loop through all expired items in the queue
        //
        synchronized (taskTrackers) {
          synchronized (trackerExpiryQueue) {
            long now = System.currentTimeMillis();
            TaskTrackerStatus leastRecent = null;
            while ((trackerExpiryQueue.size() > 0)
                && ((leastRecent = (TaskTrackerStatus) trackerExpiryQueue.first()) != null)
                && (now - leastRecent.getLastSeen() > TASKTRACKER_EXPIRY_INTERVAL)) {

              // Remove profile from head of queue
              trackerExpiryQueue.remove(leastRecent);
              String trackerName = leastRecent.getTrackerName();

              // Figure out if last-seen time should be updated, or if tracker is dead
              TaskTrackerStatus newProfile =
                  (TaskTrackerStatus) taskTrackers.get(leastRecent.getTrackerName());
              // Items might leave the taskTracker set through other means; the
              // status stored in 'taskTrackers' might be null, which means the
              // tracker has already been destroyed.
              if (newProfile != null) {
                if (now - newProfile.getLastSeen() > TASKTRACKER_EXPIRY_INTERVAL) {
                  // Remove completely
                  updateTaskTrackerStatus(trackerName, null);
                  lostTaskTracker(leastRecent.getTrackerName());
                } else {
                  // Update time by inserting latest profile
                  trackerExpiryQueue.add(newProfile);
                }
              }
            }
          }
        }
      }
    }
  /** Prints out usage */
  static void usage() {
    System.err.println(
        "Usage: hadoop com.tripadvisor.hadoop.BackupHdfs args\n"
            + "  --hdfs-path path/on/hdfs\n"
            + "  --local-path path/on/local/fs: path to hdfs backup\n"
            + "  --preserve-path path/on/local/fs: path to preserve old files\n"
            + "  [--no-preserve FILE]: list of file substrings to skip preserving\n"
            + "  [--ignore-tables FILE]: list of tables to ignore\n"
            + "  [--dry-run]: don't create any files on local fs\n"
            + "  --date yesterday|last-day|last-week|UNIX-time-T\n"
            + "  [--max-date UNIX-time-T]: don't backup any files newer than T\n"
            + "  [--sleep N]: sleep N seconds after each file copy\n"
            + "  [--max-bytes N]: don't back up more than N bytes\n");

    System.exit(1);
  }
Example #25
0
  public int run(String[] args) throws Exception {
    Path tempDir = new Path("/user/akhfa/temp");

    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "word count");
    job.setJarByClass(AuthorCounter.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, tempDir);
    System.exit(job.waitForCompletion(true) ? 0 : 1);

    return 0;
  }
Example #26
0
      public void configure(JobConf job) {
        // 'key' == sortInput for sort-input; key == sortOutput for sort-output
        key = deduceInputFile(job);

        if (key == sortOutput) {
          partitioner = new HashPartitioner<WritableComparable, Writable>();

          // Figure the 'current' partition and no. of reduces of the 'sort'
          try {
            URI inputURI = new URI(job.get("map.input.file"));
            String inputFile = inputURI.getPath();
            partition =
                Integer.valueOf(inputFile.substring(inputFile.lastIndexOf("part") + 5)).intValue();
            noSortReducers = job.getInt("sortvalidate.sort.reduce.tasks", -1);
          } catch (Exception e) {
            System.err.println("Caught: " + e);
            System.exit(-1);
          }
        }
      }
  // Transform the json-type feature to mat-type
  public static Mat json2mat(String json) {

    JsonParser parser = new JsonParser();
    JsonElement parseTree = parser.parse(json);

    // Verify the input is JSON type
    if (!parseTree.isJsonObject()) {
      System.out.println("The input is not a JSON type...\nExiting...");
      System.exit(1);
    }
    JsonObject jobj = parser.parse(json).getAsJsonObject();

    if (jobj == null || !jobj.isJsonObject() || jobj.isJsonNull()) {
      return null;
    }

    // Detect broken/null features
    JsonElement r = jobj.get("rows");
    if (r == null) {
      return null;
    }

    int rows = jobj.get("rows").getAsInt();
    int cols = jobj.get("cols").getAsInt();
    int type = jobj.get("type").getAsInt();
    String data = jobj.get("data").getAsString();
    String[] pixs = data.split(",");

    Mat descriptor = new Mat(rows, cols, type);
    for (String pix : pixs) {
      String[] tmp = pix.split(" ");
      int r_pos = Integer.valueOf(tmp[0]);
      int c_pos = Integer.valueOf(tmp[1]);
      double rgb = Double.valueOf(tmp[2]);
      descriptor.put(r_pos, c_pos, rgb);
    }
    return descriptor;
  }
  public void testcheckOutputSpecsForbidRecordCompression() throws IOException {
    Job job = Job.getInstance(new Configuration(), "testcheckOutputSpecsForbidRecordCompression");
    FileSystem fs = FileSystem.getLocal(job.getConfiguration());
    Path outputdir = new Path(System.getProperty("test.build.data", "/tmp") + "/output");
    fs.delete(outputdir, true);

    // Without outputpath, FileOutputFormat.checkoutputspecs will throw
    // InvalidJobConfException
    FileOutputFormat.setOutputPath(job, outputdir);

    // SequenceFileAsBinaryOutputFormat doesn't support record compression
    // It should throw an exception when checked by checkOutputSpecs
    SequenceFileAsBinaryOutputFormat.setCompressOutput(job, true);

    SequenceFileAsBinaryOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);
    try {
      new SequenceFileAsBinaryOutputFormat().checkOutputSpecs(job);
    } catch (Exception e) {
      fail(
          "Block compression should be allowed for "
              + "SequenceFileAsBinaryOutputFormat:Caught "
              + e.getClass().getName());
    }

    SequenceFileAsBinaryOutputFormat.setOutputCompressionType(job, CompressionType.RECORD);
    try {
      new SequenceFileAsBinaryOutputFormat().checkOutputSpecs(job);
      fail("Record compression should not be allowed for " + "SequenceFileAsBinaryOutputFormat");
    } catch (InvalidJobConfException ie) {
      // expected
    } catch (Exception e) {
      fail(
          "Expected "
              + InvalidJobConfException.class.getName()
              + "but caught "
              + e.getClass().getName());
    }
  }
 // TODO: Can also collect #procs, time taken by each thread to measure fairness.
 @Override
 public Integer call() throws IOException {
   while (true) {
     if (workersFailed.get()) {
       return EXIT_FAILURE;
     }
     long procId = procIds.getAndIncrement();
     if (procId >= numProcs) {
       break;
     }
     if (procId != 0 && procId % 10000 == 0) {
       long ms = System.currentTimeMillis() - start;
       System.out.println("Wrote " + procId + " procedures in " + StringUtils.humanTimeDiff(ms));
     }
     try {
       if (procId > 0 && procId % numProcsPerWal == 0) {
         store.rollWriterForTesting();
         System.out.println(
             "Starting new log : "
                 + store.getActiveLogs().get(store.getActiveLogs().size() - 1));
       }
     } catch (IOException ioe) {
       // Ask other threads to quit too.
       workersFailed.set(true);
       System.err.println("Exception when rolling log file. Current procId = " + procId);
       ioe.printStackTrace();
       return EXIT_FAILURE;
     }
     ProcedureTestingUtility.TestProcedure proc =
         new ProcedureTestingUtility.TestProcedure(procId);
     proc.setData(serializedState);
     store.insert(proc, null);
     store.update(proc);
   }
   return EXIT_SUCCESS;
 }
  public void testFormat() throws Exception {
    JobConf job = new JobConf(conf);
    FileSystem fs = FileSystem.getLocal(conf);
    Path dir = new Path(System.getProperty("test.build.data", ".") + "/mapred");
    Path file = new Path(dir, "test.seq");

    Reporter reporter = Reporter.NULL;

    int seed = new Random().nextInt();
    // LOG.info("seed = "+seed);
    Random random = new Random(seed);

    fs.delete(dir, true);

    FileInputFormat.setInputPaths(job, dir);

    // for a variety of lengths
    for (int length = 0; length < MAX_LENGTH; length += random.nextInt(MAX_LENGTH / 10) + 1) {

      // LOG.info("creating; entries = " + length);

      // create a file with length entries
      SequenceFile.Writer writer =
          SequenceFile.createWriter(fs, conf, file, IntWritable.class, BytesWritable.class);
      try {
        for (int i = 0; i < length; i++) {
          IntWritable key = new IntWritable(i);
          byte[] data = new byte[random.nextInt(10)];
          random.nextBytes(data);
          BytesWritable value = new BytesWritable(data);
          writer.append(key, value);
        }
      } finally {
        writer.close();
      }

      // try splitting the file in a variety of sizes
      InputFormat<IntWritable, BytesWritable> format =
          new SequenceFileInputFormat<IntWritable, BytesWritable>();
      IntWritable key = new IntWritable();
      BytesWritable value = new BytesWritable();
      for (int i = 0; i < 3; i++) {
        int numSplits = random.nextInt(MAX_LENGTH / (SequenceFile.SYNC_INTERVAL / 20)) + 1;
        // LOG.info("splitting: requesting = " + numSplits);
        InputSplit[] splits = format.getSplits(job, numSplits);
        // LOG.info("splitting: got =        " + splits.length);

        // check each split
        BitSet bits = new BitSet(length);
        for (int j = 0; j < splits.length; j++) {
          RecordReader<IntWritable, BytesWritable> reader =
              format.getRecordReader(splits[j], job, reporter);
          try {
            int count = 0;
            while (reader.next(key, value)) {
              // if (bits.get(key.get())) {
              // LOG.info("splits["+j+"]="+splits[j]+" : " +
              // key.get());
              // LOG.info("@"+reader.getPos());
              // }
              assertFalse("Key in multiple partitions.", bits.get(key.get()));
              bits.set(key.get());
              count++;
            }
            // LOG.info("splits["+j+"]="+splits[j]+" count=" +
            // count);
          } finally {
            reader.close();
          }
        }
        assertEquals("Some keys in no partition.", length, bits.cardinality());
      }
    }
  }