示例#1
0
  public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
      System.err.println("Usage: test.icde12.HadoopJoin <in> <out>");
      System.exit(2);
    }
    Job job = new Job(conf, "hadoop join");
    job.setJarByClass(HadoopJoin.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);

    job.setPartitionerClass(ICDEPartitioner.class);

    //    WritableComparator.define(Text.class,new ICDEComparator());

    job.setSortComparatorClass(ICDEComparator.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setNumReduceTasks(8);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
  }
 public static void main(String[] args) throws Exception {
   if (ToolRunner.run(new FeatureMatching(), args) == 1) {
     System.out.println(".......Feature Match failure........");
     System.exit(1);
   }
   System.exit(0);
 }
示例#3
0
文件: RPC.java 项目: baeeq/hadoop-20
    public Object invoke(Object proxy, Method method, Object[] args) throws Throwable {
      final boolean logDebug = LOG.isDebugEnabled();
      long startTime = 0;
      if (logDebug) {
        startTime = System.currentTimeMillis();
      }

      ObjectWritable value = null;
      try {
        value =
            (ObjectWritable)
                client.call(
                    new Invocation(method, args), getAddress(), protocol, ticket, rpcTimeout);
      } catch (RemoteException re) {
        throw re;
      } catch (ConnectException ce) {
        needCheckDnsUpdate = true;
        throw ce;
      } catch (NoRouteToHostException nrhe) {
        needCheckDnsUpdate = true;
        throw nrhe;
      } catch (PortUnreachableException pue) {
        needCheckDnsUpdate = true;
        throw pue;
      } catch (UnknownHostException uhe) {
        needCheckDnsUpdate = true;
        throw uhe;
      }
      if (logDebug) {
        long callTime = System.currentTimeMillis() - startTime;
        LOG.debug("Call: " + method.getName() + " " + callTime);
      }
      return value.get();
    }
  public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] remainArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

    if (remainArgs.length != 2) {
      System.err.println("Usage: wordcount <input> <output>");
      System.exit(1);
    }

    Job job = new Job(conf, "wordcount");
    job.setJarByClass(WordCount.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setMapperClass(Map.class);
    job.setCombinerClass(Reduce.class);
    job.setReducerClass(Reduce.class);

    job.setNumReduceTasks(4);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    FileSystem.get(conf).delete(new Path(remainArgs[1]), true);

    FileInputFormat.setInputPaths(job, new Path(remainArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(remainArgs[1]));

    System.exit(job.waitForCompletion(true) ? 0 : 1);
  }
示例#5
0
文件: RPC.java 项目: baeeq/hadoop-20
    public Writable call(Class<?> protocol, Writable param, long receivedTime) throws IOException {
      try {
        Invocation call = (Invocation) param;
        if (verbose) log("Call: " + call);

        Method method = protocol.getMethod(call.getMethodName(), call.getParameterClasses());
        method.setAccessible(true);

        int qTime = (int) (System.currentTimeMillis() - receivedTime);
        long startNanoTime = System.nanoTime();
        Object value = method.invoke(instance, call.getParameters());
        long processingMicroTime = (System.nanoTime() - startNanoTime) / 1000;
        if (LOG.isDebugEnabled()) {
          LOG.debug(
              "Served: "
                  + call.getMethodName()
                  + " queueTime (millisec)= "
                  + qTime
                  + " procesingTime (microsec)= "
                  + processingMicroTime);
        }
        rpcMetrics.rpcQueueTime.inc(qTime);
        rpcMetrics.rpcProcessingTime.inc(processingMicroTime);

        MetricsTimeVaryingRate m =
            (MetricsTimeVaryingRate) rpcMetrics.registry.get(call.getMethodName());
        if (m == null) {
          try {
            m = new MetricsTimeVaryingRate(call.getMethodName(), rpcMetrics.registry);
          } catch (IllegalArgumentException iae) {
            // the metrics has been registered; re-fetch the handle
            LOG.debug("Error register " + call.getMethodName(), iae);
            m = (MetricsTimeVaryingRate) rpcMetrics.registry.get(call.getMethodName());
          }
        }
        // record call time in microseconds
        m.inc(processingMicroTime);

        if (verbose) log("Return: " + value);

        return new ObjectWritable(method.getReturnType(), value);

      } catch (InvocationTargetException e) {
        Throwable target = e.getTargetException();
        if (target instanceof IOException) {
          throw (IOException) target;
        } else {
          IOException ioe = new IOException(target.toString());
          ioe.setStackTrace(target.getStackTrace());
          throw ioe;
        }
      } catch (Throwable e) {
        if (!(e instanceof IOException)) {
          LOG.error("Unexpected throwable object ", e);
        }
        IOException ioe = new IOException(e.toString());
        ioe.setStackTrace(e.getStackTrace());
        throw ioe;
      }
    }
示例#6
0
  public void inject(Path crawlDb, Path urlDir) throws IOException {
    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
    long start = System.currentTimeMillis();
    if (LOG.isInfoEnabled()) {
      LOG.info("Injector: starting at " + sdf.format(start));
      LOG.info("Injector: crawlDb: " + crawlDb);
      LOG.info("Injector: urlDir: " + urlDir);
    }

    Path tempDir =
        new Path(
            getConf().get("mapred.temp.dir", ".")
                + "/inject-temp-"
                + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));

    // map text input file to a <url,CrawlDatum> file
    if (LOG.isInfoEnabled()) {
      LOG.info("Injector: Converting injected urls to crawl db entries.");
    }
    JobConf sortJob = new NutchJob(getConf());
    sortJob.setJobName("inject " + urlDir);
    FileInputFormat.addInputPath(sortJob, urlDir);
    sortJob.setMapperClass(InjectMapper.class);

    FileOutputFormat.setOutputPath(sortJob, tempDir);
    sortJob.setOutputFormat(SequenceFileOutputFormat.class);
    sortJob.setOutputKeyClass(Text.class);
    sortJob.setOutputValueClass(CrawlDatum.class);
    sortJob.setLong("injector.current.time", System.currentTimeMillis());
    RunningJob mapJob = JobClient.runJob(sortJob);

    long urlsInjected = mapJob.getCounters().findCounter("injector", "urls_injected").getValue();
    long urlsFiltered = mapJob.getCounters().findCounter("injector", "urls_filtered").getValue();
    LOG.info("Injector: total number of urls rejected by filters: " + urlsFiltered);
    LOG.info(
        "Injector: total number of urls injected after normalization and filtering: "
            + urlsInjected);

    // merge with existing crawl db
    if (LOG.isInfoEnabled()) {
      LOG.info("Injector: Merging injected urls into crawl db.");
    }
    JobConf mergeJob = CrawlDb.createJob(getConf(), crawlDb);
    FileInputFormat.addInputPath(mergeJob, tempDir);
    mergeJob.setReducerClass(InjectReducer.class);
    JobClient.runJob(mergeJob);
    CrawlDb.install(mergeJob, crawlDb);

    // clean up
    FileSystem fs = FileSystem.get(getConf());
    fs.delete(tempDir, true);

    long end = System.currentTimeMillis();
    LOG.info(
        "Injector: finished at "
            + sdf.format(end)
            + ", elapsed: "
            + TimingUtil.elapsedTime(start, end));
  }
 static {
   System.load(
       (new File("/home/gathors/proj/v-opencv/FeatureMatching/libs/libopencv_java2412.so"))
           .getAbsolutePath());
   System.load(
       (new File("/home/gathors/proj/v-opencv/FeatureMatching/libs/libopencv_highgui.so"))
           .getAbsolutePath());
 }
 /** ***********************Driver function****************** */
 public static void main(String args[]) throws Exception {
   if (args.length != 3) {
     System.out.println(
         "provide sufficient arguments <input matrix file path>  <input vector file path> <output file path>");
     System.exit(-1);
   }
   int res = ToolRunner.run(new Configuration(), new Mat_vect_mul(), args);
   System.exit(res);
 }
示例#9
0
  /** Sets up configuration based on params */
  private static boolean setup(Hashtable<String, String> curConf, Configuration argConf) {

    if (argConf.get("file") == null) {
      logger.fatal("Missing file parameter");
      System.exit(1);
    }

    if (argConf.get("hdfs_base_path") == null) {
      logger.fatal("Missing HDFS base path, check gestore-conf.xml");
      System.exit(1);
    }

    if (argConf.get("hdfs_temp_path") == null) {
      logger.fatal("Missing HDFS temp path, check gestore-conf.xml");
      System.exit(1);
    }

    if (argConf.get("local_temp_path") == null) {
      logger.fatal("Missing local temp path, check gestore-conf.xml");
      System.exit(1);
    }

    // Input paramaters
    curConf.put("run_id", argConf.get("run", ""));
    curConf.put("task_id", argConf.get("task", ""));
    curConf.put("file_id", argConf.get("file"));
    curConf.put("local_path", argConf.get("path", ""));
    curConf.put("type", argConf.get("type", "l2r"));
    curConf.put("timestamp_start", argConf.get("timestamp_start", "1"));
    curConf.put(
        "timestamp_stop", argConf.get("timestamp_stop", Integer.toString(Integer.MAX_VALUE)));
    curConf.put("delimiter", argConf.get("regex", "ID=.*"));
    curConf.put("taxon", argConf.get("taxon", "all"));
    curConf.put("intermediate", argConf.get("full_run", "false"));
    curConf.put("quick_add", argConf.get("quick_add", "false"));
    Boolean full_run = curConf.get("intermediate").matches("(?i).*true.*");
    curConf.put("format", argConf.get("format", "unknown"));
    curConf.put("split", argConf.get("split", "1"));
    curConf.put("copy", argConf.get("copy", "true"));

    // Constants
    curConf.put("base_path", argConf.get("hdfs_base_path"));
    curConf.put("temp_path", argConf.get("hdfs_temp_path"));
    curConf.put("local_temp_path", argConf.get("local_temp_path"));
    curConf.put("db_name_files", argConf.get("hbase_file_table"));
    curConf.put("db_name_runs", argConf.get("hbase_run_table"));
    curConf.put("db_name_updates", argConf.get("hbase_db_update_table"));

    // Timestamps
    Date currentTime = new Date();
    Date endDate = new Date(new Long(curConf.get("timestamp_stop")));
    curConf.put("timestamp_real", Long.toString(currentTime.getTime()));

    return true;
  }
 public void setLocalElasticSearchInstallation(JobConf conf) {
   String esConfigPath = conf.get(ES_CONFIG_OPT, ES_CONFIG);
   String esPluginsPath = conf.get(ES_PLUGINS_OPT, ES_PLUGINS);
   System.setProperty(ES_CONFIG_OPT, esConfigPath);
   System.setProperty(ES_PLUGINS_OPT, esPluginsPath);
   LOG.info(
       "Using Elasticsearch configuration file at "
           + esConfigPath
           + " and plugin directory "
           + esPluginsPath);
 }
    public void setup(Context context) {

      try {
        // System.setProperty("java.library.path", "/home/gathors/proj/libs");
        // System.loadLibrary(Core.NATIVE_xxx);
        // System.loacLibrary("/home/gathors/proj/libs/opencv-300.jar");
      } catch (UnsatisfiedLinkError e) {
        System.err.println("\nNATIVE LIBRARY failed to load...");
        System.err.println("ERROR:" + e);
        System.err.println("NATIVE_LIBRARY_NAME:" + Core.NATIVE_LIBRARY_NAME);
        System.err.println("#" + System.getProperty("java.library.path"));
        System.exit(1);
      }
    }
  public static void main(String args[]) throws Exception {
    Configuration c = new Configuration();
    if (args.length != 2) {
      System.out.println("provide sufficient arguments");
      System.exit(-1);
    }
    Job job = Job.getInstance(c, "Wordcount");
    job.setJarByClass(Wordcount.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    //	job.submit();
    job.waitForCompletion(true);
  }
  @Override
  public int run(String[] args) throws Exception {
    JobConf conf = new JobConf(getConf(), getClass());
    conf.setJobName("UFO count");

    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
      System.err.println("Usage: avro UFO counter <in> <out>");
      System.exit(2);
    }

    FileInputFormat.addInputPath(conf, new Path(otherArgs[0]));
    Path outputPath = new Path(otherArgs[1]);
    FileOutputFormat.setOutputPath(conf, outputPath);
    outputPath.getFileSystem(conf).delete(outputPath);
    Schema input_schema = Schema.parse(getClass().getResourceAsStream("ufo.avsc"));
    AvroJob.setInputSchema(conf, input_schema);
    AvroJob.setMapOutputSchema(
        conf,
        Pair.getPairSchema(Schema.create(Schema.Type.STRING), Schema.create(Schema.Type.LONG)));

    AvroJob.setOutputSchema(conf, OUTPUT_SCHEMA);
    AvroJob.setMapperClass(conf, AvroRecordMapper.class);
    AvroJob.setReducerClass(conf, AvroRecordReducer.class);
    conf.setInputFormat(AvroInputFormat.class);
    JobClient.runJob(conf);

    return 0;
  }
示例#14
0
文件: RPC.java 项目: baeeq/hadoop-20
  /**
   * Get a proxy connection to a remote server
   *
   * @param protocol protocol class
   * @param clientVersion client version
   * @param addr remote address
   * @param conf configuration to use
   * @param rpcTimeout timeout for each RPC
   * @param timeout time in milliseconds before giving up
   * @return the proxy
   * @throws IOException if the far end through a RemoteException
   */
  static <T extends VersionedProtocol> ProtocolProxy<T> waitForProtocolProxy(
      Class<T> protocol,
      long clientVersion,
      InetSocketAddress addr,
      Configuration conf,
      long timeout,
      int rpcTimeout)
      throws IOException {
    long startTime = System.currentTimeMillis();
    UserGroupInformation ugi = null;
    try {
      ugi = UserGroupInformation.login(conf);
    } catch (LoginException le) {
      throw new RuntimeException("Couldn't login!");
    }
    IOException ioe;
    while (true) {
      try {
        return getProtocolProxy(
            protocol,
            clientVersion,
            addr,
            ugi,
            conf,
            NetUtils.getDefaultSocketFactory(conf),
            rpcTimeout);
      } catch (ConnectException se) { // namenode has not been started
        LOG.info("Server at " + addr + " not available yet, Zzzzz...");
        ioe = se;
      } catch (SocketTimeoutException te) { // namenode is busy
        LOG.info("Problem connecting to server: " + addr);
        ioe = te;
      }
      // check if timed out
      if (System.currentTimeMillis() - timeout >= startTime) {
        throw ioe;
      }

      // wait for retry
      try {
        Thread.sleep(1000);
      } catch (InterruptedException ie) {
        // IGNORE
      }
    }
  }
示例#15
0
  /**
   * Load an edit log, and apply the changes to the in-memory structure
   *
   * <p>This is where we apply edits that we've been writing to disk all along.
   */
  int loadFSEdits(File edits) throws IOException {
    int numEdits = 0;

    if (edits.exists()) {
      DataInputStream in = new DataInputStream(new BufferedInputStream(new FileInputStream(edits)));
      try {
        while (in.available() > 0) {
          byte opcode = in.readByte();
          numEdits++;
          switch (opcode) {
            case OP_ADD:
              {
                UTF8 name = new UTF8();
                name.readFields(in);
                ArrayWritable aw = new ArrayWritable(Block.class);
                aw.readFields(in);
                Writable writables[] = (Writable[]) aw.get();
                Block blocks[] = new Block[writables.length];
                System.arraycopy(writables, 0, blocks, 0, blocks.length);
                unprotectedAddFile(name, blocks);
                break;
              }
            case OP_RENAME:
              {
                UTF8 src = new UTF8();
                UTF8 dst = new UTF8();
                src.readFields(in);
                dst.readFields(in);
                unprotectedRenameTo(src, dst);
                break;
              }
            case OP_DELETE:
              {
                UTF8 src = new UTF8();
                src.readFields(in);
                unprotectedDelete(src);
                break;
              }
            case OP_MKDIR:
              {
                UTF8 src = new UTF8();
                src.readFields(in);
                unprotectedMkdir(src.toString());
                break;
              }
            default:
              {
                throw new IOException("Never seen opcode " + opcode);
              }
          }
        }
      } finally {
        in.close();
      }
    }
    return numEdits;
  }
示例#16
0
文件: RPC.java 项目: baeeq/hadoop-20
 private synchronized InetSocketAddress getAddress() {
   if (needCheckDnsUpdate
       && address != null
       && address.getHostName() != null
       && System.currentTimeMillis() - this.timeLastDnsCheck > MIN_DNS_CHECK_INTERVAL_MSEC) {
     try {
       InetSocketAddress newAddr = NetUtils.resolveAddress(address);
       if (newAddr != null) {
         LOG.info("DNS change: " + newAddr);
         address = newAddr;
       }
     } finally {
       this.timeLastDnsCheck = System.currentTimeMillis();
     }
   }
   needCheckDnsUpdate = false;
   return address;
 }
示例#17
0
 public void configure(JobConf job) {
   this.jobConf = job;
   urlNormalizers = new URLNormalizers(job, URLNormalizers.SCOPE_INJECT);
   interval = jobConf.getInt("db.fetch.interval.default", 2592000);
   filters = new URLFilters(jobConf);
   scfilters = new ScoringFilters(jobConf);
   scoreInjected = jobConf.getFloat("db.score.injected", 1.0f);
   curTime = job.getLong("injector.current.time", System.currentTimeMillis());
 }
示例#18
0
  public void testInputFormat() {

    try {
      JobConf conf = new JobConf();
      String TMP_DIR = System.getProperty("test.build.data", "/tmp");
      Path filename = new Path("file:///" + TMP_DIR + "/tmpSeqFile");
      SequenceFile.Writer sfw =
          SequenceFile.createWriter(
              FileSystem.getLocal(conf),
              conf,
              filename,
              ChukwaArchiveKey.class,
              ChunkImpl.class,
              SequenceFile.CompressionType.NONE,
              Reporter.NULL);

      StringBuilder buf = new StringBuilder();
      int offsets[] = new int[lines.length];
      for (int i = 0; i < lines.length; ++i) {
        buf.append(lines[i]);
        buf.append("\n");
        offsets[i] = buf.length() - 1;
      }
      ChukwaArchiveKey key = new ChukwaArchiveKey(0, "datatype", "sname", 0);
      ChunkImpl val = new ChunkImpl("datatype", "sname", 0, buf.toString().getBytes(), null);
      val.setRecordOffsets(offsets);
      sfw.append(key, val);
      sfw.append(key, val); // write it twice
      sfw.close();

      long len = FileSystem.getLocal(conf).getFileStatus(filename).getLen();
      InputSplit split = new FileSplit(filename, 0, len, (String[]) null);
      ChukwaInputFormat in = new ChukwaInputFormat();
      RecordReader<LongWritable, Text> r = in.getRecordReader(split, conf, Reporter.NULL);

      LongWritable l = r.createKey();
      Text line = r.createValue();
      for (int i = 0; i < lines.length * 2; ++i) {
        boolean succeeded = r.next(l, line);
        assertTrue(succeeded);
        assertEquals(i, l.get());
        assertEquals(lines[i % lines.length], line.toString());
        System.out.println("read line: " + l.get() + " " + line);
      }
      boolean succeeded = r.next(l, line);
      assertFalse(succeeded);

    } catch (IOException e) {
      e.printStackTrace();
      fail("IO exception " + e);
    }
  }
示例#19
0
  public static boolean stopIteration(Configuration conf) throws IOException {
    FileSystem fs = FileSystem.get(conf);
    Path preFile = new Path("preX/Result");
    Path curFile = new Path("curX/part-00000");

    if (!(fs.exists(preFile) && fs.exists(curFile))) {
      System.exit(1);
    }

    boolean stop = true;
    String line1, line2;
    FSDataInputStream in1 = fs.open(preFile);
    FSDataInputStream in2 = fs.open(curFile);
    InputStreamReader isr1 = new InputStreamReader(in1);
    InputStreamReader isr2 = new InputStreamReader(in2);
    BufferedReader br1 = new BufferedReader(isr1);
    BufferedReader br2 = new BufferedReader(isr2);

    while ((line1 = br1.readLine()) != null && (line2 = br2.readLine()) != null) {
      String[] str1 = line1.split("\\s+");
      String[] str2 = line2.split("\\s+");
      double preElem = Double.parseDouble(str1[1]);
      double curElem = Double.parseDouble(str2[1]);
      if (Math.abs(preElem - curElem) > eps) {
        stop = false;
        break;
      }
    }

    if (stop == false) {
      fs.delete(preFile, true);
      if (fs.rename(curFile, preFile) == false) {
        System.exit(1);
      }
    }
    return stop;
  }
示例#20
0
  public static void main(String[] args) throws Exception {
    Job job = new Job();
    job.setJarByClass(Sort.class);
    job.setJobName("Sort");

    FileInputFormat.addInputPath(job, new Path("hdfs://localhost:9000/input/"));
    FileOutputFormat.setOutputPath(job, new Path("hdfs://localhost:9000/output/"));
    job.setMapperClass(Map.class);
    // job.setCombinerClass(Reduce.class);
    job.setReducerClass(Reduce.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    job.setNumReduceTasks(2);

    System.exit(job.waitForCompletion(true) ? 0 : 1);
  }
示例#21
0
 VerProtocolImpl[] getSupportedProtocolVersions(RPC.RpcKind rpcKind, String protocolName) {
   VerProtocolImpl[] resultk = new VerProtocolImpl[getProtocolImplMap(rpcKind).size()];
   int i = 0;
   for (Map.Entry<ProtoNameVer, ProtoClassProtoImpl> pv :
       getProtocolImplMap(rpcKind).entrySet()) {
     if (pv.getKey().protocol.equals(protocolName)) {
       resultk[i++] = new VerProtocolImpl(pv.getKey().version, pv.getValue());
     }
   }
   if (i == 0) {
     return null;
   }
   VerProtocolImpl[] result = new VerProtocolImpl[i];
   System.arraycopy(resultk, 0, result, 0, i);
   return result;
 }
  public static void main(String[] args) throws Exception {

    Configuration conf = new Configuration();

    Job jobA = Job.getInstance(conf, "delaysum");
    jobA.setOutputKeyClass(Text.class);
    jobA.setOutputValueClass(IntWritable.class);

    jobA.setMapperClass(ArrivalMap.class);
    jobA.setReducerClass(ArrivalReduce.class);

    FileInputFormat.setInputPaths(jobA, new Path(args[0]));
    FileOutputFormat.setOutputPath(jobA, new Path(args[1]));

    jobA.setJarByClass(MeanDelayOriginDestination.class);

    System.exit(jobA.waitForCompletion(true) ? 0 : 1);
  }
示例#23
0
  public int run(String[] args) throws Exception {
    Path tempDir = new Path("/user/akhfa/temp");

    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "word count");
    job.setJarByClass(AuthorCounter.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, tempDir);
    System.exit(job.waitForCompletion(true) ? 0 : 1);

    return 0;
  }
  public static void main(String[] args) throws IOException {
    if (args.length != 2) {
      System.err.println("Usage: OldMaxTemperature <input path> <output path>");
      System.exit(-1);
    }

    /*[*/ JobConf conf = new JobConf(OldMaxTemperature.class); /*]*/
    /*[*/ conf /*]*/.setJobName("Max temperature");

    FileInputFormat.addInputPath(/*[*/ conf /*]*/, new Path(args[0]));
    FileOutputFormat.setOutputPath(/*[*/ conf /*]*/, new Path(args[1]));

    /*[*/ conf /*]*/.setMapperClass(OldMaxTemperatureMapper.class);
    /*[*/ conf /*]*/.setReducerClass(OldMaxTemperatureReducer.class);

    /*[*/ conf /*]*/.setOutputKeyClass(Text.class);
    /*[*/ conf /*]*/.setOutputValueClass(IntWritable.class);

    /*[*/ JobClient.runJob(conf); /*]*/
  }
示例#25
0
  @Override
  public int run(String[] args) throws Exception {
    // TODO Auto-generated method stub

    Configuration conf = getConf();

    if (args.length != 3) {
      System.err.println("Usage: WordCountDriver <inpath> <outpath>");
      System.exit(0);
    }

    GenericOptionsParser gop = new GenericOptionsParser(args);
    args = gop.getRemainingArgs();

    Path inpath = new Path(args[1]);
    Path outpath = new Path(args[2]);

    Job job = new Job(conf, "WordCountDriver");
    job.setJarByClass(WordCountDriver.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setOutputFormatClass(TextOutputFormat.class);
    job.setInputFormatClass(TextInputFormat.class);

    FileInputFormat.setInputPaths(job, inpath);
    FileOutputFormat.setOutputPath(job, outpath);

    job.setNumReduceTasks(2);

    job.setMapperClass(WordCountMapper.class);
    job.setReducerClass(WordCountReducer.class);
    job.setPartitionerClass(WordCountPart.class);

    job.waitForCompletion(true);

    return 0;
  }
  public static void main(String[] args) throws Exception {

    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);
    Path tmpPath = new Path("/w1/tmp");
    fs.delete(tmpPath, true);

    Job jobA = Job.getInstance(conf, "delaysum");
    jobA.setOutputKeyClass(Text.class);
    jobA.setOutputValueClass(IntWritable.class);

    jobA.setMapperClass(ArrivalMap.class);
    jobA.setReducerClass(ArrivalReduce.class);

    FileInputFormat.setInputPaths(jobA, new Path(args[0]));
    FileOutputFormat.setOutputPath(jobA, tmpPath);
    jobA.setJarByClass(TopArrivalPerformanceCarriersOD.class);
    jobA.waitForCompletion(true);

    Job jobB = Job.getInstance(conf, "Best Airline");
    jobB.setOutputKeyClass(Text.class);
    jobB.setOutputValueClass(FloatWritable.class);

    jobB.setMapOutputKeyClass(NullWritable.class);
    jobB.setMapOutputValueClass(TextArrayWritable.class);

    jobB.setMapperClass(DelayMap.class);
    jobB.setReducerClass(DelayReduce.class);
    jobB.setNumReduceTasks(1);

    FileInputFormat.setInputPaths(jobB, tmpPath);
    FileOutputFormat.setOutputPath(jobB, new Path(args[1]));

    jobB.setInputFormatClass(KeyValueTextInputFormat.class);
    jobB.setOutputFormatClass(TextOutputFormat.class);

    jobB.setJarByClass(TopArrivalPerformanceCarriersOD.class);
    System.exit(jobB.waitForCompletion(true) ? 0 : 1);
  }
  // Transform the json-type feature to mat-type
  public static Mat json2mat(String json) {

    JsonParser parser = new JsonParser();
    JsonElement parseTree = parser.parse(json);

    // Verify the input is JSON type
    if (!parseTree.isJsonObject()) {
      System.out.println("The input is not a JSON type...\nExiting...");
      System.exit(1);
    }
    JsonObject jobj = parser.parse(json).getAsJsonObject();

    if (jobj == null || !jobj.isJsonObject() || jobj.isJsonNull()) {
      return null;
    }

    // Detect broken/null features
    JsonElement r = jobj.get("rows");
    if (r == null) {
      return null;
    }

    int rows = jobj.get("rows").getAsInt();
    int cols = jobj.get("cols").getAsInt();
    int type = jobj.get("type").getAsInt();
    String data = jobj.get("data").getAsString();
    String[] pixs = data.split(",");

    Mat descriptor = new Mat(rows, cols, type);
    for (String pix : pixs) {
      String[] tmp = pix.split(" ");
      int r_pos = Integer.valueOf(tmp[0]);
      int c_pos = Integer.valueOf(tmp[1]);
      double rgb = Double.valueOf(tmp[2]);
      descriptor.put(r_pos, c_pos, rgb);
    }
    return descriptor;
  }
  public void testcheckOutputSpecsForbidRecordCompression() throws IOException {
    Job job = Job.getInstance(new Configuration(), "testcheckOutputSpecsForbidRecordCompression");
    FileSystem fs = FileSystem.getLocal(job.getConfiguration());
    Path outputdir = new Path(System.getProperty("test.build.data", "/tmp") + "/output");
    fs.delete(outputdir, true);

    // Without outputpath, FileOutputFormat.checkoutputspecs will throw
    // InvalidJobConfException
    FileOutputFormat.setOutputPath(job, outputdir);

    // SequenceFileAsBinaryOutputFormat doesn't support record compression
    // It should throw an exception when checked by checkOutputSpecs
    SequenceFileAsBinaryOutputFormat.setCompressOutput(job, true);

    SequenceFileAsBinaryOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);
    try {
      new SequenceFileAsBinaryOutputFormat().checkOutputSpecs(job);
    } catch (Exception e) {
      fail(
          "Block compression should be allowed for "
              + "SequenceFileAsBinaryOutputFormat:Caught "
              + e.getClass().getName());
    }

    SequenceFileAsBinaryOutputFormat.setOutputCompressionType(job, CompressionType.RECORD);
    try {
      new SequenceFileAsBinaryOutputFormat().checkOutputSpecs(job);
      fail("Record compression should not be allowed for " + "SequenceFileAsBinaryOutputFormat");
    } catch (InvalidJobConfException ie) {
      // expected
    } catch (Exception e) {
      fail(
          "Expected "
              + InvalidJobConfException.class.getName()
              + "but caught "
              + e.getClass().getName());
    }
  }
  public void testFormat() throws Exception {
    JobConf job = new JobConf(conf);
    FileSystem fs = FileSystem.getLocal(conf);
    Path dir = new Path(System.getProperty("test.build.data", ".") + "/mapred");
    Path file = new Path(dir, "test.seq");

    Reporter reporter = Reporter.NULL;

    int seed = new Random().nextInt();
    // LOG.info("seed = "+seed);
    Random random = new Random(seed);

    fs.delete(dir, true);

    FileInputFormat.setInputPaths(job, dir);

    // for a variety of lengths
    for (int length = 0; length < MAX_LENGTH; length += random.nextInt(MAX_LENGTH / 10) + 1) {

      // LOG.info("creating; entries = " + length);

      // create a file with length entries
      SequenceFile.Writer writer =
          SequenceFile.createWriter(fs, conf, file, IntWritable.class, BytesWritable.class);
      try {
        for (int i = 0; i < length; i++) {
          IntWritable key = new IntWritable(i);
          byte[] data = new byte[random.nextInt(10)];
          random.nextBytes(data);
          BytesWritable value = new BytesWritable(data);
          writer.append(key, value);
        }
      } finally {
        writer.close();
      }

      // try splitting the file in a variety of sizes
      InputFormat<IntWritable, BytesWritable> format =
          new SequenceFileInputFormat<IntWritable, BytesWritable>();
      IntWritable key = new IntWritable();
      BytesWritable value = new BytesWritable();
      for (int i = 0; i < 3; i++) {
        int numSplits = random.nextInt(MAX_LENGTH / (SequenceFile.SYNC_INTERVAL / 20)) + 1;
        // LOG.info("splitting: requesting = " + numSplits);
        InputSplit[] splits = format.getSplits(job, numSplits);
        // LOG.info("splitting: got =        " + splits.length);

        // check each split
        BitSet bits = new BitSet(length);
        for (int j = 0; j < splits.length; j++) {
          RecordReader<IntWritable, BytesWritable> reader =
              format.getRecordReader(splits[j], job, reporter);
          try {
            int count = 0;
            while (reader.next(key, value)) {
              // if (bits.get(key.get())) {
              // LOG.info("splits["+j+"]="+splits[j]+" : " +
              // key.get());
              // LOG.info("@"+reader.getPos());
              // }
              assertFalse("Key in multiple partitions.", bits.get(key.get()));
              bits.set(key.get());
              count++;
            }
            // LOG.info("splits["+j+"]="+splits[j]+" count=" +
            // count);
          } finally {
            reader.close();
          }
        }
        assertEquals("Some keys in no partition.", length, bits.cardinality());
      }
    }
  }
public class TestFileOutputCommitter extends TestCase {
  private static Path outDir = new Path(System.getProperty("test.build.data", "/tmp"), "output");

  // A random task attempt id for testing.
  private static String attempt = "attempt_200707121733_0001_m_000000_0";
  private static TaskAttemptID taskID = TaskAttemptID.forName(attempt);
  private Text key1 = new Text("key1");
  private Text key2 = new Text("key2");
  private Text val1 = new Text("val1");
  private Text val2 = new Text("val2");

  @SuppressWarnings("unchecked")
  private void writeOutput(RecordWriter theRecordWriter, Reporter reporter) throws IOException {
    NullWritable nullWritable = NullWritable.get();

    try {
      theRecordWriter.write(key1, val1);
      theRecordWriter.write(null, nullWritable);
      theRecordWriter.write(null, val1);
      theRecordWriter.write(nullWritable, val2);
      theRecordWriter.write(key2, nullWritable);
      theRecordWriter.write(key1, null);
      theRecordWriter.write(null, null);
      theRecordWriter.write(key2, val2);
    } finally {
      theRecordWriter.close(reporter);
    }
  }

  private void setConfForFileOutputCommitter(JobConf job) {
    job.set(JobContext.TASK_ATTEMPT_ID, attempt);
    job.setOutputCommitter(FileOutputCommitter.class);
    FileOutputFormat.setOutputPath(job, outDir);
  }

  @SuppressWarnings("unchecked")
  public void testCommitter() throws Exception {
    JobConf job = new JobConf();
    setConfForFileOutputCommitter(job);
    JobContext jContext = new JobContextImpl(job, taskID.getJobID());
    TaskAttemptContext tContext = new TaskAttemptContextImpl(job, taskID);
    FileOutputCommitter committer = new FileOutputCommitter();
    FileOutputFormat.setWorkOutputPath(job, committer.getTempTaskOutputPath(tContext));

    committer.setupJob(jContext);
    committer.setupTask(tContext);
    String file = "test.txt";

    // A reporter that does nothing
    Reporter reporter = Reporter.NULL;
    // write output
    FileSystem localFs = FileSystem.getLocal(job);
    TextOutputFormat theOutputFormat = new TextOutputFormat();
    RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(localFs, job, file, reporter);
    writeOutput(theRecordWriter, reporter);

    // do commit
    committer.commitTask(tContext);
    committer.commitJob(jContext);

    // validate output
    File expectedFile = new File(new Path(outDir, file).toString());
    StringBuffer expectedOutput = new StringBuffer();
    expectedOutput.append(key1).append('\t').append(val1).append("\n");
    expectedOutput.append(val1).append("\n");
    expectedOutput.append(val2).append("\n");
    expectedOutput.append(key2).append("\n");
    expectedOutput.append(key1).append("\n");
    expectedOutput.append(key2).append('\t').append(val2).append("\n");
    String output = UtilsForTests.slurp(expectedFile);
    assertEquals(output, expectedOutput.toString());

    FileUtil.fullyDelete(new File(outDir.toString()));
  }

  public void testAbort() throws IOException {
    JobConf job = new JobConf();
    setConfForFileOutputCommitter(job);
    JobContext jContext = new JobContextImpl(job, taskID.getJobID());
    TaskAttemptContext tContext = new TaskAttemptContextImpl(job, taskID);
    FileOutputCommitter committer = new FileOutputCommitter();
    FileOutputFormat.setWorkOutputPath(job, committer.getTempTaskOutputPath(tContext));

    // do setup
    committer.setupJob(jContext);
    committer.setupTask(tContext);
    String file = "test.txt";

    // A reporter that does nothing
    Reporter reporter = Reporter.NULL;
    // write output
    FileSystem localFs = FileSystem.getLocal(job);
    TextOutputFormat theOutputFormat = new TextOutputFormat();
    RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(localFs, job, file, reporter);
    writeOutput(theRecordWriter, reporter);

    // do abort
    committer.abortTask(tContext);
    File expectedFile =
        new File(new Path(committer.getTempTaskOutputPath(tContext), file).toString());
    assertFalse("task temp dir still exists", expectedFile.exists());

    committer.abortJob(jContext, JobStatus.State.FAILED);
    expectedFile = new File(new Path(outDir, FileOutputCommitter.TEMP_DIR_NAME).toString());
    assertFalse("job temp dir still exists", expectedFile.exists());
    assertEquals("Output directory not empty", 0, new File(outDir.toString()).listFiles().length);
    FileUtil.fullyDelete(new File(outDir.toString()));
  }

  public static class FakeFileSystem extends RawLocalFileSystem {
    public FakeFileSystem() {
      super();
    }

    public URI getUri() {
      return URI.create("faildel:///");
    }

    @Override
    public boolean delete(Path p, boolean recursive) throws IOException {
      throw new IOException("fake delete failed");
    }
  }

  public void testFailAbort() throws IOException {
    JobConf job = new JobConf();
    job.set(FileSystem.FS_DEFAULT_NAME_KEY, "faildel:///");
    job.setClass("fs.faildel.impl", FakeFileSystem.class, FileSystem.class);
    setConfForFileOutputCommitter(job);
    JobContext jContext = new JobContextImpl(job, taskID.getJobID());
    TaskAttemptContext tContext = new TaskAttemptContextImpl(job, taskID);
    FileOutputCommitter committer = new FileOutputCommitter();
    FileOutputFormat.setWorkOutputPath(job, committer.getTempTaskOutputPath(tContext));

    // do setup
    committer.setupJob(jContext);
    committer.setupTask(tContext);
    String file = "test.txt";

    // A reporter that does nothing
    Reporter reporter = Reporter.NULL;
    // write output
    FileSystem localFs = new FakeFileSystem();
    TextOutputFormat theOutputFormat = new TextOutputFormat();
    RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(localFs, job, file, reporter);
    writeOutput(theRecordWriter, reporter);

    // do abort
    Throwable th = null;
    try {
      committer.abortTask(tContext);
    } catch (IOException ie) {
      th = ie;
    }
    assertNotNull(th);
    assertTrue(th instanceof IOException);
    assertTrue(th.getMessage().contains("fake delete failed"));
    File jobTmpDir = new File(new Path(outDir, FileOutputCommitter.TEMP_DIR_NAME).toString());
    File taskTmpDir = new File(jobTmpDir, "_" + taskID);
    File expectedFile = new File(taskTmpDir, file);
    assertTrue(expectedFile + " does not exists", expectedFile.exists());

    th = null;
    try {
      committer.abortJob(jContext, JobStatus.State.FAILED);
    } catch (IOException ie) {
      th = ie;
    }
    assertNotNull(th);
    assertTrue(th instanceof IOException);
    assertTrue(th.getMessage().contains("fake delete failed"));
    assertTrue("job temp dir does not exists", jobTmpDir.exists());
  }
}