Java Job示例，org.apache.hadoop.mapreduce.Job Java示例

示例#1

1

显示文件

文件： JobFactoryBean.java 项目： nellaivijay/spring-hadoop

  private void configureMapperTypesIfPossible(Job j, Class<? extends Mapper> mapper) {
    // Find mapper
    Class<?> targetClass = mapper;
    Type targetType = mapper;

    do {
      targetType = targetClass.getGenericSuperclass();
      targetClass = targetClass.getSuperclass();
    } while (targetClass != null
        && targetClass != Object.class
        && !Mapper.class.equals(targetClass));

    if (targetType instanceof ParameterizedType) {
      Type[] params = ((ParameterizedType) targetType).getActualTypeArguments();
      if (params.length == 4) {
        // set each param (if possible);
        if (params[2] instanceof Class) {
          Class<?> clz = (Class<?>) params[2];
          if (!clz.isInterface()) j.setMapOutputKeyClass(clz);
        }

        // set each param (if possible);
        if (params[3] instanceof Class) {
          Class<?> clz = (Class<?>) params[3];
          if (!clz.isInterface()) {
            j.setMapOutputValueClass(clz);
          }
        }
      }
    }
  }

示例#2

0

显示文件

文件： TestInputFormat.java 项目： ajay0221/parquet-mr

  @Test
  public void testOnlyOneKindOfFilterSupported() throws Exception {
    IntColumn foo = intColumn("foo");
    FilterPredicate p = or(eq(foo, 10), eq(foo, 11));

    Job job = new Job();

    Configuration conf = job.getConfiguration();
    ParquetInputFormat.setUnboundRecordFilter(job, DummyUnboundRecordFilter.class);
    try {
      ParquetInputFormat.setFilterPredicate(conf, p);
      fail("this should throw");
    } catch (IllegalArgumentException e) {
      assertEquals(
          "You cannot provide a FilterPredicate after providing an UnboundRecordFilter",
          e.getMessage());
    }

    job = new Job();
    conf = job.getConfiguration();

    ParquetInputFormat.setFilterPredicate(conf, p);
    try {
      ParquetInputFormat.setUnboundRecordFilter(job, DummyUnboundRecordFilter.class);
      fail("this should throw");
    } catch (IllegalArgumentException e) {
      assertEquals(
          "You cannot provide an UnboundRecordFilter after providing a FilterPredicate",
          e.getMessage());
    }
  }

示例#3

0

显示文件

文件： HandsomeSearch.java 项目： JiCaiCai/cloudproject

  public static void main(String[] args) throws Exception {
    sourcePhoto = "/home/hduser/workspace/images/source.jpg";
    sourceFingerprint = SimilarImageSearch.produceFingerPrint(sourcePhoto);

    final Configuration conf = new Configuration();
    MongoConfigUtil.setInputURI(conf, "mongodb://localhost/photo.fingerprint");
    MongoConfigUtil.setOutputURI(conf, "mongodb://localhost/photo.handsomeOut");
    System.out.println("Conf: " + conf);

    final Job job = new Job(conf, "similar photo");

    job.setJarByClass(MdbSimilarPhoto.class);

    // Mapper,Reduce and Combiner type definition
    job.setMapperClass(PhotoMapper.class);

    job.setCombinerClass(SimilarityReducer.class);
    job.setReducerClass(SimilarityReducer.class);

    // output key/value type definition
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(DoubleWritable.class);

    // InputFormat and OutputFormat type definition
    job.setInputFormatClass(MongoInputFormat.class);
    job.setOutputFormatClass(MongoOutputFormat.class);

    System.exit(job.waitForCompletion(true) ? 0 : 1);
  }

示例#4

0

显示文件

文件： BrowerLogFormatMR.java 项目： wisgood/mobile-core

  public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    GenericOptionsParser gop = new GenericOptionsParser(conf, args);
    conf = gop.getConfiguration();

    Job job = new Job(conf, conf.get("job_name"));
    FileInputFormat.addInputPaths(job, conf.get("input_dir"));
    Path output = new Path(conf.get("output_dir"));
    FileOutputFormat.setOutputPath(job, output);
    output.getFileSystem(conf).delete(output, true);

    job.setJarByClass(BrowerLogFormatMR.class);
    job.setMapperClass(BrowerLogFormatMapper.class);
    job.setReducerClass(BrowerLogFormatReducer.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);
    job.setNumReduceTasks(1);

    int code = job.waitForCompletion(true) ? 0 : 1;
    return code;
  }

示例#5

0

显示文件

文件： WriteUsingMR.java 项目： ajay0221/parquet-mr

  public Path write(Message... messages) throws Exception {

    synchronized (WriteUsingMR.class) {
      outputPath = TestUtils.someTemporaryFilePath();

      Path inputPath = TestUtils.someTemporaryFilePath();
      FileSystem fileSystem = inputPath.getFileSystem(conf);
      fileSystem.create(inputPath);

      inputMessages = Collections.unmodifiableList(Arrays.asList(messages));

      final Job job = new Job(conf, "write");

      // input not really used
      TextInputFormat.addInputPath(job, inputPath);
      job.setInputFormatClass(TextInputFormat.class);

      job.setMapperClass(WritingMapper.class);
      job.setNumReduceTasks(0);

      job.setOutputFormatClass(ProtoParquetOutputFormat.class);
      ProtoParquetOutputFormat.setOutputPath(job, outputPath);
      ProtoParquetOutputFormat.setProtobufClass(job, TestUtils.inferRecordsClass(messages));

      waitForJob(job);

      inputMessages = null;
      return outputPath;
    }
  }

示例#6

0

显示文件

文件： TestMRSequenceFileAsBinaryOutputFormat.java 项目： Jude7/bc-hadoop2.0

  public void testSequenceOutputClassDefaultsToMapRedOutputClass() throws IOException {
    Job job = new Job();
    // Setting Random class to test getSequenceFileOutput{Key,Value}Class
    job.setOutputKeyClass(FloatWritable.class);
    job.setOutputValueClass(BooleanWritable.class);

    assertEquals(
        "SequenceFileOutputKeyClass should default to ouputKeyClass",
        FloatWritable.class,
        SequenceFileAsBinaryOutputFormat.getSequenceFileOutputKeyClass(job));
    assertEquals(
        "SequenceFileOutputValueClass should default to " + "ouputValueClass",
        BooleanWritable.class,
        SequenceFileAsBinaryOutputFormat.getSequenceFileOutputValueClass(job));

    SequenceFileAsBinaryOutputFormat.setSequenceFileOutputKeyClass(job, IntWritable.class);
    SequenceFileAsBinaryOutputFormat.setSequenceFileOutputValueClass(job, DoubleWritable.class);

    assertEquals(
        "SequenceFileOutputKeyClass not updated",
        IntWritable.class,
        SequenceFileAsBinaryOutputFormat.getSequenceFileOutputKeyClass(job));
    assertEquals(
        "SequenceFileOutputValueClass not updated",
        DoubleWritable.class,
        SequenceFileAsBinaryOutputFormat.getSequenceFileOutputValueClass(job));
  }

示例#7

0

显示文件

文件： HubsAndSpokes.java 项目： kidaak/Hadoop-MapReduce-1

  public static void dijkstra(String input, String output) throws Exception {

    String temp = output;

    ///  Run HITS Algorithm JOB:2 For 32 Times
    /// Setting the Value of k-> 32

    for (int i = 0; i < 32; i++) {
      Configuration conf = new Configuration();
      Job job = new Job(conf, "hubsandspokes");
      job.setJarByClass(HubsAndSpokes.class);
      job.setMapperClass(HubSpokeMapper.class);
      job.setReducerClass(HubSpokeReducer.class);
      job.setMapOutputKeyClass(Text.class);
      job.setMapOutputValueClass(NodeWritable.class);
      job.setOutputKeyClass(NodeWritable.class);
      job.setOutputValueClass(Text.class);
      job.setNumReduceTasks(1);
      FileInputFormat.addInputPath(job, new Path(input));
      FileOutputFormat.setOutputPath(job, new Path(output));

      // Toggle the value of Input and Output variable
      // For Next iteration
      input = output;
      output = temp + Integer.toString(i);

      // Wait for completing the JOB
      boolean b = job.waitForCompletion(true);
      if (!b) System.exit(2);
      // System.exit(job.waitForCompletion(true) ? 0 : 1);
    }
  }

示例#8

0

显示文件

文件： TopKRecommendationsJob.java 项目： SteamShon/collaborative-filtering-experiment

  @Override
  public int run(String[] args) throws Exception {
    addInputOption();
    addOutputOption();
    addOption(RECOMMENDATIONS_PER_USER, "k", "recommendations per user.");

    if (parseArguments(args) == null) {
      return -1;
    }

    Job job =
        prepareJob(
            getInputPath(),
            getOutputPath(),
            SequenceFileInputFormat.class,
            TopKRecommendationsMapper.class,
            IntWritable.class,
            Text.class,
            SequenceFileOutputFormat.class);

    Configuration conf = job.getConfiguration();
    conf.setInt(RECOMMENDATIONS_PER_USER, Integer.parseInt(getOption(RECOMMENDATIONS_PER_USER)));
    job.waitForCompletion(true);
    return 0;
  }

示例#9

0

显示文件

文件： TestMultithreadedTableMapper.java 项目： riyazaahil/hbase

 private void runTestOnTable(Table table)
     throws IOException, InterruptedException, ClassNotFoundException {
   Job job = null;
   try {
     LOG.info("Before map/reduce startup");
     job = new Job(table.getConfiguration(), "process column contents");
     job.setNumReduceTasks(1);
     Scan scan = new Scan();
     scan.addFamily(INPUT_FAMILY);
     TableMapReduceUtil.initTableMapperJob(
         table.getName(),
         scan,
         MultithreadedTableMapper.class,
         ImmutableBytesWritable.class,
         Put.class,
         job);
     MultithreadedTableMapper.setMapperClass(job, ProcessContentsMapper.class);
     MultithreadedTableMapper.setNumberOfThreads(job, NUMBER_OF_THREADS);
     TableMapReduceUtil.initTableReducerJob(
         table.getName().getNameAsString(), IdentityTableReducer.class, job);
     FileOutputFormat.setOutputPath(job, new Path("test"));
     LOG.info("Started " + table.getName());
     assertTrue(job.waitForCompletion(true));
     LOG.info("After map/reduce completion");
     // verify map-reduce results
     verify(table.getName());
   } finally {
     table.close();
     if (job != null) {
       FileUtil.fullyDelete(new File(job.getConfiguration().get("hadoop.tmp.dir")));
     }
   }
 }

示例#10

0

显示文件

文件： DistBlockFixer.java 项目： richxu/hadoop-20-warehouse

 /** Update {@link lastStatus} so that it can be viewed from outside */
 private void updateStatus() {
   int highPriorityFiles = 0;
   int lowPriorityFiles = 0;
   List<JobStatus> jobs = new ArrayList<JobStatus>();
   List<String> highPriorityFileNames = new ArrayList<String>();
   for (Map.Entry<String, CorruptFileInfo> e : fileIndex.entrySet()) {
     String fileName = e.getKey();
     CorruptFileInfo fileInfo = e.getValue();
     if (fileInfo.getHighestPriority() > 0) {
       highPriorityFileNames.add(fileName);
       highPriorityFiles += 1;
     } else {
       lowPriorityFiles += 1;
     }
   }
   for (Job job : jobIndex.keySet()) {
     String url = job.getTrackingURL();
     String name = job.getJobName();
     JobID jobId = job.getID();
     jobs.add(new BlockFixer.JobStatus(jobId, name, url));
   }
   lastStatus =
       new BlockFixer.Status(highPriorityFiles, lowPriorityFiles, jobs, highPriorityFileNames);
   RaidNodeMetrics.getInstance().corruptFilesHighPri.set(highPriorityFiles);
   RaidNodeMetrics.getInstance().corruptFilesLowPri.set(lowPriorityFiles);
   LOG.info("Update status done." + lastStatus.toString());
 }

示例#11

0

显示文件

文件： DistBlockFixer.java 项目： richxu/hadoop-20-warehouse

  /** Handle a successful job. */
  private void succeedJob(Job job, long filesSucceeded, long filesFailed) throws IOException {
    String jobName = job.getJobName();
    LOG.info("Job " + job.getID() + "(" + jobName + ") finished (succeeded)");

    if (filesFailed == 0) {
      // no files have failed
      for (CorruptFileInfo fileInfo : jobIndex.get(job)) {
        boolean failed = false;
        fileInfo.finishJob(jobName, failed);
      }
    } else {
      // we have to look at the output to check which files have failed
      Set<String> failedFiles = getFailedFiles(job);

      for (CorruptFileInfo fileInfo : jobIndex.get(job)) {
        if (failedFiles.contains(fileInfo.getFile().toString())) {
          boolean failed = true;
          fileInfo.finishJob(jobName, failed);
        } else {
          // call succeed for files that have succeeded or for which no action
          // was taken
          boolean failed = false;
          fileInfo.finishJob(jobName, failed);
        }
      }
    }
    // report succeeded files to metrics
    incrFilesFixed(filesSucceeded);
    incrFileFixFailures(filesFailed);
    numJobsRunning--;
  }

示例#12

0

显示文件

文件： AbstractInputFormat.java 项目： matthew-dailey/accumulo

  /**
   * Sets the connector information needed to communicate with Accumulo in this job.
   *
   * <p><b>WARNING:</b> Some tokens, when serialized, divulge sensitive information in the
   * configuration as a means to pass the token to MapReduce tasks. This information is BASE64
   * encoded to provide a charset safe conversion to a string, but this conversion is not intended
   * to be secure. {@link PasswordToken} is one example that is insecure in this way; however {@link
   * DelegationToken}s, acquired using {@link
   * SecurityOperations#getDelegationToken(DelegationTokenConfig)}, is not subject to this concern.
   *
   * @param job the Hadoop job instance to be configured
   * @param principal a valid Accumulo user name (user must have Table.CREATE permission)
   * @param token the user's password
   * @since 1.5.0
   */
  public static void setConnectorInfo(Job job, String principal, AuthenticationToken token)
      throws AccumuloSecurityException {
    if (token instanceof KerberosToken) {
      log.info("Received KerberosToken, attempting to fetch DelegationToken");
      try {
        Instance instance = getInstance(job);
        Connector conn = instance.getConnector(principal, token);
        token = conn.securityOperations().getDelegationToken(new DelegationTokenConfig());
      } catch (Exception e) {
        log.warn(
            "Failed to automatically obtain DelegationToken, Mappers/Reducers will likely fail to communicate with Accumulo",
            e);
      }
    }
    // DelegationTokens can be passed securely from user to task without serializing insecurely in
    // the configuration
    if (token instanceof DelegationTokenImpl) {
      DelegationTokenImpl delegationToken = (DelegationTokenImpl) token;

      // Convert it into a Hadoop Token
      AuthenticationTokenIdentifier identifier = delegationToken.getIdentifier();
      Token<AuthenticationTokenIdentifier> hadoopToken =
          new Token<>(
              identifier.getBytes(),
              delegationToken.getPassword(),
              identifier.getKind(),
              delegationToken.getServiceName());

      // Add the Hadoop Token to the Job so it gets serialized and passed along.
      job.getCredentials().addToken(hadoopToken.getService(), hadoopToken);
    }

    InputConfigurator.setConnectorInfo(CLASS, job.getConfiguration(), principal, token);
  }

示例#13

0

显示文件

文件： FeatureWriterJob.java 项目： ellisbjohns/geomesa

  public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "simple feature writer");

    job.setJarByClass(FeatureWriterJob.class);
    job.setMapperClass(MyMapper.class);
    job.setInputFormatClass(GeoMesaInputFormat.class);
    job.setOutputFormatClass(GeoMesaOutputFormat.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(ScalaSimpleFeature.class);
    job.setNumReduceTasks(0);

    Map<String, String> params = new HashMap<String, String>();
    params.put("instanceId", "myinstance");
    params.put("zookeepers", "zoo1,zoo2,zoo3");
    params.put("user", "myuser");
    params.put("password", "mypassword");
    params.put("tableName", "mycatalog");

    Query query = new Query("myfeature", ECQL.toFilter("BBOX(geom, -165,5,-50,75)"));

    GeoMesaInputFormat.configure(job, params, query);

    Map<String, String> outParams = new HashMap<String, String>();
    outParams.put("instanceId", "myinstance");
    outParams.put("zookeepers", "zoo1,zoo2,zoo3");
    outParams.put("user", "myuser");
    outParams.put("password", "mypassword");
    outParams.put("tableName", "mycatalog_2");

    GeoMesaOutputFormat.configureDataStore(job, outParams);

    System.exit(job.waitForCompletion(true) ? 0 : 1);
  }

示例#14

0

显示文件

文件： WhiteHouseVisitorDriver.java 项目： pikkapika/cs216

  @Override
  public int run(String[] args) throws Exception {
    if (args.length != 2) {
      System.err.println("Usage: WhiteHouseVisitorDriver <input path> <output path>");
      ToolRunner.printGenericCommandUsage(System.err);
      return -1;
    }

    Configuration conf = new Configuration();
    Job job = new Job(conf);
    job.setJarByClass(WhiteHouseVisitorDriver.class);

    // input file format
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    // map/combine/reduce class definition
    job.setMapperClass(WhiteHouseVisitorMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);

    // Key, Value set type definition
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    return job.waitForCompletion(true) ? 0 : 1;
  }

示例#15

0

显示文件

文件： SequenceFileMergeJob.java 项目： rahulj/hadoop-training-samples

 private void configureMapTasks(String arg, Job job) throws IOException {
   job.setMapperClass(IdentityMapper.class);
   job.setMapOutputKeyClass(keyClass);
   job.setMapOutputValueClass(valueClass);
   job.setInputFormatClass(SequenceFileInputFormat.class);
   SequenceFileInputFormat.setInputPaths(job, arg);
 }

示例#16

0

显示文件

文件： LevNestDissectJob.java 项目： swapster/hadoop

  private static void StartingJob()
      throws IOException, InterruptedException, ClassNotFoundException {

    conf = new Configuration();
    fs = FileSystem.get(conf);
    conf.setLong("my.vertex.num", num);
    job = Job.getInstance(conf, "Levelized Nested Dissection Starting");

    job.setJarByClass(LevNestDissectJob.class);
    job.setMapperClass(StartVertexMapper.class);
    job.setReducerClass(StartVertexReducer.class);

    in = out.suffix("/" + outPath_count);
    FileInputFormat.addInputPath(job, in);

    out_start = out.suffix("/" + outPath_start);
    if (fs.exists(out_start)) {
      fs.delete(out_start, true);
    }
    FileOutputFormat.setOutputPath(job, out_start);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(VertexWritable.class);
    job.setMapOutputValueClass(Text.class);

    job.waitForCompletion(true);

    depth = depth == 0 ? depth + 1 : depth;
    wasStart = true;
  }

示例#17

0

显示文件

文件： TestHFileOutputFormat2.java 项目： mringg/hbase

  private void runIncrementalPELoad(
      Configuration conf,
      HTableDescriptor tableDescriptor,
      RegionLocator regionLocator,
      Path outDir)
      throws IOException, UnsupportedEncodingException, InterruptedException,
          ClassNotFoundException {
    Job job = new Job(conf, "testLocalMRIncrementalLoad");
    job.setWorkingDirectory(util.getDataTestDirOnTestFS("runIncrementalPELoad"));
    job.getConfiguration()
        .setStrings(
            "io.serializations",
            conf.get("io.serializations"),
            MutationSerialization.class.getName(),
            ResultSerialization.class.getName(),
            KeyValueSerialization.class.getName());
    setupRandomGeneratorMapper(job);
    HFileOutputFormat2.configureIncrementalLoad(job, tableDescriptor, regionLocator);
    FileOutputFormat.setOutputPath(job, outDir);

    assertFalse(util.getTestFileSystem().exists(outDir));

    assertEquals(regionLocator.getAllRegionLocations().size(), job.getNumReduceTasks());

    assertTrue(job.waitForCompletion(true));
  }

示例#18

0

显示文件

文件： SSTableRecordReaderTest.java 项目： anddegs/KassandraMRHelper

  @Before
  public void setup() throws IOException {
    job = Job.getInstance();
    conf = job.getConfiguration();
    attemptId = new TaskAttemptID();
    Path inputPath = new Path(TABLE_PATH_STR);
    inputSplit = new FileSplit(inputPath, 0, 1, null);
    Descriptor desc =
        new Descriptor(new File(TABLE_PATH_STR), "keyspace", "columnFamily", 1, false);

    doReturn(desc).when(ssTableColumnRecordReader).getDescriptor();
    doReturn(desc).when(ssTableRowRecordReader).getDescriptor();

    doNothing()
        .when(ssTableColumnRecordReader)
        .copyTablesToLocal(any(FileSplit.class), any(TaskAttemptContext.class));
    doNothing()
        .when(ssTableRowRecordReader)
        .copyTablesToLocal(any(FileSplit.class), any(TaskAttemptContext.class));

    doReturn(ssTableReader)
        .when(ssTableColumnRecordReader)
        .openSSTableReader(any(IPartitioner.class), any(CFMetaData.class));
    doReturn(ssTableReader)
        .when(ssTableRowRecordReader)
        .openSSTableReader(any(IPartitioner.class), any(CFMetaData.class));
    when(ssTableReader.getDirectScanner(null)).thenReturn(tableScanner);
  }

示例#19

0

显示文件

文件： TestSequenceFileStorage.java 项目： hardiku/elephant-bird

  @Test
  public void readOutsidePig()
      throws ClassCastException, ParseException, ClassNotFoundException, InstantiationException,
          IllegalAccessException, IOException, InterruptedException {
    // simulate Pig front-end runtime
    final SequenceFileLoader<IntWritable, Text> storage =
        new SequenceFileLoader<IntWritable, Text>(
            "-c " + IntWritableConverter.class.getName(), "-c " + TextConverter.class.getName());
    Job job = new Job();
    storage.setUDFContextSignature("12345");
    storage.setLocation(tempFilename, job);

    // simulate Pig back-end runtime
    RecordReader<DataInputBuffer, DataInputBuffer> reader = new RawSequenceFileRecordReader();
    FileSplit fileSplit =
        new FileSplit(
            new Path(tempFilename), 0, new File(tempFilename).length(), new String[] {"localhost"});
    TaskAttemptContext context =
        new TaskAttemptContext(job.getConfiguration(), new TaskAttemptID());
    reader.initialize(fileSplit, context);
    InputSplit[] wrappedSplits = new InputSplit[] {fileSplit};
    int inputIndex = 0;
    List<OperatorKey> targetOps = Arrays.asList(new OperatorKey("54321", 0));
    int splitIndex = 0;
    PigSplit split = new PigSplit(wrappedSplits, inputIndex, targetOps, splitIndex);
    split.setConf(job.getConfiguration());
    storage.prepareToRead(reader, split);

    // read tuples and validate
    validate(new LoadFuncTupleIterator(storage));
  }

示例#20

0

显示文件

文件： TestLzoTextInputFormat.java 项目： huagetai/elephant-bird

  /**
   * Generate random data, compress it, index and md5 hash the data. Then read it all back and md5
   * that too, to verify that it all went ok.
   *
   * @param testWithIndex Should we index or not?
   * @param charsToOutput How many characters of random data should we output.
   * @throws IOException
   * @throws NoSuchAlgorithmException
   * @throws InterruptedException
   */
  private void runTest(boolean testWithIndex, int charsToOutput)
      throws IOException, NoSuchAlgorithmException, InterruptedException {

    Configuration conf = new Configuration();
    conf.setLong("fs.local.block.size", charsToOutput / 2);
    // reducing block size to force a split of the tiny file
    conf.set("io.compression.codecs", LzopCodec.class.getName());

    Assume.assumeTrue(CoreTestUtil.okToRunLzoTests(conf));

    FileSystem.getLocal(conf).close(); // remove cached filesystem (if any)
    FileSystem localFs = FileSystem.getLocal(conf);
    localFs.delete(outputDir_, true);
    localFs.mkdirs(outputDir_);

    Job job = new Job(conf);
    TextOutputFormat.setCompressOutput(job, true);
    TextOutputFormat.setOutputCompressorClass(job, LzopCodec.class);
    TextOutputFormat.setOutputPath(job, outputDir_);

    TaskAttemptContext attemptContext =
        new TaskAttemptContext(job.getConfiguration(), new TaskAttemptID("123", 0, false, 1, 2));

    // create some input data
    byte[] expectedMd5 = createTestInput(outputDir_, localFs, attemptContext, charsToOutput);

    if (testWithIndex) {
      Path lzoFile = new Path(outputDir_, lzoFileName_);
      LzoIndex.createIndex(localFs, lzoFile);
    }

    LzoTextInputFormat inputFormat = new LzoTextInputFormat();
    TextInputFormat.setInputPaths(job, outputDir_);

    List<InputSplit> is = inputFormat.getSplits(job);
    // verify we have the right number of lzo chunks
    if (testWithIndex && OUTPUT_BIG == charsToOutput) {
      assertEquals(3, is.size());
    } else {
      assertEquals(1, is.size());
    }

    // let's read it all and calculate the md5 hash
    for (InputSplit inputSplit : is) {
      RecordReader<LongWritable, Text> rr =
          inputFormat.createRecordReader(inputSplit, attemptContext);
      rr.initialize(inputSplit, attemptContext);

      while (rr.nextKeyValue()) {
        Text value = rr.getCurrentValue();

        md5_.update(value.getBytes(), 0, value.getLength());
      }

      rr.close();
    }

    localFs.close();
    assertTrue(Arrays.equals(expectedMd5, md5_.digest()));
  }

示例#21

0

显示文件

文件： ESIndexCreator.java 项目： morpheus-lab/java-bigdata-201506

  public static void main(String[] args) throws Exception {

    Configuration conf = new Configuration();
    Job job = new Job(conf, "ESIndexCreator");

    job.setJarByClass(ESIndexCreator.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setMapperClass(ESIndexCreator.MyMapper.class);

    job.setNumReduceTasks(0); // Skip Reduce Task

    job.setInputFormatClass(KeyValueTextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    // 프로그램 인자
    // 0: 입력 파일 경로
    // 1: 출력 파일 경로
    // 2: elastic search server's host name
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.getConfiguration().set("host", args[2]);

    job.waitForCompletion(true);
  }

示例#22

0

显示文件

文件： InputSampler.java 项目： AndreSchumacher/adam

 /**
  * Write a partition file for the given job, using the Sampler provided. Queries the sampler for a
  * sample keyset, sorts by the output key comparator, selects the keys for each rank, and writes
  * to the destination returned from {@link TotalOrderPartitioner#getPartitionFile}.
  */
 @SuppressWarnings("unchecked") // getInputFormat, getOutputKeyComparator
 public static <K, V> void writePartitionFile(Job job, Sampler<K, V> sampler)
     throws IOException, ClassNotFoundException, InterruptedException {
   Configuration conf = job.getConfiguration();
   final InputFormat inf = ReflectionUtils.newInstance(job.getInputFormatClass(), conf);
   int numPartitions = job.getNumReduceTasks();
   K[] samples = sampler.getSample(inf, job);
   RawComparator<K> comparator = (RawComparator<K>) job.getSortComparator();
   Arrays.sort(samples, comparator);
   Path dst = new Path(TotalOrderPartitioner.getPartitionFile(conf));
   FileSystem fs = dst.getFileSystem(conf);
   if (fs.exists(dst)) {
     fs.delete(dst, false);
   }
   SequenceFile.Writer writer =
       SequenceFile.createWriter(fs, conf, dst, job.getMapOutputKeyClass(), NullWritable.class);
   NullWritable nullValue = NullWritable.get();
   float stepSize = samples.length / (float) numPartitions;
   int last = -1;
   for (int i = 1; i < numPartitions; ++i) {
     int k = Math.round(stepSize * i);
     while (last >= k && comparator.compare(samples[last], samples[k]) == 0) {
       ++k;
     }
     writer.append(samples[k], nullValue);
     last = k;
   }
   writer.close();
 }

示例#23

0

显示文件

文件： CountRowsMR.java 项目： davidbuttler/hbase-demo

  public int run(String[] args) throws Exception {
    if (args.length != 1) {
      System.out.println("usage: CountRows <table name>");
      return 1;
    }
    Configuration conf = getConf();

    try {
      String tableName = args[0];

      LOG.info("Before map/reduce startup");
      Job job = new Job(conf, "query: count rows");
      job.setJarByClass(this.getClass());
      job.getConfiguration().set(TABLE_NAME, args[0]);

      Scan scan = new Scan();

      TableMapReduceUtil.initTableMapperJob(
          tableName, scan, CountRowMapper.class, ImmutableBytesWritable.class, Put.class, job);
      // TableMapReduceUtil.initTableReducerJob(tableName,
      // IdentityTableReducer.class, job);
      job.setNumReduceTasks(0);

      LOG.info("Started " + tableName);
      job.waitForCompletion(true);
      LOG.info("After map/reduce completion");

    } catch (Exception e) {
      e.printStackTrace();
      return 1;
    }

    return 0;
  }

示例#24

0

显示文件

文件： InputSampler.java 项目： AndreSchumacher/adam

  /**
   * Driver for InputSampler from the command line. Configures a JobConf instance and calls {@link
   * #writePartitionFile}.
   */
  public int run(String[] args) throws Exception {
    Job job = new Job(getConf());
    ArrayList<String> otherArgs = new ArrayList<String>();
    Sampler<K, V> sampler = null;
    for (int i = 0; i < args.length; ++i) {
      try {
        if ("-r".equals(args[i])) {
          job.setNumReduceTasks(Integer.parseInt(args[++i]));
        } else if ("-inFormat".equals(args[i])) {
          job.setInputFormatClass(Class.forName(args[++i]).asSubclass(InputFormat.class));
        } else if ("-keyClass".equals(args[i])) {
          job.setMapOutputKeyClass(Class.forName(args[++i]).asSubclass(WritableComparable.class));
        } else if ("-splitSample".equals(args[i])) {
          int numSamples = Integer.parseInt(args[++i]);
          int maxSplits = Integer.parseInt(args[++i]);
          if (0 >= maxSplits) maxSplits = Integer.MAX_VALUE;
          sampler = new SplitSampler<K, V>(numSamples, maxSplits);
        } else if ("-splitRandom".equals(args[i])) {
          double pcnt = Double.parseDouble(args[++i]);
          int numSamples = Integer.parseInt(args[++i]);
          int maxSplits = Integer.parseInt(args[++i]);
          if (0 >= maxSplits) maxSplits = Integer.MAX_VALUE;
          sampler = new RandomSampler<K, V>(pcnt, numSamples, maxSplits);
        } else if ("-splitInterval".equals(args[i])) {
          double pcnt = Double.parseDouble(args[++i]);
          int maxSplits = Integer.parseInt(args[++i]);
          if (0 >= maxSplits) maxSplits = Integer.MAX_VALUE;
          sampler = new IntervalSampler<K, V>(pcnt, maxSplits);
        } else {
          otherArgs.add(args[i]);
        }
      } catch (NumberFormatException except) {
        System.out.println("ERROR: Integer expected instead of " + args[i]);
        return printUsage();
      } catch (ArrayIndexOutOfBoundsException except) {
        System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
        return printUsage();
      }
    }
    if (job.getNumReduceTasks() <= 1) {
      System.err.println("Sampler requires more than one reducer");
      return printUsage();
    }
    if (otherArgs.size() < 2) {
      System.out.println("ERROR: Wrong number of parameters: ");
      return printUsage();
    }
    if (null == sampler) {
      sampler = new RandomSampler<K, V>(0.1, 10000, 10);
    }

    Path outf = new Path(otherArgs.remove(otherArgs.size() - 1));
    TotalOrderPartitioner.setPartitionFile(getConf(), outf);
    for (String s : otherArgs) {
      FileInputFormat.addInputPath(job, new Path(s));
    }
    InputSampler.<K, V>writePartitionFile(job, sampler);

    return 0;
  }

示例#25

0

显示文件

文件： HDFToText.java 项目： XkhldY/spatialhadoop2

  /**
   * Performs an HDF to text operation as a MapReduce job and returns total number of points
   * generated.
   *
   * @param inPath
   * @param outPath
   * @param datasetName
   * @param skipFillValue
   * @return
   * @throws IOException
   * @throws ClassNotFoundException
   * @throws InterruptedException
   */
  public static long HDFToTextMapReduce(
      Path inPath, Path outPath, String datasetName, boolean skipFillValue, OperationsParams params)
      throws IOException, InterruptedException, ClassNotFoundException {
    Job job = new Job(params, "HDFToText");
    Configuration conf = job.getConfiguration();
    job.setJarByClass(HDFToText.class);
    job.setJobName("HDFToText");

    // Set Map function details
    job.setMapperClass(HDFToTextMap.class);
    job.setNumReduceTasks(0);

    // Set input information
    job.setInputFormatClass(SpatialInputFormat3.class);
    SpatialInputFormat3.setInputPaths(job, inPath);
    if (conf.get("shape") == null) conf.setClass("shape", NASAPoint.class, Shape.class);
    conf.set("dataset", datasetName);
    conf.setBoolean("skipfillvalue", skipFillValue);

    // Set output information
    job.setOutputFormatClass(TextOutputFormat3.class);
    TextOutputFormat3.setOutputPath(job, outPath);

    // Run the job
    boolean verbose = conf.getBoolean("verbose", false);
    job.waitForCompletion(verbose);
    Counters counters = job.getCounters();
    Counter outputRecordCounter = counters.findCounter(Task.Counter.MAP_OUTPUT_RECORDS);
    final long resultCount = outputRecordCounter.getValue();

    return resultCount;
  }

示例#26

0

显示文件

文件： Util.java 项目： FloodDragon/hadoop

  /** Run a job. */
  static void runJob(String name, Job job, Machine machine, String startmessage, Util.Timer timer) {
    JOB_SEMAPHORE.acquireUninterruptibly();
    Long starttime = null;
    try {
      try {
        starttime = timer.tick("starting " + name + " ...\n  " + startmessage);

        // initialize and submit a job
        machine.init(job);
        job.submit();

        // Separate jobs
        final long sleeptime = 1000L * job.getConfiguration().getInt(JOB_SEPARATION_PROPERTY, 10);
        if (sleeptime > 0) {
          Util.out.println(name + "> sleep(" + Util.millis2String(sleeptime) + ")");
          Thread.sleep(sleeptime);
        }
      } finally {
        JOB_SEMAPHORE.release();
      }

      if (!job.waitForCompletion(false)) throw new RuntimeException(name + " failed.");
    } catch (Exception e) {
      throw e instanceof RuntimeException ? (RuntimeException) e : new RuntimeException(e);
    } finally {
      if (starttime != null)
        timer.tick(name + "> timetaken=" + Util.millis2String(timer.tick() - starttime));
    }
  }

示例#27

0

显示文件

文件： TCMReasoner.java 项目： hualichenxi/bio-tcm-cloud

  public static void main(String[] args)
      throws IOException, InterruptedException, ClassNotFoundException {

    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (args.length < 1) {
      System.out.println("USAGE: RFDSReasoner [pool path] [options]");
      return;
    }

    Job job = new Job(conf, "reasoner");
    job.setJarByClass(TCMReasoner.class);
    System.out.println(args[0]);

    job.setMapperClass(TCMMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Triple.class);

    job.setReducerClass(TCMReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Triple.class);

    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    job.waitForCompletion(true);
    Counter derivedTriples =
        job.getCounters()
            .findCounter("org.apache.hadoop.mapred.Task$Counter", "REDUCE_OUTPUT_RECORDS");
    System.out.println(derivedTriples.getValue());

    return;
  }

示例#28

0

显示文件

文件： NotInFinder.java 项目： KGayan/Acacia

  public static void main(String[] args) throws Exception {
    String dir1 = "/user/miyuru/wcout";
    String dir2 = "/user/miyuru/notinverts";
    // We first delete the temporary directories if they exist on the HDFS
    FileSystem fs1 = FileSystem.get(new JobConf());

    if (fs1.exists(new Path(dir2))) {
      fs1.delete(new Path(dir2), true);
    }

    JobConf conf = new JobConf();
    conf.setNumMapTasks(96);
    conf.setOutputKeyClass(LongWritable.class);
    conf.setOutputValueClass(LongWritable.class);
    conf.setMapperClass(TokenizerMapper.class);
    conf.setReducerClass(IntSumReducer.class);
    conf.setCombinerClass(IntSumReducer.class);
    conf.setInputFormat(NLinesInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);
    FileInputFormat.setInputPaths(conf, new Path(dir1));
    FileOutputFormat.setOutputPath(conf, new Path(dir2));
    Job job = new Job(conf, "NotInFinder");
    job.setJarByClass(WordCount.class);
    //   job.setMapperClass(TokenizerMapper.class);
    //   job.setCombinerClass(IntSumReducer.class);
    //   job.setReducerClass(IntSumReducer.class);
    //   job.setOutputKeyClass(LongWritable.class);
    //   job.setOutputValueClass(LongWritable.class);

    job.setSortComparatorClass(SortComparator.class);
    job.waitForCompletion(true);
  }

示例#29

0

显示文件

文件： TopNJob.java 项目： sdgdsffdsfff/grade

 @Override
 public int run(String[] strings) throws Exception {
   Configuration configuration = getConf();
   configuration.setLong("mapred.min.split.size", 512 * 1024 * 1024L);
   Job numJob = new Job(configuration, "calculate film program seed num job ");
   Path[] paths = getPaths(strings[0].split(","));
   HadoopUtils.deleteIfExist(strings[1]);
   MapReduceUtils.initMapperJob(
       NumCountMapper.class, Text.class, Text.class, this.getClass(), numJob, paths);
   // TableMapReduceUtil.initTableReducerJob(strings[1], NumCountReducer.class, numJob);
   MapReduceUtils.initReducerJob(new Path(strings[1]), NumCountReducer.class, numJob);
   numJob.waitForCompletion(true);
   Job programeSets = new Job(configuration, "calculate program set num job");
   HadoopUtils.deleteIfExist(strings[2]);
   MapReduceUtils.initMapperJob(
       NumProgramSetsMapper.class,
       Text.class,
       Text.class,
       this.getClass(),
       programeSets,
       new Path(strings[1]));
   programeSets.setCombinerClass(NumProgramSetCombiner.class);
   MapReduceUtils.initReducerJob(new Path(strings[2]), NumProgramSetsReducer.class, programeSets);
   return programeSets.waitForCompletion(true) ? 0 : 1;
   //        return 0;
 }

示例#30

0

显示文件

文件： CachingRowCounter.java 项目： fundatureanu-sever/hbase-rdf

  /**
   * Sets up the actual job.
   *
   * @param conf The current configuration.
   * @param args The command line parameters.
   * @return The newly created job.
   * @throws IOException When setting up the job fails.
   */
  public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException {
    String tableName = args[0];
    Job job = new Job(conf, NAME + "_" + tableName);
    job.setJarByClass(CachingRowCounter.class);
    // Columns are space delimited
    StringBuilder sb = new StringBuilder();
    final int columnoffset = 1;
    for (int i = columnoffset; i < args.length; i++) {
      if (i > columnoffset) {
        sb.append(" ");
      }
      sb.append(args[i]);
    }

    Scan scan = new Scan();
    scan.setFilter(new FirstKeyOnlyFilter());
    if (sb.length() > 0) {
      for (String columnName : sb.toString().split(" ")) {
        String[] fields = columnName.split(":");
        if (fields.length == 1) {
          scan.addFamily(Bytes.toBytes(fields[0]));
        } else {
          scan.addColumn(Bytes.toBytes(fields[0]), Bytes.toBytes(fields[1]));
        }
      }
    }
    scan.setCaching(100);

    // Second argument is the table name.
    job.setOutputFormatClass(NullOutputFormat.class);
    TableMapReduceUtil.initTableMapperJob(
        tableName, scan, RowCounterMapper.class, ImmutableBytesWritable.class, Result.class, job);
    job.setNumReduceTasks(0);
    return job;
  }