Java JobConfの例

プログラミング言語: Java

名前空間/パッケージ名: org.apache.hadoop.mapred

クラス/型: JobConf

hotexamples.comのコード掲載数: 30

JobConfは、Apache Hadoopのmapred（マップリデュース）フレームワークで使用される、Javaプログラミング言語で記述されたクラスです。このクラスは、Hadoopのジョブ設定を管理し、マップリデュースタスクを実行するための情報とパラメータを提供します。JobConfは、入力ファイルの場所、入出力の形式、マップとリデュースのタスクの設定など、ジョブの実行に必要なすべての情報を保持します。このクラスを使用することで、Hadoopジョブを簡単に設定およびカスタマイズすることができます。

Java JobConf - 30件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたJavaのorg.apache.hadoop.mapred.JobConfの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

setMapOutputValueClass(30)

setOutputValueClass(30)

setJobName(30)

setMapperClass(30)

setInputFormat(30)

set(30)

setNumMapTasks(30)

setNumReduceTasks(30)

setOutputFormat(30)

setMapOutputKeyClass(30)

setOutputKeyClass(30)

getInt(30)

setReducerClass(30)

get(30)

setCombinerClass(27)

setInt(25)

setBoolean(23)

getBoolean(18)

setJarByClass(16)

getLong(14)

setLong(12)

setPartitionerClass(12)

setMapSpeculativeExecution(10)

getFloat(8)

setClass(7)

setJar(6)

setOutputKeyComparatorClass(6)

setReduceSpeculativeExecution(5)

getCredentials(5)

setOutputValueGroupingComparator(5)

getNumMapTasks(5)

setNumTasksToExecutePerJvm(4)

getJobName(4)

setMapRunnerClass(4)

addResource(4)

getNumReduceTasks(4)

setMaxMapAttempts(4)

setCompressMapOutput(4)

getInputFormat(4)

setSpeculativeExecution(4)

setStrings(3)

setClassLoader(3)

setOutputPath(3)

getMapOutputValueClass(3)

getMapOutputKeyClass(3)

setJobPriority(3)

setFloat(3)

setQueueName(2)

setMaxReduceAttempts(2)

addInputPath(2)

コード例 #1

ファイルを表示

ファイル: SortByTemperatureUsingTotalOrderPartitioner.java プロジェクト: JuneShi0315/hadoop-book

  @Override
  public int run(String[] args) throws Exception {
    JobConf conf = JobBuilder.parseInputAndOutput(this, getConf(), args);
    if (conf == null) {
      return -1;
    }

    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setOutputKeyClass(IntWritable.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);
    SequenceFileOutputFormat.setCompressOutput(conf, true);
    SequenceFileOutputFormat.setOutputCompressorClass(conf, GzipCodec.class);
    SequenceFileOutputFormat.setOutputCompressionType(conf, CompressionType.BLOCK);

    conf.setPartitionerClass(TotalOrderPartitioner.class);

    InputSampler.Sampler<IntWritable, Text> sampler =
        new InputSampler.RandomSampler<IntWritable, Text>(0.1, 10000, 10);

    Path input = FileInputFormat.getInputPaths(conf)[0];
    input = input.makeQualified(input.getFileSystem(conf));

    Path partitionFile = new Path(input, "_partitions");
    TotalOrderPartitioner.setPartitionFile(conf, partitionFile);
    InputSampler.writePartitionFile(conf, sampler);

    // Add to DistributedCache
    URI partitionUri = new URI(partitionFile.toString() + "#_partitions");
    DistributedCache.addCacheFile(partitionUri, conf);
    DistributedCache.createSymlink(conf);

    JobClient.runJob(conf);
    return 0;
  }

コード例 #2

ファイルを表示

ファイル: TestTokenCache.java プロジェクト: pwendell/mr2-fairscheduler

  /**
   * run a distributed job and verify that TokenCache is available
   *
   * @throws IOException
   */
  @Test
  public void testTokenCache() throws IOException {

    System.out.println("running dist job");

    // make sure JT starts
    jConf = mrCluster.createJobConf();

    // provide namenodes names for the job to get the delegation tokens for
    String nnUri = dfsCluster.getURI(0).toString();
    jConf.set(MRJobConfig.JOB_NAMENODES, nnUri + "," + nnUri);
    // job tracker principla id..
    jConf.set(JTConfig.JT_USER_NAME, "jt_id/foo@BAR");

    // using argument to pass the file name
    String[] args = {
      "-tokenCacheFile", tokenFileName.toString(), "-m", "1", "-r", "1", "-mt", "1", "-rt", "1"
    };

    int res = -1;
    try {
      res = ToolRunner.run(jConf, new MySleepJob(), args);
    } catch (Exception e) {
      System.out.println("Job failed with" + e.getLocalizedMessage());
      e.printStackTrace(System.out);
      fail("Job failed");
    }
    assertEquals("dist job res is not 0", res, 0);
  }

コード例 #3

ファイルを表示

ファイル: MapReduceInputFormatWrapper.java プロジェクト: HimanshuBhardwaj/elephant-bird

  @Override
  public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException {

    JobConf jobConf = (JobConf) HadoopCompat.getConfiguration(context);

    initInputFormat(jobConf);

    org.apache.hadoop.mapred.InputSplit[] splits =
        realInputFormat.getSplits(jobConf, jobConf.getNumMapTasks());

    if (splits == null) {
      return null;
    }

    List<InputSplit> resultSplits = new ArrayList<InputSplit>(splits.length);

    for (org.apache.hadoop.mapred.InputSplit split : splits) {
      if (split.getClass() == org.apache.hadoop.mapred.FileSplit.class) {
        org.apache.hadoop.mapred.FileSplit mapredFileSplit =
            ((org.apache.hadoop.mapred.FileSplit) split);
        resultSplits.add(
            new FileSplit(
                mapredFileSplit.getPath(),
                mapredFileSplit.getStart(),
                mapredFileSplit.getLength(),
                mapredFileSplit.getLocations()));
      } else {
        resultSplits.add(new InputSplitWrapper(split));
      }
    }

    return resultSplits;
  }

コード例 #4

ファイルを表示

ファイル: MaxTemperatureDriver.java プロジェクト: saintstack/hadoop-book

  @Override
  public int run(String[] args) throws Exception {
    if (args.length != 2) {
      System.err.printf(
          "Usage: %s [generic options] <input> <output>\n", getClass().getSimpleName());
      ToolRunner.printGenericCommandUsage(System.err);
      return -1;
    }

    JobConf conf = new JobConf(getConf(), getClass());
    conf.setJobName("Max temperature");

    FileInputFormat.addInputPath(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);

    conf.setMapperClass(MaxTemperatureMapper.class);
    conf.setCombinerClass(MaxTemperatureReducer.class);
    conf.setReducerClass(MaxTemperatureReducer.class);

    // vv MaxTemperatureDriverV6
    conf.setProfileEnabled(true);
    conf.setProfileParams(
        "-agentlib:hprof=cpu=samples,heap=sites,depth=6," + "force=n,thread=y,verbose=n,file=%s");
    conf.setProfileTaskRange(true, "0-2");
    // ^^ MaxTemperatureDriverV6

    JobClient.runJob(conf);
    return 0;
  }

コード例 #5

ファイルを表示

ファイル: JobUtil.java プロジェクト: riteshbagrecha/jumbune

  /**
   * This method call when injected into a class will add the GenericOptionParser with '-libjars'
   * parameter
   *
   * @param job JobConf to which the classpath to be added
   * @param jars comma separated jars to be added to the classpath
   * @param resources comma separated files to be added to the classpath
   * @throws IOException
   */
  public static void addClassPath(final JobConf job, String jars, String resources)
      throws IOException {
    LOGGER.debug(
        "Libraries being added to the classpath: "
            + job.get(JOB_CONF_JARS)
            + "Resources : "
            + job.get(JOB_CONF_RESOURCES));
    // taking libjars and files values passed from console while executing
    // job
    StringBuilder oldJars = new StringBuilder().append(job.get(JOB_CONF_JARS));
    StringBuilder oldFiles = new StringBuilder().append(job.get(JOB_CONF_RESOURCES));
    String jarsTmp = jars;
    String resourcesTmp = resources;
    if (oldJars != null && !oldJars.toString().equals(NULL) && oldJars.length() > 0) {
      oldJars.append(SEPARATOR_COMMA);
      oldJars.append(jarsTmp);
      jarsTmp = oldJars.toString();
    }

    if (resourcesTmp != null && resourcesTmp.length() > 0) {
      if (oldFiles != null && !oldFiles.toString().equals(NULL) && oldFiles.length() > 0) {
        oldFiles.append(SEPARATOR_COMMA);
        oldFiles.append(resourcesTmp);
        resourcesTmp = oldFiles.toString();
      }
      new GenericOptionsParser(
          job, new String[] {GENERIC_PARSER_LIB_JARS, jarsTmp, GENERIC_PARSER_FILES, resourcesTmp});
    } else {
      new GenericOptionsParser(job, new String[] {GENERIC_PARSER_LIB_JARS, jarsTmp});
    }
  }

コード例 #6

ファイルを表示

ファイル: DistCp.java プロジェクト: neutronsharc/hdfsbackup

  /**
   * Driver to copy srcPath to destPath depending on required protocol.
   *
   * @param args arguments
   */
  static void copy(final Configuration conf, final Arguments args) throws IOException {
    LOG.info("srcPaths=" + args.srcs);
    LOG.info("destPath=" + args.dst);
    checkSrcPath(conf, args.srcs);

    JobConf job = createJobConf(conf);
    if (args.preservedAttributes != null) {
      job.set(PRESERVE_STATUS_LABEL, args.preservedAttributes);
    }
    if (args.mapredSslConf != null) {
      job.set("dfs.https.client.keystore.resource", args.mapredSslConf);
    }

    // Initialize the mapper
    try {
      setup(conf, job, args);
      JobClient.runJob(job);
      finalize(conf, job, args.dst, args.preservedAttributes);
    } finally {
      // delete tmp
      fullyDelete(job.get(TMP_DIR_LABEL), job);
      // delete jobDirectory
      fullyDelete(job.get(JOB_DIR_LABEL), job);
    }
  }

コード例 #7

ファイルを表示

ファイル: RandomWriter.java プロジェクト: heipacker/wordcount

 /**
  * Generate the requested number of file splits, with the filename set to the filename of the
  * output file.
  */
 public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
   /** 设置输入分片的个数* */
   JobClient client = new JobClient(job);
   ClusterStatus cluster = client.getClusterStatus();
   /** 如果属性不存在 则返回默认的值 * */
   int numMapsPerHost = job.getInt("test.randomwriter.maps_per_host", 10);
   long numBytesToWritePerMap =
       job.getLong("test.randomwrite.bytes_per_map", 1 * 1024 * 1024 * 1024);
   if (numBytesToWritePerMap == 0) {
     System.err.println("Cannot have test.randomwrite.bytes_per_map set to 0");
   }
   long totalBytesToWrite =
       job.getLong(
           "test.randomwrite.total_bytes",
           numMapsPerHost * numBytesToWritePerMap * cluster.getTaskTrackers());
   int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap);
   if (numMaps == 0 && totalBytesToWrite > 0) {
     numMaps = 1;
   }
   System.out.println("numMaps-------" + numMaps);
   InputSplit[] result = new InputSplit[numMaps];
   Path outDir = FileOutputFormat.getOutputPath(job);
   for (int i = 0; i < result.length; ++i) {
     result[i] = new FileSplit(new Path(outDir, "dummy-split-" + i), 0, 1, (String[]) null);
   }
   return result;
 }

コード例 #8

ファイルを表示

ファイル: JobBuilder.java プロジェクト: Prasadidasi/commoncrawl-crawler

 public JobBuilder reducer(Class<? extends Reducer> reducer, boolean hasCombiner)
     throws IOException {
   if (reducer != IdentityReducer.class) _jobConf.setReducerClass(reducer);
   if (hasCombiner) _jobConf.setCombinerClass(reducer);
   _jobConf.setJarByClass(reducer);
   return this;
 }

コード例 #9

ファイルを表示

ファイル: JobBuilder.java プロジェクト: Prasadidasi/commoncrawl-crawler

 public JobBuilder compressor(CompressionType type, Class<? extends CompressionCodec> codec)
     throws IOException {
   _jobConf.setBoolean("mapred.output.compress", true);
   _jobConf.set("mapred.output.compression.type", type.toString());
   _jobConf.setClass("mapred.output.compression.codec", codec, CompressionCodec.class);
   return this;
 }

コード例 #10

ファイルを表示

ファイル: RBMMapper.java プロジェクト: dery-hit/MapReduce-Based-Deep-Learning

  public void configure(JobConf conf) {
    /*
     * It reads all the configurations and distributed cache from outside.
     */

    // Read number of nodes in input layer and output layer from configuration
    inputNumdims = conf.get("numdims");
    inputNumhid = conf.get("numhid");

    // Read the weights from distributed cache
    Path[] pathwaysFiles = new Path[0];
    try {
      pathwaysFiles = DistributedCache.getLocalCacheFiles(conf);
      for (Path path : pathwaysFiles) {
        /*
         * this loop reads all the distributed cache files
         * In fact, the driver program ensures that there is only one distributed cache file
         */
        BufferedReader fis = new BufferedReader(new FileReader(path.toString()));
        weightline = fis.readLine();
      }
    } catch (Exception e) {
      e.printStackTrace();
    }
  }

コード例 #11

ファイルを表示

ファイル: AvroMR.java プロジェクト: EricDoug/Hadoop-Beginner-s-Guide-Code

  @Override
  public int run(String[] args) throws Exception {
    JobConf conf = new JobConf(getConf(), getClass());
    conf.setJobName("UFO count");

    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
      System.err.println("Usage: avro UFO counter <in> <out>");
      System.exit(2);
    }

    FileInputFormat.addInputPath(conf, new Path(otherArgs[0]));
    Path outputPath = new Path(otherArgs[1]);
    FileOutputFormat.setOutputPath(conf, outputPath);
    outputPath.getFileSystem(conf).delete(outputPath);
    Schema input_schema = Schema.parse(getClass().getResourceAsStream("ufo.avsc"));
    AvroJob.setInputSchema(conf, input_schema);
    AvroJob.setMapOutputSchema(
        conf,
        Pair.getPairSchema(Schema.create(Schema.Type.STRING), Schema.create(Schema.Type.LONG)));

    AvroJob.setOutputSchema(conf, OUTPUT_SCHEMA);
    AvroJob.setMapperClass(conf, AvroRecordMapper.class);
    AvroJob.setReducerClass(conf, AvroRecordReducer.class);
    conf.setInputFormat(AvroInputFormat.class);
    JobClient.runJob(conf);

    return 0;
  }

コード例 #12

ファイルを表示

ファイル: TestHive2ActionExecutor.java プロジェクト: jthelin/oozie

  private RunningJob submitAction(Context context, Namespace ns) throws Exception {
    Hive2ActionExecutor ae = new Hive2ActionExecutor();

    WorkflowAction action = context.getAction();

    ae.prepareActionDir(getFileSystem(), context);
    ae.submitLauncher(getFileSystem(), context, action);

    String jobId = action.getExternalId();
    String jobTracker = action.getTrackerUri();
    String consoleUrl = action.getConsoleUrl();
    assertNotNull(jobId);
    assertNotNull(jobTracker);
    assertNotNull(consoleUrl);
    Element e = XmlUtils.parseXml(action.getConf());
    XConfiguration conf =
        new XConfiguration(
            new StringReader(XmlUtils.prettyPrint(e.getChild("configuration", ns)).toString()));
    conf.set("mapred.job.tracker", e.getChildTextTrim("job-tracker", ns));
    conf.set("fs.default.name", e.getChildTextTrim("name-node", ns));
    conf.set("user.name", context.getProtoActionConf().get("user.name"));
    conf.set("group.name", getTestGroup());

    JobConf jobConf = Services.get().get(HadoopAccessorService.class).createJobConf(jobTracker);
    XConfiguration.copy(conf, jobConf);
    String user = jobConf.get("user.name");
    JobClient jobClient =
        Services.get().get(HadoopAccessorService.class).createJobClient(user, jobConf);
    final RunningJob runningJob = jobClient.getJob(JobID.forName(jobId));
    assertNotNull(runningJob);
    return runningJob;
  }

コード例 #13

ファイルを表示

ファイル: AbstractHadoopBdbStoreBuilderMapper.java プロジェクト: rsumbaly/voldemort

  @Override
  @SuppressWarnings("unchecked")
  public void configure(JobConf conf) {
    super.configure(conf);

    keySerializerDefinition = getStoreDef().getKeySerializer();
    valueSerializerDefinition = getStoreDef().getValueSerializer();

    try {
      SerializerFactory factory = new DefaultSerializerFactory();

      if (conf.get("serializer.factory") != null) {
        factory = (SerializerFactory) Class.forName(conf.get("serializer.factory")).newInstance();
      }

      keySerializer = (Serializer<Object>) factory.getSerializer(keySerializerDefinition);
      valueSerializer = (Serializer<Object>) factory.getSerializer(valueSerializerDefinition);
    } catch (Exception e) {
      throw new RuntimeException(e);
    }

    keyCompressor = new CompressionStrategyFactory().get(keySerializerDefinition.getCompression());
    valueCompressor =
        new CompressionStrategyFactory().get(valueSerializerDefinition.getCompression());

    routingStrategy =
        new ConsistentRoutingStrategy(
            getCluster().getNodes(), getStoreDef().getReplicationFactor());
  }

コード例 #14

ファイルを表示

ファイル: HadoopConverterJob.java プロジェクト: AlexanderSaydakov/druid

 private static void setJobName(JobConf jobConf, List<DataSegment> segments) {
   if (segments.size() == 1) {
     final DataSegment segment = segments.get(0);
     jobConf.setJobName(
         String.format(
             "druid-convert-%s-%s-%s",
             segment.getDataSource(), segment.getInterval(), segment.getVersion()));
   } else {
     final Set<String> dataSources =
         Sets.newHashSet(
             Iterables.transform(
                 segments,
                 new Function<DataSegment, String>() {
                   @Override
                   public String apply(DataSegment input) {
                     return input.getDataSource();
                   }
                 }));
     final Set<String> versions =
         Sets.newHashSet(
             Iterables.transform(
                 segments,
                 new Function<DataSegment, String>() {
                   @Override
                   public String apply(DataSegment input) {
                     return input.getVersion();
                   }
                 }));
     jobConf.setJobName(
         String.format(
             "druid-convert-%s-%s",
             Arrays.toString(dataSources.toArray()), Arrays.toString(versions.toArray())));
   }
 }

コード例 #15

ファイルを表示

ファイル: JavaScriptMRBase.java プロジェクト: rdingwell/hadoop-gateway

 public List<JavaScriptSource> setUpSource(JobConf job) {
   List<JavaScriptSource> js = new ArrayList<JavaScriptSource>();
   js.add(
       new JavaScriptSource("emit.js", "function emit(k,v){$mapper.emit(k,v,$output_collector)}"));
   js.add(new JavaScriptSource("map.js", job.get("map.js")));
   js.add(new JavaScriptSource("reduce.js", job.get("reduce.js")));
   js.add(new JavaScriptSource("functions.js", job.get("functions.js")));
   js.add(new JavaScriptSource("filter.js", job.get("filter.js")));
   js.add(new JavaScriptSource("query_id", job.get("query_id")));
   try {
     Path[] files = DistributedCache.getLocalCacheFiles(job);
     if (files != null) {
       for (int i = 0; i < files.length; i++) {
         Path path = files[i];
         if (path.getName().endsWith(".js")) {
           String source = Files.toString(new File(path.toString()), Charset.forName("UTF-8"));
           js.add(new JavaScriptSource(path.getName(), source));
         }
       }
     }
   } catch (IOException ioe) {
     throw new RuntimeException("Couldn't read from DistributedCache", ioe);
   }
   return js;
 }

コード例 #16

ファイルを表示

ファイル: InstrumentDataflow.java プロジェクト: MarvinLiu0810/HiTune

 private void init() {
   String nodes = conf.get(AnalysisProcessorConfiguration.nodes);
   this.nodelist = String2List(nodes, SEPERATOR_COMMA);
   String status = conf.get("status");
   this.statuslist = String2List(status, SEPERATOR_COMMA);
   parsePhase();
 }

コード例 #17

ファイルを表示

ファイル: ItemCFJob.java プロジェクト: bytegriffin/recsys-offline

 public void run() throws Exception {
   long startTime = System.currentTimeMillis();
   JobConf conf = new JobConf(ItemCFJob.class);
   conf.setJobName("ItemCF" + System.currentTimeMillis());
   conf.setNumMapTasks(10);
   conf.set(
       "io.serializations",
       "org.apache.hadoop.io.serializer.JavaSerialization,"
           + "org.apache.hadoop.io.serializer.WritableSerialization");
   StringBuilder sb = new StringBuilder();
   sb.append("--input ").append(input);
   sb.append(" --output ").append(output);
   if (flag) {
     sb.append(" --booleanData true");
   } else {
     sb.append(" --booleanData false");
   }
   sb.append(" --similarityClassname " + Constants.mahout_similarityclassname);
   sb.append(" --tempDir ").append(tmp);
   String[] args = sb.toString().split(" ");
   RecommenderJob job = new RecommenderJob();
   job.setConf(conf);
   job.run(args);
   long endTime = System.currentTimeMillis();
   logger.info(
       "recommdation job ["
           + conf.getJobName()
           + "] run finish. it costs"
           + (endTime - startTime) / 1000
           + "s.");
 }

コード例 #18

ファイルを表示

ファイル: TestHiveAccumuloTableInputFormat.java プロジェクト: Leolh/hive

  @Test
  public void testConfigureAccumuloInputFormatWithIterators() throws Exception {
    AccumuloConnectionParameters accumuloParams = new AccumuloConnectionParameters(conf);
    ColumnMapper columnMapper =
        new ColumnMapper(
            conf.get(AccumuloSerDeParameters.COLUMN_MAPPINGS),
            conf.get(AccumuloSerDeParameters.DEFAULT_STORAGE_TYPE),
            columnNames,
            columnTypes);
    Set<Pair<Text, Text>> cfCqPairs =
        inputformat.getPairCollection(columnMapper.getColumnMappings());
    List<IteratorSetting> iterators = new ArrayList<IteratorSetting>();
    Set<Range> ranges = Collections.singleton(new Range());
    String instanceName = "realInstance";
    String zookeepers = "host1:2181,host2:2181,host3:2181";

    IteratorSetting cfg = new IteratorSetting(50, PrimitiveComparisonFilter.class);
    cfg.addOption(PrimitiveComparisonFilter.P_COMPARE_CLASS, StringCompare.class.getName());
    cfg.addOption(PrimitiveComparisonFilter.COMPARE_OPT_CLASS, Equal.class.getName());
    cfg.addOption(PrimitiveComparisonFilter.CONST_VAL, "dave");
    cfg.addOption(PrimitiveComparisonFilter.COLUMN, "person:name");
    iterators.add(cfg);

    cfg = new IteratorSetting(50, PrimitiveComparisonFilter.class);
    cfg.addOption(PrimitiveComparisonFilter.P_COMPARE_CLASS, IntCompare.class.getName());
    cfg.addOption(PrimitiveComparisonFilter.COMPARE_OPT_CLASS, Equal.class.getName());
    cfg.addOption(PrimitiveComparisonFilter.CONST_VAL, "50");
    cfg.addOption(PrimitiveComparisonFilter.COLUMN, "person:age");
    iterators.add(cfg);

    ZooKeeperInstance zkInstance = Mockito.mock(ZooKeeperInstance.class);
    HiveAccumuloTableInputFormat mockInputFormat = Mockito.mock(HiveAccumuloTableInputFormat.class);

    // Stub out the ZKI mock
    Mockito.when(zkInstance.getInstanceName()).thenReturn(instanceName);
    Mockito.when(zkInstance.getZooKeepers()).thenReturn(zookeepers);

    // Call out to the real configure method
    Mockito.doCallRealMethod()
        .when(mockInputFormat)
        .configure(conf, zkInstance, con, accumuloParams, columnMapper, iterators, ranges);

    // Also compute the correct cf:cq pairs so we can assert the right argument was passed
    Mockito.doCallRealMethod()
        .when(mockInputFormat)
        .getPairCollection(columnMapper.getColumnMappings());

    mockInputFormat.configure(
        conf, zkInstance, con, accumuloParams, columnMapper, iterators, ranges);

    // Verify that the correct methods are invoked on AccumuloInputFormat
    Mockito.verify(mockInputFormat).setZooKeeperInstance(conf, instanceName, zookeepers, false);
    Mockito.verify(mockInputFormat).setConnectorInfo(conf, USER, new PasswordToken(PASS));
    Mockito.verify(mockInputFormat).setInputTableName(conf, TEST_TABLE);
    Mockito.verify(mockInputFormat)
        .setScanAuthorizations(conf, con.securityOperations().getUserAuthorizations(USER));
    Mockito.verify(mockInputFormat).addIterators(conf, iterators);
    Mockito.verify(mockInputFormat).setRanges(conf, ranges);
    Mockito.verify(mockInputFormat).fetchColumns(conf, cfCqPairs);
  }

コード例 #19

ファイルを表示

ファイル: FileCombiner.java プロジェクト: hfausta/thesis-file-combiner

  public static void main(String[] args)
      throws IOException, InterruptedException, ClassNotFoundException {
    // TODO Auto-generated method stub
    JobConf conf = new JobConf();
    conf.setNumMapTasks(1);
    conf.setNumReduceTasks(5);

    FileSystem fs = FileSystem.get(conf);
    Path dir = new Path(args[0]);
    FileStatus[] stats = fs.listStatus(dir);
    numFiles = stats.length;

    Job job = new Job(conf);
    job.setJarByClass(FileCombiner.class);
    job.setJobName("File Combiner");

    job.setMapperClass(FileCombinerMapper.class);
    job.setReducerClass(FileCombinerReducer.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    // LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(Text.class);

    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(Text.class);

    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.waitForCompletion(true);
  }

コード例 #20

ファイルを表示

ファイル: Docnos2Titles.java プロジェクト: AdeDZY/Ivory

    public void configure(JobConf job) {
      sLogger.setLevel(Level.INFO);
      srcLang = job.get("fLang");
      mJob = job;
      pwsimMapping = new HMapIV<ArrayListOfIntsWritable>();
      valOut = new PairOfIntString();
      keyOut = new PairOfInts();

      // read doc ids of sample into vectors
      String samplesFile = job.get("Ivory.SampleFile");
      if (samplesFile != null) {
        try {
          samplesMap = readSamplesFromCache(getFilename(samplesFile), job);
        } catch (NumberFormatException e) {
          e.printStackTrace();
          throw new RuntimeException("Incorrect format in " + samplesFile);
        } catch (IOException e) {
          e.printStackTrace();
          throw new RuntimeException("I/O error in " + samplesFile);
        } catch (Exception e) {
          e.printStackTrace();
          throw new RuntimeException("Error reading sample file: " + samplesFile);
        }
      }
    }

コード例 #21

ファイルを表示

ファイル: CompactorMR.java プロジェクト: nkeywal/hive

    @Override
    public void map(
        WritableComparable key,
        CompactorInputSplit split,
        OutputCollector<NullWritable, NullWritable> nullWritableVOutputCollector,
        Reporter reporter)
        throws IOException {
      // This will only get called once, since CompactRecordReader only returns one record,
      // the input split.
      // Based on the split we're passed we go instantiate the real reader and then iterate on it
      // until it finishes.
      @SuppressWarnings("unchecked") // since there is no way to parametrize instance of Class
      AcidInputFormat<WritableComparable, V> aif =
          instantiate(AcidInputFormat.class, jobConf.get(INPUT_FORMAT_CLASS_NAME));
      ValidTxnList txnList = new ValidReadTxnList(jobConf.get(ValidTxnList.VALID_TXNS_KEY));

      boolean isMajor = jobConf.getBoolean(IS_MAJOR, false);
      AcidInputFormat.RawReader<V> reader =
          aif.getRawReader(
              jobConf,
              isMajor,
              split.getBucket(),
              txnList,
              split.getBaseDir(),
              split.getDeltaDirs());
      RecordIdentifier identifier = reader.createKey();
      V value = reader.createValue();
      getWriter(reporter, reader.getObjectInspector(), split.getBucket());
      while (reader.next(identifier, value)) {
        if (isMajor && reader.isDelete(value)) continue;
        writer.write(value);
        reporter.progress();
      }
    }

コード例 #22

ファイルを表示

ファイル: TestMerger.java プロジェクト: jonathangizmo/HadoopDistJ

 @SuppressWarnings({"deprecation", "unchecked"})
 public void testMergeShouldReturnProperProgress(List<Segment<Text, Text>> segments)
     throws IOException {
   Path tmpDir = new Path("localpath");
   Class<Text> keyClass = (Class<Text>) jobConf.getMapOutputKeyClass();
   Class<Text> valueClass = (Class<Text>) jobConf.getMapOutputValueClass();
   RawComparator<Text> comparator = jobConf.getOutputKeyComparator();
   Counter readsCounter = new Counter();
   Counter writesCounter = new Counter();
   Progress mergePhase = new Progress();
   RawKeyValueIterator mergeQueue =
       Merger.merge(
           conf,
           fs,
           keyClass,
           valueClass,
           segments,
           2,
           tmpDir,
           comparator,
           getReporter(),
           readsCounter,
           writesCounter,
           mergePhase);
   Assert.assertEquals(1.0f, mergeQueue.getProgress().get(), 0.0f);
 }

コード例 #23

ファイルを表示

ファイル: Similarity.java プロジェクト: gdfm/similarity-self-join

 @Override
 public void configure(JobConf conf) {
   this.threshold = conf.getFloat(PARAM_APS_THRESHOLD, DEFAULT_THRESHOLD);
   int reducerID = conf.getInt("mapred.task.partition", -1);
   int max = conf.getInt(PARAM_APS_MAXKEY, 0);
   int nstripes = conf.getInt(PARAM_APS_STRIPES, 1);
   int spread = conf.getInt(PARAM_APS_REDUCER_PER_STRIPE, 1);
   if (reducerID < 0 || max == 0) {
     LOG.error("Could not find stripe ID, reverting to whole rest file loading");
     LOG.debug("reducer = " + reducerID + "\t max = " + max + "\t nstripes = " + nstripes);
     // open the pruned part file in the DistrubutedCache
     haspruned = FileUtils.readRestFile(conf, pruned);
   } else {
     int stripe = GenericKey.StripePartitioner.findStripe(reducerID, spread);
     int from = GenericKey.StripePartitioner.minKeyInStripe(stripe, nstripes, max);
     int to = from + GenericKey.StripePartitioner.numKeysInStripe(stripe, nstripes, max);
     // read from 'from' included, to 'to' excluded
     LOG.info(
         "Reducer "
             + reducerID
             + " loading stripe "
             + stripe
             + " of "
             + nstripes
             + " ("
             + from
             + ","
             + (to - 1)
             + ")");
     haspruned = FileUtils.readRestFile(conf, pruned, from, to);
   }
   if (!haspruned) LOG.warn("No pruned file provided in DistributedCache");
   else LOG.info("Read " + pruned.size() + " entries from pruned file");
 }

コード例 #24

ファイルを表示

ファイル: KMeansMapper.java プロジェクト: RamGhadiyaram/cloud-class

  public void configure(JobConf conf) {
    numberOfCenters = Integer.valueOf(conf.get("numberOfCenters"));
    centersDirectory = conf.get("centersReadDirectory");

    try {
      Configuration c = new Configuration();
      FileSystem fs = FileSystem.get(c);

      for (int index = 0; index < numberOfCenters; ++index) {
        SequenceFile.Reader reader =
            new SequenceFile.Reader(fs, new Path(centersDirectory + "/centers/" + index), c);

        LongWritable key = new LongWritable();
        Point value = new Point();

        reader.next(key, value);

        Point center = (Point) value;

        centers.add(center);

        reader.close();
      }
    } catch (IOException e) {
      // do nothing
      // I hope this doesn't happen
      System.out.println("well, damn.");
      e.printStackTrace();
    }
  }

コード例 #25

ファイルを表示

ファイル: BuildGraph.java プロジェクト: avijitgupta/contrail-bio

  public RunningJob run(String inputPath, String outputPath) throws Exception {
    sLogger.info("Tool name: BuildGraph");
    sLogger.info(" - input: " + inputPath);
    sLogger.info(" - output: " + outputPath);

    JobConf conf = new JobConf(BuildGraph.class);
    conf.setJobName("BuildGraph " + inputPath + " " + ContrailConfig.K);

    ContrailConfig.initializeConfiguration(conf);

    FileInputFormat.addInputPath(conf, new Path(inputPath));
    FileOutputFormat.setOutputPath(conf, new Path(outputPath));

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(Text.class);

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    conf.setMapperClass(BuildGraphMapper.class);
    conf.setReducerClass(BuildGraphReducer.class);

    // delete the output directory if it exists already
    FileSystem.get(conf).delete(new Path(outputPath), true);

    return JobClient.runJob(conf);
  }

コード例 #26

ファイルを表示

ファイル: CorpusVocabNormalizerAndNumberizer.java プロジェクト: rahulbhawsar/Cloud9

    @Override
    public void close() {
      System.err.println(
          "Target: " + vocE.size() + " types. Writing to " + job_.get("root", null) + "/vocab.E");
      System.err.println(
          "Source: " + vocF.size() + " types .Writing to " + job_.get("root", null) + "/vocab.F");

      // write out vocabulary to file
      try {
        FileSystem fs = FileSystem.get(job_);

        DataOutputStream dos =
            new DataOutputStream(
                new BufferedOutputStream(fs.create(new Path(job_.get("root", null) + "/vocab.E"))));
        ((VocabularyWritable) vocE).write(dos);
        dos.close();
        DataOutputStream dos2 =
            new DataOutputStream(
                new BufferedOutputStream(fs.create(new Path(job_.get("root", null) + "/vocab.F"))));
        ((VocabularyWritable) vocF).write(dos2);
        dos2.close();

      } catch (IOException e) {
        throw new RuntimeException("Vocab couldn't be written to disk.\n" + e.toString());
      }
    }

コード例 #27

ファイルを表示

ファイル: WordCountExtended.java プロジェクト: st3ffwo3/hadoop_samples

  /**
   * {@inheritDoc}
   *
   * @see org.apache.hadoop.util.Tool#run(java.lang.String[])
   */
  @Override
  public int run(String[] args) throws Exception {
    JobConf configuration = new JobConf(getConf(), WordCountExtended.class);
    configuration.setJobName(JOB_NAME);

    configuration.setOutputKeyClass(Text.class);
    configuration.setOutputValueClass(IntWritable.class);

    configuration.setMapperClass(Map.class);
    configuration.setCombinerClass(Reduce.class);
    configuration.setReducerClass(Reduce.class);

    configuration.setInputFormat(TextInputFormat.class);
    configuration.setOutputFormat(TextOutputFormat.class);

    List<String> otherArgs = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
      if (JOB_SKIP_ARGUMENT.equals(args[i])) {
        DistributedCache.addCacheFile(new Path(args[++i]).toUri(), configuration);
        configuration.setBoolean(JOB_PARAMETER_SKIP_PATTERNS, true);
      } else {
        otherArgs.add(args[i]);
      }
    }

    FileInputFormat.setInputPaths(configuration, new Path(otherArgs.get(0)));
    FileOutputFormat.setOutputPath(configuration, new Path(otherArgs.get(1)));

    JobClient.runJob(configuration);
    return 0;
  }

コード例 #28

ファイルを表示

ファイル: PagerankData.java プロジェクト: ConeyLiu/HiBench

  private void setPageRankLinksOptions(JobConf job) throws URISyntaxException {
    job.setLong("pages", options.getNumPages());
    job.setLong("slotpages", options.getNumSlotPages());
    job.set("delimiter", cdelim);

    Utils.shareLinkZipfCore(options, job);
  }

コード例 #29

ファイルを表示

ファイル: Compressible.java プロジェクト: nabsrock786/Hadoop_NGS_Lakshman

  public RunningJob run(String inputPath, String outputPath) throws Exception {
    sLogger.info("Tool name: Compressible");
    sLogger.info(" - input: " + inputPath);
    sLogger.info(" - output: " + outputPath);

    // JobConf conf = new JobConf(Stats.class);
    JobConf conf = new JobConf(Compressible.class);
    conf.setJobName("Compressible " + inputPath);

    BrushConfig.initializeConfiguration(conf);

    FileInputFormat.addInputPath(conf, new Path(inputPath));
    FileOutputFormat.setOutputPath(conf, new Path(outputPath));

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(Text.class);

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    conf.setMapperClass(CompressibleMapper.class);
    conf.setReducerClass(CompressibleReducer.class);

    // delete the output directory if it exists already
    FileSystem.get(conf).delete(new Path(outputPath), true);

    return JobClient.runJob(conf);
  }

コード例 #30

ファイルを表示

ファイル: CDMahoutEvaluator.java プロジェクト: maximzhao/Mahout-GSOC-LibLinear

  /**
   * Configure the job
   *
   * @param conf Job to configure
   * @param rules classification rules to evaluate
   * @param target label value to evaluate the rules for
   * @param inpath input path (the dataset)
   * @param outpath output <code>Path</code>
   * @param split DatasetSplit used to separate training and testing input
   */
  private static void configureJob(
      JobConf conf,
      List<? extends Rule> rules,
      int target,
      Path inpath,
      Path outpath,
      DatasetSplit split) {
    split.storeJobParameters(conf);

    FileInputFormat.setInputPaths(conf, inpath);
    FileOutputFormat.setOutputPath(conf, outpath);

    conf.setOutputKeyClass(LongWritable.class);
    conf.setOutputValueClass(CDFitness.class);

    conf.setMapperClass(CDMapper.class);
    conf.setCombinerClass(CDReducer.class);
    conf.setReducerClass(CDReducer.class);

    conf.setInputFormat(DatasetTextInputFormat.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);

    // store the parameters
    conf.set(CDMapper.CLASSDISCOVERY_RULES, StringUtils.toString(rules));
    conf.set(CDMapper.CLASSDISCOVERY_DATASET, StringUtils.toString(DataSet.getDataSet()));
    conf.setInt(CDMapper.CLASSDISCOVERY_TARGET_LABEL, target);
  }