Ejemplo n.º 1
0
  /**
   * This method call when injected into a class will add the GenericOptionParser with '-libjars'
   * parameter
   *
   * @param job JobConf to which the classpath to be added
   * @param jars comma separated jars to be added to the classpath
   * @param resources comma separated files to be added to the classpath
   * @throws IOException
   */
  public static void addClassPath(final JobConf job, String jars, String resources)
      throws IOException {
    LOGGER.debug(
        "Libraries being added to the classpath: "
            + job.get(JOB_CONF_JARS)
            + "Resources : "
            + job.get(JOB_CONF_RESOURCES));
    // taking libjars and files values passed from console while executing
    // job
    StringBuilder oldJars = new StringBuilder().append(job.get(JOB_CONF_JARS));
    StringBuilder oldFiles = new StringBuilder().append(job.get(JOB_CONF_RESOURCES));
    String jarsTmp = jars;
    String resourcesTmp = resources;
    if (oldJars != null && !oldJars.toString().equals(NULL) && oldJars.length() > 0) {
      oldJars.append(SEPARATOR_COMMA);
      oldJars.append(jarsTmp);
      jarsTmp = oldJars.toString();
    }

    if (resourcesTmp != null && resourcesTmp.length() > 0) {
      if (oldFiles != null && !oldFiles.toString().equals(NULL) && oldFiles.length() > 0) {
        oldFiles.append(SEPARATOR_COMMA);
        oldFiles.append(resourcesTmp);
        resourcesTmp = oldFiles.toString();
      }
      new GenericOptionsParser(
          job, new String[] {GENERIC_PARSER_LIB_JARS, jarsTmp, GENERIC_PARSER_FILES, resourcesTmp});
    } else {
      new GenericOptionsParser(job, new String[] {GENERIC_PARSER_LIB_JARS, jarsTmp});
    }
  }
Ejemplo n.º 2
0
    @Override
    public void map(
        WritableComparable key,
        CompactorInputSplit split,
        OutputCollector<NullWritable, NullWritable> nullWritableVOutputCollector,
        Reporter reporter)
        throws IOException {
      // This will only get called once, since CompactRecordReader only returns one record,
      // the input split.
      // Based on the split we're passed we go instantiate the real reader and then iterate on it
      // until it finishes.
      @SuppressWarnings("unchecked") // since there is no way to parametrize instance of Class
      AcidInputFormat<WritableComparable, V> aif =
          instantiate(AcidInputFormat.class, jobConf.get(INPUT_FORMAT_CLASS_NAME));
      ValidTxnList txnList = new ValidReadTxnList(jobConf.get(ValidTxnList.VALID_TXNS_KEY));

      boolean isMajor = jobConf.getBoolean(IS_MAJOR, false);
      AcidInputFormat.RawReader<V> reader =
          aif.getRawReader(
              jobConf,
              isMajor,
              split.getBucket(),
              txnList,
              split.getBaseDir(),
              split.getDeltaDirs());
      RecordIdentifier identifier = reader.createKey();
      V value = reader.createValue();
      getWriter(reporter, reader.getObjectInspector(), split.getBucket());
      while (reader.next(identifier, value)) {
        if (isMajor && reader.isDelete(value)) continue;
        writer.write(value);
        reporter.progress();
      }
    }
  @Override
  @SuppressWarnings("unchecked")
  public void configure(JobConf conf) {
    super.configure(conf);

    keySerializerDefinition = getStoreDef().getKeySerializer();
    valueSerializerDefinition = getStoreDef().getValueSerializer();

    try {
      SerializerFactory factory = new DefaultSerializerFactory();

      if (conf.get("serializer.factory") != null) {
        factory = (SerializerFactory) Class.forName(conf.get("serializer.factory")).newInstance();
      }

      keySerializer = (Serializer<Object>) factory.getSerializer(keySerializerDefinition);
      valueSerializer = (Serializer<Object>) factory.getSerializer(valueSerializerDefinition);
    } catch (Exception e) {
      throw new RuntimeException(e);
    }

    keyCompressor = new CompressionStrategyFactory().get(keySerializerDefinition.getCompression());
    valueCompressor =
        new CompressionStrategyFactory().get(valueSerializerDefinition.getCompression());

    routingStrategy =
        new ConsistentRoutingStrategy(
            getCluster().getNodes(), getStoreDef().getReplicationFactor());
  }
  @Test
  public void testConfigureAccumuloInputFormatWithIterators() throws Exception {
    AccumuloConnectionParameters accumuloParams = new AccumuloConnectionParameters(conf);
    ColumnMapper columnMapper =
        new ColumnMapper(
            conf.get(AccumuloSerDeParameters.COLUMN_MAPPINGS),
            conf.get(AccumuloSerDeParameters.DEFAULT_STORAGE_TYPE),
            columnNames,
            columnTypes);
    Set<Pair<Text, Text>> cfCqPairs =
        inputformat.getPairCollection(columnMapper.getColumnMappings());
    List<IteratorSetting> iterators = new ArrayList<IteratorSetting>();
    Set<Range> ranges = Collections.singleton(new Range());
    String instanceName = "realInstance";
    String zookeepers = "host1:2181,host2:2181,host3:2181";

    IteratorSetting cfg = new IteratorSetting(50, PrimitiveComparisonFilter.class);
    cfg.addOption(PrimitiveComparisonFilter.P_COMPARE_CLASS, StringCompare.class.getName());
    cfg.addOption(PrimitiveComparisonFilter.COMPARE_OPT_CLASS, Equal.class.getName());
    cfg.addOption(PrimitiveComparisonFilter.CONST_VAL, "dave");
    cfg.addOption(PrimitiveComparisonFilter.COLUMN, "person:name");
    iterators.add(cfg);

    cfg = new IteratorSetting(50, PrimitiveComparisonFilter.class);
    cfg.addOption(PrimitiveComparisonFilter.P_COMPARE_CLASS, IntCompare.class.getName());
    cfg.addOption(PrimitiveComparisonFilter.COMPARE_OPT_CLASS, Equal.class.getName());
    cfg.addOption(PrimitiveComparisonFilter.CONST_VAL, "50");
    cfg.addOption(PrimitiveComparisonFilter.COLUMN, "person:age");
    iterators.add(cfg);

    ZooKeeperInstance zkInstance = Mockito.mock(ZooKeeperInstance.class);
    HiveAccumuloTableInputFormat mockInputFormat = Mockito.mock(HiveAccumuloTableInputFormat.class);

    // Stub out the ZKI mock
    Mockito.when(zkInstance.getInstanceName()).thenReturn(instanceName);
    Mockito.when(zkInstance.getZooKeepers()).thenReturn(zookeepers);

    // Call out to the real configure method
    Mockito.doCallRealMethod()
        .when(mockInputFormat)
        .configure(conf, zkInstance, con, accumuloParams, columnMapper, iterators, ranges);

    // Also compute the correct cf:cq pairs so we can assert the right argument was passed
    Mockito.doCallRealMethod()
        .when(mockInputFormat)
        .getPairCollection(columnMapper.getColumnMappings());

    mockInputFormat.configure(
        conf, zkInstance, con, accumuloParams, columnMapper, iterators, ranges);

    // Verify that the correct methods are invoked on AccumuloInputFormat
    Mockito.verify(mockInputFormat).setZooKeeperInstance(conf, instanceName, zookeepers, false);
    Mockito.verify(mockInputFormat).setConnectorInfo(conf, USER, new PasswordToken(PASS));
    Mockito.verify(mockInputFormat).setInputTableName(conf, TEST_TABLE);
    Mockito.verify(mockInputFormat)
        .setScanAuthorizations(conf, con.securityOperations().getUserAuthorizations(USER));
    Mockito.verify(mockInputFormat).addIterators(conf, iterators);
    Mockito.verify(mockInputFormat).setRanges(conf, ranges);
    Mockito.verify(mockInputFormat).fetchColumns(conf, cfCqPairs);
  }
Ejemplo n.º 5
0
  public void configure(JobConf conf) {
    numberOfCenters = Integer.valueOf(conf.get("numberOfCenters"));
    centersDirectory = conf.get("centersReadDirectory");

    try {
      Configuration c = new Configuration();
      FileSystem fs = FileSystem.get(c);

      for (int index = 0; index < numberOfCenters; ++index) {
        SequenceFile.Reader reader =
            new SequenceFile.Reader(fs, new Path(centersDirectory + "/centers/" + index), c);

        LongWritable key = new LongWritable();
        Point value = new Point();

        reader.next(key, value);

        Point center = (Point) value;

        centers.add(center);

        reader.close();
      }
    } catch (IOException e) {
      // do nothing
      // I hope this doesn't happen
      System.out.println("well, damn.");
      e.printStackTrace();
    }
  }
  public void configure(JobConf conf) {
    /*
     * It reads all the configurations and distributed cache from outside.
     */

    // Read number of nodes in input layer and output layer from configuration
    inputNumdims = conf.get("numdims");
    inputNumhid = conf.get("numhid");

    // Read the weights from distributed cache
    Path[] pathwaysFiles = new Path[0];
    try {
      pathwaysFiles = DistributedCache.getLocalCacheFiles(conf);
      for (Path path : pathwaysFiles) {
        /*
         * this loop reads all the distributed cache files
         * In fact, the driver program ensures that there is only one distributed cache file
         */
        BufferedReader fis = new BufferedReader(new FileReader(path.toString()));
        weightline = fis.readLine();
      }
    } catch (Exception e) {
      e.printStackTrace();
    }
  }
    @Override
    public void close() {
      System.err.println(
          "Target: " + vocE.size() + " types. Writing to " + job_.get("root", null) + "/vocab.E");
      System.err.println(
          "Source: " + vocF.size() + " types .Writing to " + job_.get("root", null) + "/vocab.F");

      // write out vocabulary to file
      try {
        FileSystem fs = FileSystem.get(job_);

        DataOutputStream dos =
            new DataOutputStream(
                new BufferedOutputStream(fs.create(new Path(job_.get("root", null) + "/vocab.E"))));
        ((VocabularyWritable) vocE).write(dos);
        dos.close();
        DataOutputStream dos2 =
            new DataOutputStream(
                new BufferedOutputStream(fs.create(new Path(job_.get("root", null) + "/vocab.F"))));
        ((VocabularyWritable) vocF).write(dos2);
        dos2.close();

      } catch (IOException e) {
        throw new RuntimeException("Vocab couldn't be written to disk.\n" + e.toString());
      }
    }
Ejemplo n.º 8
0
  /**
   * Driver to copy srcPath to destPath depending on required protocol.
   *
   * @param args arguments
   */
  static void copy(final Configuration conf, final Arguments args) throws IOException {
    LOG.info("srcPaths=" + args.srcs);
    LOG.info("destPath=" + args.dst);
    checkSrcPath(conf, args.srcs);

    JobConf job = createJobConf(conf);
    if (args.preservedAttributes != null) {
      job.set(PRESERVE_STATUS_LABEL, args.preservedAttributes);
    }
    if (args.mapredSslConf != null) {
      job.set("dfs.https.client.keystore.resource", args.mapredSslConf);
    }

    // Initialize the mapper
    try {
      setup(conf, job, args);
      JobClient.runJob(job);
      finalize(conf, job, args.dst, args.preservedAttributes);
    } finally {
      // delete tmp
      fullyDelete(job.get(TMP_DIR_LABEL), job);
      // delete jobDirectory
      fullyDelete(job.get(JOB_DIR_LABEL), job);
    }
  }
Ejemplo n.º 9
0
    public void configure(JobConf job) {
      sLogger.setLevel(Level.INFO);
      srcLang = job.get("fLang");
      mJob = job;
      pwsimMapping = new HMapIV<ArrayListOfIntsWritable>();
      valOut = new PairOfIntString();
      keyOut = new PairOfInts();

      // read doc ids of sample into vectors
      String samplesFile = job.get("Ivory.SampleFile");
      if (samplesFile != null) {
        try {
          samplesMap = readSamplesFromCache(getFilename(samplesFile), job);
        } catch (NumberFormatException e) {
          e.printStackTrace();
          throw new RuntimeException("Incorrect format in " + samplesFile);
        } catch (IOException e) {
          e.printStackTrace();
          throw new RuntimeException("I/O error in " + samplesFile);
        } catch (Exception e) {
          e.printStackTrace();
          throw new RuntimeException("Error reading sample file: " + samplesFile);
        }
      }
    }
Ejemplo n.º 10
0
 private void init() {
   String nodes = conf.get(AnalysisProcessorConfiguration.nodes);
   this.nodelist = String2List(nodes, SEPERATOR_COMMA);
   String status = conf.get("status");
   this.statuslist = String2List(status, SEPERATOR_COMMA);
   parsePhase();
 }
Ejemplo n.º 11
0
 public List<JavaScriptSource> setUpSource(JobConf job) {
   List<JavaScriptSource> js = new ArrayList<JavaScriptSource>();
   js.add(
       new JavaScriptSource("emit.js", "function emit(k,v){$mapper.emit(k,v,$output_collector)}"));
   js.add(new JavaScriptSource("map.js", job.get("map.js")));
   js.add(new JavaScriptSource("reduce.js", job.get("reduce.js")));
   js.add(new JavaScriptSource("functions.js", job.get("functions.js")));
   js.add(new JavaScriptSource("filter.js", job.get("filter.js")));
   js.add(new JavaScriptSource("query_id", job.get("query_id")));
   try {
     Path[] files = DistributedCache.getLocalCacheFiles(job);
     if (files != null) {
       for (int i = 0; i < files.length; i++) {
         Path path = files[i];
         if (path.getName().endsWith(".js")) {
           String source = Files.toString(new File(path.toString()), Charset.forName("UTF-8"));
           js.add(new JavaScriptSource(path.getName(), source));
         }
       }
     }
   } catch (IOException ioe) {
     throw new RuntimeException("Couldn't read from DistributedCache", ioe);
   }
   return js;
 }
Ejemplo n.º 12
0
  /* (non-Javadoc)
   * @see org.apache.hadoop.chukwa.analysis.HiTune.AnalysisProcessor#run()
   */
  @Override
  public void run() {
    // TODO Auto-generated method stub

    long timestamp = System.currentTimeMillis();

    JobConf conf = new JobConf(this.conf, InstrumentDataflow.class);
    try {
      conf.setJobName(this.getClass().getSimpleName() + timestamp);
      conf.setInputFormat(MultiSequenceFileInputFormat.class);
      conf.setMapperClass(InstrumentDataflow.MapClass.class);
      conf.setReducerClass(InstrumentDataflow.ReduceClass.class);
      conf.setOutputKeyClass(Text.class);
      Class<? extends WritableComparable> outputKeyClass =
          Class.forName(conf.get(AnalysisProcessorConfiguration.mapoutputKeyClass))
              .asSubclass(WritableComparable.class);
      Class<? extends Writable> outputValueClass =
          Class.forName(conf.get(AnalysisProcessorConfiguration.mapoutputValueClass))
              .asSubclass(Writable.class);
      conf.setMapOutputKeyClass(outputKeyClass);
      conf.setMapOutputValueClass(outputValueClass);

      conf.setOutputValueClass(TextArrayWritable.class);
      conf.setOutputFormat(CSVFileOutputFormat.class);

      String outputPaths =
          conf.get(AnalysisProcessorConfiguration.reportfolder)
              + "/"
              + conf.get(AnalysisProcessorConfiguration.reportfile);
      String temp_outputPaths = getTempOutputDir(outputPaths);

      if (this.inputfiles != null) {
        log.debug("inputPaths:" + inputfiles);
        FileInputFormat.setInputPaths(conf, inputfiles);
        FileOutputFormat.setOutputPath(conf, new Path(temp_outputPaths));

        // FileInputFormat.setInputPathFilter(conf, evtFileFilter.class);
        // conf.setNumReduceTasks(1);

        try {
          JobClient.runJob(conf);
          moveResults(conf, outputPaths, temp_outputPaths);
        } catch (IOException e) {
          // TODO Auto-generated catch block
          log.warn("For " + getOutputFileName() + " :JOB fails!");
          log.warn(e);
          e.printStackTrace();
          this.MOVE_DONE = false;
        }

      } else {
        log.warn("For " + getOutputFileName() + " :No input path!");
      }
    } catch (Exception e) {
      log.warn("Job preparation failure!");
      log.warn(e);
      e.printStackTrace();
    }
  }
Ejemplo n.º 13
0
  /**
   * The main driver for word count map/reduce program. Invoke this method to submit the map/reduce
   * job.
   *
   * @throws IOException When there is communication problems with the job tracker.
   */
  public int run(String[] args) throws Exception {
    JobConf conf = new JobConf(getConf(), WordCountSeqOutput.class);
    conf.setJobName("wordcount_seqOF");

    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(IntWritable.class);
    // the keys are words (strings)
    conf.setOutputKeyClass(Text.class);
    // the values are counts (ints)
    // conf.setOutputValueClass(IntWritable.class);
    conf.setOutputValueClass(Text.class);

    conf.setMapperClass(MapClass.class);
    conf.setCombinerClass(Combiner.class);
    conf.setReducerClass(Reduce.class);

    conf.setOutputFormat(SequenceFileOutputFormat.class);

    //      // compress Mapper output
    //      conf.setCompressMapOutput(true);
    //      conf.setMapOutputCompressorClass(org.apache.hadoop.io.compress.GzipCodec.class);

    // compress final output
    conf.set("mapred.output.compress", conf.get("mapred.output.compress", "true"));
    conf.set("mapred.output.compression.type", conf.get("mapred.output.compression.type", "BLOCK"));
    conf.set(
        "mapred.output.compression.codec",
        conf.get("mapred.output.compression.codec", "org.apache.hadoop.io.compress.GzipCodec"));

    List<String> other_args = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
      try {
        if ("-m".equals(args[i])) {
          conf.setNumMapTasks(Integer.parseInt(args[++i]));
        } else if ("-r".equals(args[i])) {
          conf.setNumReduceTasks(Integer.parseInt(args[++i]));
        } else {
          other_args.add(args[i]);
        }
      } catch (NumberFormatException except) {
        System.out.println("ERROR: Integer expected instead of " + args[i]);
        return printUsage();
      } catch (ArrayIndexOutOfBoundsException except) {
        System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
        return printUsage();
      }
    }
    // Make sure there are exactly 2 parameters left.
    if (other_args.size() != 2) {
      System.out.println(
          "ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2.");
      return printUsage();
    }
    FileInputFormat.setInputPaths(conf, other_args.get(0));
    FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1)));

    JobClient.runJob(conf);
    return 0;
  }
Ejemplo n.º 14
0
  public void configure(JobConf job) {
    double fpLat = Double.parseDouble(job.get("fpLat"));
    double fpLong = Double.parseDouble(job.get("fpLong"));
    fp = new FocalPoint(fpLong, fpLat);

    k = Integer.parseInt(job.get("k"));
    Comparator<Tuple> comparer = new TupleAscComparer();
    queue = new PriorityQueue<Tuple>(50, comparer);
  }
 public void setLocalElasticSearchInstallation(JobConf conf) {
   String esConfigPath = conf.get(ES_CONFIG_OPT, ES_CONFIG);
   String esPluginsPath = conf.get(ES_PLUGINS_OPT, ES_PLUGINS);
   System.setProperty(ES_CONFIG_OPT, esConfigPath);
   System.setProperty(ES_PLUGINS_OPT, esPluginsPath);
   LOG.info(
       "Using Elasticsearch configuration file at "
           + esConfigPath
           + " and plugin directory "
           + esPluginsPath);
 }
  /**
   * Configure language configuration field.
   *
   * @param path
   * @param jobConf
   * @throws MinerApplicationException
   */
  private void updateLanguageConfiguration(Path path, JobConf jobConf)
      throws MinerApplicationException {

    if (path.getName().equals(new Path(jobConf.get(AbstractStep.LANG_FILE)).getName())) {
      languageConfiguration = new LanguageConfiguration(jobConf.get(AbstractStep.LANG_CODE), path);
    }

    if (languageConfiguration == null) {
      throw new MinerApplicationException(
          String.format(NOT_IN_CACHE, jobConf.get(AbstractStep.LANG_FILE)));
    }
  }
Ejemplo n.º 17
0
    @Override
    public void commitJob(JobContext context) throws IOException {
      JobConf conf = ShimLoader.getHadoopShims().getJobConf(context);
      Path tmpLocation = new Path(conf.get(TMP_LOCATION));
      Path finalLocation = new Path(conf.get(FINAL_LOCATION));
      FileSystem fs = tmpLocation.getFileSystem(conf);
      LOG.debug("Moving contents of " + tmpLocation.toString() + " to " + finalLocation.toString());

      FileStatus[] contents = fs.listStatus(tmpLocation);
      for (int i = 0; i < contents.length; i++) {
        Path newPath = new Path(finalLocation, contents[i].getPath().getName());
        fs.rename(contents[i].getPath(), newPath);
      }
      fs.delete(tmpLocation, true);
    }
    public void configure(JobConf job) {
      String inputFile = job.get("map.input.file");
      String deltaInputPath = job.get(DELTA_FILE_PATH_CONF);

      if (inputFile.contains(deltaInputPath)) {
        isDeltaChar = IS_DELTA_TRUE;
      }

      StringTokenizer st = new StringTokenizer(job.get(PRIMARY_KEYS_CONF), ",");
      primaryKeyIndexes = new int[st.countTokens()];

      for (int i = 0; st.hasMoreTokens(); i++) {
        primaryKeyIndexes[i] = Integer.parseInt(st.nextToken());
      }
    }
Ejemplo n.º 19
0
 @Override
 public String getTaskAttemptLogUrl(
     JobConf conf, String taskTrackerHttpAddress, String taskAttemptId)
     throws MalformedURLException {
   if (conf.get("mapreduce.framework.name") != null
       && conf.get("mapreduce.framework.name").equals("yarn")) {
     // if the cluster is running in MR2 mode, return null
     LOG.warn("Can't fetch tasklog: TaskLogServlet is not supported in MR2 mode.");
     return null;
   } else {
     // Was using Hadoop-internal API to get tasklogs, disable until  MAPREDUCE-5857 is fixed.
     LOG.warn("Can't fetch tasklog: TaskLogServlet is not supported in MR1 mode.");
     return null;
   }
 }
Ejemplo n.º 20
0
  private RunningJob submitAction(Context context, Namespace ns) throws Exception {
    Hive2ActionExecutor ae = new Hive2ActionExecutor();

    WorkflowAction action = context.getAction();

    ae.prepareActionDir(getFileSystem(), context);
    ae.submitLauncher(getFileSystem(), context, action);

    String jobId = action.getExternalId();
    String jobTracker = action.getTrackerUri();
    String consoleUrl = action.getConsoleUrl();
    assertNotNull(jobId);
    assertNotNull(jobTracker);
    assertNotNull(consoleUrl);
    Element e = XmlUtils.parseXml(action.getConf());
    XConfiguration conf =
        new XConfiguration(
            new StringReader(XmlUtils.prettyPrint(e.getChild("configuration", ns)).toString()));
    conf.set("mapred.job.tracker", e.getChildTextTrim("job-tracker", ns));
    conf.set("fs.default.name", e.getChildTextTrim("name-node", ns));
    conf.set("user.name", context.getProtoActionConf().get("user.name"));
    conf.set("group.name", getTestGroup());

    JobConf jobConf = Services.get().get(HadoopAccessorService.class).createJobConf(jobTracker);
    XConfiguration.copy(conf, jobConf);
    String user = jobConf.get("user.name");
    JobClient jobClient =
        Services.get().get(HadoopAccessorService.class).createJobClient(user, jobConf);
    final RunningJob runningJob = jobClient.getJob(JobID.forName(jobId));
    assertNotNull(runningJob);
    return runningJob;
  }
  /**
   * Gets a set of input splits for a MapReduce job running over a Kiji table. One split is created
   * per region in the input Kiji table.
   *
   * @param configuration of the job using the splits. The configuration should specify the input
   *     Kiji table being used, through the configuration variable {@link
   *     KijiConfKeys#KIJI_INPUT_TABLE_URI}.
   * @param numSplits desired for the job. This framework hint is ignored by this method.
   * @return an array of input splits to be operated on in the MapReduce job.
   * @throws IOException if an I/O error occurs while communicating with HBase to determine the
   *     regions in the Kiji table.
   */
  @Override
  public InputSplit[] getSplits(JobConf configuration, int numSplits) throws IOException {
    final String uriString =
        Preconditions.checkNotNull(configuration.get(KijiConfKeys.KIJI_INPUT_TABLE_URI));
    final KijiURI inputTableURI = KijiURI.newBuilder(uriString).build();
    final Kiji kiji = Kiji.Factory.open(inputTableURI, configuration);
    try {
      final KijiTable table = kiji.openTable(inputTableURI.getTable());
      try {
        final HTableInterface htable = HBaseKijiTable.downcast(table).getHTable();

        final List<InputSplit> splits = Lists.newArrayList();
        for (KijiRegion region : table.getRegions()) {
          final byte[] startKey = region.getStartKey();
          // TODO(KIJIMR-65): For now pick the first available location (ie. region server), if any.
          final String location =
              region.getLocations().isEmpty() ? null : region.getLocations().iterator().next();
          final TableSplit tableSplit =
              new TableSplit(htable.getTableName(), startKey, region.getEndKey(), location);
          splits.add(new KijiTableSplit(tableSplit));
        }
        return splits.toArray(new InputSplit[0]);

      } finally {
        table.release();
      }
    } finally {
      kiji.release();
    }
  }
Ejemplo n.º 22
0
  private static void finalize(
      Configuration conf, JobConf jobconf, final Path destPath, String presevedAttributes)
      throws IOException {
    if (presevedAttributes == null) {
      return;
    }
    EnumSet<FileAttribute> preseved = FileAttribute.parse(presevedAttributes);
    if (!preseved.contains(FileAttribute.USER)
        && !preseved.contains(FileAttribute.GROUP)
        && !preseved.contains(FileAttribute.PERMISSION)) {
      return;
    }

    FileSystem dstfs = destPath.getFileSystem(conf);
    Path dstdirlist = new Path(jobconf.get(DST_DIR_LIST_LABEL));
    SequenceFile.Reader in = null;
    try {
      in = new SequenceFile.Reader(dstdirlist.getFileSystem(jobconf), dstdirlist, jobconf);
      Text dsttext = new Text();
      FilePair pair = new FilePair();
      for (; in.next(dsttext, pair); ) {
        Path absdst = new Path(destPath, pair.output);
        updatePermissions(pair.input, dstfs.getFileStatus(absdst), preseved, dstfs);
      }
    } finally {
      checkAndClose(in);
    }
  }
Ejemplo n.º 23
0
  /**
   * Start the child process to handle the task for us.
   *
   * @param conf the task's configuration
   * @param recordReader the fake record reader to update progress with
   * @param output the collector to send output to
   * @param reporter the reporter for the task
   * @param outputKeyClass the class of the output keys
   * @param outputValueClass the class of the output values
   * @throws IOException
   * @throws InterruptedException
   */
  Application(
      JobConf conf,
      RecordReader<FloatWritable, NullWritable> recordReader,
      OutputCollector<K2, V2> output,
      Reporter reporter,
      Class<? extends K2> outputKeyClass,
      Class<? extends V2> outputValueClass)
      throws IOException, InterruptedException {
    serverSocket = new ServerSocket(0);
    Map<String, String> env = new HashMap<String, String>();
    // add TMPDIR environment variable with the value of java.io.tmpdir
    env.put("TMPDIR", System.getProperty("java.io.tmpdir"));
    env.put("hadoop.pipes.command.port", Integer.toString(serverSocket.getLocalPort()));
    List<String> cmd = new ArrayList<String>();
    String executable = DistributedCache.getLocalCacheFiles(conf)[0].toString();
    FileUtil.chmod(executable, "a+x");
    cmd.add(executable);
    // wrap the command in a stdout/stderr capture
    TaskAttemptID taskid = TaskAttemptID.forName(conf.get("mapred.task.id"));
    File stdout = TaskLog.getTaskLogFile(taskid, TaskLog.LogName.STDOUT);
    File stderr = TaskLog.getTaskLogFile(taskid, TaskLog.LogName.STDERR);
    long logLength = TaskLog.getTaskLogLength(conf);
    cmd = TaskLog.captureOutAndError(cmd, stdout, stderr, logLength);

    process = runClient(cmd, env);
    clientSocket = serverSocket.accept();
    handler = new OutputHandler<K2, V2>(output, reporter, recordReader);
    K2 outputKey = (K2) ReflectionUtils.newInstance(outputKeyClass, conf);
    V2 outputValue = (V2) ReflectionUtils.newInstance(outputValueClass, conf);
    downlink =
        new BinaryProtocol<K1, V1, K2, V2>(clientSocket, handler, outputKey, outputValue, conf);
    downlink.start();
    downlink.setJobConf(conf);
  }
Ejemplo n.º 24
0
  protected static List<String> getFilesInHivePartition(Partition part, JobConf jobConf) {
    List<String> result = newArrayList();

    String ignoreFileRegex = jobConf.get(HCatTap.IGNORE_FILE_IN_PARTITION_REGEX, "");
    Pattern ignoreFilePattern = Pattern.compile(ignoreFileRegex);

    try {
      Path partitionDirPath = new Path(part.getSd().getLocation());
      FileStatus[] partitionContent =
          partitionDirPath.getFileSystem(jobConf).listStatus(partitionDirPath);
      for (FileStatus currStatus : partitionContent) {
        if (!currStatus.isDir()) {
          if (!ignoreFilePattern.matcher(currStatus.getPath().getName()).matches()) {
            result.add(currStatus.getPath().toUri().getPath());
          } else {
            LOG.debug(
                "Ignoring path {} since matches ignore regex {}",
                currStatus.getPath().toUri().getPath(),
                ignoreFileRegex);
          }
        }
      }

    } catch (IOException e) {
      logError("Unable to read the content of partition '" + part.getSd().getLocation() + "'", e);
    }

    return result;
  }
Ejemplo n.º 25
0
  private static IntWritable deduceInputFile(JobConf job) {
    Path[] inputPaths = FileInputFormat.getInputPaths(job);
    Path inputFile = new Path(job.get("map.input.file"));

    // value == one for sort-input; value == two for sort-output
    return (inputFile.getParent().equals(inputPaths[0])) ? sortInput : sortOutput;
  }
Ejemplo n.º 26
0
  public void testActionCheck() throws Exception {
    JPAService jpaService = Services.get().get(JPAService.class);
    WorkflowJobBean job =
        this.addRecordToWfJobTable(WorkflowJob.Status.RUNNING, WorkflowInstance.Status.RUNNING);
    WorkflowActionBean action =
        this.addRecordToWfActionTable(job.getId(), "1", WorkflowAction.Status.PREP);
    WorkflowActionGetJPAExecutor wfActionGetCmd = new WorkflowActionGetJPAExecutor(action.getId());

    new ActionStartXCommand(action.getId(), "map-reduce").call();
    action = jpaService.execute(wfActionGetCmd);

    ActionExecutorContext context =
        new ActionXCommand.ActionExecutorContext(job, action, false, false);
    MapReduceActionExecutor actionExecutor = new MapReduceActionExecutor();
    JobConf conf =
        actionExecutor.createBaseHadoopConf(context, XmlUtils.parseXml(action.getConf()));
    String user = conf.get("user.name");
    JobClient jobClient =
        Services.get().get(HadoopAccessorService.class).createJobClient(user, conf);

    String launcherId = action.getExternalId();

    final RunningJob launcherJob = jobClient.getJob(JobID.forName(launcherId));

    waitFor(
        120 * 1000,
        new Predicate() {
          public boolean evaluate() throws Exception {
            return launcherJob.isComplete();
          }
        });
    assertTrue(launcherJob.isSuccessful());
    Map<String, String> actionData =
        LauncherMapperHelper.getActionData(getFileSystem(), context.getActionDir(), conf);
    assertTrue(LauncherMapperHelper.hasIdSwap(actionData));

    new ActionCheckXCommand(action.getId()).call();
    action = jpaService.execute(wfActionGetCmd);
    String mapperId = action.getExternalId();
    String childId = action.getExternalChildIDs();

    assertTrue(launcherId.equals(mapperId));

    final RunningJob mrJob = jobClient.getJob(JobID.forName(childId));

    waitFor(
        120 * 1000,
        new Predicate() {
          public boolean evaluate() throws Exception {
            return mrJob.isComplete();
          }
        });
    assertTrue(mrJob.isSuccessful());

    new ActionCheckXCommand(action.getId()).call();
    action = jpaService.execute(wfActionGetCmd);

    assertEquals("SUCCEEDED", action.getExternalStatus());
  }
    /**
     * passing data and popsize so that they would be available before a call to the mapper
     *
     * @param job this mapreduce job
     */
    @Override
    public void configure(JobConf job) {

      popsize = job.getInt("popsize", popsize);
      data = job.get("dataset", data);

      new Population(data, popsize);
    }
  @Test
  public void testConfigureAccumuloInputFormatWithAuthorizations() throws Exception {
    AccumuloConnectionParameters accumuloParams = new AccumuloConnectionParameters(conf);
    conf.set(AccumuloSerDeParameters.AUTHORIZATIONS_KEY, "foo,bar");
    ColumnMapper columnMapper =
        new ColumnMapper(
            conf.get(AccumuloSerDeParameters.COLUMN_MAPPINGS),
            conf.get(AccumuloSerDeParameters.DEFAULT_STORAGE_TYPE),
            columnNames,
            columnTypes);
    Set<Pair<Text, Text>> cfCqPairs =
        inputformat.getPairCollection(columnMapper.getColumnMappings());
    List<IteratorSetting> iterators = Collections.emptyList();
    Set<Range> ranges = Collections.singleton(new Range());
    String instanceName = "realInstance";
    String zookeepers = "host1:2181,host2:2181,host3:2181";

    ZooKeeperInstance zkInstance = Mockito.mock(ZooKeeperInstance.class);
    HiveAccumuloTableInputFormat mockInputFormat = Mockito.mock(HiveAccumuloTableInputFormat.class);

    // Stub out the ZKI mock
    Mockito.when(zkInstance.getInstanceName()).thenReturn(instanceName);
    Mockito.when(zkInstance.getZooKeepers()).thenReturn(zookeepers);

    // Call out to the real configure method
    Mockito.doCallRealMethod()
        .when(mockInputFormat)
        .configure(conf, zkInstance, con, accumuloParams, columnMapper, iterators, ranges);

    // Also compute the correct cf:cq pairs so we can assert the right argument was passed
    Mockito.doCallRealMethod()
        .when(mockInputFormat)
        .getPairCollection(columnMapper.getColumnMappings());

    mockInputFormat.configure(
        conf, zkInstance, con, accumuloParams, columnMapper, iterators, ranges);

    // Verify that the correct methods are invoked on AccumuloInputFormat
    Mockito.verify(mockInputFormat).setZooKeeperInstance(conf, instanceName, zookeepers, false);
    Mockito.verify(mockInputFormat).setConnectorInfo(conf, USER, new PasswordToken(PASS));
    Mockito.verify(mockInputFormat).setInputTableName(conf, TEST_TABLE);
    Mockito.verify(mockInputFormat).setScanAuthorizations(conf, new Authorizations("foo,bar"));
    Mockito.verify(mockInputFormat).addIterators(conf, iterators);
    Mockito.verify(mockInputFormat).setRanges(conf, ranges);
    Mockito.verify(mockInputFormat).fetchColumns(conf, cfCqPairs);
  }
Ejemplo n.º 29
0
 @Override
 public void abortJob(JobContext context, int status) throws IOException {
   JobConf conf = ShimLoader.getHadoopShims().getJobConf(context);
   Path tmpLocation = new Path(conf.get(TMP_LOCATION));
   FileSystem fs = tmpLocation.getFileSystem(conf);
   LOG.debug("Removing " + tmpLocation.toString());
   fs.delete(tmpLocation, true);
 }
Ejemplo n.º 30
0
 /**
  * Mapper configuration. Extracts source and destination file system, as well as top-level paths
  * on source and destination directories. Gets the named file systems, to be used later in map.
  */
 public void configure(JobConf job) {
   destPath = new Path(job.get(DST_DIR_LABEL, "/"));
   try {
     destFileSys = destPath.getFileSystem(job);
   } catch (IOException ex) {
     throw new RuntimeException("Unable to get the named file system.", ex);
   }
   sizeBuf = job.getInt("copy.buf.size", 128 * 1024);
   buffer = new byte[sizeBuf];
   ignoreReadFailures = job.getBoolean(Options.IGNORE_READ_FAILURES.propertyname, false);
   preserve_status = job.getBoolean(Options.PRESERVE_STATUS.propertyname, false);
   if (preserve_status) {
     preseved = FileAttribute.parse(job.get(PRESERVE_STATUS_LABEL));
   }
   update = job.getBoolean(Options.UPDATE.propertyname, false);
   overwrite = !update && job.getBoolean(Options.OVERWRITE.propertyname, false);
   this.job = job;
 }