@Override
    protected void setup(Context context) throws IOException, InterruptedException {
      // TODO Auto-generated method stub
      super.setup(context);

      InputSplit split = context.getInputSplit();

      System.out.println("***************Mapper's setup is being executed***************");
      FileSplit FS = (FileSplit) split;

      long datastart = FS.getStart();
      System.out.println("***************GetStart() returns " + datastart + " ***************");

      long datalongth = FS.getLength();
      System.out.println("***************getLength() returns " + datalongth + " ***************");

      String[] datalocations = FS.getLocations();
      System.out.println(
          "***************getLocations() returns "
              + datalocations.length
              + " locations***************");

      for (int i = 0; i < datalocations.length; i++) {
        System.out.println(
            "***************No." + i + " location is : " + datalocations[i] + " ***************");
      }

      Path path = FS.getPath();
      System.out.println(
          "***************getLocations() returns " + path.toString() + " ***************");
    }
 @Override
 protected void cleanup(Context context) throws IOException, InterruptedException {
   for (Map.Entry<Integer, WeightedVectorWritable> entry : mostDistantPoints.entrySet()) {
     context.write(new IntWritable(entry.getKey()), entry.getValue());
   }
   super.cleanup(context);
 }
Exemple #3
0
 @Override
 protected void cleanup(Context context) throws IOException, InterruptedException {
   super.cleanup(context);
   table.flushCommits();
   table.close();
   connection.close();
 }
Exemple #4
0
    /*
     * 将文档中心从hdfs中加载至内存
     */
    protected void setup(Context context) throws IOException, InterruptedException { // 读取中心点向量数据
      Configuration conf = context.getConfiguration();
      Path cents = new Path(CENT_PATH);
      // FileSystem fs = FileSystem.get(conf);
      FileSystem fs = cents.getFileSystem(conf);

      SequenceFile.Reader reader = new SequenceFile.Reader(fs, cents, conf);
      Text key = new Text(); // 读取题号
      Text value = new Text(); // 读取题号对应的单词=TFIDF,单词=TFIDF
      while (reader.next(key, value)) {
        Map<String, Double> tfidfAndword = new HashMap<String, Double>(); // 存储TFIDF和单词
        String[] strs = null;
        Pattern p = Pattern.compile("\"([^\"]+)\"=([^,}]+)"); // 正则匹配取出:单词和TFIDF
        Matcher m = p.matcher(value.toString());
        while (m.find()) {
          strs = m.group().split("=");
          if (strs.length == 2) {
            tfidfAndword.put(
                strs[0].replace("\"", "").trim(),
                Double.parseDouble(strs[1].replace("}", "").trim()));
          }
        }
        centers.put(key.toString(), tfidfAndword);
      }
      reader.close();

      super.setup(context);
    }
 protected void setup(Context context) throws IOException, InterruptedException {
   // TODO Auto-generated method stub
   super.setup(context);
   this.dimIpRuleDAO = new DMIPRuleDAOImpl<Long, Map<ConstantEnum, String>>();
   this.dimIpRuleDAO.parseDMObj(new File(ConstantEnum.IP_TABLE.name().toLowerCase()));
   this.flagId = context.getConfiguration().get("flags");
 }
 @Override
 protected void map(Text key, PageInvertWritable value, Context context)
     throws IOException, InterruptedException {
   if (key.getBytes()[0] == 'W') {
     super.map(new Text(new String(key.toString()).substring(1)), value, context);
   }
 }
 @Override
 protected void cleanup(Context context) throws IOException, InterruptedException {
   try {
     // Write the last & final Mutation Batch
     if (mutationState != null) {
       writeBatch(mutationState, context);
     }
     // We are writing some dummy key-value as map output here so that we commit only one
     // output to reducer.
     context.write(
         new ImmutableBytesWritable(UUID.randomUUID().toString().getBytes()), new IntWritable(0));
     super.cleanup(context);
   } catch (SQLException e) {
     LOG.error(" Error {}  while read/write of a record ", e.getMessage());
     context.getCounter(PhoenixJobCounters.FAILED_RECORDS).increment(1);
     throw new RuntimeException(e);
   } finally {
     if (connection != null) {
       try {
         connection.close();
       } catch (SQLException e) {
         LOG.error(
             "Error {} while closing connection in the PhoenixIndexMapper class ", e.getMessage());
       }
     }
     if (writer != null) {
       writer.close();
     }
   }
 }
 @Override
 protected void setup(Mapper.Context context) throws IOException, InterruptedException {
   super.setup(context);
   logger.info("in setup of " + context.getTaskAttemptID().toString());
   String fileName = ((FileSplit) context.getInputSplit()).getPath() + "";
   System.out.println("in stdout" + context.getTaskAttemptID().toString() + " " + fileName);
   System.err.println("in stderr" + context.getTaskAttemptID().toString());
 }
  /** {@inheritDoc} */
  @Override
  protected void setup(Context ctx) throws IOException, InterruptedException {
    super.setup(ctx);

    wasSetUp = true;

    HadoopErrorSimulator.instance().onMapSetup();
  }
    @Override
    protected void setup(Context context) throws IOException, InterruptedException {
      super.setup(context);

      Configuration conf = context.getConfiguration();
      keyLength = conf.getInt(KEYLEN_CONF, KEYLEN_DEFAULT);
      valLength = conf.getInt(VALLEN_CONF, VALLEN_DEFAULT);
    }
 @Override
 protected void setup(Context context) throws IOException, InterruptedException {
   super.setup(context);
   Configuration conf = context.getConfiguration();
   measure =
       ClassUtils.instantiateAs(
           conf.get(RepresentativePointsDriver.DISTANCE_MEASURE_KEY), DistanceMeasure.class);
   representativePoints = getRepresentativePoints(conf);
 }
 @Override
 protected void cleanup(Context ctx) throws IOException, InterruptedException {
   super.cleanup(ctx);
   // dirty trick
   ctx.write(new IntWritable(NORM_VECTOR_MARKER), new VectorWritable(norms));
   ctx.write(
       new IntWritable(NUM_NON_ZERO_ENTRIES_VECTOR_MARKER), new VectorWritable(nonZeroEntries));
   ctx.write(new IntWritable(MAXVALUE_VECTOR_MARKER), new VectorWritable(maxValues));
 }
 @Override
 protected void setup(Context context) throws IOException, InterruptedException {
   super.setup(context);
   batchID = context.getConfiguration().get(ConfigConstants.BATCH_ID);
   commandPath = context.getConfiguration().get(HADOOP_CONVERTER_PATH);
   String outputFolder = context.getConfiguration().get(HADOOP_CONVERTER_OUTPUT_PATH);
   resultExtention = context.getConfiguration().get(HADOOP_CONVERTER_OUTPUT_EXTENSION_PATH);
   batchFolder = new File(outputFolder, batchID);
   batchFolder.mkdirs();
 }
Exemple #14
0
 @Override
 protected void setup(Context context) throws IOException, InterruptedException {
   super.setup(context);
   configuration = context.getConfiguration();
   connection = HConnectionManager.createConnection(configuration);
   table = connection.getTable("");
   table.setAutoFlush(false, true);
   table.setWriteBufferSize(12 * 1024 * 1024);
   wal = true;
 }
    @Override
    protected void setup(Context context) throws IOException, InterruptedException {
      super.setup(context);

      final Configuration conf = context.getConfiguration();

      // get permissible file extensions from the configuration
      Extensions.clear();
      Extensions.addAll(conf.getStringCollection("extensions"));
    }
 @Override
 protected void setup(Context context) throws IOException, InterruptedException {
   super.setup(context);
   filter =
       PcapFilters.valueOf(
               context.getConfiguration().get(PcapFilterConfigurator.PCAP_FILTER_NAME_CONF))
           .create();
   filter.configure(context.getConfiguration());
   start = Long.parseUnsignedLong(context.getConfiguration().get(START_TS_CONF));
   end = Long.parseUnsignedLong(context.getConfiguration().get(END_TS_CONF));
 }
    @Override
    protected void setup(Context context) throws IOException, InterruptedException {

      super.setup(context);

      id = context.getTaskAttemptID().getTaskID().getId();
      increment = context.getConfiguration().getInt("mapred.map.tasks", 0);
      if (increment == 0) {
        throw new IllegalArgumentException("mapred.map.tasks is zero");
      }
    }
  private void runNewMapper(
      final JobConf job, MRTaskReporter reporter, final MRInputLegacy in, KeyValueWriter out)
      throws IOException, InterruptedException {

    // Initialize input in-line since it sets parameters which may be used by the processor.
    // Done only for MRInput.
    // TODO use new method in MRInput to get required info
    // in.initialize(job, master);

    // make a task context so we can get the classes
    org.apache.hadoop.mapreduce.TaskAttemptContext taskContext = getTaskAttemptContext();

    // make a mapper
    org.apache.hadoop.mapreduce.Mapper mapper;
    try {
      mapper =
          (org.apache.hadoop.mapreduce.Mapper)
              ReflectionUtils.newInstance(taskContext.getMapperClass(), job);
    } catch (ClassNotFoundException cnfe) {
      throw new IOException(cnfe);
    }

    org.apache.hadoop.mapreduce.RecordReader input = new NewRecordReader(in);

    org.apache.hadoop.mapreduce.RecordWriter output = new NewOutputCollector(out);

    org.apache.hadoop.mapreduce.InputSplit split = in.getNewInputSplit();

    org.apache.hadoop.mapreduce.MapContext mapContext =
        new MapContextImpl(
            job, taskAttemptId, input, output, getCommitter(), processorContext, split);

    org.apache.hadoop.mapreduce.Mapper.Context mapperContext =
        new WrappedMapper().getMapContext(mapContext);

    input.initialize(split, mapperContext);
    mapper.run(mapperContext);
    this.statusUpdate();
    input.close();
    output.close(mapperContext);
  }
 @Override
 protected void setup(Context context) throws IOException, InterruptedException {
   super.setup(context);
   Configuration config = context.getConfiguration();
   log.info("*** MAPPER PATH -> " + config.get(ProClusConfigKeys.SET_PATH));
   Path medoidsSetPath = new Path(config.get(ProClusConfigKeys.SET_PATH));
   try {
     medoidSet = readMedoidsSet(medoidsSetPath, config);
   } catch (Exception e) {
     log.error("Error reading medoid set in mapper ", e);
   }
 }
Exemple #20
0
    protected void setup(Context context) throws IOException, InterruptedException {
      super.setup(context);

      Path pt = new Path("/user/yao/query/query");
      FileSystem fs = FileSystem.get(new Configuration());
      BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(pt)));
      String line = br.readLine();
      String[] keywords = line.split(",");
      k0 = keywords[0];
      k1 = keywords[1];
      k2 = keywords[2];
      br.close();
    }
 @Override
 protected void map(
     final LongWritable key,
     final DoubleWritable value,
     final org.apache.hadoop.mapreduce.Mapper.Context context)
     throws IOException, InterruptedException {
   long positiveKey = key.get();
   double adjustedValue = value.get();
   if (positiveKey < 0) {
     positiveKey = -positiveKey - 1;
     adjustedValue *= -1;
   }
   super.map(new LongWritable(positiveKey), new DoubleWritable(adjustedValue), context);
 }
Exemple #22
0
  @Override
  public void setup(Context context) throws IOException, InterruptedException {
    super.setup(context);
    String fieldStrs = context.getConfiguration().get("higo.index.fields");
    split = context.getConfiguration().get("higo.column.split", split);
    String custfields = context.getConfiguration().get("higo.column.custfields", "");
    usedthedate = context.getConfiguration().getBoolean("higo.column.userthedate", usedthedate);
    this.thedate = null;
    if (usedthedate) {
      InputSplit inputSplit = context.getInputSplit();
      Path filepath = ((FileSplit) inputSplit).getPath();
      String inputbase = context.getConfiguration().get("higo.input.base");
      this.thedate = JobIndexPublic.parseThedate(new Path(inputbase), filepath);
      System.out.println(
          "thedatepath: " + thedate + "@" + filepath.toString() + "@" + inputbase + "");
    }

    if (custfields == null || custfields.isEmpty()) {

      String[] fieldslist = fieldStrs.split(",");
      this.fields = new String[fieldslist.length];
      this.isDate = new Boolean[fieldslist.length];
      this.isString = new Boolean[fieldslist.length];
      this.isStore = new Boolean[fieldslist.length];

      for (int i = 0; i < fieldslist.length; i++) {
        String[] fieldSchema = fieldslist[i].split(":");
        String fieldName = fieldSchema[0].trim().toLowerCase();
        String type = fieldSchema[1];
        this.isStore[i] = Boolean.valueOf(fieldSchema[3]);
        this.fields[i] = fieldName;
        this.isDate[i] = type.equalsIgnoreCase("tdate");
        this.isString[i] = type.equalsIgnoreCase("string");
      }
    } else {
      String[] fieldslist = custfields.split(",");
      this.fields = new String[fieldslist.length];
      this.isDate = new Boolean[fieldslist.length];
      this.isString = new Boolean[fieldslist.length];
      this.isStore = new Boolean[fieldslist.length];

      for (int i = 0; i < fieldslist.length; i++) {
        this.isStore[i] = Boolean.valueOf(false);
        this.fields[i] = fieldslist[i];
        this.isDate[i] = false;
        this.isString[i] = true;
      }
    }
  }
Exemple #23
0
    @Override
    protected void setup(Context context) throws IOException, InterruptedException {
      super.setup(context);
      Configuration conf = context.getConfiguration();
      Path centroids = new Path(conf.get(CENTERS_CONF_KEY));
      FileSystem fs = FileSystem.get(conf);

      SequenceFile.Reader reader = new SequenceFile.Reader(fs, centroids, conf);

      Centroid key = new Centroid();
      IntWritable value = new IntWritable();
      while (reader.next(key, value)) {
        Centroid clusterCenter = new Centroid(key);
        centers.add(clusterCenter);
      }
      reader.close();
    }
Exemple #24
0
 @Override
 protected void setup(Context context) throws IOException, InterruptedException {
   // TODO Auto-generated method stub
   super.setup(context);
   String json = context.getConfiguration().get("m");
   model = new Gson().fromJson(json, OPEModel.class);
   String l = context.getConfiguration().get("l");
   String r = context.getConfiguration().get("r");
   int i = l.indexOf(":");
   left = Double.parseDouble(l.substring(0, i));
   bl = l.substring(i + 1);
   i = r.indexOf(":");
   right = Double.parseDouble(r.substring(0, i));
   br = r.substring(i + 1);
   li = model.cIndex(left);
   ri = model.cIndex(right);
 }
Exemple #25
0
 protected void setup(Context context) throws IOException, InterruptedException {
   super.setup(context);
   scriptEngine = scriptEngineManager.getEngineByName("JavaScript");
   try {
     Configuration configuration = context.getConfiguration();
     mapOutputKey = MrUtils.getWritableComparableType("js.map.output.key.type", configuration);
     mapOutputValue = MrUtils.getWritableComparableType("js.map.output.value.type", configuration);
     scriptEngine.eval(
         MrUtils.getScripts(
             context.getConfiguration(), MrUtils.getPathFilter("js.map.filename", configuration)));
     scriptEngine.put("mapOutputKey", mapOutputKey);
     scriptEngine.put("mapOutputValue", mapOutputValue);
     OUTPUT_KEY_CLASS = mapOutputKey.getClass();
     OUTPUT_VALUE_CLASS = mapOutputValue.getClass();
   } catch (ScriptException se) {
     IOException ioe = new IOException(se);
     ioe.setStackTrace(se.getStackTrace());
     throw ioe;
   }
 }
    @SuppressWarnings("deprecation")
    @Override
    protected void setup(Context context) throws IOException, InterruptedException {
      super.setup(context);
      _currKMeansClusters.clear();
      _currCanopyClusters.clear();

      Configuration conf = context.getConfiguration();
      Path centroids = new Path(conf.get("centers.path"));
      FileSystem fs = FileSystem.get(conf);
      SequenceFile.Reader reader = new SequenceFile.Reader(fs, centroids, conf);

      Cluster canopyCenter = new Cluster();
      Cluster kMeansCenter = new Cluster();

      while (reader.next(canopyCenter, kMeansCenter)) {
        _currKMeansClusters.add(new Cluster(kMeansCenter));
        _currCanopyClusters.add(new Cluster(canopyCenter));
      }

      reader.close();
    }
  @Override
  protected void setup(Context context) throws IOException, InterruptedException {
    super.setup(context);

    // read the centroids from all the files
    for (int i = 0; i < 5; i++) {
      Path path = new Path("/kmeansDM/centroid/centroid_" + i + ".txt");
      FileSystem fs = FileSystem.get(new Configuration());
      BufferedReader bf = new BufferedReader(new InputStreamReader(fs.open(path)));
      String centroidsLine = bf.readLine();
      String[] arr = centroidsLine.split("\t");
      if (initDone == false) {
        centroidsList = new double[5][arr.length];
        initDone = true;
      }
      for (int j = 0; j < arr.length; j++) {
        centroidsList[i][j] = Double.parseDouble(arr[j].trim());
      }
      bf.close();
      // fs.close();
    }
  }
  @Override
  protected void setup(Context context) throws IOException, InterruptedException {
    super.setup(context);
    // load invalid characters
    invalid = new ArrayList<String>();
    BufferedReader br =
        new BufferedReader(
            new InputStreamReader(
                SearchKeywordMapper.class.getResourceAsStream(Const.INVALID_CHARACTOR_PATH)));
    String line = null;
    while (null != (line = br.readLine())) {
      line = line.trim();
      invalid.add(line);
    }
    br.close();

    String column = context.getConfiguration().get("conf.column");
    byte[][] colkey = KeyValue.parseColumn(Bytes.toBytes(column));
    family = colkey[0];
    if (colkey.length > 1) {
      qualifier = colkey[1];
    }
  }
Exemple #29
0
    protected void setup(Context context) throws IOException, InterruptedException {
      super.setup(context);
      this.dmIPRuleDAO = new DMIPRuleDAOImpl<Long, Map<ConstantEnum, String>>();
      this.dmOuterURLRuleDAO = new DMOuterURLRuleImpl<String, Map<ConstantEnum, String>>();
      this.dmKeywordRuleDAO = new DMKeywordRuleDAOImpl<String, Map<ConstantEnum, String>>();
      this.dmInterURLRuleDAO = new DMInterURLImpl();

      this.dmIPRuleDAO.parseDMObj(new File(ConstantEnum.IP_TABLE.name().toLowerCase()));
      this.dmOuterURLRuleDAO.parseDMObj(new File(ConstantEnum.DM_OUTER_URL.name().toLowerCase()));
      this.dmInterURLRuleDAO.parseDMObj(new File(ConstantEnum.DM_INTER_URL.name().toLowerCase()));
      this.dmKeywordRuleDAO.parseDMObj(
          new File(ConstantEnum.DM_URL_KEYWORD_2.name().toLowerCase()));

      multipleOutputs = new MultipleOutputs<Text, Text>(context);

      dateId = context.getConfiguration().get("dateid");

      keyText = new Text();
      valueText = new Text();

      FileSplit fileSplit = (FileSplit) context.getInputSplit();
      filePath = fileSplit.getPath().getParent().toString();
    }
  @Override
  protected void setup(final Context context) throws IOException, InterruptedException {
    super.setup(context);
    final Configuration configuration = context.getConfiguration();
    writer = new DirectHTableWriter(configuration);

    try {
      indxTblColumnMetadata = PhoenixConfigurationUtil.getUpsertColumnMetadataList(configuration);
      indxWritable.setColumnMetadata(indxTblColumnMetadata);

      final Properties overrideProps = new Properties();
      String scn = configuration.get(PhoenixConfigurationUtil.CURRENT_SCN_VALUE);
      String txScnValue = configuration.get(PhoenixConfigurationUtil.TX_SCN_VALUE);
      if (txScnValue == null) {
        overrideProps.put(PhoenixRuntime.CURRENT_SCN_ATTRIB, scn);
      }
      connection = ConnectionUtil.getOutputConnection(configuration, overrideProps);
      connection.setAutoCommit(false);
      // Get BatchSize
      ConnectionQueryServices services = ((PhoenixConnection) connection).getQueryServices();
      int maxSize =
          services
              .getProps()
              .getInt(
                  QueryServices.MAX_MUTATION_SIZE_ATTRIB,
                  QueryServicesOptions.DEFAULT_MAX_MUTATION_SIZE);
      batchSize = Math.min(((PhoenixConnection) connection).getMutateBatchSize(), maxSize);
      LOG.info("Mutation Batch Size = " + batchSize);

      final String upsertQuery = PhoenixConfigurationUtil.getUpsertStatement(configuration);
      this.pStatement = connection.prepareStatement(upsertQuery);

    } catch (SQLException e) {
      throw new RuntimeException(e.getMessage());
    }
  }