コード例 #1
0
    @Override
    protected void map(IntWritable column, VectorWritable occurrenceVector, Context ctx)
        throws IOException, InterruptedException {
      Vector.Element[] occurrences = Vectors.toArray(occurrenceVector);
      Arrays.sort(occurrences, BY_INDEX);

      int cooccurrences = 0;
      int prunedCooccurrences = 0;
      for (int n = 0; n < occurrences.length; n++) {
        Vector.Element occurrenceA = occurrences[n];
        Vector dots = new RandomAccessSparseVector(Integer.MAX_VALUE);
        for (int m = n; m < occurrences.length; m++) {
          Vector.Element occurrenceB = occurrences[m];
          if (threshold == NO_THRESHOLD || consider(occurrenceA, occurrenceB)) {
            dots.setQuick(
                occurrenceB.index(), similarity.aggregate(occurrenceA.get(), occurrenceB.get()));
            cooccurrences++;
          } else {
            prunedCooccurrences++;
          }
        }
        ctx.write(new IntWritable(occurrenceA.index()), new VectorWritable(dots));
      }
      ctx.getCounter(Counters.COOCCURRENCES).increment(cooccurrences);
      ctx.getCounter(Counters.PRUNED_COOCCURRENCES).increment(prunedCooccurrences);
    }
コード例 #2
0
  /** Called for every record in the data */
  @Override
  public void map(LongWritable key, Text value, Context context)
      throws IOException, InterruptedException {
    /** Skip enormous documents, due to memory problems and since regex cannot handle them. */
    if (value.getLength() > MAX_DOC_SIZE_IN_BYTES) {
      context.getCounter(ProcessingTime.SKIPPED).increment(1);
      return;
    }

    /** Parse document and measure time */
    t1 = System.nanoTime();
    Spinn3rDocument d = new Spinn3rDocument(value.toString());
    t2 = System.nanoTime();
    context.getCounter(ProcessingTime.PARSING).increment(t2 - t1);

    /** Return only those documents that satisfy search conditions */
    t1 = System.nanoTime();
    t = filter.documentSatisfies(d);
    t2 = System.nanoTime();
    context.getCounter(ProcessingTime.FILTERING).increment(t2 - t1);

    /** Output if satisfies */
    if (t) {
      if (cmdMap.hasOption("formatF5")) {
        context.write(new Text(d.toStringF5()), NullWritable.get());
      } else {
        context.write(new Text(d.toString()), NullWritable.get());
      }
    }
  }
コード例 #3
0
    /** fix a stripe */
    @Override
    public void map(LongWritable key, Text fileText, Context context)
        throws IOException, InterruptedException {

      BlockFixerHelper helper = new BlockFixerHelper(context.getConfiguration());

      String fileStr = fileText.toString();
      LOG.info("fixing " + fileStr);

      Path file = new Path(fileStr);

      try {
        boolean fixed = helper.fixFile(file, context);

        if (fixed) {
          context.getCounter(Counter.FILES_SUCCEEDED).increment(1L);
        } else {
          context.getCounter(Counter.FILES_NOACTION).increment(1L);
        }
      } catch (Exception e) {
        LOG.error(StringUtils.stringifyException(e));

        // report file as failed
        context.getCounter(Counter.FILES_FAILED).increment(1L);
        String outkey = fileStr;
        String outval = "failed";
        context.write(new Text(outkey), new Text(outval));
      }

      context.progress();
    }
コード例 #4
0
    @SuppressWarnings("deprecation")
    @Override
    protected void cleanup(Context context) throws IOException, InterruptedException {
      super.cleanup(context);
      List<Cluster> newKMeansClusters = new ArrayList<Cluster>();
      List<Cluster> newCanopyClusters = new ArrayList<Cluster>();

      for (Cluster kMeansCluster : _clusters.keySet()) {
        Cluster canopyCluster = _kMeansToCanopyMap.get(kMeansCluster);

        // Set a new Cluster center
        Vector center = new Vector();
        center.setElements(new double[kMeansCluster.getCenterVector().getElements().length]);
        List<Vector> vectors = new ArrayList<Vector>();

        for (Vector currentVector : _clusters.get(kMeansCluster)) {
          vectors.add(new Vector(currentVector));

          // Sums the vectors to a new vector in order to find the one that is the closest to all
          // others, it will be our new cluster center.
          for (int i = 0; i < currentVector.getElements().length; i++)
            center.getElements()[i] += currentVector.getElements()[i];
        }

        // Divides the vector's elements in order to find its real location (it will be a fictive
        // vector)
        for (int i = 0; i < center.getElements().length; i++)
          center.getElements()[i] = center.getElements()[i] / vectors.size();

        Cluster newKMeansCluster = new Cluster(center);
        canopyCluster.setIsCovered(newKMeansCluster.isConvergedWithOtherCluster(kMeansCluster));
        newKMeansClusters.add(newKMeansCluster);
        newCanopyClusters.add(canopyCluster);

        // Adding the vectors to the new cluster center
        for (Vector vector : vectors) {
          context.write(newKMeansCluster, vector);
        }
      }

      Configuration conf = context.getConfiguration();
      Path outPath = new Path(conf.get("centers.path"));
      FileSystem fs = FileSystem.get(conf);

      if (fs.exists(outPath)) fs.delete(outPath, true);

      SequenceFile.Writer writer =
          SequenceFile.createWriter(
              fs, context.getConfiguration(), outPath, Cluster.class, Cluster.class);
      context.getCounter(Counter.CONVERGED).setValue(0);

      for (int i = 0; i < newKMeansClusters.size(); i++) {
        writer.append(newCanopyClusters.get(i), newKMeansClusters.get(i));

        if (newCanopyClusters.get(i).getIsCovered())
          context.getCounter(Counter.CONVERGED).increment(1);
      }

      writer.close();
    }
コード例 #5
0
  @Override
  protected void map(NullWritable key, PhoenixIndexDBWritable record, Context context)
      throws IOException, InterruptedException {

    context.getCounter(PhoenixJobCounters.INPUT_RECORDS).increment(1);

    try {
      final List<Object> values = record.getValues();
      indxWritable.setValues(values);
      indxWritable.write(this.pStatement);
      this.pStatement.execute();

      final PhoenixConnection pconn = connection.unwrap(PhoenixConnection.class);
      MutationState currentMutationState = pconn.getMutationState();
      if (mutationState == null) {
        mutationState = currentMutationState;
        return;
      }
      // Keep accumulating Mutations till batch size
      mutationState.join(currentMutationState);

      // Write Mutation Batch
      if (context.getCounter(PhoenixJobCounters.INPUT_RECORDS).getValue() % batchSize == 0) {
        writeBatch(mutationState, context);
        mutationState = null;
      }

      // Make sure progress is reported to Application Master.
      context.progress();
    } catch (SQLException e) {
      LOG.error(" Error {}  while read/write of a record ", e.getMessage());
      context.getCounter(PhoenixJobCounters.FAILED_RECORDS).increment(1);
      throw new RuntimeException(e);
    }
  }
コード例 #6
0
ファイル: IndexMapper.java プロジェクト: haiyang1987/mdrill
  private boolean line(String record, Context context) throws IOException, InterruptedException {
    context.getCounter("higo", "totalrecord").increment(1);
    String[] values = record.split(split, -1);
    if (!this.validate(values, record, context)) {
      return false;
    }

    HashMap<String, String> res = new HashMap<String, String>(fields.length);
    for (int i = 0; i < fields.length; i++) {
      String fieldName = fields[i];
      String string = (i < values.length) ? values[i] : null;
      String val = parseDefault(string);

      if (this.isDate[i]) {
        res.put(fieldName, TdateFormat.ensureTdate(val, fieldName));
      } else if (val != null) {
        res.put(fieldName, val);
      } else if (this.isString[i]) {
        res.put(fieldName, "_");
      }
    }

    if (usedthedate) {
      if (thedate != null) {
        res.put("thedate", thedate); // 从文件的路径中获取
      }

      res.put(
          "thedate", String.valueOf(res.get("thedate")).replaceAll("-", "").replaceAll("_", ""));

      if (res.get("thedate").length() != 8) {
        if (debuglines < 100) {
          debuglines++;
          System.out.println("miss thedate values: " + record.replaceAll(split, "#") + "");
        }
        context.getCounter("higo", "skiprecords").increment(1);
      }

      context.getCounter("higo", "dayrecord_" + String.valueOf(res.get("thedate"))).increment(1);

      CRC32 crc32 = new CRC32();
      crc32.update(java.util.UUID.randomUUID().toString().getBytes());
      res.put("higo_uuid", Long.toString(crc32.getValue()));
    }

    if (printlines < 10) {
      printlines++;
      System.out.println("res: " + res.toString() + " arrays," + Arrays.toString(values));
    }

    this.doclist.add(res);
    if (this.doclist.size() >= PER) {
      context.write(new LongWritable(this.Index++), new DocumentList(this.doclist));
      this.doclist.clear();
    }

    return true;
  }
コード例 #7
0
ファイル: LoadTest.java プロジェクト: hypertable/hypertable
 public void reduce(KeyWritable key, Iterable<BytesWritable> values, Context ctx)
     throws IOException, InterruptedException {
   for (BytesWritable value : values) {
     long startTime = System.currentTimeMillis();
     ctx.write(key, value);
     long elapsedTime = System.currentTimeMillis() - startTime;
     ctx.getCounter(Counters.ELAPSED_TIME_MS).increment(elapsedTime);
     ctx.getCounter(Counters.NUM_CELLS).increment(1);
   }
 }
コード例 #8
0
  @SuppressWarnings("deprecation")
  @Override
  protected void map(LongWritable key, Text value, Context context)
      throws IOException, InterruptedException {
    if (conn == null) {
      throw new RuntimeException("Connection not initialized.");
    }
    try {
      RECORD record = null;
      try {
        record = getLineParser().parse(value.toString());
      } catch (IOException e) {
        context.getCounter(COUNTER_GROUP_NAME, "Parser errors").increment(1L);
        return;
      }

      if (record == null) {
        context.getCounter(COUNTER_GROUP_NAME, "Empty records").increment(1L);
        return;
      }
      upsertExecutor.execute(ImmutableList.<RECORD>of(record));
      Map<Integer, List<KeyValue>> map = new HashMap<>();
      Iterator<Pair<byte[], List<KeyValue>>> uncommittedDataIterator =
          PhoenixRuntime.getUncommittedDataIterator(conn, true);
      while (uncommittedDataIterator.hasNext()) {
        Pair<byte[], List<KeyValue>> kvPair = uncommittedDataIterator.next();
        List<KeyValue> keyValueList = kvPair.getSecond();
        keyValueList = preUpdateProcessor.preUpsert(kvPair.getFirst(), keyValueList);
        byte[] first = kvPair.getFirst();
        // Create a list of KV for each table
        for (int i = 0; i < tableNames.size(); i++) {
          if (Bytes.compareTo(Bytes.toBytes(tableNames.get(i)), first) == 0) {
            if (!map.containsKey(i)) {
              map.put(i, new ArrayList<KeyValue>());
            }
            List<KeyValue> list = map.get(i);
            for (KeyValue kv : keyValueList) {
              list.add(kv);
            }
            break;
          }
        }
      }
      for (Map.Entry<Integer, List<KeyValue>> rowEntry : map.entrySet()) {
        int tableIndex = rowEntry.getKey();
        List<KeyValue> lkv = rowEntry.getValue();
        // All KV values combines to a single byte array
        writeAggregatedRow(context, tableNames.get(tableIndex), lkv);
      }
      conn.rollback();
    } catch (Exception e) {
      throw new RuntimeException(e);
    }
  }
コード例 #9
0
    /**
     * Maps the data.
     *
     * @param row The current table row key.
     * @param values The columns.
     * @param context The current context.
     * @throws IOException When something is broken with the data.
     * @see org.apache.hadoop.mapreduce.Mapper#map(KEYIN, VALUEIN,
     *     org.apache.hadoop.mapreduce.Mapper.Context)
     */
    @Override
    public void map(ImmutableBytesWritable row, Result values, Context context) throws IOException {
      String currentFamilyName = null;
      String currentQualifierName = null;
      String currentRowKey = null;
      Configuration config = context.getConfiguration();
      String separator = config.get("ReportSeparator", ":");

      try {
        if (values != null) {
          context.getCounter(Counters.ROWS).increment(1);
          context.write(new Text("Total ROWS"), new IntWritable(1));
        }

        for (KeyValue value : values.list()) {
          currentRowKey = Bytes.toStringBinary(value.getRow());
          String thisRowFamilyName = Bytes.toStringBinary(value.getFamily());
          if (thisRowFamilyName != null && !thisRowFamilyName.equals(currentFamilyName)) {
            currentFamilyName = thisRowFamilyName;
            context.getCounter("CF", thisRowFamilyName).increment(1);
            context.write(new Text("Total Families Across all Rows"), new IntWritable(1));
            context.write(new Text(thisRowFamilyName), new IntWritable(1));
          }
          String thisRowQualifierName =
              thisRowFamilyName + separator + Bytes.toStringBinary(value.getQualifier());
          if (thisRowQualifierName != null && !thisRowQualifierName.equals(currentQualifierName)) {
            currentQualifierName = thisRowQualifierName;
            context.getCounter("CFQL", thisRowQualifierName).increment(1);
            context.write(new Text("Total Qualifiers across all Rows"), new IntWritable(1));
            context.write(new Text(thisRowQualifierName), new IntWritable(1));
            // Intialize versions
            context
                .getCounter("QL_VERSIONS", currentRowKey + separator + thisRowQualifierName)
                .increment(1);
            context.write(
                new Text(currentRowKey + separator + thisRowQualifierName + "_Versions"),
                new IntWritable(1));

          } else {
            // Increment versions
            currentQualifierName = thisRowQualifierName;
            context
                .getCounter("QL_VERSIONS", currentRowKey + separator + thisRowQualifierName)
                .increment(1);
            context.write(
                new Text(currentRowKey + separator + thisRowQualifierName + "_Versions"),
                new IntWritable(1));
          }
        }
      } catch (InterruptedException e) {
        e.printStackTrace();
      }
    }
コード例 #10
0
ファイル: GATKReducer.java プロジェクト: svandenhoek/halvade
  protected void baseQualityScoreRecalibration(
      Context context,
      String region,
      ChromosomeRange r,
      PreprocessingTools tools,
      GATKTools gatk,
      String input,
      String output)
      throws InterruptedException, IOException, URISyntaxException {
    String table = tmpFileBase + ".table";

    // get snp database(s)
    String[] snpslocal = HalvadeFileUtils.downloadSites(context, taskId);
    String[] newKnownSites = new String[snpslocal.length];
    for (int i = 0; i < snpslocal.length; i++) {
      if (filterDBsnp) {
        newKnownSites[i] =
            tools.filterDBSnps(
                ref.replaceAll("fasta", "dict"), snpslocal[i], r, tmpFileBase, threads);
      } else {
        newKnownSites[i] = snpslocal[i];
      }
      if (newKnownSites[i].endsWith(".gz")) {
        newKnownSites[i] = HalvadeFileUtils.Unzip(newKnownSites[i]);
      }
    }

    // should be created automatically by GATK v3.0 or higher
    //        Logger.DEBUG("build bam index");
    //        context.setStatus("build bam index");
    //        tools.runBuildBamIndex(tmpFile1);
    Logger.DEBUG("run baseRecalibrator");
    context.setStatus("run baseRecalibrator");
    context.getCounter(HalvadeCounters.TOOLS_GATK).increment(1);
    gatk.runBaseRecalibrator(input, table, ref, newKnownSites, region);

    Logger.DEBUG("run printReads");
    context.setStatus("run printReads");
    context.getCounter(HalvadeCounters.TOOLS_GATK).increment(1);
    gatk.runPrintReads(input, output, ref, table, region);

    HalvadeFileUtils.removeLocalFile(keep, input, context, HalvadeCounters.FOUT_GATK_TMP);
    HalvadeFileUtils.removeLocalFile(keep, input.replaceAll(".bam", ".bai"));
    HalvadeFileUtils.removeLocalFile(keep, table, context, HalvadeCounters.FOUT_GATK_TMP);
    for (int i = 0; i < newKnownSites.length; i++) {
      if (filterDBsnp) {
        HalvadeFileUtils.removeLocalFile(
            keep, newKnownSites[i], context, HalvadeCounters.FOUT_GATK_TMP);
      }
    }
  }
コード例 #11
0
ファイル: GATKReducer.java プロジェクト: svandenhoek/halvade
  protected void RnaVariantCalling(
      Context context, String region, GATKTools gatk, String input, String output)
      throws InterruptedException {
    // choose between unifiendgenotyper vs haplotypegenotyper
    Logger.DEBUG("run variantCaller");
    context.setStatus("run variantCaller");
    context.getCounter(HalvadeCounters.TOOLS_GATK).increment(1);
    gatk.runHaplotypeCaller(input, output, true, scc, sec, ref, null, region);

    context.setStatus("cleanup");
    context.getCounter(HalvadeCounters.OUT_VCF_FILES).increment(1);

    HalvadeFileUtils.removeLocalFile(keep, input, context, HalvadeCounters.FOUT_GATK_TMP);
    HalvadeFileUtils.removeLocalFile(keep, input.replaceAll(".bam", ".bai"));
  }
 public void map(LongWritable key, Text value, Context context)
     throws IOException, InterruptedException {
   String val = value.toString();
   String label = "";
   String input_label = "";
   if (val.contains("Processed")) context.getCounter(Driver.Progress.Completion).increment(1);
   if (val.contains("Training")) {
     if (!val.split(" ")[1].contains(":")) {
       label = val.split(" ")[1]; // FIRST LABEL
       for (int i = 2; i < val.split(" ").length; i++) {
         input_label = input_label + " " + val.split(" ")[i];
       }
       input_label = val.split(" ")[0] + "#" + label + " " + input_label;
       context.write(
           new Text(val.split(" ")[0].substring(val.split(" ")[0].lastIndexOf("g") + 1) + label),
           new Text(input_label));
     }
   } else {
     if (!val.split(" ")[2].contains(":")) {
       label = val.split(" ")[2];
       for (int i = 1; i < val.split(" ").length; i++) {
         input_label = input_label + val.split(" ")[i];
       }
       input_label = val.split(" ")[0] + "#" + label + " " + input_label;
       context.write(
           new Text(val.split(" ")[0].substring(val.split(" ")[0].lastIndexOf("t") + 1) + label),
           new Text(input_label));
     }
   }
 } // map ends
コード例 #13
0
  @Override
  protected void map(Object key, Object value, Context context)
      throws IOException, InterruptedException {
    try {
      final InputRow inputRow;
      try {
        inputRow = parseInputRow(value, parser);
      } catch (Exception e) {
        if (config.isIgnoreInvalidRows()) {
          log.debug(e, "Ignoring invalid row [%s] due to parsing error", value.toString());
          context
              .getCounter(HadoopDruidIndexerConfig.IndexJobCounters.INVALID_ROW_COUNTER)
              .increment(1);
          return; // we're ignoring this invalid row
        } else {
          throw e;
        }
      }

      if (!granularitySpec.bucketIntervals().isPresent()
          || granularitySpec
              .bucketInterval(new DateTime(inputRow.getTimestampFromEpoch()))
              .isPresent()) {
        innerMap(inputRow, value, context);
      }
    } catch (RuntimeException e) {
      throw new RE(e, "Failure on row[%s]", value);
    }
  }
コード例 #14
0
    public void map(LongWritable key, Text value, Context context)
        throws IOException, InterruptedException {
      HashMap<String, Integer> wordCounts = new HashMap<String, Integer>();
      String article = value.toString();
      String articleName = article.substring(0, article.indexOf("\t"));
      String articleContent = article.substring(article.indexOf("\t") + 1);
      Iterable<String> words = parseWords(context, articleContent);
      int wordCount = 0;

      for (String word : words) {
        if (!topWords.contains(word)) {
          addCount(wordCounts, word);
        }
        wordCount++;
      }

      for (java.util.Map.Entry<String, Integer> wordAndCount : wordCounts.entrySet()) {
        double frequency = ((double) wordAndCount.getValue()) / wordCount;
        String word = wordAndCount.getKey();
        textWritable.set(word);
        articleNameAndFrequency.set(articleName, frequency);
        context.write(textWritable, articleNameAndFrequency);
      }
      context.getCounter(Counters.ARTICLE_COUNTER).increment(1);
    }
コード例 #15
0
 @Override
 protected void cleanup(Context context) throws IOException, InterruptedException {
   try {
     // Write the last & final Mutation Batch
     if (mutationState != null) {
       writeBatch(mutationState, context);
     }
     // We are writing some dummy key-value as map output here so that we commit only one
     // output to reducer.
     context.write(
         new ImmutableBytesWritable(UUID.randomUUID().toString().getBytes()), new IntWritable(0));
     super.cleanup(context);
   } catch (SQLException e) {
     LOG.error(" Error {}  while read/write of a record ", e.getMessage());
     context.getCounter(PhoenixJobCounters.FAILED_RECORDS).increment(1);
     throw new RuntimeException(e);
   } finally {
     if (connection != null) {
       try {
         connection.close();
       } catch (SQLException e) {
         LOG.error(
             "Error {} while closing connection in the PhoenixIndexMapper class ", e.getMessage());
       }
     }
     if (writer != null) {
       writer.close();
     }
   }
 }
コード例 #16
0
 @Override
 public void map(LongWritable key, Aquaint2Document doc, Context context)
     throws IOException, InterruptedException {
   context.getCounter(Count.DOCS).increment(1);
   docid.set(doc.getDocid());
   context.write(docid, one);
 }
コード例 #17
0
  @Override
  protected void map(LongWritable key, Text value, Context context)
      throws IOException, InterruptedException {
    String valueStr = value.toString();

    String arr[] = valueStr.split("\t", -1);
    if (arr.length == 2) {
      uid = arr[0];
      keyword = arr[1];

      // clean data
      for (String ic : invalid) {
        if (keyword.indexOf(ic) >= 0) {
          System.out.println(ic + "||" + keyword);
          return;
        }
      }

      byte[] rowkey = uid.getBytes();
      Put put = new Put(rowkey);
      put.add(family, qualifier, Bytes.toBytes(keyword));
      context.write(new ImmutableBytesWritable(rowkey), put);
      context.getCounter("hbase-import", "keyword-line").increment(1);
    }
  }
コード例 #18
0
    @Override
    protected void map(IntWritable row, VectorWritable vectorWritable, Context ctx)
        throws IOException, InterruptedException {

      Vector rowVector = similarity.normalize(vectorWritable.get());

      int numNonZeroEntries = 0;
      double maxValue = Double.MIN_VALUE;

      Iterator<Vector.Element> nonZeroElements = rowVector.iterateNonZero();
      while (nonZeroElements.hasNext()) {
        Vector.Element element = nonZeroElements.next();
        RandomAccessSparseVector partialColumnVector =
            new RandomAccessSparseVector(Integer.MAX_VALUE);
        partialColumnVector.setQuick(row.get(), element.get());
        ctx.write(new IntWritable(element.index()), new VectorWritable(partialColumnVector));

        numNonZeroEntries++;
        if (maxValue < element.get()) {
          maxValue = element.get();
        }
      }

      if (threshold != NO_THRESHOLD) {
        nonZeroEntries.setQuick(row.get(), numNonZeroEntries);
        maxValues.setQuick(row.get(), maxValue);
      }
      norms.setQuick(row.get(), similarity.norm(rowVector));

      ctx.getCounter(Counters.ROWS).increment(1);
    }
コード例 #19
0
ファイル: LineCountMapper.java プロジェクト: YzPaul3/ankus
 @Override
 protected void setup(Context context) throws IOException, InterruptedException {
   long start = ((FileSplit) context.getInputSplit()).getStart();
   logger.info("Input Split : ", context.getInputSplit().toString());
   logger.info("Input Split Start : {}", start);
   counter = context.getCounter(getClass().getName(), String.valueOf(start));
 }
コード例 #20
0
ファイル: KMeansMapper.java プロジェクト: mackstone/mdrill
  @Override
  protected void map(WritableComparable<?> key, Text point, Context context)
      throws IOException, InterruptedException {

    Cluster nearestCluster = null;
    double nearestDistance = Double.MAX_VALUE;
    Vector pointv = parse.parseVector(point.toString());
    if (pointv == null) {
      return;
    }
    pointv.setNumPoints(1);
    for (Cluster cluster : clusters) {
      Vector clusterCenter = cluster.getCenter();

      boolean isDeny = pointv.Deny(clusterCenter);
      if (isDeny) {
        continue;
      }
      double distance = clusterCenter.distiance(pointv);
      context.getCounter("Clustering", "similar").increment(1);

      if (distance <= nearestDistance || nearestCluster == null) {
        nearestCluster = cluster;
        nearestDistance = distance;
      }
    }
    if (nearestCluster != null) {
      context.write(new Text(String.valueOf(nearestCluster.getId())), pointv);
    }
  }
コード例 #21
0
  @Override
  public void reduce(Text key, Iterable<IntWritable> values, Context context)
      throws IOException, InterruptedException {

    int ratingsCount = 0;

    /*
     * For each movieId:
     */
    // Define a counter for bad records.
    for (IntWritable value : values) {

      /*
       * Count the movie ratings.
       */
      ratingsCount += value.get();
    }

    /*
     * Reducer output is the userID and number of ratings.
     */
    context.getCounter(UserIdCounter.NUM_USERS).increment(1);
    rValue.set(ratingsCount);
    context.write(key, rValue);
  }
コード例 #22
0
 /** Called once at the very beginning */
 @Override
 public void setup(Context context) {
   t1 = System.nanoTime();
   cmdMap = ParseCLI.parse(context.getConfiguration().getStrings("args"));
   filter = new DocumentFilter(cmdMap);
   t2 = System.nanoTime();
   context.getCounter(ProcessingTime.SETUP).increment(t2 - t1);
 }
コード例 #23
0
  @Override
  protected void map(LongWritable key, Text value, Context context)
      throws IOException, InterruptedException {

    String[] fields = null;
    String id = null,
        symbol = null,
        date = null,
        cap = null,
        price = null,
        rowKey = null,
        rowVal = null;
    try {
      fields = value.toString().split(",");
    } catch (Exception ex) {
      context.getCounter("HBaseKVMapper", "PARSE_ERRORS").increment(1);
      return;
    }

    if (fields.length > 0 && fields[0] != null && !fields[0].equals("")) {
      id = fields[0];
    }

    if (fields.length > 1 && fields[1] != null && !fields[1].equals("")) {
      date = fields[1];
    }

    if (fields.length > 2 && fields[2] != null && !fields[2].equals("")) {
      symbol = fields[2];
    }

    if (fields.length > 3 && fields[3] != null && !fields[3].equals("")) {
      price = fields[3];
    }

    if (fields.length > 4 && fields[4] != null && !fields[4].equals("")) {
      cap = fields[4];
    }
    if (id != null && symbol != null) {
      rowKey = id + "_" + symbol;
      rowVal = date + "_" + price + "_" + cap;
      context.write(new Text(rowKey), new Text(rowVal));
    }

    context.getCounter("HBaseKVMapper", "NUM_MSGS").increment(1);
  }
コード例 #24
0
    /**
     * @param row 为主键,不能通过toString直接转换
     * @param value 取出后得到jack/course:english/1436449989754/Put/vlen=2/seqid=0,不能单独取出
     * @param context
     */
    @Override
    public void map(ImmutableBytesWritable row, Result value, Context context)
        throws UnsupportedEncodingException {
      context.getCounter(Counters.ROWS).increment(1);
      System.out.println(context.getCounter(Counters.ROWS).getValue());
      String s = new String(row.copyBytes(), "GB2312");
      System.out.println(s);

      List<Cell> valuelist = value.getColumnCells("course".getBytes(), "english".getBytes());
      // 得到结果为  valuelist[0] = "jack/course:english/1436449989754/Put/vlen=2/seqid=0"

      for (Cell cell : valuelist) {
        System.out.println(cell.toString());
      }
      // System.out.println(value.getColumnCells("course".getBytes(),"english".getBytes())+"\n");

    }
コード例 #25
0
 /**
  * Maps the data.
  *
  * @param row The current table row key.
  * @param values The columns.
  * @param context The current context.
  * @throws IOException When something is broken with the data.
  * @see org.apache.hadoop.mapreduce.Mapper#map(KEYIN, VALUEIN,
  *     org.apache.hadoop.mapreduce.Mapper.Context)
  */
 @Override
 public void map(ImmutableBytesWritable row, Result values, Context context) throws IOException {
   for (KeyValue value : values.list()) {
     if (value.getValue().length > 0) {
       context.getCounter(Counters.ROWS).increment(1);
       break;
     }
   }
 }
コード例 #26
0
 @Override
 protected void map(LongWritable key, Text value, Context context)
     throws IOException, InterruptedException {
   int partitionId = context.getConfiguration().getInt("mapreduce.task.partition", 0);
   System.out.println("partitionId = " + partitionId + "    " + this);
   context.write(new IntWritable(partitionId), new Text(counter + "_-_" + value));
   context.getCounter("LN", "partition_" + partitionId).setValue(counter);
   counter++;
 }
    @Override
    public void map(IntWritable nid, PersonalizedPageRankNode node, Context context)
        throws IOException, InterruptedException {
      // Pass along node structure.
      intermediateStructure.setNodeId(node.getNodeId());
      intermediateStructure.setType(PersonalizedPageRankNode.Type.Structure);
      intermediateStructure.setAdjacencyList(node.getAdjacenyList());

      context.write(nid, intermediateStructure);

      int massMessages = 0;

      // Distribute PageRank mass to neighbors (along outgoing edges).
      if (node.getAdjacenyList().size() > 0) {
        // Each neighbor gets an equal share of PageRank mass.
        ArrayListOfIntsWritable list = node.getAdjacenyList();
        float mass[] = new float[sources.size()];
        for (int i = 0; i < sources.size(); i++) {
          mass[i] = node.getPageRank(i) - (float) StrictMath.log(list.size());
        }

        context.getCounter(PageRank.edges).increment(list.size());

        // Iterate over neighbors.
        for (int i = 0; i < list.size(); i++) {
          neighbor.set(list.get(i));
          intermediateMass.setNodeId(list.get(i));
          intermediateMass.setType(PersonalizedPageRankNode.Type.Mass);

          for (int j = 0; j < sources.size(); j++) {
            intermediateMass.setPageRank(j, mass[j]);
          }

          // Emit messages with PageRank mass to neighbors.
          context.write(neighbor, intermediateMass);

          massMessages++;
        }
      }

      // Bookkeeping.
      context.getCounter(PageRank.nodes).increment(1);
      context.getCounter(PageRank.massMessages).increment(massMessages);
    }
コード例 #28
0
ファイル: ImportMRMapper.java プロジェクト: boiler256/lumify
  private void safeMap(LongWritable filePosition, Text line, Context context)
      throws IOException, InterruptedException {
    String lineString = line.toString();
    int colonOffet = lineString.indexOf(':');
    if (colonOffet < 1) {
      return;
    }
    long userId = Long.parseLong(lineString.substring(0, colonOffet));
    context.setStatus("User: "******"Friendster MR", "", user, visibility);
    context.write(key, AccumuloSession.createMutationFromRow(audit));

    String friends = lineString.substring(colonOffet + 1).trim();
    if ("notfound".equals(friends) || "private".equals(friends)) {
      // do nothing?
    } else {
      String[] friendsArray = friends.split(",");
      for (String friend : friendsArray) {
        friend = friend.trim();
        if (friend.length() == 0) {
          continue;
        }
        long friendId = Long.parseLong(friend);
        Vertex friendVertex = createUserVertex(friendId);
        addEdge(
            ImportMR.getFriendEdgeId(userVertex, friendVertex),
            userVertex,
            friendVertex,
            FriendsterOntology.EDGE_LABEL_FRIEND,
            visibility,
            authorizations);
        context.getCounter(FriendsterImportCounters.FRIEND_EDGES_CREATED).increment(1);
      }
    }

    context.getCounter(FriendsterImportCounters.USERS_PROCESSED).increment(1);
  }
  @Override
  protected void reduce(final Key key, final Iterable<Value> values, final Context context)
      throws IOException, InterruptedException {
    final Iterator<Value> iter = values.iterator();
    final Value firstValue = iter.next();
    final boolean isMulti = iter.hasNext();

    context.write(key, reduceValue(key, isMulti, iter, firstValue));
    context.getCounter("Bulk import", getCounterId(isMulti)).increment(1L);
  }
コード例 #30
0
ファイル: EMRReducer.java プロジェクト: khsibr/hadoop
 @Override
 public void reduce(Text key, Iterable<IntWritable> values, Context context)
     throws IOException, InterruptedException {
   int sum = 0;
   for (IntWritable value : values) {
     sum += value.get();
   }
   context.write(key, new Text(sum + ""));
   context.getCounter(EMRDriver.STATE_COUNTER_GROUP, EMRDriver.TOTAL_PROFILE_COUNT).increment(1);
 }