예제 #1
1
    public void reduce(Text key, Iterable<Text> values, Context context)
        throws IOException, InterruptedException {

      String keyS = key.toString();
      if (keyS.startsWith("O") || keyS.startsWith("P") || keyS.startsWith("S")) {
        String sum = new String();

        for (Text val : values) {

          sum += (" " + val.toString());
        }

        // String subKey = keyS.substring(0,keyS.length()-1);

        // Text t = new Text();
        // t.set(subKey);
        result.set(sum);
        context.write(key, result);
      }
      if (keyS.startsWith("L")) {
        //	String [] keyIdS = keyS.substring(1).split("[+]");

        result.set(" ");
        context.write(key, result);

        // String KeyIdS1 = keyIdS[1];
        // result.set(KeyIdS1);
        // context.write(key, result);

        // String KeyIdS2 = keyIdS[2];
        // result.set(KeyIdS2);
        // context.write(key, result);

      }
    }
 public void map(LongWritable key, Text value, Context context)
     throws IOException, InterruptedException {
   String cur_file =
       ((FileSplit) context.getInputSplit()).getPath().getParent().getParent().getName();
   String train_file = context.getConfiguration().get("train_file");
   if (cur_file.equals(train_file)) {
     StringTokenizer st = new StringTokenizer(value.toString());
     String word = st.nextToken();
     String f_id = st.nextToken();
     myKey.set(word);
     myVal.set(f_id);
     context.write(myKey, myVal);
   } else {
     StringTokenizer st = new StringTokenizer(value.toString());
     String word = st.nextToken();
     String f_id = st.nextToken();
     StringBuilder builder = new StringBuilder(dlt);
     while (st.hasMoreTokens()) {
       String filename = st.nextToken();
       String tf_idf = st.nextToken();
       builder.append(filename);
       builder.append(dlt);
       builder.append(tf_idf);
       builder.append("\t");
     }
     myKey.set(word);
     myVal.set(builder.toString());
     context.write(myKey, myVal);
   }
 }
예제 #3
1
  @Override
  public void map(LongWritable key, Text value, Context context)
      throws IOException, InterruptedException {
    String line = value.toString();

    // skip the header
    if (line.startsWith(OTPConsts.HEADER_START)) return;

    // two possible input format: 1. prediction input, 2. query input
    String k;
    String v;
    String[] splits = line.split(",");
    if (line.startsWith("P")) {
      // handle prediction input
      k = extractKey(splits, 1, 5);
      v =
          splits[6]
              + OTPConsts.COMMA
              + splits[7]
              + OTPConsts.COMMA
              + splits[8]
              + OTPConsts.COMMA
              + splits[9];
      context.write(new Text(k), new Text(v));
    } else {
      k = extractKey(splits, 0, -1);
      v = "Q";
      context.write(new Text(k), new Text(v));
    }
  }
예제 #4
0
    @Override
    public void reduce(IntWritable nid, Iterable<PageRankNode> values, Context context)
        throws IOException, InterruptedException {
      int massMessages = 0;

      // Remember, PageRank mass is stored as a log prob.
      float mass = Float.NEGATIVE_INFINITY;
      for (PageRankNode n : values) {
        if (n.getType() == PageRankNode.Type.Structure) {
          // Simply pass along node structure.
          context.write(nid, n);
        } else {
          // Accumulate PageRank mass contributions.
          mass = sumLogProbs(mass, n.getPageRank());
          massMessages++;
        }
      }

      // Emit aggregated results.
      if (massMessages > 0) {
        intermediateMass.setNodeId(nid.get());
        intermediateMass.setType(PageRankNode.Type.Mass);
        intermediateMass.setPageRank(mass);

        context.write(nid, intermediateMass);
      }
    }
    @Override
    public void map(Object key, Text value, Context context)
        throws IOException, InterruptedException {

      // Parse the input string into a nice map
      Map<String, String> parsed = MRDPUtils.transformXmlToMap(value.toString());

      String userId = parsed.get("UserId");

      if (userId == null) {
        return;
      }

      String userInformation = userIdToInfo.get(userId);

      // If the user information is not null, then output
      if (userInformation != null) {
        outvalue.set(userInformation);
        context.write(value, outvalue);
      } else if (joinType.equalsIgnoreCase("leftouter")) {
        // If we are doing a left outer join, output the record with an
        // empty value
        context.write(value, new Text(""));
      }
    }
    @Override
    public void reduce(IntWritable nid, Iterable<PersonalizedPageRankNode> values, Context context)
        throws IOException, InterruptedException {
      int massMessages = 0;

      // Remember, PageRank mass is stored as a log prob.
      float[] mass = new float[sources.size()];
      for (int i = 0; i < sources.size(); i++) {
        mass[i] = Float.NEGATIVE_INFINITY;
      }

      for (PersonalizedPageRankNode n : values) {
        if (n.getType() == PersonalizedPageRankNode.Type.Structure) {
          // Simply pass along node structure.
          context.write(nid, n);
        } else {
          // Accumulate PageRank mass contributions.
          for (int j = 0; j < sources.size(); j++) {
            mass[j] = sumLogProbs(mass[j], n.getPageRank(j));
          }
          massMessages++;
        }
      }

      // Emit aggregated results.
      if (massMessages > 0) {
        intermediateMass.setNodeId(nid.get());
        intermediateMass.setType(PersonalizedPageRankNode.Type.Mass);
        for (int i = 0; i < sources.size(); i++) {
          intermediateMass.setPageRank(i, mass[i]);
        }

        context.write(nid, intermediateMass);
      }
    }
예제 #7
0
파일: Job4.java 프로젝트: kkiran13/HBase
    public void reduce(Text key, Iterable<Text> values, Context context)
        throws IOException, InterruptedException {
      for (Text val : values) {
        treeset.add(val.toString());
      }
      int treesize = treeset.size();
      int limit = treesize - 10;
      Iterator itr = treeset.iterator();
      while (itr.hasNext()) {
        String[] va;
        c++;
        if (c < 11) {
          va = itr.next().toString().split("!");
          Put pa = new Put(va[1].getBytes());
          // System.out.println(va[0]+" and "+va[1]);
          pa.add(Bytes.toBytes("stock"), Bytes.toBytes("volatility"), Bytes.toBytes(va[0]));
          context.write(new ImmutableBytesWritable(va[1].getBytes()), pa);

        } else if (c > limit) {
          va = itr.next().toString().split("!");
          // System.out.println(va[0]+" and "+va[1]);
          Put pa = new Put(va[1].getBytes());
          pa.add(Bytes.toBytes("stock"), Bytes.toBytes("volatility"), Bytes.toBytes(va[0]));
          context.write(new ImmutableBytesWritable(va[1].getBytes()), pa);
        } else {
          itr.next();
        }
      }
    }
  @Override
  public void reduce(IntWritable key, Iterable<WriteableData> values, Context context)
      throws IOException, InterruptedException {

    DaalContext daalContext = new DaalContext();

    /* Create an algorithm to compute a sparse variance-covariance matrix on the master node */
    DistributedStep2Master covarianceSparseMaster =
        new DistributedStep2Master(daalContext, Double.class, Method.fastCSR);

    for (WriteableData value : values) {
      PartialResult pr = (PartialResult) value.getObject(daalContext);
      covarianceSparseMaster.input.add(DistributedStep2MasterInputId.partialResults, pr);
    }

    /* Compute a sparse variance-covariance matrix on the master node */
    covarianceSparseMaster.compute();

    /* Finalize computations and retrieve the results */
    Result result = covarianceSparseMaster.finalizeCompute();

    HomogenNumericTable covariance = (HomogenNumericTable) result.get(ResultId.covariance);
    HomogenNumericTable mean = (HomogenNumericTable) result.get(ResultId.mean);

    context.write(new IntWritable(0), new WriteableData(covariance));
    context.write(new IntWritable(1), new WriteableData(mean));

    daalContext.dispose();
  }
예제 #9
0
    public void reduce(Text key, Iterable<IntWritable> values, Context context)
        throws IOException, InterruptedException {
      int sum = 0;
      boolean filtered = true;
      String keypair = key.toString();
      String[] keys = keypair.split(" ");
      if (keys.length == 1) {
        filtered = false;
      } else {
        if (keys[0].equals("*") || keys[1].equals("*")) {
          filtered = false;
        }
      }

      if (!filtered) {
        for (IntWritable val : values) {
          sum += val.get();
        }
        context.write(key, new IntWritable(sum));
        return;
      }

      for (IntWritable val : values) {
        if (val.get() == -1) {
          filtered = false;
          continue;
        }
        sum += val.get();
      }
      // filter non-needed events
      if (filtered) return;
      context.write(key, new IntWritable(sum));
    }
예제 #10
0
    /**
     * Map method.
     *
     * @param offset samples starting from the (offset+1)th sample.
     * @param size the number of samples for this map
     * @param context output {ture-&gt;numInside, false-&gt;numOutside}
     */
    public void map(LongWritable offset, LongWritable size, Context context)
        throws IOException, InterruptedException {

      final HaltonSequence haltonsequence = new HaltonSequence(offset.get());
      long numInside = 0L;
      long numOutside = 0L;

      for (long i = 0; i < size.get(); ) {
        // generate points in a unit square
        final double[] point = haltonsequence.nextPoint();

        // count points inside/outside of the inscribed circle of the square
        final double x = point[0] - 0.5;
        final double y = point[1] - 0.5;
        if (x * x + y * y > 0.25) {
          numOutside++;
        } else {
          numInside++;
        }

        // report status
        i++;
        if (i % 1000 == 0) {
          context.setStatus("Generated " + i + " samples.");
        }
      }

      // output map results
      context.write(new BooleanWritable(true), new LongWritable(numInside));
      context.write(new BooleanWritable(false), new LongWritable(numOutside));
    }
예제 #11
0
  /** Called for every record in the data */
  @Override
  public void map(LongWritable key, Text value, Context context)
      throws IOException, InterruptedException {
    /** Skip enormous documents, due to memory problems and since regex cannot handle them. */
    if (value.getLength() > MAX_DOC_SIZE_IN_BYTES) {
      context.getCounter(ProcessingTime.SKIPPED).increment(1);
      return;
    }

    /** Parse document and measure time */
    t1 = System.nanoTime();
    Spinn3rDocument d = new Spinn3rDocument(value.toString());
    t2 = System.nanoTime();
    context.getCounter(ProcessingTime.PARSING).increment(t2 - t1);

    /** Return only those documents that satisfy search conditions */
    t1 = System.nanoTime();
    t = filter.documentSatisfies(d);
    t2 = System.nanoTime();
    context.getCounter(ProcessingTime.FILTERING).increment(t2 - t1);

    /** Output if satisfies */
    if (t) {
      if (cmdMap.hasOption("formatF5")) {
        context.write(new Text(d.toStringF5()), NullWritable.get());
      } else {
        context.write(new Text(d.toString()), NullWritable.get());
      }
    }
  }
 @Override
 protected void map(LongWritable key, Text value, Context context)
     throws IOException, InterruptedException {
   // 获取输入文件的全路径和名称
   String pathName = ((FileSplit) context.getInputSplit()).getPath().toString();
   if (pathName.contains("data.txt")) {
     String values[] = value.toString().split("\t");
     if (values.length < 3) {
       // data数据格式不规范,字段小于3,抛弃数据
       return;
     } else {
       // 数据格式规范,区分标识为1
       TextPair tp = new TextPair(new Text(values[1]), new Text("1"));
       context.write(tp, new Text(values[0] + "\t" + values[2]));
     }
   }
   if (pathName.contains("info.txt")) {
     String values[] = value.toString().split("\t");
     if (values.length < 2) {
       // data数据格式不规范,字段小于2,抛弃数据
       return;
     } else {
       // 数据格式规范,区分标识为0
       TextPair tp = new TextPair(new Text(values[0]), new Text("0"));
       context.write(tp, new Text(values[1]));
     }
   }
 }
예제 #13
0
  public void map(LongWritable key, Text value, Context context)
      throws IOException, InterruptedException {

    // System.out.println("in mapper, input "+ key + " " + value + ";");
    // userRow = null;
    userRow = value.toString().split("\\s");
    if (userRow.length == 1) {
      userRow = null;
      return;
    }
    // friendList = null;
    friendList = userRow[1].split(",");
    for (i = 0; i < friendList.length; i++) {
      keyUser.set(new Text(friendList[i]));
      for (j = 0; j < friendList.length; j++) {
        if (j == i) {
          continue;
        }
        suggTuple.set(friendList[j] + ",1");
        context.write(keyUser, suggTuple);
        // System.out.println(keyUser + ",(" + suggTuple + ")");
      }
      existingFriend.set(userRow[0] + ",-1");
      context.write(keyUser, existingFriend);
      // System.out.println(keyUser + ",(" + existingFriend + ")");

    }

    /*DateFormat dateFormat = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss");
    Date date = new Date();
    System.out.println("Mapper done at: " + dateFormat.format(date)); //2014/08/06 15:59:48*/
  }
예제 #14
0
    public void map(Object key, Text value, Context context)
        throws IOException, InterruptedException {

      NodeWritable n = new NodeWritable(value.toString().trim());

      // Emit node to carry forward the Model.
      NodeWritable p = new NodeWritable(value.toString().trim());
      p.setIsNode(new Text("YES"));
      p.setIsInList(new Text("***"));
      context.write(new Text(p.getNid().toString()), p);

      // For Each OutLinks Emit This Node
      for (NodeWritable x : n.getOuts()) {
        if (!x.getNid().toString().equals(n.getNid().toString())) {
          n.setIsInList(new Text("YES"));
          n.setIsNode(new Text("NO"));
          context.write(new Text(x.getNid().toString()), n);
        }
      }

      // For Each Inlinks Emit This Node
      for (NodeWritable x : n.getIns()) {
        if (!x.getNid().toString().equals(n.getNid().toString())) {
          n.setIsInList(new Text("NO"));
          n.setIsNode(new Text("NO"));
          context.write(new Text(x.getNid().toString()), n);
        }
      }
    }
    @Override
    protected void map(IntWritable row, VectorWritable similaritiesWritable, Context ctx)
        throws IOException, InterruptedException {
      Vector similarities = similaritiesWritable.get();
      // For performance, the creation of transposedPartial is moved out of the while loop and it is
      // reused inside
      Vector transposedPartial = new RandomAccessSparseVector(similarities.size(), 1);
      TopElementsQueue topKQueue = new TopElementsQueue(maxSimilaritiesPerRow);
      Iterator<Vector.Element> nonZeroElements = similarities.iterateNonZero();
      while (nonZeroElements.hasNext()) {
        Vector.Element nonZeroElement = nonZeroElements.next();

        MutableElement top = topKQueue.top();
        double candidateValue = nonZeroElement.get();
        if (candidateValue > top.get()) {
          top.setIndex(nonZeroElement.index());
          top.set(candidateValue);
          topKQueue.updateTop();
        }

        transposedPartial.setQuick(row.get(), candidateValue);
        ctx.write(new IntWritable(nonZeroElement.index()), new VectorWritable(transposedPartial));
        transposedPartial.setQuick(row.get(), 0.0);
      }
      Vector topKSimilarities =
          new RandomAccessSparseVector(similarities.size(), maxSimilaritiesPerRow);
      for (Vector.Element topKSimilarity : topKQueue.getTopElements()) {
        topKSimilarities.setQuick(topKSimilarity.index(), topKSimilarity.get());
      }
      ctx.write(row, new VectorWritable(topKSimilarities));
    }
 public void map(LongWritable key, Text value, Context context)
     throws IOException, InterruptedException {
   String val = value.toString();
   String label = "";
   String input_label = "";
   if (val.contains("Processed")) context.getCounter(Driver.Progress.Completion).increment(1);
   if (val.contains("Training")) {
     if (!val.split(" ")[1].contains(":")) {
       label = val.split(" ")[1]; // FIRST LABEL
       for (int i = 2; i < val.split(" ").length; i++) {
         input_label = input_label + " " + val.split(" ")[i];
       }
       input_label = val.split(" ")[0] + "#" + label + " " + input_label;
       context.write(
           new Text(val.split(" ")[0].substring(val.split(" ")[0].lastIndexOf("g") + 1) + label),
           new Text(input_label));
     }
   } else {
     if (!val.split(" ")[2].contains(":")) {
       label = val.split(" ")[2];
       for (int i = 1; i < val.split(" ").length; i++) {
         input_label = input_label + val.split(" ")[i];
       }
       input_label = val.split(" ")[0] + "#" + label + " " + input_label;
       context.write(
           new Text(val.split(" ")[0].substring(val.split(" ")[0].lastIndexOf("t") + 1) + label),
           new Text(input_label));
     }
   }
 } // map ends
예제 #17
0
  @Override
  protected void map(LongWritable key, Text value, Context context)
      throws IOException, InterruptedException {
    String[] valueArr = value.toString().split(MRConstants.SEPERATOR_IN);

    int loginTime = StringUtil.convertInt(valueArr[Index_LoginTime], 0);
    int onlineTime = StringUtil.convertInt(valueArr[Index_OnlineTime], 0);

    Calendar calendar = Calendar.getInstance();
    Date date = ConfigManager.getInitialDate(context.getConfiguration());
    if (date != null) {
      calendar.setTime(date);
    }
    calendar.add(Calendar.HOUR_OF_DAY, -1); // 默认取调度初始化时间的前一个小时
    calendar.set(Calendar.MINUTE, 0);
    calendar.set(Calendar.SECOND, 0);
    int startPoint = (int) (calendar.getTimeInMillis() / 1000); // 找出起始时间点

    if (loginTime > 0 && onlineTime > 0) {
      for (int i = 0; i < 12; i++) {
        int point = startPoint + i * 5 * 60; // 5分钟为步长
        if (loginTime <= point && onlineTime > (point - loginTime)) {
          // 登陆时间在point点之前且在线时长大于point点-登陆时间记为一次在线
          /*
           * ACU/PCU 计算调整为不分渠道统计,只按区服统计
           * 在分区服统计的同时,另加一个不分区服的全量统计
          String[] keyFields = new String[] { valueArr[Index_Appid],
          									valueArr[Index_Platform],
          									valueArr[Index_Channel],
          									valueArr[Index_gameServer],
          									point + "" };*/

          // 分区服的统计
          String[] keyFields =
              new String[] {
                valueArr[Index_Appid],
                valueArr[Index_Platform],
                valueArr[Index_gameServer],
                point + ""
              };
          mapKeyObj.setOutFields(keyFields);
          context.write(mapKeyObj, one);

          // 不分区服的统计,gameServer 以 '-' 代替
          String[] keyFieldsAll =
              new String[] {
                valueArr[Index_Appid],
                valueArr[Index_Platform],
                MRConstants.INVALID_PLACE_HOLDER_CHAR,
                point + ""
              };
          mapKeyObj.setOutFields(keyFieldsAll);
          context.write(mapKeyObj, one);
        }
      }
    }
  }
 @Override
 public void cleanup(Context context) throws IOException, InterruptedException {
   context.write(new IntWritable(-1), new FloatWritable((float) loss));
   for (int i = 0; i < gradient.length; i++) {
     if (gradient[i] != (float) 0.0) {
       context.write(new IntWritable(i), new FloatWritable((float) gradient[i]));
     }
   }
 }
 @Override
 protected void cleanup(Context ctx) throws IOException, InterruptedException {
   super.cleanup(ctx);
   // dirty trick
   ctx.write(new IntWritable(NORM_VECTOR_MARKER), new VectorWritable(norms));
   ctx.write(
       new IntWritable(NUM_NON_ZERO_ENTRIES_VECTOR_MARKER), new VectorWritable(nonZeroEntries));
   ctx.write(new IntWritable(MAXVALUE_VECTOR_MARKER), new VectorWritable(maxValues));
 }
예제 #20
0
    public void map(LongWritable key, Text value, Context context)
        throws IOException, InterruptedException {
      try {
        String line = value.toString();
        String[] fields = line.split(FIELD_TAB_SEPARATOR, -1);

        if (filePath.toLowerCase().contains("play")
            && fields.length > PlayFormatEnum.MEDIA_TYPE_ID.ordinal()) {
          String playETLStr = getPlayFormatStr(line);

          String[] playField = playETLStr.split(FIELD_TAB_SEPARATOR, -1);
          String infohashStr = null;
          if (playField[PlayFormatEnum.MEDIA_TYPE_ID.ordinal()].trim().equals("1")
              || playField[PlayFormatEnum.URL.ordinal()].contains("subject/play")) {

            infohashStr = playField[PlayFormatEnum.INFOHASH_ID.ordinal()].toUpperCase();
          } else {
            infohashStr = playField[PlayFormatEnum.MEDIA_ID.ordinal()];
          }
          if (null != infohashStr && playField.length == PlayFormatEnum.SEIDCOUNT.ordinal() + 1) {
            keyText.set(infohashStr.trim());
            valueText.set(playETLStr);
            context.write(keyText, valueText);
          }

        } else {
          String dimLine = "";
          String dimInfo = null;
          if (filePath.toLowerCase().contains("infohash")) {
            if (fields.length > DMInfoHashEnum.MEDIA_ID.ordinal()) {
              dimLine = line.trim();
              dimInfo = fields[DMInfoHashEnum.IH.ordinal()];
            }
          } else if (filePath.toLowerCase().contains("mediainfo")) {
            StringBuilder dimStrSb = new StringBuilder();
            dimStrSb.append(DEFAULT_INFOHASH + FIELD_TAB_SEPARATOR);
            dimStrSb.append(DEFAULT_SERIAL_ID + FIELD_TAB_SEPARATOR);
            dimStrSb.append(line.trim());
            dimLine = dimStrSb.toString();
            dimInfo = fields[DMInfoHashEnum.IH.ordinal()];
          }
          if (null != dimInfo && !dimInfo.isEmpty()) {
            String mediaInfo = dimInfo.trim().toUpperCase();
            keyText.set(mediaInfo);
            valueText.set(dimLine);
            context.write(keyText, valueText);
          }
        }
      } catch (Exception e) {
        multipleOutputs.write(
            new Text(null == e.getMessage() ? ("error:" + filePath) : e.getMessage()),
            new Text(value.toString()),
            "_error/part");
        e.printStackTrace();
      }
    }
예제 #21
0
    public void map(Object key, Text value, Context context)
        throws IOException, InterruptedException {

      FlightDetails flight = new FlightDetails(value.toString());
      Text origin = flight.getOrigin();
      Text destination = flight.getDest();

      context.write(origin, new Text("Out," + destination.toString()));
      context.write(destination, new Text("In," + origin.toString()));
    }
예제 #22
0
 @Override
 public void map(LongWritable key, Text value, Context context)
     throws IOException, InterruptedException {
   String[] items = value.toString().split(",");
   outKey.set("u," + items[0]);
   outText.set(items[2]);
   context.write(outKey, outText);
   outKey.set("v," + items[1]);
   outText.set(items[2]);
   context.write(outKey, outText);
 }
예제 #23
0
    public void reduce(GFKey key, Iterable<PEIWritable> values, Context context)
        throws IOException, InterruptedException {
      // For a particular key ... process all records and output what we would have expected in this
      // concKnownKeys test
      // Note that we either
      // 1. do a single create
      // 2. create + update
      // 3. create + destroy
      // look at all ops ... and output either
      // 1. create
      // 2. create (with value from update)
      // 3. do nothing (overall result is destroy, so do not create the entry in the gemfire
      // validation region
      String keyStr = (String) key.getKey();
      ValueHolder updateValue = null;
      ValueHolder createValue = null;
      boolean destroyed = false;
      System.out.println("KnownKeysMRv2.reduce() invoked with " + keyStr);
      for (PEIWritable value : values) {
        PersistedEventImpl event = value.getEvent();
        Operation op = event.getOperation();

        ValueHolder vh = null;
        if (op.isDestroy()) {
          destroyed = true;
        } else {
          try {
            vh = (ValueHolder) event.getDeserializedValue();
          } catch (ClassNotFoundException e) {
            System.out.println(
                "KnownKeysMRv2.map() caught " + e + " : " + TestHelper.getStackTrace(e));
          }
          if (op.isUpdate()) {
            updateValue = vh;
          } else {
            createValue = vh;
          }
        }
        System.out.println(
            "KnownKeysMRv2.reduce() record: "
                + op.toString()
                + ": key = "
                + keyStr
                + " and op "
                + op.toString());
      }
      if (!destroyed) {
        if (updateValue != null) {
          context.write(key.getKey(), updateValue);
        } else {
          context.write(key.getKey(), createValue);
        }
      }
    }
    @Override
    protected void map(UndirectedEdgeWithDegrees edge, Object value, Context ctx)
        throws IOException, InterruptedException {
      VertexWithDegree first = edge.getFirstVertexWithDegree();
      VertexWithDegree second = edge.getSecondVertexWithDegree();

      if (first.getDegree() < second.getDegree()) {
        ctx.write(first.getVertex(), second.getVertex());
      } else {
        ctx.write(second.getVertex(), first.getVertex());
      }
    }
예제 #25
0
    public void map(LongWritable key, Text value, Context context)
        throws IOException, InterruptedException {

      String[] strTemp = value.toString().split("\t");

      if (path.toString().contains("hs_log")) {
        context.write(new Text(strTemp[3]), new Text("1" + "\t" + strTemp[0]));

      } else if (path.toString().contains("1_history_mac")) {

        context.write(new Text(strTemp[0]), new Text("2"));
      }
    }
예제 #26
0
 public void map(LongWritable key, Text value, Context context)
     throws IOException, InterruptedException {
   int start = value.find("<title>");
   int end = value.find("</title>", start);
   if (start == -1 || end == -1) return;
   start += 7;
   String title = Text.decode(value.getBytes(), start, end - start);
   title = title.replace(' ', '_');
   Text titleKey = new Text(title);
   String outLinks = "";
   start = value.find("<text");
   if (start == -1) {
     context.write(titleKey, new Text(outLinks));
     return;
   }
   start = value.find(">", start);
   if (start == -1) {
     context.write(titleKey, new Text(outLinks));
     return;
   }
   end = value.find("</text>");
   if (end == -1) {
     context.write(titleKey, new Text(outLinks));
     return;
   }
   start += 1;
   String text = Text.decode(value.getBytes(), start, end - start);
   Matcher wikiLinksMatcher = patterLinks.matcher(text);
   LinkedList<String> duplicateRemover = new LinkedList<String>();
   while (wikiLinksMatcher.find()) {
     String outLinkPage = wikiLinksMatcher.group();
     outLinkPage = linksCatcher(outLinkPage);
     if (outLinkPage != null) {
       if (!outLinkPage.isEmpty()) {
         outLinkPage = outLinkPage.trim();
         duplicateRemover.add(outLinkPage);
       }
     }
   }
   LinkedHashSet<String> duplicatePruning = new LinkedHashSet<String>(duplicateRemover);
   LinkedList<String> finalList = new LinkedList<String>(duplicatePruning);
   boolean first = true;
   for (String values : finalList) {
     if (!values.equals(title)) {
       if (!first) outLinks += "\t";
       outLinks += values;
       first = false;
     }
   }
   context.write(titleKey, new Text(outLinks));
 }
예제 #27
0
    @Override
    public void reduce(NullWritable key, Iterable<TextArrayWritable> values, Context context)
        throws IOException, InterruptedException {
      Integer sum, mean, max, min, var;

      // TODO
      for (TextArrayWritable val : values) {
        Text[] pair = (Text[]) val.toArray();

        String title = pair[0].toString();
        Integer count = Integer.parseInt(pair[1].toString());

        countToTitleMap.add(new Pair<Integer, String>(count, title));

        if (countToTitleMap.size() > this.N) {
          countToTitleMap.remove(countToTitleMap.first());
        }
      }

      // calculate the statistics
      Integer[] counts = new Integer[this.N];
      int j = 0;
      for (Pair<Integer, String> pair : countToTitleMap) {
        counts[j++] = pair.first;
      }

      sum = 0;
      min = Integer.MAX_VALUE;
      max = Integer.MIN_VALUE;

      for (int i = 0; i < this.N; i++) {
        sum += counts[i];
        min = Math.min(min, counts[i]);
        max = Math.max(max, counts[i]);
      }

      mean = sum / this.N;
      var = 0;
      for (int i = 0; i < this.N; i++) {
        var += ((counts[i] - mean) * (counts[i] - mean));
      }
      var /= this.N;

      context.write(new Text("Mean"), new IntWritable(mean));
      context.write(new Text("Sum"), new IntWritable(sum));
      context.write(new Text("Min"), new IntWritable(min));
      context.write(new Text("Max"), new IntWritable(max));
      context.write(new Text("Var"), new IntWritable(var));
    }
예제 #28
0
 @Override
 protected void reduce(K row, Iterable<Put> vals, Context context)
     throws IOException, InterruptedException {
   // Using HeapSize to create an upper bound on the memory size of
   // the puts and flush some portion of the content while looping. This
   // flush could result in multiple Puts for a single rowkey. That is
   // acceptable because Combiner is run as an optimization and it's not
   // critical that all Puts are grouped perfectly.
   long threshold =
       context.getConfiguration().getLong("putcombiner.row.threshold", 1L * (1 << 30));
   int cnt = 0;
   long curSize = 0;
   Put put = null;
   Map<byte[], List<Cell>> familyMap = null;
   for (Put p : vals) {
     cnt++;
     if (put == null) {
       put = p;
       familyMap = put.getFamilyCellMap();
     } else {
       for (Entry<byte[], List<Cell>> entry : p.getFamilyCellMap().entrySet()) {
         List<Cell> cells = familyMap.get(entry.getKey());
         List<Cell> kvs = (cells != null) ? (List<Cell>) cells : null;
         for (Cell cell : entry.getValue()) {
           KeyValue kv = KeyValueUtil.ensureKeyValue(cell);
           curSize += kv.heapSize();
           if (kvs != null) {
             kvs.add(kv);
           }
         }
         if (cells == null) {
           familyMap.put(entry.getKey(), entry.getValue());
         }
       }
       if (cnt % 10 == 0) context.setStatus("Combine " + cnt);
       if (curSize > threshold) {
         LOG.info(String.format("Combined %d Put(s) into %d.", cnt, 1));
         context.write(row, put);
         put = null;
         cnt = 0;
       }
     }
   }
   if (put != null) {
     LOG.info(String.format("Combined %d Put(s) into %d.", cnt, 1));
     context.write(row, put);
   }
 }
 /**
  * Collect all column values for the same Row. RowKey may be different if indexes are involved, so
  * it writes a separate record for each unique RowKey
  *
  * @param context Current mapper context
  * @param tableName Table index in tableNames list
  * @param lkv List of KV values that will be combined in a single ImmutableBytesWritable
  * @throws IOException
  * @throws InterruptedException
  */
 private void writeAggregatedRow(Context context, String tableName, List<KeyValue> lkv)
     throws IOException, InterruptedException {
   ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
   DataOutputStream outputStream = new DataOutputStream(bos);
   ImmutableBytesWritable outputKey = null;
   if (!lkv.isEmpty()) {
     for (KeyValue cell : lkv) {
       if (outputKey == null
           || Bytes.compareTo(
                   outputKey.get(),
                   outputKey.getOffset(),
                   outputKey.getLength(),
                   cell.getRowArray(),
                   cell.getRowOffset(),
                   cell.getRowLength())
               != 0) {
         // This a the first RowKey or a different from previous
         if (outputKey != null) { // It's a different RowKey, so we need to write it
           ImmutableBytesWritable aggregatedArray = new ImmutableBytesWritable(bos.toByteArray());
           outputStream.close();
           context.write(new TableRowkeyPair(tableName, outputKey), aggregatedArray);
         }
         outputKey =
             new ImmutableBytesWritable(
                 cell.getRowArray(), cell.getRowOffset(), cell.getRowLength());
         bos = new ByteArrayOutputStream(1024);
         outputStream = new DataOutputStream(bos);
       }
       /*
       The order of aggregation: type, index of column, length of value, value itself
        */
       int i = findIndex(cell);
       if (i == -1) {
         // That may happen when we load only local indexes. Since KV pairs for both
         // table and local index are going to the same physical table at that point
         // we skip those KVs that are not belongs to loca index
         continue;
       }
       outputStream.writeByte(cell.getTypeByte());
       WritableUtils.writeVInt(outputStream, i);
       WritableUtils.writeVInt(outputStream, cell.getValueLength());
       outputStream.write(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength());
     }
     ImmutableBytesWritable aggregatedArray = new ImmutableBytesWritable(bos.toByteArray());
     outputStream.close();
     context.write(new TableRowkeyPair(tableName, outputKey), aggregatedArray);
   }
 }
예제 #30
0
    public void map(LongWritable key, Text value, Context context)
        throws IOException, InterruptedException {

      String strId = "";
      String strBody = "";

      // Parse the xml and read data (page id and article body)
      // Using XOM library
      Builder builder = new Builder();

      try {
        Document doc = builder.build(value.toString(), null);

        Nodes nodeId = doc.query("//eecs485_article_id");
        strId = nodeId.get(0).getChild(0).getValue();

        Nodes nodeBody = doc.query("//eecs485_article_body");
        strBody = nodeBody.get(0).getChild(0).getValue();
      } catch (ParsingException ex) {
        System.out.println("Not well-formed.");
        System.out.println(ex.getMessage());
      } catch (IOException ex) {
        System.out.println("io exception");
      }

      // Tokenize document body
      Pattern pattern = Pattern.compile("\\w+");
      Matcher matcher = pattern.matcher(strBody);

      while (matcher.find()) {
        // Write the parsed token
        // key = term, docid   value = 1
        context.write(new Text(matcher.group() + "," + strId), one);
      }
    }