Пример #1
0
    public void map(Object key, Text value, Context context)
        throws IOException, InterruptedException {

      String recline = value.toString().trim();
      String[] seg_arr = recline.split("\001");
      String seg_text = "";
      String word_statis = "";

      String wsline = "";
      String keyVir = "";
      System.out.println("field_num:" + loc_field_num + "  seg_arr.length:" + seg_arr.length);
      if (seg_arr.length == loc_field_num) {
        seg_text = seg_arr[loc_text_index];
        if (SSO.tnoe(seg_text)) {
          seg_text = seg_text.trim();
          word_statis = WordStatis.wordStatis(seg_text);
          if (SSO.tnoe(word_statis)) {
            word_statis = word_statis.trim();
            keyVir = seg_arr[0] + "\001";
            for (int j = 1; j < loc_text_index; j++) {
              wsline += (seg_arr[j] + "\001");
            }
            wsline += word_statis + "\001";
            for (int j = loc_text_index + 1; j < loc_field_num; j++) {
              wsline += (seg_arr[j] + "\001");
            }
            wsline = wsline.trim();
            word.set(keyVir);
            word1.set(wsline);
            context.write(word, word1);
          }
        }
      }
    } // map
Пример #2
0
 @Override
 public void reduce(Text key, Iterable<LongWritable> values, Context context)
     throws IOException, InterruptedException {
   long s = 0;
   for (LongWritable lw : values) s += lw.get();
   context.write(key, new LongWritable(s));
 }
 //		@Override
 public void reduce(VarLongWritable userID, Iterable<VarLongWritable> itemPrefs, Context context)
     throws IOException, InterruptedException {
   Vector userVector = new RandomAccessSparseVector(Integer.MAX_VALUE, 100);
   for (VarLongWritable itemPref : itemPrefs) {
     userVector.set((int) itemPref.get(), 1.0f);
   }
   context.write(userID, (new VectorWritable(userVector)));
 }
Пример #4
0
  public void reduce(PairsKey key, Iterable<IntWritable> values, Context context)
      throws IOException, InterruptedException {
    int sum = 0;
    //		for (IntWritable val : values) {
    //			sum += val.get();
    //		}

    context.write(key.key1, new IntWritable(Integer.parseInt(key.key2.toString())));
  }
Пример #5
0
 protected void reduce(Text key, Iterable<IntWritable> values, Context context)
     throws IOException, InterruptedException {
   sum = 0;
   for (IntWritable value : values) {
     sum += value.get();
   }
   valueOut.set(key.toString() + fieldDelim + sum);
   context.write(NullWritable.get(), valueOut);
 }
Пример #6
0
 @Override
 public void reduce(Text key, Iterable<Text> values, Context context)
     throws IOException, InterruptedException {
   StringBuilder str = new StringBuilder();
   for (Text i : values) {
     str.append(i.toString()).append(" ");
   }
   context.write(voidtxt, new Text(str.toString()));
   // Utiliser context.write(text, int) pour ecrire la sortie du reducer;
 }
Пример #7
0
 @Override
 public void reduce(Text key, Iterable<LongWritable> vals, Context context)
     throws IOException, InterruptedException {
   int s = 0;
   for (LongWritable lw : vals) s += lw.get();
   String pt = key.toString() + " " + s;
   byte[] iv = crypto.randomBytes(AES_BLOCK_SIZE);
   byte[] ct = crypto.encrypt_word_rnd(pt, iv);
   context.write(new BytesWritable(iv), new BytesWritable(ct));
 }
Пример #8
0
  // Calls the descending Sort method to sort the Hashmap by values in descending order
  @Override
  protected void cleanup(Context context) throws IOException, InterruptedException {

    Map<Text, IntWritable> descendSortedMap = descendSortByValues(userCountMap);
    int resultset = 0;
    for (Text key : descendSortedMap.keySet()) {
      if (resultset++ == 10) { // to get top 10 users
        break;
      }
      context.write(key, descendSortedMap.get(key));
    }
  }
 @Override
 protected void map(LongWritable key, Text value, Context context)
     throws IOException, InterruptedException {
   String line = value.toString();
   Matcher m = NUMBERS.matcher(line);
   m.find();
   VarLongWritable userID = new VarLongWritable(Long.parseLong(m.group()));
   VarLongWritable itemID = new VarLongWritable();
   while (m.find()) {
     itemID.set(Long.parseLong(m.group()));
     context.write(userID, itemID);
   }
 }
Пример #10
0
  /* decrypt, then compute
   */
  @Override
  public void map(BytesWritable key, BytesWritable val, Context context)
      throws IOException, InterruptedException {
    byte[] iv = key.copyBytes();
    byte[] ct = val.copyBytes();
    // String[] ss = new String(crypto.decrypt_word_rnd(ct, iv)).split("\\s+");

    String text = new String(crypto.decrypt_word_rnd(ct, iv));
    Matcher matcher = pattern.matcher(text);

    while (matcher.find()) {
      context.write(new Text(matcher.group(0)), new LongWritable(1));
    }
  }
Пример #11
0
    public void map(Object key, Text value, Context context)
        throws IOException, InterruptedException {

      String[] seg_arr = (value.toString()).split("\t");

      String hourseg = "";
      String input_ip = "";
      String input_area = "";
      String input_cookie = "";
      String host = "";
      String input_url = "";
      String input_title = "";
      String title_fenci = "";
      String input_ci_ip = "";
      String input_ci_ip_area = "";
      String input_refer = "";
      String wd = "";

      if (seg_arr != null && seg_arr.length == 11) {

        hourseg = seg_arr[0].trim();
        input_ip = seg_arr[1].trim();
        input_area = seg_arr[2].trim();
        input_cookie = seg_arr[3].trim();
        host = seg_arr[4].trim();
        input_url = seg_arr[5].trim();
        input_title = seg_arr[6].trim();
        title_fenci = seg_arr[7].trim();
        input_ci_ip = seg_arr[8].trim();
        input_ci_ip_area = seg_arr[9].trim();
        input_refer = seg_arr[10].trim();

        if ((input_refer != null) && (!input_refer.equals(""))) {
          if (isBaiduSearch(input_refer)) {
            wd = extract_word(input_refer);
            if ((wd != null) && (!((wd.trim()).equals("")))) {
              // System.out.println(wd+"  "+input_url);
              if (input_title == null) {
                input_title = "";
              }
              input_title.replaceAll("\t", " ");
              word.set(wd);
              word1.set(input_url + "\t" + input_title);
              context.write(word, word1);
            }
          }
        }
      }
    }
Пример #12
0
  @Override
  protected void reduce(Text key, Iterable<Text> values, Context context)
      throws IOException, InterruptedException {

    Iterator<Text> valuesIterator = values.iterator();
    if (valuesIterator.hasNext()) {
      String firstVal = valuesIterator.next().toString();
      if (isLong(firstVal)) {
        while (valuesIterator.hasNext()) {
          context.write(
              new Text(valuesIterator.next()), new LongWritable(Long.parseLong(firstVal)));
        }
      }
    }
  }
Пример #13
0
 // reduce(Object, Iterable, Context) method is called for each <key, (collection of values)> in
 // the sorted inputs
 @Override
 protected void reduce(Text key, Iterable<IntWritable> values, Context context)
     throws IOException, InterruptedException {
   // Iterable : allows an object to be the target of the "foreach" statement
   int sum = 0;
   for (IntWritable val : values) {
     sum += val.get();
   }
   String[] keyStr = key.toString().split(":");
   Put p = new Put(Bytes.toBytes(Integer.parseInt(keyStr[0]))); // keyStr[0] = id as row
   p.addColumn(Bytes.toBytes("count"), Bytes.toBytes(keyStr[1]), Bytes.toBytes(sum));
   // Put addColumn(byte[] family, byte[] qualifier, byte[] value)
   // count as family, Y/N as qualifier, sum as value
   context.write(new ImmutableBytesWritable(p.getRow()), p);
 }
Пример #14
0
 @Override
 protected void map(ImmutableBytesWritable rowkey, Result result, Context context)
     throws IOException, InterruptedException {
   byte[] c = result.getValue(Bytes.toBytes("products"), Bytes.toBytes("product_category_id"));
   byte[] d = result.getValue(Bytes.toBytes("orders"), Bytes.toBytes("order_date"));
   String thg = "N";
   String[] dStr = Bytes.toString(d).split("\\W+");
   if (dStr[1].equals("11")) {
     int date = Integer.parseInt(dStr[2]);
     if (date >= 21) thg = "Y";
   }
   context.write(
       new Text(Bytes.toString(c) + ":" + thg),
       new IntWritable(1)); // Generate an output key/value pair.
   // context.write(outputKey, outputValue) -> (123:Y, 1), (124:N, 1)
 }
Пример #15
0
    protected void reduce(Text key, Iterable<Text> values, Context context)
        throws IOException, InterruptedException {

      String key_str = key.toString().trim();
      Iterator<Text> it = values.iterator();
      String info = "";
      if (SSO.tnoe(key_str)) {
        while (it.hasNext()) {
          info = it.next().toString();
          info = info.trim();
          if (SSO.tnoe(info)) {
            result_key.set(key_str + "\001" + info);
            context.write(result_key, NullWritable.get());
          }
        }
      }
    }
Пример #16
0
    @Override
    protected void map(LongWritable key, Text value, Context context)
        throws IOException, InterruptedException {
      String[] items = value.toString().split(fieldDelimRegex);

      for (RichAttribute field : schema.getFields()) {
        if (field.isCategorical()) {
          outKey.set("" + field.getOrdinal() + fieldDelim + items[field.getOrdinal()]);
        } else if (field.isInteger()) {
          bucket = Integer.parseInt(items[field.getOrdinal()]) / field.getBucketWidth();
          outKey.set("" + field.getOrdinal() + fieldDelim + bucket);
        } else if (field.isDouble()) {
          bucket = ((int) Double.parseDouble(items[field.getOrdinal()])) / field.getBucketWidth();
          outKey.set("" + field.getOrdinal() + fieldDelim + bucket);
        }
        context.write(outKey, outVal);
      }
    }
Пример #17
0
    public void map(Object key, Text value, Context context)
        throws IOException, InterruptedException {

      String[] seg_arr = (value.toString()).split("\001");

      String cookie = "";
      String curl = "";
      String ctext = "";
      String url = "";
      String title = "";
      String host = "";

      String cate = "";
      if (seg_arr != null && seg_arr.length == 7) {
        cookie = seg_arr[0].trim();
        curl = seg_arr[1].trim();
        ctext = seg_arr[2].trim();
        url = seg_arr[3].trim();
        title = seg_arr[4].trim();
        host = seg_arr[5].trim();
        cate = seg_arr[6].trim();

        if ((host != null) && (!host.equals(""))) {
          if (isValidTitle(title)) {
            word.set(title);
            word1.set(
                "\001" + cookie + "\001" + curl + "\001" + ctext + "\001" + url + "\001" + host
                    + "\001" + cate);
            double ran = Math.random();
            int rani = -1;
            rani = (int) (100 * ran);
            if (rani > 10 && rani < 12) {
              if (title.length() > 10) {
                context.write(word, word1);
              }
            }
          }
        }
      }
    }
Пример #18
0
    protected void reduce(Text key, Iterable<Text> values, Context context)
        throws IOException, InterruptedException {

      Iterator<Text> it = values.iterator();
      String val = "";
      String[] seg_arr = null;
      String cookie = "";
      String cookie_str = "";
      while (it.hasNext()) {
        val = it.next().toString();
        seg_arr = val.split("\001");
        if (seg_arr.length != 7) {
          continue;
        }
        cookie = seg_arr[1].trim();
        cookie_str = cookie_str + cookie + " ";
        // context.write(key, it.next());
      }

      cookie_str = cookie_str.trim();
      cookie_str = "\001" + cookie_str;
      result.set(cookie_str);
      context.write(key, result);
    }
Пример #19
0
    protected void reduce(Text key, Iterable<Text> values, Context context)
        throws IOException, InterruptedException {

      Iterator<Text> it = values.iterator();
      String val = "";
      String url = "";

      Hashtable<String, String> url_hash = new Hashtable<String, String>();

      String[] seg_arr = null;
      String title = "";
      String old_title_count = "";
      String old_title = "";
      String old_count_str = "";
      int old_count = 0;

      String[] old_seg_arr = null;

      while (it.hasNext()) {
        val = it.next().toString();
        val = val.trim();
        if ((val == null) || (val.equals(""))) {
          continue;
        }
        seg_arr = val.split("\t");
        if (seg_arr.length < 1) {
          continue;
        }
        url = seg_arr[0].trim();
        title = "";
        if (seg_arr.length > 1) {
          title = seg_arr[1].trim();
        }

        if ((url == null) || (url.equals(""))) {
          continue;
        }

        if (!(url_hash.containsKey(url))) {
          url_hash.put(url, "1\t" + title);
        } else {
          old_title_count = url_hash.get(url);
          old_seg_arr = old_title_count.split("\t");
          old_title = "";
          old_count_str = "";
          if (old_seg_arr.length > 0) {
            old_count_str = old_seg_arr[0].trim();
            old_count = Integer.parseInt(old_count_str);
            old_count++;
          }
          if (old_seg_arr.length > 1) {
            old_title = old_seg_arr[1].trim();
            if (old_title.length() > title.length()) {
              old_title = title;
            }
          }

          url_hash.remove(url);
          url_hash.put(url, old_count + "\t" + old_title);
        }
      }

      Enumeration url_enum = url_hash.keys();
      String temp_url = "";
      int temp_count = 0;
      SortElement sortele;
      ArrayList<SortElement> al = new ArrayList<SortElement>();
      String temp_count_title = "";
      String[] tct_seg = null;
      String temp_title = "";
      while (url_enum.hasMoreElements()) {
        temp_url = url_enum.nextElement() + "";
        temp_count_title = url_hash.get(temp_url);
        tct_seg = temp_count_title.split("\t");
        temp_title = "";
        if (tct_seg.length < 1) {
          continue;
        }
        temp_count = 0;
        temp_count = Integer.parseInt(tct_seg[0]);
        if (tct_seg.length > 1) {
          temp_title = tct_seg[1];
        }

        sortele = new SortElement(temp_url, temp_count, temp_title);
        al.add(sortele);
      }

      Collections.sort(al);

      Iterator it_url = al.iterator();
      SortElement tst = null;
      String url_info = "\001";
      while (it_url.hasNext()) {
        tst = (SortElement) it_url.next();
        if (tst == null) {
          continue;
        }
        temp_url = tst.key;
        temp_count = (int) tst.val;
        if ((temp_url == null) || ((temp_url.trim()).equals("")) || (temp_count < 1)) {
          continue;
        }
        url_info = url_info + temp_count + "\t" + temp_url + "\t" + temp_title + "\001";
      }
      if (url_info == null) {
        url_info = "";
      }
      // url_info=url_info.trim();
      result.set(url_info);
      context.write(key, result);
    }
Пример #20
0
    public void map(Object key, Text value, Context context)
        throws IOException, InterruptedException {

      String nstat_line = value.toString().trim();
      String area = "";
      String atime = "";
      String pname = "";
      String sip = "";
      String dip = "";
      String host = "";
      String url = "";
      String refer = "";
      String cookie = "";
      String loc = "";
      String agent = "";
      String[] seg_arr = null;
      seg_arr = nstat_line.split("\001");
      String link = "";

      String Code_A = "";

      String date_a;
      String adate_str = "";
      String atime_str = "";

      if (seg_arr.length == 11) {
        area = seg_arr[0].trim();
        atime = seg_arr[1].trim();
        pname = seg_arr[2].trim();
        sip = seg_arr[3].trim();
        dip = seg_arr[4].trim();
        host = seg_arr[5].trim();
        url = seg_arr[6].trim();
        refer = seg_arr[7].trim();
        cookie = seg_arr[8].trim();
        loc = seg_arr[9].trim();
        agent = seg_arr[10].trim();
        if (SSO.tnoe(host)) {
          if (!(SSO.tnoe(url))) {
            url = "";
          }
          link = host + url;
          Code_A = getAreaFromLink(link);
          Code_A = getDecodeUrl(Code_A);
          Code_A = Code_A.replaceAll("&", "");
          adate_str = TimeOpera.getDateFromStr(atime);
          atime_str = TimeOpera.getTimeFromStr(atime);
          if (SSO.tnoe(Code_A) && SSO.tnoe(adate_str) && SSO.tnoe(atime_str) && SSO.tnoe(area)) {
            adate_str = adate_str.trim();
            atime_str = atime_str.trim();
            area = area.trim();
            Code_A = Code_A.trim();
            date_a = getDADateFromLink(link);
            word.set(area);
            word1.set(
                adate_str
                    + "\001"
                    + atime_str
                    + "\001"
                    + sip
                    + "\001"
                    + dip
                    + "\001"
                    + Code_A
                    + "\001"
                    + cookie
                    + "\001"
                    + loc
                    + "\001"
                    + agent
                    + "\001"
                    + date_a
                    + "\001qunar");
            context.write(word, word1);
          }
        }
      }
    }