Пример #1
0
 @Override
 public void myInferenceReduce(MyKeyValue inKv, MyKeyValue outKv, TaskInputOutputContext context)
     throws IOException, InterruptedException {
   boolean isCartoon = false;
   boolean isTv = false;
   String name = null, url = null;
   for (int i = 0; i < inKv.values.size(); i++) {
     String val = inKv.values.get(i);
     if (val == null) continue;
     String[] tks = val.split("\t");
     if (tks.length == 6) {
       url = tks[1];
       name = tks[2];
       if (tks[3].contains("电视剧_")) {
         isTv = true;
       }
       if (tks[3].equals("电视剧_类型")) {
         if (tks[4].contains("动画") || tks[4].contains("儿童")) {
           isCartoon = true;
         }
       }
     }
   }
   if (isTv && !isCartoon && name != null && url != null) {
     MyMR.reduceOutput(inKv.key, url + "\t" + name + "\tTRUETYPE\t电视剧\t-1", context);
   }
 }
Пример #2
0
 @Override
 public void myInferenceReduce(MyKeyValue inKv, MyKeyValue outKv, TaskInputOutputContext context)
     throws IOException, InterruptedException {
   String name = null;
   boolean isNovel = false, isQidian = false;
   Float pv = (float) 0.0, click = (float) 0.0;
   for (int i = 0; i < inKv.values.size(); i++) {
     String val = inKv.values.get(i);
     if (val == null) continue;
     String[] tks = val.split("\t");
     if (tks.length == 6) {
       if (name == null) {
         name = tks[2];
       }
       if (tks[3].contains("小说_")) {
         isNovel = true;
       }
       if (tks[1].contains("http://www.qidian.com/book")
           || tks[1].contains("http://www.qdmm.com/mmweb")) {
         isQidian = true;
       }
       if (tks[3].equals("查询热度0")) {
         try {
           pv = (float) Integer.parseInt(tks[4]);
         } catch (Exception e) {
         }
       }
       if (tks[3].equals("点击")) {
         tks[4] = tks[4].replaceAll(",", "");
         try {
           click = (float) Integer.parseInt(tks[4]);
         } catch (Exception e) {
         }
       }
     }
   }
   if (isNovel) {
     for (int i = 0; i < inKv.values.size(); i++) {
       String val = inKv.values.get(i);
       if (val == null) continue;
       String[] tks = val.split("\t");
       if (tks.length == 6) {
         if (tks[3].equals("查询热度1") || tks[3].equals("重要性")) {
           inKv.values.set(i, null);
         }
       }
     }
     if (isQidian && click > 2000000) {
       MyMR.reduceOutput(
           inKv.key,
           "SOGOUID_"
               + inKv.key
               + "\t"
               + name
               + "\t查询热度1\t"
               + String.format("%.4f", pv / 1000)
               + "\t-1",
           context);
       MyMR.reduceOutput(
           inKv.key,
           "SOGOUID_"
               + inKv.key
               + "\t"
               + name
               + "\t重要性\t"
               + String.format("%.4f", pv / 10000)
               + "\t-1",
           context);
     }
   }
 }