public void map(LongWritable key, Text value, Context context)
     throws IOException, InterruptedException {
   String cur_file =
       ((FileSplit) context.getInputSplit()).getPath().getParent().getParent().getName();
   String train_file = context.getConfiguration().get("train_file");
   if (cur_file.equals(train_file)) {
     StringTokenizer st = new StringTokenizer(value.toString());
     String word = st.nextToken();
     String f_id = st.nextToken();
     myKey.set(word);
     myVal.set(f_id);
     context.write(myKey, myVal);
   } else {
     StringTokenizer st = new StringTokenizer(value.toString());
     String word = st.nextToken();
     String f_id = st.nextToken();
     StringBuilder builder = new StringBuilder(dlt);
     while (st.hasMoreTokens()) {
       String filename = st.nextToken();
       String tf_idf = st.nextToken();
       builder.append(filename);
       builder.append(dlt);
       builder.append(tf_idf);
       builder.append("\t");
     }
     myKey.set(word);
     myVal.set(builder.toString());
     context.write(myKey, myVal);
   }
 }
Ejemplo n.º 2
1
    public void reduce(Text key, Iterable<Text> values, Context context)
        throws IOException, InterruptedException {

      String keyS = key.toString();
      if (keyS.startsWith("O") || keyS.startsWith("P") || keyS.startsWith("S")) {
        String sum = new String();

        for (Text val : values) {

          sum += (" " + val.toString());
        }

        // String subKey = keyS.substring(0,keyS.length()-1);

        // Text t = new Text();
        // t.set(subKey);
        result.set(sum);
        context.write(key, result);
      }
      if (keyS.startsWith("L")) {
        //	String [] keyIdS = keyS.substring(1).split("[+]");

        result.set(" ");
        context.write(key, result);

        // String KeyIdS1 = keyIdS[1];
        // result.set(KeyIdS1);
        // context.write(key, result);

        // String KeyIdS2 = keyIdS[2];
        // result.set(KeyIdS2);
        // context.write(key, result);

      }
    }
Ejemplo n.º 3
0
 public void map(LongWritable key, Text value, Context context)
     throws IOException, InterruptedException {
   String[] fields = value.toString().split("\t");
   String fck = fields[11];
   String area = fields[4];
   String requestInfo = fields[14];
   String adpId = null;
   String ad = null;
   String returnStatus = null;
   newKey.set(fck);
   if (!fck.equals("-")) {
     String[] arrApAdMat = requestInfo.split("\\|");
     for (int i = 0; i < arrApAdMat.length; i++) {
       String[] apAdMat = arrApAdMat[i].split("[:;]");
       try {
         adpId = adpCode2Id.get(apAdMat[0]).trim();
       } catch (Exception e) {
         throw new IOException(e.getMessage() + ":" + value.toString());
       }
       returnStatus = "2"; // return without ad
       for (int j = 1; j < apAdMat.length; j++) {
         String[] adMat = apAdMat[j].split("#");
         ad = adMat[0];
         if (!ad.equals("-") && returnStatus.equals("2")) {
           returnStatus = "1"; // return with status
           break;
         }
       }
       newValue.set(num + ":" + area + "," + adpId + "," + returnStatus);
       context.write(newKey, newValue);
     }
   }
 }
    @Override
    public void map(ImmutableHexWritable key, FsEntry value, Context context)
        throws IOException, InterruptedException {
      if (Extensions.contains(value.extension())) {
        FullPath.set(value.fullPath());
        Ext.set(value.extension());

        encodeHex(Sha, value, "sha1");
        encodeHex(Md5, value, "md5");

        if (value.isContentHDFS()) {
          Vid.setSize(0);
          HdfsPath.set(value.getContentHdfsPath());
        } else {
          final byte[] buf = value.getContentBuffer();
          if (buf == null) {
            LOG.warn(value.fullPath() + " didn't have a content buffer, skipping.");
            return;
          }
          Vid.set(buf, 0, buf.length);
          HdfsPath.set("");
        }
        byte[] keybytes = key.get();
        OutKey.set(keybytes, 0, keybytes.length);
        context.write(OutKey, Fields);
      }
    }
Ejemplo n.º 5
0
 public Text evaluate(Text urlText, String flag1, String flag2) {
   if (urlText == null) {
     return null;
   }
   String url = urlText.toString();
   if (url != null) {
     try {
       url = url.trim();
       if (isgbk(url)) {
         url = URLDecoder.decode(url, "GBK");
       } else {
         url = URLDecoder.decode(url, "UTF-8");
       }
     } catch (Exception e) {
       try {
         url = evaluate(urlText, new Text("jsescape")).toString();
       } catch (Exception ee) {
         dstURL.set(url);
         return dstURL;
       }
     }
   }
   dstURL.set(url);
   return dstURL;
 }
Ejemplo n.º 6
0
  @Override
  public boolean next(LongWritable key, Text value) throws IOException {

    while (pos < end) {
      key.set(pos);

      int newSize =
          lineReader.readLine(
              value,
              maxLineLength,
              Math.max((int) Math.min(Integer.MAX_VALUE, end - pos), maxLineLength));
      String strReplace = value.toString().replace("$#$", "\001");
      Text txtReplace = new Text();
      txtReplace.set(strReplace);

      value.set(txtReplace.getBytes(), 0, txtReplace.getLength());

      if (newSize == 0) return false;
      pos += newSize;
      if (newSize < maxLineLength) return true;
      // line too long. try again
      log.info("Skipped line of size " + newSize + " at pos " + (pos - newSize));
    }
    return false;
  }
Ejemplo n.º 7
0
  /**
   * The map method for counting a co occurrence of Japanese doc.
   *
   * @param key Specify the map key.
   * @param value Specify the map value.
   * @param context Specify the hadoop Context object.
   * @throws IOException Exception for the input file.
   * @throws InterruptedException Exception for the waiting process.
   */
  @Override
  public void map(Object key, Text value, Context context)
      throws IOException, InterruptedException {
    Configuration conf = context.getConfiguration();
    String maxLineLengthBuf = conf.get(MAX_LINE_LENGTH);
    int maxLineLength = Integer.valueOf(maxLineLengthBuf);
    String numOfAroundWordsBuf = conf.get(NUM_OF_AROUND_WORD);
    int numOfAroundWords = Integer.valueOf(numOfAroundWordsBuf);
    String buf = value.toString();
    if (buf.length() > maxLineLength) {
      buf = buf.substring(0, maxLineLength);
    }
    buf = net.broomie.utils.Normalizer.normalize(buf);
    // String[] result = tokenizer.getToken(buf, EnumSet.of(GoSenTokenizer.ExtractType.Noun,
    // GoSenTokenizer.ExtractType.Adj));
    tokenizer.extractToken2(buf);

    String[] nouns = tokenizer.getNoun();
    String[] adjs = tokenizer.getAdj();
    for (String noun : nouns) {
      Matcher matcher = pattern.matcher(noun);
      if (!matcher.matches()) {
        targetToken.set(noun);
        for (String adj : adjs) {
          aroundToken.set(adj);
          context.write(targetToken, aroundToken);
        }
      }
    }
  }
Ejemplo n.º 8
0
    @Override
    public void reduce(
        PairOfInts docnoPair,
        Iterator<PairOfIntString> titles,
        OutputCollector<Text, Text> output,
        Reporter reporter)
        throws IOException {
      eTitle.clear();
      fTitle.clear();
      sLogger.info(docnoPair);

      int cnt = 0;
      while (titles.hasNext()) {
        PairOfIntString title = titles.next();
        sLogger.info(title);
        if (title.getLeftElement() == CLIRUtils.E) {
          eTitle.set(title.getRightElement());
          cnt++;
        } else if (title.getLeftElement() == CLIRUtils.F) {
          fTitle.set(title.getRightElement());
          cnt++;
        } else {
          throw new RuntimeException("Unknown language ID: " + title.getLeftElement());
        }
      }

      if (cnt == 2) {
        output.collect(fTitle, eTitle);
      } else {
        sLogger.info("Incomplete data for " + docnoPair + ":" + fTitle + "," + eTitle);
      }
    }
Ejemplo n.º 9
0
 public void reduce(Text key, Iterable<Text> values, Context context)
     throws IOException, InterruptedException {
   float newX = 0.0f;
   float newY = 0.0f;
   int sumX = 0;
   int sumY = 0;
   int counter = 0;
   String clusterPoints = "";
   int i = 0;
   for (Text value : values) {
     String line = value.toString();
     String coordinates[] = line.split("\\,");
     sumX = sumX + Integer.valueOf(coordinates[0]);
     sumY = sumY + Integer.valueOf(coordinates[1]);
     counter++;
     if (i == 0) {
       clusterPoints = line;
       i = 1;
     } else {
       clusterPoints = clusterPoints + ";" + line;
     }
   }
   newX = (float) sumX / counter;
   newY = (float) sumY / counter;
   String clusterKey = "Cluster: " + key.toString();
   String clusterInfo = newX + "," + newY + "\t" + clusterPoints;
   emitKey.set(clusterKey);
   emitValue.set(clusterInfo);
   context.write(emitKey, emitValue);
 }
Ejemplo n.º 10
0
 public void map(LongWritable key, Text value, Context context)
     throws InterruptedException, IOException {
   String line = value.toString();
   String dataPoints[] = line.split("\\,");
   float distance = 0.0f;
   Text emitValue = new Text();
   Text emitKey = new Text();
   float min = Float.MAX_VALUE;
   float current = 0.0f;
   String clusterPoint = "";
   for (int i = 0; i < centroids.length; i++) {
     int xdiff = centroids[i][0] - Integer.valueOf(dataPoints[0]);
     int ydiff = centroids[i][1] - Integer.valueOf(dataPoints[1]);
     int xcord = xdiff * xdiff;
     int ycord = ydiff * ydiff;
     distance = (float) Math.sqrt(xcord + ycord);
     current = distance;
     if (min >= current) {
       min = current;
       clusterPoint = centroids[i][0] + "," + centroids[i][1];
     }
   }
   String myPoint = dataPoints[0] + "," + dataPoints[1];
   emitKey.set(clusterPoint);
   emitValue.set(myPoint);
   context.write(emitKey, emitValue);
 }
Ejemplo n.º 11
0
  @Override
  protected void map(LongWritable key, Text value, Context context)
      throws IOException, InterruptedException {
    String[] tokens = value.toString().split(","); // 一个子品牌的所有行为向量
    int n = tokens.length; // 该子品牌行为向量个数

    String[] userId = new String[n];
    double[] behavior = new double[n];
    for (int i = 0; i < n; i++) {
      String[] ones = tokens[i].toString().split(":"); // 一个行为向量
      userId[i] = ones[1];
      behavior[i] = Double.parseDouble(ones[0]);
    }
    for (int i = 0; i < n; i++) {
      for (int j = i; j < n; j++) {
        if (userId[i].charAt(0) != 'm'
            || userId[j].charAt(0) != 'm'
            || userId[i].equals(userId[j])) {
          String bij = Double.toString(behavior[i] * behavior[j]);
          keyText.set(userId[i]);
          valueText.set(userId[j] + ":" + bij);
          context.write(keyText, valueText);
        }
      }
    }
  }
Ejemplo n.º 12
0
    /* (non-Javadoc)
     * @see org.apache.hadoop.mapreduce.Mapper#map(KEYIN, VALUEIN, org.apache.hadoop.mapreduce.Mapper.Context)
     */
    @Override
    protected void map(LongWritable key, Text value, Context context)
        throws IOException, InterruptedException {
      String[] items = value.toString().split(fieldDelimRegex);

      srcEntityId = items[0];
      trgEntityId = items[1];
      rank = Integer.parseInt(items[items.length - 1]);

      outKey.initialize();
      if (recordInOutput) {
        // include source and taraget record
        if (recLength == -1) {
          recLength = (items.length - 3) / 2;
          srcRecBeg = 2;
          srcRecEnd = trgRecBeg = 2 + recLength;
          trgRecEnd = trgRecBeg + recLength;
        }
        srcRec = org.chombo.util.Utility.join(items, srcRecBeg, srcRecEnd, fieldDelim);
        trgRec = org.chombo.util.Utility.join(items, trgRecBeg, trgRecEnd, fieldDelim);
        outKey.add(srcEntityId, srcRec, rank);
        outVal.set(trgEntityId + fieldDelim + trgRec + fieldDelim + items[items.length - 1]);
      } else {
        // only target entity id and distance
        outKey.add(srcEntityId, rank);
        outVal.set(trgEntityId + fieldDelim + items[items.length - 1]);
      }
      context.write(outKey, outVal);
    }
  @Override
  public boolean nextKeyValue() throws IOException, InterruptedException {
    if (key == null) {
      key = new Text();
    }
    if (value == null) {
      value = new Text();
    }
    Text edge = new Text();
    int newSize = 0;
    newSize =
        in.readLine(edge, Integer.MAX_VALUE, (int) Math.min((long) Integer.MAX_VALUE, end - pos));

    if (newSize == 0) {
      key = null;
      value = null;
      return false;
    } else {
      String[] dataArray = edge.toString().split("\t");
      if (dataArray.length < 2) {
        dataArray = edge.toString().split(" ");
      }
      key.set(dataArray[0]);
      value.set(dataArray[1]);
      pos += newSize;
      return true;
    }
  }
Ejemplo n.º 14
0
    @Override
    public void reduce(Text key, Iterable<Text> values, Context context)
        throws IOException, InterruptedException {
      // For each value, figure out which file it's from and store it
      // accordingly.
      List<String> first = new ArrayList<String>();
      List<String> second = new ArrayList<String>();

      for (Text value : values) {
        if (value.charAt(0) == '1') {
          first.add(value.toString().substring(1));
        } else second.add(value.toString().substring(1));
        context.setStatus("OK");
      }

      context.setStatus("OK");

      if (first.size() == 0) return;
      if (second.size() == 0) second.add(null);

      // Do the cross product
      for (String s1 : first) {
        for (String s2 : second) {
          if (s2 == null) OUT.set(key.toString() + "\t" + s1 + "\t\t");
          else OUT.set(key.toString() + "\t" + s1 + "\t" + key.toString() + "\t" + s2);
          context.write(NULL, OUT);
        }
      }
    }
Ejemplo n.º 15
0
    public void map(Object key, Text value, Context context)
        throws IOException, InterruptedException {

      String recline = value.toString().trim();
      String[] seg_arr = recline.split("\001");
      String seg_text = "";
      String word_statis = "";

      String wsline = "";
      String keyVir = "";
      System.out.println("field_num:" + loc_field_num + "  seg_arr.length:" + seg_arr.length);
      if (seg_arr.length == loc_field_num) {
        seg_text = seg_arr[loc_text_index];
        if (SSO.tnoe(seg_text)) {
          seg_text = seg_text.trim();
          word_statis = WordStatis.wordStatis(seg_text);
          if (SSO.tnoe(word_statis)) {
            word_statis = word_statis.trim();
            keyVir = seg_arr[0] + "\001";
            for (int j = 1; j < loc_text_index; j++) {
              wsline += (seg_arr[j] + "\001");
            }
            wsline += word_statis + "\001";
            for (int j = loc_text_index + 1; j < loc_field_num; j++) {
              wsline += (seg_arr[j] + "\001");
            }
            wsline = wsline.trim();
            word.set(keyVir);
            word1.set(wsline);
            context.write(word, word1);
          }
        }
      }
    } // map
Ejemplo n.º 16
0
  public void map(LongWritable key, Text value, Context context)
      throws IOException, InterruptedException {

    // System.out.println("in mapper, input "+ key + " " + value + ";");
    // userRow = null;
    userRow = value.toString().split("\\s");
    if (userRow.length == 1) {
      userRow = null;
      return;
    }
    // friendList = null;
    friendList = userRow[1].split(",");
    for (i = 0; i < friendList.length; i++) {
      keyUser.set(new Text(friendList[i]));
      for (j = 0; j < friendList.length; j++) {
        if (j == i) {
          continue;
        }
        suggTuple.set(friendList[j] + ",1");
        context.write(keyUser, suggTuple);
        // System.out.println(keyUser + ",(" + suggTuple + ")");
      }
      existingFriend.set(userRow[0] + ",-1");
      context.write(keyUser, existingFriend);
      // System.out.println(keyUser + ",(" + existingFriend + ")");

    }

    /*DateFormat dateFormat = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss");
    Date date = new Date();
    System.out.println("Mapper done at: " + dateFormat.format(date)); //2014/08/06 15:59:48*/
  }
Ejemplo n.º 17
0
    public void map(LongWritable key, Text value, Context context)
        throws IOException, InterruptedException {

      Text reviewerId = new Text();
      Text reviewerName = new Text();
      Text category = new Text();
      Text helpfulScore = new Text();

      String str = value.toString();
      if (FieldsUtil.isValid(str)) {
        String reviewerIdStr = FieldsUtil.getFieldValue(str, FieldsIndex.REVIEWER_ID);
        String reviewerNameStr = FieldsUtil.getFieldValue(str, FieldsIndex.REVIEWER_NAME);
        String categoryStr = FieldsUtil.getFieldValue(str, FieldsIndex.CATEGORY);
        String helpfulScoreStr = FieldsUtil.getFieldValue(str, FieldsIndex.REVIEWER_SCORE);

        reviewerId.set(reviewerIdStr);
        reviewerName.set(reviewerNameStr);
        category.set(categoryStr);
        helpfulScore.set(helpfulScoreStr);

        reviewerInfo.set(reviewerId, reviewerName);
        categoryHelpful.set(category, helpfulScore);

        // System.out.println(pair);
        context.write(categoryHelpful, reviewerInfo);
      }
    }
    protected void cleanup(Context context) throws IOException, InterruptedException {

      String[] keys = {
        "thereIsAChange",
        "onlyAdded",
        "onlyRemoved",
        "bothAddedAndRemoved",
        "totalIncreased",
        "totalDecresed",
        "nochange"
      };
      int[] values = {
        thereIsAChange,
        onlyAdded,
        onlyRemoved,
        bothAddedAndRemoved,
        totalIncreased,
        totalDecresed,
        nochange
      };
      Text key = new Text();
      Text val = new Text();
      for (int i = 0; i < keys.length; i++) {
        key.set(keys[i]);
        val.set(values[i] + "");
        context.write(key, val);
      }
    }
  @Override
  protected void map(LongWritable key, Text value, Mapper.Context context)
      throws IOException, InterruptedException {
    String line = value.toString();
    StringTokenizer tokenizer = new StringTokenizer(line, "\t");
    if (tokenizer.countTokens() == 4) { // u.data record
      tokenizer.nextToken();
      String itemid = tokenizer.nextToken();
      String rating = tokenizer.nextToken();
      item.set(itemid);
      fields.set(rating);
      context.write(item, fields);
    } else { // u.item record
      tokenizer = new StringTokenizer(line, "|");
      String itemid = tokenizer.nextToken();
      String title = tokenizer.nextToken();
      String release = tokenizer.nextToken();
      // tokenizer.nextToken();
      String imdb = tokenizer.nextToken();
      fields.set(title + "\t" + release + "\t" + imdb);
      item.set(itemid);
      context.write(item, fields);
    }

    // TotalRecords counter
    Counter counter = context.getCounter("MyCounter", "TOTALRECORDS");
    counter.increment(1);
  }
Ejemplo n.º 20
0
    @Override
    protected void reduce(Text key, Iterable<Text> values, Context context)
        throws IOException, InterruptedException {
      Text outputKey = new Text();
      Text outputValue = new Text();

      double valA = a;
      double valB = b;
      for (Text val : values) {
        StringTokenizer valStringTokenizer = new StringTokenizer(val.toString(), TAB);
        String valKey = valStringTokenizer.nextToken();
        if (valKey.equals("a")) {
          valA += Double.parseDouble(valStringTokenizer.nextToken());
        }
        if (valKey.equals("b")) {
          valB += Double.parseDouble(valStringTokenizer.nextToken());
        }
      }

      double val = valA * valB;
      if (val > 0) {
        outputKey.set(key);
        outputValue.set(val + TAB + "null"); // 相似性综合
        context.write(outputKey, outputValue);
      }
    }
 public boolean nextKeyValue() throws IOException {
   if (offset >= length) {
     return false;
   }
   int read = 0;
   while (read < RECORD_LENGTH) {
     long newRead = in.read(buffer, read, RECORD_LENGTH - read);
     if (newRead == -1) {
       if (read == 0) {
         return false;
       } else {
         throw new EOFException("read past eof");
       }
     }
     read += newRead;
   }
   if (key == null) {
     key = new Text();
   }
   if (value == null) {
     value = new Text();
   }
   key.set(buffer, 0, KEY_LENGTH);
   value.set(buffer, KEY_LENGTH, VALUE_LENGTH);
   offset += RECORD_LENGTH;
   return true;
 }
Ejemplo n.º 22
0
 protected boolean next(Text key, Text value) throws IOException {
   if (fsin.getPos() < end) {
     try {
       if (readUntilMatch(START_TITLE_MARKER, false)) {
         if (readUntilMatch(END_TITLE_MARKER, true)) {
           int stop = buffer.getLength() - END_TITLE_MARKER.length;
           key.set(buffer.getData(), 0, stop);
           buffer.reset();
           if (readUntilMatch(START_TEXT_MARKER, false)) {
             if (readUntilMatch(END_TEXT_MARKER, true)) {
               // un-escape the XML entities encoding and
               // re-encode the result as raw UTF8 bytes
               stop = buffer.getLength() - END_TITLE_MARKER.length;
               String xmlEscapedContent = new String(buffer.getData(), 0, stop + 1, UTF8);
               value.set(StringEscapeUtils.unescapeXml(xmlEscapedContent).getBytes(UTF8));
               return true;
             }
           }
         }
       }
     } finally {
       buffer.reset();
     }
   }
   return false;
 }
Ejemplo n.º 23
0
 /** @param args */
 @SuppressWarnings("deprecation")
 public static void main(String[] args) throws IOException, URISyntaxException {
   String name = "/home/naga/dept";
   @SuppressWarnings("resource")
   BufferedReader br = new BufferedReader(new FileReader(name));
   String line = br.readLine();
   String uri = "/nyse/";
   Configuration conf = new Configuration();
   FileSystem fs = FileSystem.get(new URI("hdfs://hadoop:9000"), conf);
   Path path = new Path(uri);
   Text key = new Text();
   Text value = new Text();
   MapFile.Writer writer = null;
   try {
     writer = new MapFile.Writer(conf, fs, uri, key.getClass(), value.getClass());
     while (line != null) {
       String parts[] = line.split("\\t");
       key.set(parts[0]);
       value.set(parts[1]);
       writer.append(key, value);
       line = br.readLine();
     }
   } finally {
     IOUtils.closeStream(writer);
   }
 }
Ejemplo n.º 24
0
    @SuppressWarnings({"unchecked", "rawtypes"})
    @Override
    public void map(
        Writable key, Indexable doc, Mapper<Writable, Indexable, Text, Text>.Context context)
        throws IOException, InterruptedException {

      List<String> sentences = new ArrayList<String>();

      if (doc instanceof SentenceSegmentedDocument) {
        List<SentenceWritable> segmentedSentences =
            ((SentenceSegmentedDocument) doc).getSentences();
        for (SentenceWritable sentence : segmentedSentences) {
          sentences.add(sentence.toString());
        }
      } else {
        sentences =
            Arrays.asList(mSentenceDetector.sentDetect(doc.getContent().replace('\n', ' ')));
      }

      for (String sentence : sentences) {
        for (Pattern p : mPatterns) {
          mKey.set(p.pattern());
          if (p.matcher(sentence).find()) {
            mValue.set(sentence);
            context.write(mKey, mValue);
          }
        }
      }
    }
Ejemplo n.º 25
0
 @Override
 public void close() throws IOException { // Close
   for (Entry<String, String> entry : logMap.entrySet()) {
     log.set(entry.getKey());
     mean.set(entry.getValue());
     output.collect(log, mean);
   }
 }
Ejemplo n.º 26
0
 public void map(Object key, Text value, Context context)
     throws IOException, InterruptedException {
   StringTokenizer itr = new StringTokenizer(value.toString());
   while (itr.hasMoreTokens()) {
     word.set(itr.nextToken());
     split.set(context.getInputSplit().toString());
     context.write(word, split);
   }
 }
Ejemplo n.º 27
0
 @Override
 public boolean next(Text key, Text value) throws IOException {
   if (!lineReader.next(lineReaderKey, lineValue)) {
     return false;
   }
   key.set(lineKey);
   value.set(lineValue);
   return true;
 }
Ejemplo n.º 28
0
    public void map(LongWritable key, Text value, Context context)
        throws IOException, InterruptedException {
      try {
        String line = value.toString();
        String[] fields = line.split(FIELD_TAB_SEPARATOR, -1);

        if (filePath.toLowerCase().contains("play")
            && fields.length > PlayFormatEnum.MEDIA_TYPE_ID.ordinal()) {
          String playETLStr = getPlayFormatStr(line);

          String[] playField = playETLStr.split(FIELD_TAB_SEPARATOR, -1);
          String infohashStr = null;
          if (playField[PlayFormatEnum.MEDIA_TYPE_ID.ordinal()].trim().equals("1")
              || playField[PlayFormatEnum.URL.ordinal()].contains("subject/play")) {

            infohashStr = playField[PlayFormatEnum.INFOHASH_ID.ordinal()].toUpperCase();
          } else {
            infohashStr = playField[PlayFormatEnum.MEDIA_ID.ordinal()];
          }
          if (null != infohashStr && playField.length == PlayFormatEnum.SEIDCOUNT.ordinal() + 1) {
            keyText.set(infohashStr.trim());
            valueText.set(playETLStr);
            context.write(keyText, valueText);
          }

        } else {
          String dimLine = "";
          String dimInfo = null;
          if (filePath.toLowerCase().contains("infohash")) {
            if (fields.length > DMInfoHashEnum.MEDIA_ID.ordinal()) {
              dimLine = line.trim();
              dimInfo = fields[DMInfoHashEnum.IH.ordinal()];
            }
          } else if (filePath.toLowerCase().contains("mediainfo")) {
            StringBuilder dimStrSb = new StringBuilder();
            dimStrSb.append(DEFAULT_INFOHASH + FIELD_TAB_SEPARATOR);
            dimStrSb.append(DEFAULT_SERIAL_ID + FIELD_TAB_SEPARATOR);
            dimStrSb.append(line.trim());
            dimLine = dimStrSb.toString();
            dimInfo = fields[DMInfoHashEnum.IH.ordinal()];
          }
          if (null != dimInfo && !dimInfo.isEmpty()) {
            String mediaInfo = dimInfo.trim().toUpperCase();
            keyText.set(mediaInfo);
            valueText.set(dimLine);
            context.write(keyText, valueText);
          }
        }
      } catch (Exception e) {
        multipleOutputs.write(
            new Text(null == e.getMessage() ? ("error:" + filePath) : e.getMessage()),
            new Text(value.toString()),
            "_error/part");
        e.printStackTrace();
      }
    }
 void encodeHex(Text val, FsEntry entry, String field) {
   Object o = entry.get(field);
   if (o != null && o instanceof byte[]) {
     byte[] b = (byte[]) o;
     val.set(new String(Hex.encodeHex(b)));
   } else {
     LOG.warn(entry.fullPath() + " didn't have a hash for " + field);
     val.set("");
   }
 }
Ejemplo n.º 30
0
    @Override
    public void reduce(Text key, Iterable<Text> values, Context context)
        throws IOException, InterruptedException {
      long installNum = 0;
      long effectInstallNum = 0;

      for (Text val : values) {
        Set<String> set = new HashSet<String>();
        HashMap<String, Integer> dateNumMap = new HashMap<String, Integer>();
        String[] fields = val.toString().split(",");
        String[] installDates = fields[0].split(":");
        for (int i = 0; i < installDates.length; i++) {
          String installDate = installDates[i];
          if (dateNumMap.containsKey(installDate)) {
            dateNumMap.put(installDate, dateNumMap.get(installDate) + 1);
          } else {
            dateNumMap.put(installDate, 1);
          }
        }
        String[] hsDates = fields[1].split(":");
        for (int i = 0; i < hsDates.length; i++) {
          if (!hsDates[i].equals("-")) {
            set.add(hsDates[i]);
          }
        }

        for (String mapKey : dateNumMap.keySet()) {
          installNum += dateNumMap.get(mapKey);
          if (set.contains(mapKey)) {
            effectInstallNum += dateNumMap.get(mapKey);
          }
        }
      }

      String flag = key.toString().substring(0, 1);
      String outputDir = "";
      if (flag.equals("p")) {
        outputDir = "F_CLIENT_MONTH_DATE_AREA";
      } else if (flag.equals("i")) {
        outputDir = "F_CLIENT_MONTH_DATE_ISP";
      } else if (flag.equals("v")) {
        outputDir = "F_CLIENT_MONTH_DATE_VERSION";
      } else {
        outputDir = "F_CLIENT_MONTH_DATE";
      }

      if (flag.equals("d")) {
        newKey.set(date);
      } else {
        newKey.set(date + "\t" + key.toString().substring(1));
      }

      multipleOutputs.write(newKey, new Text("13" + "\t" + installNum), outputDir);
      multipleOutputs.write(newKey, new Text("14" + "\t" + effectInstallNum), outputDir);
    }