public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { String dimStr = null; List<String> playInfoList = new ArrayList<String>(); for (Text val : values) { String value = val.toString(); String[] valueField = value.split(FIELD_TAB_SEPARATOR, -1); if (valueField.length == DMInfoHashEnum.MEDIA_NAME.ordinal() + 1) { dimStr = value; } else { playInfoList.add(value); } } for (String playInfoString : playInfoList) { String playFormatValue = ""; String[] splitMediaPlaySts = playInfoString.split(FIELD_TAB_SEPARATOR, -1); List<String> splitMediaPlayList = new ArrayList<String>(); for (String splitMediaPlay : splitMediaPlaySts) { splitMediaPlayList.add(splitMediaPlay); } if (null != dimStr) { String[] dimStrs = dimStr.split(FIELD_TAB_SEPARATOR, -1); if (dimStrs.length > DMInfoHashEnum.MEDIA_NAME.ordinal()) { splitMediaPlayList.set( PlayFormatEnum.SERIAL_ID.ordinal(), dimStrs[DMInfoHashEnum.SERIAL_ID.ordinal()]); splitMediaPlayList.set( PlayFormatEnum.MEDIA_ID.ordinal(), dimStrs[DMInfoHashEnum.MEDIA_ID.ordinal()]); splitMediaPlayList.set( PlayFormatEnum.CHANNEL_ID.ordinal(), dimStrs[DMInfoHashEnum.CHANNEL_ID.ordinal()]); splitMediaPlayList.set( PlayFormatEnum.MEDIA_NAME.ordinal(), dimStrs[DMInfoHashEnum.MEDIA_NAME.ordinal()]); } else { splitMediaPlayList.set(PlayFormatEnum.SERIAL_ID.ordinal(), DEFAULT_NEGATIVE_NUM); splitMediaPlayList.set(PlayFormatEnum.MEDIA_ID.ordinal(), DEFAULT_NEGATIVE_NUM); } } else { if (splitMediaPlayList.size() > PlayFormatEnum.SERIAL_ID.ordinal() + 1) { splitMediaPlayList.set(PlayFormatEnum.SERIAL_ID.ordinal(), DEFAULT_NEGATIVE_NUM); splitMediaPlayList.set(PlayFormatEnum.MEDIA_ID.ordinal(), DEFAULT_NEGATIVE_NUM); } } for (int i = 0; i < splitMediaPlayList.size(); i++) { playFormatValue += splitMediaPlayList.get(i).replaceAll("\\s+", ""); if (i < splitMediaPlayList.size() - 1) { playFormatValue += FIELD_TAB_SEPARATOR; } } keyText.set(playFormatValue); context.write(keyText, NullWritable.get()); } }
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { try { String line = value.toString(); String[] fields = line.split(FIELD_TAB_SEPARATOR, -1); if (filePath.toLowerCase().contains("play") && fields.length > PlayFormatEnum.MEDIA_TYPE_ID.ordinal()) { String playETLStr = getPlayFormatStr(line); String[] playField = playETLStr.split(FIELD_TAB_SEPARATOR, -1); String infohashStr = null; if (playField[PlayFormatEnum.MEDIA_TYPE_ID.ordinal()].trim().equals("1") || playField[PlayFormatEnum.URL.ordinal()].contains("subject/play")) { infohashStr = playField[PlayFormatEnum.INFOHASH_ID.ordinal()].toUpperCase(); } else { infohashStr = playField[PlayFormatEnum.MEDIA_ID.ordinal()]; } if (null != infohashStr && playField.length == PlayFormatEnum.SEIDCOUNT.ordinal() + 1) { keyText.set(infohashStr.trim()); valueText.set(playETLStr); context.write(keyText, valueText); } } else { String dimLine = ""; String dimInfo = null; if (filePath.toLowerCase().contains("infohash")) { if (fields.length > DMInfoHashEnum.MEDIA_ID.ordinal()) { dimLine = line.trim(); dimInfo = fields[DMInfoHashEnum.IH.ordinal()]; } } else if (filePath.toLowerCase().contains("mediainfo")) { StringBuilder dimStrSb = new StringBuilder(); dimStrSb.append(DEFAULT_INFOHASH + FIELD_TAB_SEPARATOR); dimStrSb.append(DEFAULT_SERIAL_ID + FIELD_TAB_SEPARATOR); dimStrSb.append(line.trim()); dimLine = dimStrSb.toString(); dimInfo = fields[DMInfoHashEnum.IH.ordinal()]; } if (null != dimInfo && !dimInfo.isEmpty()) { String mediaInfo = dimInfo.trim().toUpperCase(); keyText.set(mediaInfo); valueText.set(dimLine); context.write(keyText, valueText); } } } catch (Exception e) { multipleOutputs.write( new Text(null == e.getMessage() ? ("error:" + filePath) : e.getMessage()), new Text(value.toString()), "_error/part"); e.printStackTrace(); } }