@Override public void execute(Tuple tuple, BasicOutputCollector collector) { String word = tuple.getString(0); Integer count = counts.get(word); if (count == null) count = 0; count++; counts.put(word, count); collector.emit(new Values(word, count)); }
@Override public void execute(Tuple input, BasicOutputCollector collector) { String line = input.getString(0); String[] fields = line.split(seprator); try { pstmt = conn.prepareStatement(insertSql); for (int i = 0; i < fields.length; i++) { pstmt.setString(i + 1, fields[i]); } pstmt.executeUpdate(); } catch (SQLException e) { log.error("数据插入失败", e); throw new RuntimeException("数据插入失败", e); } collector.emit(new Values(line)); }
@Override public void execute(Tuple tuple) { Long val = (Long) tuple.getValue(0); collector.emit(new Values(2 * (val + 1))); // increment and double value collector.ack(tuple); }
@Override public void execute(Tuple input, BasicOutputCollector collector) { collector.emit(input.getValues()); }
@Override public void execute(Tuple tuple) { Object key = tuple.getValue(1); Number curr = Utils.get(_sums, key, 0); _sums.put(key, Numbers.add(curr, tuple.getValue(2))); }
@Override public void execute(Tuple tuple) { Metadata metadata = (Metadata) tuple.getValueByField("metadata"); byte[] content = tuple.getBinaryByField("content"); String url = tuple.getStringByField("url"); boolean isfeed = Boolean.valueOf(metadata.getFirstValue(isFeedKey)); // doesn't have the metadata expected if (!isfeed) { if (sniffWhenNoMDKey) { // uses mime-type // won't work when servers return text/xml // TODO use Tika instead? String ct = metadata.getFirstValue(HttpHeaders.CONTENT_TYPE); if (ct.contains("rss+xml")) isfeed = true; } } // still not a feed file if (!isfeed) { // just pass it on this.collector.emit(tuple, tuple.getValues()); this.collector.ack(tuple); return; } else { // can be used later on for custom scheduling metadata.setValue(isFeedKey, "true"); } List<Outlink> outlinks; try { outlinks = parseFeed(url, content, metadata); } catch (Exception e) { // exception while parsing the feed String errorMessage = "Exception while parsing " + url + ": " + e; LOG.error(errorMessage); // send to status stream in case another component wants to update // its status metadata.setValue(Constants.STATUS_ERROR_SOURCE, "feed parsing"); metadata.setValue(Constants.STATUS_ERROR_MESSAGE, errorMessage); collector.emit(Constants.StatusStreamName, tuple, new Values(url, metadata, Status.ERROR)); this.collector.ack(tuple); return; } // apply the parse filters if any to the current document try { ParseResult parse = new ParseResult(); parse.setOutlinks(outlinks); ParseData parseData = parse.get(url); parseData.setMetadata(metadata); parseFilters.filter(url, content, null, parse); } catch (RuntimeException e) { String errorMessage = "Exception while running parse filters on " + url + ": " + e; LOG.error(errorMessage); metadata.setValue(Constants.STATUS_ERROR_SOURCE, "content filtering"); metadata.setValue(Constants.STATUS_ERROR_MESSAGE, errorMessage); collector.emit(StatusStreamName, tuple, new Values(url, metadata, Status.ERROR)); collector.ack(tuple); return; } // send to status stream for (Outlink ol : outlinks) { Values v = new Values(ol.getTargetURL(), ol.getMetadata(), Status.DISCOVERED); collector.emit(Constants.StatusStreamName, tuple, v); } // marking the main URL as successfully fetched // regardless of whether we got a parse exception or not collector.emit(Constants.StatusStreamName, tuple, new Values(url, metadata, Status.FETCHED)); this.collector.ack(tuple); }