/** * Filters for records of the visits relation where the year of visit is equal to a specified * value. The URL of all visit records passing the filter is emitted. * * <p>Output Format: 0: URL */ @Override public void map(PactRecord record, Collector<PactRecord> out) throws Exception { // Parse date string with the format YYYY-MM-DD and extract the year String dateString = record.getField(1, PactString.class).getValue(); int year = Integer.parseInt(dateString.substring(0, 4)); if (year == YEARFILTER) { record.setNull(1); out.collect(record); } }
/** * Filters for documents that contain all of the given keywords and projects the records on the * URL field. * * <p>Output Format: 0: URL */ @Override public void map(PactRecord record, Collector<PactRecord> out) throws Exception { // FILTER // Only collect the document if all keywords are contained String docText = record.getField(1, PactString.class).toString(); boolean allContained = true; for (String kw : KEYWORDS) { if (!docText.contains(kw)) { allContained = false; break; } } if (allContained) { record.setNull(1); out.collect(record); } }