コード例 #1
0
    /**
     * Filters for records of the visits relation where the year of visit is equal to a specified
     * value. The URL of all visit records passing the filter is emitted.
     *
     * <p>Output Format: 0: URL
     */
    @Override
    public void map(PactRecord record, Collector<PactRecord> out) throws Exception {

      // Parse date string with the format YYYY-MM-DD and extract the year
      String dateString = record.getField(1, PactString.class).getValue();
      int year = Integer.parseInt(dateString.substring(0, 4));

      if (year == YEARFILTER) {
        record.setNull(1);
        out.collect(record);
      }
    }
コード例 #2
0
    /**
     * Filters for documents that contain all of the given keywords and projects the records on the
     * URL field.
     *
     * <p>Output Format: 0: URL
     */
    @Override
    public void map(PactRecord record, Collector<PactRecord> out) throws Exception {

      // FILTER
      // Only collect the document if all keywords are contained
      String docText = record.getField(1, PactString.class).toString();
      boolean allContained = true;
      for (String kw : KEYWORDS) {
        if (!docText.contains(kw)) {
          allContained = false;
          break;
        }
      }

      if (allContained) {
        record.setNull(1);
        out.collect(record);
      }
    }