Ejemplo n.º 1
0
  @Override
  public SearchRequestBuilder initRequestBuilder(SearchRequestBuilder srb) {
    // the dateFilter should not apply to the date facets!
    dateFilter = null;
    srb = super.initRequestBuilder(srb);

    if (dateFilter != null) srb.setFilter(dateFilter);

    if (isDateFacets()) {
      // too much work to convert the generic case with all the date math
      // so cheat for our case:
      String name = ElasticTweetSearch.DATE_FACET;
      RangeFacetBuilder rfb = FacetBuilders.rangeFacet(name).field(ElasticTweetSearch.DATE);
      MyDate date = new MyDate();

      // latest
      rfb.addUnboundedTo(Helper.toLocalDateTime(date.clone().minusHours(8).castToHour().toDate()));
      // first day
      rfb.addUnboundedTo(Helper.toLocalDateTime(date.castToDay().toDate()));

      for (int i = 0; i < 7; i++) {
        // 'from' must be smaller than 'to'!
        Date oldDate = date.toDate();
        rfb.addRange(
            Helper.toLocalDateTime(date.minusDays(1).toDate()), Helper.toLocalDateTime(oldDate));
      }

      // oldest
      rfb.addUnboundedFrom(Helper.toLocalDateTime(date.toDate()));
      srb.addFacet(rfb);
    }

    return srb;
  }
Ejemplo n.º 2
0
  public static void parseOutputAggregation(
      AdvancedQueryPojo.QueryOutputPojo.AggregationOutputPojo aggregation,
      AliasLookupTable aliasLookup,
      boolean geoLowAccuracy,
      String[] entTypeFilterStrings,
      String[] assocVerbFilterStrings,
      SearchRequestBuilder searchSettings,
      BoolFilterBuilder parentFilterObj,
      String[] communityIdStrs) {
    // 1.] Go through aggregation list

    // 1.1] Apply "simple specifications" if necessary

    // Geo

    if ((null != aggregation)
        && (null != aggregation.geoNumReturn)
        && (aggregation.geoNumReturn > 0)) {
      CrossVersionFacetBuilder.TermsFacetBuilder fb =
          CrossVersionFacetBuilders.termsFacet("geo")
              .field(DocumentPojo.locs_)
              .size(aggregation.geoNumReturn);
      // Gross raw handling for facets
      if (null != parentFilterObj) {
        fb = fb.facetFilter(parentFilterObj);
      }
      searchSettings.addFacet(fb);
    } // (TESTED)

    // Temporal

    if ((null != aggregation) && (null != aggregation.timesInterval)) {
      if (aggregation.timesInterval.contains("m")) {
        aggregation.timesInterval = "month";
      }
      CrossVersionFacetBuilder.DateHistogramFacetBuilder fb =
          CrossVersionFacetBuilders.dateHistogramFacet("time")
              .field(DocumentPojo.publishedDate_)
              .interval(aggregation.timesInterval);
      // Gross raw handling for facets
      if (null != parentFilterObj) {
        fb = fb.facetFilter(parentFilterObj);
      }
      searchSettings.addFacet(fb);

      // TODO (INF-2688): if using certain types of moments then don't want this?
    } // (TESTED)

    // Temporal Moments

    if ((null != aggregation) && (null != aggregation.moments)) {
      if (null == aggregation.moments.timesInterval) {
        if (null != aggregation.timesInterval) {
          aggregation.moments.timesInterval = aggregation.timesInterval;
        } else {
          aggregation.moments.timesInterval = "m";
        }
      }
      if (aggregation.moments.timesInterval.contains("m")) {
        aggregation.moments.timesInterval = "month";
      }

      // TODO (INF-2688): Other cross filter type things
      if (!geoLowAccuracy
          && (null != aggregation.moments.geoNumReturn)
          && (aggregation.moments.geoNumReturn > 0)) {
        DateHistogramBuilder timeAgg =
            AggregationBuilders.dateHistogram("moments")
                .field(DocumentPojo.publishedDate_)
                .interval(new Interval(aggregation.moments.timesInterval));
        TermsBuilder geoAgg =
            AggregationBuilders.terms("geo")
                .field(DocumentPojo.locs_)
                .size(aggregation.moments.geoNumReturn);
        timeAgg.subAggregation(geoAgg);
        searchSettings.addAggregation(timeAgg);
      }

      // TODO (CORE-89)
      if (null != aggregation.moments.associationsNumReturn
          && aggregation.moments.associationsNumReturn >= 0) {
        // TODO need to check if indexes mapping use doc.associations.assoc_index == docValue
        // fail out or don't include those communities if they don't
        if (validateAssociationMapping(communityIdStrs)) {
          DateHistogramBuilder assocTimeAgg =
              AggregationBuilders.dateHistogram("moments.assoc")
                  .field(DocumentPojo.publishedDate_)
                  .interval(new Interval(aggregation.moments.timesInterval));
          TermsBuilder assocAgg =
              AggregationBuilders.terms("assoc")
                  .field(AssociationPojo.assoc_index_)
                  .size(aggregation.moments.associationsNumReturn);
          NestedBuilder nested =
              AggregationBuilders.nested("moments.assoc.nested")
                  .path(DocumentPojo.associations_)
                  .subAggregation(assocAgg);
          assocTimeAgg.subAggregation(nested);
          searchSettings.addAggregation(assocTimeAgg);
        }
      }

      if (null != aggregation.moments.entityList) {
        for (String entIndex : aggregation.moments.entityList) {

          CrossVersionFacetBuilder.DateHistogramFacetBuilder fb =
              CrossVersionFacetBuilders.dateHistogramFacet("moments." + entIndex)
                  .field(DocumentPojo.publishedDate_)
                  .interval(aggregation.moments.timesInterval);

          EntityFeaturePojo alias = null;
          if (null != aliasLookup) {
            alias = aliasLookup.getAliases(entIndex);
          }
          if (null == alias) { // no alias
            fb =
                fb.facetFilter(
                    FilterBuilders.nestedFilter(
                        DocumentPojo.entities_,
                        FilterBuilders.termFilter(EntityPojo.index_, entIndex)));
          } // TESTED
          else {
            QueryFilterBuilder qfb = null;
            if ((null != alias.getSemanticLinks()) && !alias.getSemanticLinks().isEmpty()) {
              BoolQueryBuilder qb = QueryBuilders.boolQuery();
              for (String textAlias : alias.getSemanticLinks()) {
                qb =
                    qb.should(
                        CrossVersionQueryBuilders.matchPhraseQuery(
                            DocumentPojo.fullText_, textAlias));
              }
              qfb = FilterBuilders.queryFilter(qb);
            } // TESTED
            if (!alias.getAlias().isEmpty()) {
              NestedFilterBuilder nfb =
                  FilterBuilders.nestedFilter(
                      DocumentPojo.entities_,
                      FilterBuilders.termsFilter(EntityPojo.index_, entIndex, alias.getAlias()));
              if (null == qfb) {
                fb = fb.facetFilter(nfb);
              } // TESTED
              else {
                BoolFilterBuilder bfb = FilterBuilders.boolFilter().should(nfb).should(qfb);
                fb = fb.facetFilter(bfb);
              } // TESTED
            } else if (null != qfb) {
              fb = fb.facetFilter(qfb);
            } // TESTED
          } // TESTED

          // Gross raw handling for facets
          if (null != parentFilterObj) {
            fb = fb.facetFilter(parentFilterObj);
          }
          searchSettings.addFacet(fb);
        }
      } // (end list over entities)
    } // TESTED

    // Entities - due to problems with significance, handled on a document by document basis, see
    // Significance helper class

    // Associations (Events/Facts)

    // Association verb category filter
    StringBuilder verbCatRegex = null;
    StringBuilder entTypeRegex = null;

    if (((null != aggregation)
            && (null != aggregation.eventsNumReturn)
            && (aggregation.eventsNumReturn > 0))
        || ((null != aggregation)
            && (null != aggregation.factsNumReturn)
            && (aggregation.factsNumReturn > 0))) {
      if (null != entTypeFilterStrings) {
        boolean bNegative = false;
        if ('-' != entTypeFilterStrings[0].charAt(0)) { // positive filtering
          entTypeRegex = new StringBuilder("(?:");
        } else {
          bNegative = true;
          entTypeRegex = new StringBuilder("(?!");
          // (this is a lookahead but will be fine because of the .*/ in front of it)
        }
        for (String entType : entTypeFilterStrings) {
          if (bNegative && ('-' == entType.charAt(0))) {
            entType = entType.substring(1);
          }
          entType = entType.replace("|", "%7C");
          entTypeRegex.append(".*?/").append(Pattern.quote(entType.toLowerCase())).append('|');
          // (can't match greedily because of the 2nd instance of entity type)
        }
        entTypeRegex.setLength(entTypeRegex.length() - 1); // (remove trailing |)
        entTypeRegex.append(")");
        if (bNegative) {
          entTypeRegex.append("[^|]*"); // (now the actual verb, if a -ve lookahead)				
        }
      } // TESTED

      if (null != assocVerbFilterStrings) {
        boolean bNegative = false;
        if ('-' != assocVerbFilterStrings[0].charAt(0)) { // positive filtering
          verbCatRegex = new StringBuilder("\\|(?:");
        } else {
          bNegative = true;
          verbCatRegex = new StringBuilder("\\|(?!");
          // (this is a lookahead but will be fine because of the "^[^|]*\\" in front of it)

          // eg say I have -VERB then subject|VERB|object will match because if the
        }
        for (String assocVerbFilterString : assocVerbFilterStrings) {
          if (bNegative && ('-' == assocVerbFilterString.charAt(0))) {
            assocVerbFilterString = assocVerbFilterString.substring(1);
          }
          assocVerbFilterString = assocVerbFilterString.replace("|", "%7C");
          verbCatRegex.append(Pattern.quote(assocVerbFilterString)).append('|');
        }
        verbCatRegex.setLength(verbCatRegex.length() - 1); // (remove trailing |)
        verbCatRegex.append(")");
        if (bNegative) {
          verbCatRegex.append("[^|]*"); // (now the actual verb, if a -ve lookahead)
        }
      } // TESTED
    }
    // TESTED (all combinations of 1/2 people, 1/2 verbs)

    if ((null != aggregation)
        && (null != aggregation.eventsNumReturn)
        && (aggregation.eventsNumReturn > 0)) {
      StringBuffer regex = new StringBuffer("^Event\\|");
      if (null != entTypeRegex) {
        regex.append(entTypeRegex);
      } else {
        regex.append("[^|]*");
      }
      if (null != verbCatRegex) {
        regex.append(verbCatRegex);
      } else if (null != entTypeRegex) {
        regex.append("\\|[^|]*");
      } else {
        regex.append(".*");
      }
      if (null != entTypeRegex) {
        regex.append("\\|").append(entTypeRegex);
        regex.append(".*");
      } else {
        regex.append("\\|.*");
      }
      // DEBUG
      // System.out.println("REGEX==" + regex.toString());
      // TESTED (all combinations of 1/2 people, 1/2 verbs)

      CrossVersionFacetBuilder.TermsFacetBuilder fb =
          CrossVersionFacetBuilders.termsFacet("events")
              .field(AssociationPojo.assoc_index_)
              .size(aggregation.eventsNumReturn)
              .nested(DocumentPojo.associations_);
      fb.regex(regex.toString());

      // Gross raw handling for facets
      if (null != parentFilterObj) {
        fb = fb.facetFilter(parentFilterObj);
      }
      searchSettings.addFacet(fb);
    }
    if ((null != aggregation)
        && (null != aggregation.factsNumReturn)
        && (aggregation.factsNumReturn > 0)) {
      StringBuffer regex = new StringBuffer("^Fact\\|");
      if (null != entTypeRegex) {
        regex.append(entTypeRegex);
      } else {
        regex.append("[^|]*");
      }
      if (null != verbCatRegex) {
        regex.append(verbCatRegex);
      } else if (null != entTypeRegex) {
        regex.append("\\|[^|]*");
      } else {
        regex.append(".*");
      }
      if (null != entTypeRegex) {
        regex.append("\\|").append(entTypeRegex);
        regex.append(".*");
      } else {
        regex.append("\\|.*");
      }
      // DEBUG
      // System.out.println("REGEX==" + regex.toString());
      // TESTED (all combinations of 1/2 people, 1/2 verbs)

      CrossVersionFacetBuilder.TermsFacetBuilder fb =
          CrossVersionFacetBuilders.termsFacet("facts")
              .field(AssociationPojo.assoc_index_)
              .size(aggregation.factsNumReturn)
              .nested(DocumentPojo.associations_);
      fb.regex(regex.toString());

      // Gross raw handling for facets
      if (null != parentFilterObj) {
        fb = fb.facetFilter(parentFilterObj);
      }
      searchSettings.addFacet(fb);
    }

    // Source management/monitoring

    if ((null != aggregation)
        && (null != aggregation.sourceMetadata)
        && (aggregation.sourceMetadata > 0)) {
      CrossVersionFacetBuilder.TermsFacetBuilder fb =
          CrossVersionFacetBuilders.termsFacet("sourceTags")
              .field(DocumentPojo.tags_)
              .size(aggregation.sourceMetadata)
              .facetFilter(parentFilterObj);
      CrossVersionFacetBuilder.TermsFacetBuilder fb1 =
          CrossVersionFacetBuilders.termsFacet("sourceTypes")
              .field(DocumentPojo.mediaType_)
              .size(aggregation.sourceMetadata)
              .facetFilter(parentFilterObj);
      // Gross raw handling for facets
      if (null != parentFilterObj) {
        fb = fb.facetFilter(parentFilterObj);
        fb1 = fb1.facetFilter(parentFilterObj);
      }
      searchSettings.addFacet(fb);
      searchSettings.addFacet(fb1);
    }

    if ((null != aggregation) && (null != aggregation.sources) && (aggregation.sources > 0)) {
      CrossVersionFacetBuilder.TermsFacetBuilder fb =
          CrossVersionFacetBuilders.termsFacet("sourceKeys")
              .field(DocumentPojo.sourceKey_)
              .size(aggregation.sources);
      // Gross raw handling for facets
      if (null != parentFilterObj) {
        fb = fb.facetFilter(parentFilterObj);
      }
      searchSettings.addFacet(fb);
    }
  } // TESTED