private void InitTasks() throws Exception { QueryBuilder builder = QueryBuilders.boolQuery().must(QueryBuilders.termQuery("_id", "_meta")); SearchRequestBuilder search = client.prepareSearch("_river"); search.setTypes(river.riverName().name()); search.setQuery(builder); SearchResponse resp = search.execute().actionGet(); int hitCount = 0; for (SearchHit hit : resp.hits().getHits()) { logger.info( "Task Manager: Query response hits[ " + Integer.toString(hitCount) + "]: " + hit.sourceAsString()); hitCount++; Map<String, Object> sourceMap = hit.sourceAsMap(); Map<String, Object> my = (Map<String, Object>) sourceMap.get("my"); ArrayList arr = (ArrayList) my.get("tasks"); for (Object taskObj : arr) { Task newTask = new Task((Map<String, String>) taskObj, client, river.riverName().name()); taskArr.add(newTask); taskMap.put(newTask.id(), newTask); } } currentTaskIndex = 0; currentTask = (Task) taskArr.get(currentTaskIndex); }
@Override public SearchRequestBuilder initRequestBuilder(SearchRequestBuilder srb) { // the dateFilter should not apply to the date facets! dateFilter = null; srb = super.initRequestBuilder(srb); if (dateFilter != null) srb.setFilter(dateFilter); if (isDateFacets()) { // too much work to convert the generic case with all the date math // so cheat for our case: String name = ElasticTweetSearch.DATE_FACET; RangeFacetBuilder rfb = FacetBuilders.rangeFacet(name).field(ElasticTweetSearch.DATE); MyDate date = new MyDate(); // latest rfb.addUnboundedTo(Helper.toLocalDateTime(date.clone().minusHours(8).castToHour().toDate())); // first day rfb.addUnboundedTo(Helper.toLocalDateTime(date.castToDay().toDate())); for (int i = 0; i < 7; i++) { // 'from' must be smaller than 'to'! Date oldDate = date.toDate(); rfb.addRange( Helper.toLocalDateTime(date.minusDays(1).toDate()), Helper.toLocalDateTime(oldDate)); } // oldest rfb.addUnboundedFrom(Helper.toLocalDateTime(date.toDate())); srb.addFacet(rfb); } return srb; }
public SearchResponse query(JetwickQuery query, boolean log, boolean explain) { SearchRequestBuilder srb = createSearchBuilder(); srb.setExplain(query.isExplain()); query.initRequestBuilder(srb); if (log) try { logger.info( srb.internalBuilder().toXContent(JsonXContent.unCachedContentBuilder(), null).string()); } catch (Exception ex) { } return srb.execute().actionGet(); }
public String createQuery( boolean includeFacets, int simpleSelections, int rangeSelections, int pathSelections) { SenseiClientRequest clientRequest = senseiQueryProducer.createQuery( includeFacets, simpleSelections, rangeSelections, pathSelections); FilterBuilder[] filters = new FilterBuilder[clientRequest.getSelections().size()]; for (int i = 0; i < filters.length; i++) { Selection selection = clientRequest.getSelections().get(i); if (selection instanceof Terms) { filters[i] = createFilterFromTerm((Terms) selection); } if (selection instanceof Range) { Range range = (Range) selection; RangeFilterBuilder rangeFilter = FilterBuilders.rangeFilter(range.getField()); filters[i] = rangeFilter; if (!"*".equals(range.getFrom())) rangeFilter.from(range.getFrom()); if (!"*".equals(range.getTo())) rangeFilter.to(range.getTo()); } if (selection instanceof Path) { Path path = (Path) selection; filters[i] = FilterBuilders.prefixFilter(path.getField(), path.getValue()); } } FilterBuilder selections = FilterBuilders.orFilter(filters); SearchRequestBuilder requestBuilder = new SearchRequestBuilder(new MockClient()); requestBuilder.setFilter(selections); try { JSONObject request = new JSONObject(requestBuilder.toString()); JSONObject facets = new JSONObject(); for (Selection selection : clientRequest.getSelections()) { if (selection instanceof Terms) { String field = selection.getField(); facets.put( field, new JSONObject("{\"terms\": {\"field\":\"" + field + "\", \"size\" : 300}}")); } } request.put("facets", facets); return request.toString(); } catch (Exception ex) { throw new RuntimeException(ex); } }
public void scanThis( AnyExecutor<T> any, QueryBuilder query, long keepTimeInMinutes, int pageSize) { SearchRequestBuilder srb = client .prepareSearch(getIndexName()) .setQuery(query) .setSize(pageSize) .setSearchType(SearchType.SCAN) .setScroll(TimeValue.timeValueMinutes(keepTimeInMinutes)); SearchResponse rsp = srb.execute().actionGet(); try { int counter = 0; while (true) { rsp = client .prepareSearchScroll(rsp.scrollId()) .setScroll(TimeValue.timeValueMinutes(keepTimeInMinutes)) .execute() .actionGet(); long currentResults = rsp.hits().hits().length; logger.info( "(" + counter++ + ") scanquery with " + pageSize + " page size and " + currentResults + " hits"); if (currentResults == 0) break; for (T t : collectObjects(rsp)) { any.execute(t); } } } catch (Exception ex) { logger.error("Cannot run scanThis", ex); } }
public static void parseOutputAggregation( AdvancedQueryPojo.QueryOutputPojo.AggregationOutputPojo aggregation, AliasLookupTable aliasLookup, boolean geoLowAccuracy, String[] entTypeFilterStrings, String[] assocVerbFilterStrings, SearchRequestBuilder searchSettings, BoolFilterBuilder parentFilterObj, String[] communityIdStrs) { // 1.] Go through aggregation list // 1.1] Apply "simple specifications" if necessary // Geo if ((null != aggregation) && (null != aggregation.geoNumReturn) && (aggregation.geoNumReturn > 0)) { CrossVersionFacetBuilder.TermsFacetBuilder fb = CrossVersionFacetBuilders.termsFacet("geo") .field(DocumentPojo.locs_) .size(aggregation.geoNumReturn); // Gross raw handling for facets if (null != parentFilterObj) { fb = fb.facetFilter(parentFilterObj); } searchSettings.addFacet(fb); } // (TESTED) // Temporal if ((null != aggregation) && (null != aggregation.timesInterval)) { if (aggregation.timesInterval.contains("m")) { aggregation.timesInterval = "month"; } CrossVersionFacetBuilder.DateHistogramFacetBuilder fb = CrossVersionFacetBuilders.dateHistogramFacet("time") .field(DocumentPojo.publishedDate_) .interval(aggregation.timesInterval); // Gross raw handling for facets if (null != parentFilterObj) { fb = fb.facetFilter(parentFilterObj); } searchSettings.addFacet(fb); // TODO (INF-2688): if using certain types of moments then don't want this? } // (TESTED) // Temporal Moments if ((null != aggregation) && (null != aggregation.moments)) { if (null == aggregation.moments.timesInterval) { if (null != aggregation.timesInterval) { aggregation.moments.timesInterval = aggregation.timesInterval; } else { aggregation.moments.timesInterval = "m"; } } if (aggregation.moments.timesInterval.contains("m")) { aggregation.moments.timesInterval = "month"; } // TODO (INF-2688): Other cross filter type things if (!geoLowAccuracy && (null != aggregation.moments.geoNumReturn) && (aggregation.moments.geoNumReturn > 0)) { DateHistogramBuilder timeAgg = AggregationBuilders.dateHistogram("moments") .field(DocumentPojo.publishedDate_) .interval(new Interval(aggregation.moments.timesInterval)); TermsBuilder geoAgg = AggregationBuilders.terms("geo") .field(DocumentPojo.locs_) .size(aggregation.moments.geoNumReturn); timeAgg.subAggregation(geoAgg); searchSettings.addAggregation(timeAgg); } // TODO (CORE-89) if (null != aggregation.moments.associationsNumReturn && aggregation.moments.associationsNumReturn >= 0) { // TODO need to check if indexes mapping use doc.associations.assoc_index == docValue // fail out or don't include those communities if they don't if (validateAssociationMapping(communityIdStrs)) { DateHistogramBuilder assocTimeAgg = AggregationBuilders.dateHistogram("moments.assoc") .field(DocumentPojo.publishedDate_) .interval(new Interval(aggregation.moments.timesInterval)); TermsBuilder assocAgg = AggregationBuilders.terms("assoc") .field(AssociationPojo.assoc_index_) .size(aggregation.moments.associationsNumReturn); NestedBuilder nested = AggregationBuilders.nested("moments.assoc.nested") .path(DocumentPojo.associations_) .subAggregation(assocAgg); assocTimeAgg.subAggregation(nested); searchSettings.addAggregation(assocTimeAgg); } } if (null != aggregation.moments.entityList) { for (String entIndex : aggregation.moments.entityList) { CrossVersionFacetBuilder.DateHistogramFacetBuilder fb = CrossVersionFacetBuilders.dateHistogramFacet("moments." + entIndex) .field(DocumentPojo.publishedDate_) .interval(aggregation.moments.timesInterval); EntityFeaturePojo alias = null; if (null != aliasLookup) { alias = aliasLookup.getAliases(entIndex); } if (null == alias) { // no alias fb = fb.facetFilter( FilterBuilders.nestedFilter( DocumentPojo.entities_, FilterBuilders.termFilter(EntityPojo.index_, entIndex))); } // TESTED else { QueryFilterBuilder qfb = null; if ((null != alias.getSemanticLinks()) && !alias.getSemanticLinks().isEmpty()) { BoolQueryBuilder qb = QueryBuilders.boolQuery(); for (String textAlias : alias.getSemanticLinks()) { qb = qb.should( CrossVersionQueryBuilders.matchPhraseQuery( DocumentPojo.fullText_, textAlias)); } qfb = FilterBuilders.queryFilter(qb); } // TESTED if (!alias.getAlias().isEmpty()) { NestedFilterBuilder nfb = FilterBuilders.nestedFilter( DocumentPojo.entities_, FilterBuilders.termsFilter(EntityPojo.index_, entIndex, alias.getAlias())); if (null == qfb) { fb = fb.facetFilter(nfb); } // TESTED else { BoolFilterBuilder bfb = FilterBuilders.boolFilter().should(nfb).should(qfb); fb = fb.facetFilter(bfb); } // TESTED } else if (null != qfb) { fb = fb.facetFilter(qfb); } // TESTED } // TESTED // Gross raw handling for facets if (null != parentFilterObj) { fb = fb.facetFilter(parentFilterObj); } searchSettings.addFacet(fb); } } // (end list over entities) } // TESTED // Entities - due to problems with significance, handled on a document by document basis, see // Significance helper class // Associations (Events/Facts) // Association verb category filter StringBuilder verbCatRegex = null; StringBuilder entTypeRegex = null; if (((null != aggregation) && (null != aggregation.eventsNumReturn) && (aggregation.eventsNumReturn > 0)) || ((null != aggregation) && (null != aggregation.factsNumReturn) && (aggregation.factsNumReturn > 0))) { if (null != entTypeFilterStrings) { boolean bNegative = false; if ('-' != entTypeFilterStrings[0].charAt(0)) { // positive filtering entTypeRegex = new StringBuilder("(?:"); } else { bNegative = true; entTypeRegex = new StringBuilder("(?!"); // (this is a lookahead but will be fine because of the .*/ in front of it) } for (String entType : entTypeFilterStrings) { if (bNegative && ('-' == entType.charAt(0))) { entType = entType.substring(1); } entType = entType.replace("|", "%7C"); entTypeRegex.append(".*?/").append(Pattern.quote(entType.toLowerCase())).append('|'); // (can't match greedily because of the 2nd instance of entity type) } entTypeRegex.setLength(entTypeRegex.length() - 1); // (remove trailing |) entTypeRegex.append(")"); if (bNegative) { entTypeRegex.append("[^|]*"); // (now the actual verb, if a -ve lookahead) } } // TESTED if (null != assocVerbFilterStrings) { boolean bNegative = false; if ('-' != assocVerbFilterStrings[0].charAt(0)) { // positive filtering verbCatRegex = new StringBuilder("\\|(?:"); } else { bNegative = true; verbCatRegex = new StringBuilder("\\|(?!"); // (this is a lookahead but will be fine because of the "^[^|]*\\" in front of it) // eg say I have -VERB then subject|VERB|object will match because if the } for (String assocVerbFilterString : assocVerbFilterStrings) { if (bNegative && ('-' == assocVerbFilterString.charAt(0))) { assocVerbFilterString = assocVerbFilterString.substring(1); } assocVerbFilterString = assocVerbFilterString.replace("|", "%7C"); verbCatRegex.append(Pattern.quote(assocVerbFilterString)).append('|'); } verbCatRegex.setLength(verbCatRegex.length() - 1); // (remove trailing |) verbCatRegex.append(")"); if (bNegative) { verbCatRegex.append("[^|]*"); // (now the actual verb, if a -ve lookahead) } } // TESTED } // TESTED (all combinations of 1/2 people, 1/2 verbs) if ((null != aggregation) && (null != aggregation.eventsNumReturn) && (aggregation.eventsNumReturn > 0)) { StringBuffer regex = new StringBuffer("^Event\\|"); if (null != entTypeRegex) { regex.append(entTypeRegex); } else { regex.append("[^|]*"); } if (null != verbCatRegex) { regex.append(verbCatRegex); } else if (null != entTypeRegex) { regex.append("\\|[^|]*"); } else { regex.append(".*"); } if (null != entTypeRegex) { regex.append("\\|").append(entTypeRegex); regex.append(".*"); } else { regex.append("\\|.*"); } // DEBUG // System.out.println("REGEX==" + regex.toString()); // TESTED (all combinations of 1/2 people, 1/2 verbs) CrossVersionFacetBuilder.TermsFacetBuilder fb = CrossVersionFacetBuilders.termsFacet("events") .field(AssociationPojo.assoc_index_) .size(aggregation.eventsNumReturn) .nested(DocumentPojo.associations_); fb.regex(regex.toString()); // Gross raw handling for facets if (null != parentFilterObj) { fb = fb.facetFilter(parentFilterObj); } searchSettings.addFacet(fb); } if ((null != aggregation) && (null != aggregation.factsNumReturn) && (aggregation.factsNumReturn > 0)) { StringBuffer regex = new StringBuffer("^Fact\\|"); if (null != entTypeRegex) { regex.append(entTypeRegex); } else { regex.append("[^|]*"); } if (null != verbCatRegex) { regex.append(verbCatRegex); } else if (null != entTypeRegex) { regex.append("\\|[^|]*"); } else { regex.append(".*"); } if (null != entTypeRegex) { regex.append("\\|").append(entTypeRegex); regex.append(".*"); } else { regex.append("\\|.*"); } // DEBUG // System.out.println("REGEX==" + regex.toString()); // TESTED (all combinations of 1/2 people, 1/2 verbs) CrossVersionFacetBuilder.TermsFacetBuilder fb = CrossVersionFacetBuilders.termsFacet("facts") .field(AssociationPojo.assoc_index_) .size(aggregation.factsNumReturn) .nested(DocumentPojo.associations_); fb.regex(regex.toString()); // Gross raw handling for facets if (null != parentFilterObj) { fb = fb.facetFilter(parentFilterObj); } searchSettings.addFacet(fb); } // Source management/monitoring if ((null != aggregation) && (null != aggregation.sourceMetadata) && (aggregation.sourceMetadata > 0)) { CrossVersionFacetBuilder.TermsFacetBuilder fb = CrossVersionFacetBuilders.termsFacet("sourceTags") .field(DocumentPojo.tags_) .size(aggregation.sourceMetadata) .facetFilter(parentFilterObj); CrossVersionFacetBuilder.TermsFacetBuilder fb1 = CrossVersionFacetBuilders.termsFacet("sourceTypes") .field(DocumentPojo.mediaType_) .size(aggregation.sourceMetadata) .facetFilter(parentFilterObj); // Gross raw handling for facets if (null != parentFilterObj) { fb = fb.facetFilter(parentFilterObj); fb1 = fb1.facetFilter(parentFilterObj); } searchSettings.addFacet(fb); searchSettings.addFacet(fb1); } if ((null != aggregation) && (null != aggregation.sources) && (aggregation.sources > 0)) { CrossVersionFacetBuilder.TermsFacetBuilder fb = CrossVersionFacetBuilders.termsFacet("sourceKeys") .field(DocumentPojo.sourceKey_) .size(aggregation.sources); // Gross raw handling for facets if (null != parentFilterObj) { fb = fb.facetFilter(parentFilterObj); } searchSettings.addFacet(fb); } } // TESTED
/** All indices has to be created before! */ public void mergeIndices( Collection<String> indexList, String intoIndex, int hitsPerPage, boolean forceRefresh, CreateObjectsInterface<T> createObj, FilterBuilder additionalFilter) { if (forceRefresh) { refresh(indexList); refresh(intoIndex); } int keepTime = 100; for (String fromIndex : indexList) { SearchRequestBuilder srb = client .prepareSearch(fromIndex) .setVersion(true) .setQuery(QueryBuilders.matchAllQuery()) .setSize(hitsPerPage) .setSearchType(SearchType.SCAN) .setScroll(TimeValue.timeValueMinutes(keepTime)); if (additionalFilter != null) srb.setFilter(additionalFilter); SearchResponse rsp = srb.execute().actionGet(); try { long total = rsp.hits().totalHits(); int collectedResults = 0; while (true) { StopWatch queryWatch = new StopWatch().start(); rsp = client .prepareSearchScroll(rsp.scrollId()) .setScroll(TimeValue.timeValueMinutes(keepTime)) .execute() .actionGet(); long currentResults = rsp.hits().hits().length; if (currentResults == 0) break; queryWatch.stop(); Collection<T> objs = createObj.collectObjects(rsp); StopWatch updateWatch = new StopWatch().start(); int failed = bulkUpdate(objs, intoIndex, false, false).size(); // trying to enable flushing to avoid memory issues on the server side? flush(intoIndex); updateWatch.stop(); collectedResults += currentResults; logger.info( "Progress " + collectedResults + "/" + total + " fromIndex=" + fromIndex + " update:" + updateWatch.totalTime().getSeconds() + " query:" + queryWatch.totalTime().getSeconds() + " failed:" + failed); } logger.info( "Finished copying of index:" + fromIndex + ". Total:" + total + " collected:" + collectedResults); } catch (Exception ex) { // throw new RuntimeException(ex); logger.error( "Failed to copy data from index " + fromIndex + " into " + intoIndex + ".", ex); } } if (forceRefresh) refresh(intoIndex); }
public SearchResponse query(QueryBuilder queryBuilder) { SearchRequestBuilder srb = createSearchBuilder(); srb.setQuery(queryBuilder); return srb.execute().actionGet(); }