public static void deleteCommunityDocIndex( String nameOrCommunityIdStr, ObjectId parentCommunityId, boolean bPersonalGroup) { String sGroupIndex = null; try { sGroupIndex = new StringBuffer("doc_").append(new ObjectId(nameOrCommunityIdStr).toString()).toString(); } catch (Exception e) { sGroupIndex = nameOrCommunityIdStr; } if (bPersonalGroup) { ElasticSearchManager dummyGroupIndex = IndexManager.getIndex(DocumentPojoIndexMap.dummyDocumentIndex_); dummyGroupIndex.removeAlias(sGroupIndex); } else if (null != parentCommunityId) { String sParentGroupIndex = new StringBuffer("doc_").append(parentCommunityId.toString()).toString(); ElasticSearchManager docIndex = IndexManager.getIndex(sParentGroupIndex); docIndex.removeAlias(sGroupIndex); docIndex.closeIndex(); } else { ElasticSearchManager docIndex = IndexManager.getIndex(sGroupIndex); docIndex.deleteMe(); } // TESTED (parent, children, and personal) }
public static void synchronizeEntityFeature( EntityFeaturePojo entityFeature, ObjectId communityId) { DBCollection entityFeatureDb = DbManager.getFeature().getEntity(); // NOTE: Important that feeds update occurs before synchronization, since the sync "corrupts" // the entity if (_diagnosticMode || (null != entityFeature .getDbSyncTime())) { // Else this is a new feature so don't need to update the // feature DB, only the index long nCurrTime = System.currentTimeMillis(); // (query from top of the function, basically lookup on gaz_index) BasicDBObject update2 = new BasicDBObject(); update2.put(EntityFeaturePojo.db_sync_time_, Long.toString(nCurrTime)); update2.put(EntityFeaturePojo.db_sync_doccount_, entityFeature.getDoccount()); BasicDBObject update = new BasicDBObject(MongoDbManager.set_, update2); BasicDBObject query = new BasicDBObject(EntityFeaturePojo.index_, entityFeature.getIndex()); query.put(EntityFeaturePojo.communityId_, communityId); if (_diagnosticMode) { System.out.println( "EntityAggregationUtils.synchronizeEntityFeature, featureDB: " + query.toString() + " / " + update.toString()); } else { entityFeatureDb.update(query, update, false, true); } } if (_diagnosticMode) { System.out.println( "EntityAggregationUtils.synchronizeEntityFeature, synchronize: " + new StringBuffer(entityFeature.getIndex()) .append(':') .append(communityId) .toString() + " = " + IndexManager.mapToIndex(entityFeature, new EntityFeaturePojoIndexMap())); } else { ElasticSearchManager esm = IndexManager.getIndex(EntityFeaturePojoIndexMap.indexName_); esm.addDocument(entityFeature, new EntityFeaturePojoIndexMap(), null, true); // (_id is set by the index map to index:communityId) } } // TESTED (by eye, mostly cut-and-paste from test Beta)
public static void createCommunityDocIndex( String nameOrCommunityIdStr, ObjectId parentCommunityId, boolean bPersonalGroup, boolean bSystemGroup, boolean bClearIndex, boolean bParentsOnly) { // create elasticsearch indexes PropertiesManager pm = new PropertiesManager(); boolean languageNormalization = pm.getNormalizeEncoding(); int nPreferredReplicas = pm.getMaxIndexReplicas(); String docMapping = new Gson().toJson(new DocumentPojoIndexMap.Mapping(), DocumentPojoIndexMap.Mapping.class); String sGroupIndex = null; try { sGroupIndex = new StringBuffer("doc_").append(new ObjectId(nameOrCommunityIdStr).toString()).toString(); } catch (Exception e) { sGroupIndex = nameOrCommunityIdStr; } if (!bPersonalGroup) { String parentCommunityIdStr = null; if (null != parentCommunityId) { parentCommunityIdStr = parentCommunityId.toString(); } if ((null == parentCommunityIdStr) || (parentCommunityIdStr.equals("4c927585d591d31d7b37097a"))) { // (system community is hardwired - children of this community are ignored) int nShards = bSystemGroup ? 10 : 5; // (system group is largest) // Remove the alias, in case it exists: // Then create an index with this name: Builder localSettingsGroupIndex = ImmutableSettings.settingsBuilder(); localSettingsGroupIndex .put("number_of_shards", nShards) .put("number_of_replicas", nPreferredReplicas); if (languageNormalization) { localSettingsGroupIndex.put("index.analysis.analyzer.default.tokenizer", "standard"); localSettingsGroupIndex.putArray( "index.analysis.analyzer.default.filter", "icu_normalizer", "icu_folding", "standard", "lowercase", "stop"); } // TESTED ElasticSearchManager docIndex = null; try { docIndex = IndexManager.createIndex( sGroupIndex, DocumentPojoIndexMap.documentType_, false, null, docMapping, localSettingsGroupIndex); } catch ( RuntimeException e) { // illegal arg exception, probably the language normalization? if (languageNormalization) { // (likely the required plugins have not been installed, just // regress back to normal) localSettingsGroupIndex = ImmutableSettings.settingsBuilder(); localSettingsGroupIndex .put("number_of_shards", nShards) .put("number_of_replicas", nPreferredReplicas); docIndex = IndexManager.createIndex( sGroupIndex, DocumentPojoIndexMap.documentType_, false, null, docMapping, localSettingsGroupIndex); } // TESTED else throw e; } // TOTEST if (bClearIndex) { docIndex.deleteMe(); docIndex = IndexManager.createIndex( sGroupIndex, DocumentPojoIndexMap.documentType_, false, null, docMapping, localSettingsGroupIndex); } if (null != docIndex) { try { docIndex.pingIndex(); // (wait until it's created itself) } catch (Exception e) { } // (just make sure this doesn't die horribly) docIndex.closeIndex(); } } else if (!bParentsOnly) { String sParentGroupIndex = new StringBuffer("doc_") .append(new ObjectId(parentCommunityIdStr).toString()) .toString(); ElasticSearchManager docIndex = IndexManager.getIndex(sParentGroupIndex); // DEBUG (alias corruption) // if (null == _aliasInfo) { // ClusterStateResponse clusterState = // docIndex.getRawClient().admin().cluster().state(new ClusterStateRequest()).actionGet(); // _aliasInfo = clusterState.getState().getMetaData().getAliases(); // } // else { // if (_aliasInfo.containsKey(sGroupIndex)) { // has no aliases, we're not good // return; // } // else { // //DEBUG // System.out.println("Alias " + sGroupIndex + " has no aliases (but should)"); // ElasticSearchManager docIndex2 = IndexManager.getIndex(sGroupIndex); // docIndex2.deleteMe(); // } // } docIndex.createAlias(sGroupIndex); docIndex.closeIndex(); // (do nothing on delete - that will be handled at the parent index level) } // TESTED (parents, children, and personal) } else { // Just create the dummy index, no different to getting it in practice Builder localSettingsGroupIndex = ImmutableSettings.settingsBuilder(); localSettingsGroupIndex .put("number_of_shards", 1) .put("number_of_replicas", 0); // (ie guaranteed to be local to each ES node) ElasticSearchManager dummyGroupIndex = IndexManager.createIndex( DocumentPojoIndexMap.dummyDocumentIndex_, DocumentPojoIndexMap.documentType_, false, null, docMapping, localSettingsGroupIndex); if (null == dummyGroupIndex) { dummyGroupIndex = IndexManager.getIndex(DocumentPojoIndexMap.dummyDocumentIndex_); } // Just create an alias, so that queries work arbitrarily: dummyGroupIndex.createAlias(sGroupIndex); // (do nothing on delete since don't have any docs in here anyway) } }
public void InitializeIndex( boolean bDeleteDocs, boolean bDeleteEntityFeature, boolean bDeleteEventFeature, boolean bRebuildDocsIndex) { try { // create elasticsearch indexes PropertiesManager pm = new PropertiesManager(); if (!pm.getAggregationDisabled()) { Builder localSettingsEvent = ImmutableSettings.settingsBuilder(); localSettingsEvent.put("number_of_shards", 1).put("number_of_replicas", 0); localSettingsEvent.put("index.analysis.analyzer.suggestAnalyzer.tokenizer", "standard"); localSettingsEvent.putArray( "index.analysis.analyzer.suggestAnalyzer.filter", "standard", "lowercase"); localSettingsEvent.put("index.analysis.analyzer.suggestAnalyzer.tokenizer", "standard"); localSettingsEvent.putArray( "index.analysis.analyzer.suggestAnalyzer.filter", "standard", "lowercase"); Builder localSettingsGaz = ImmutableSettings.settingsBuilder(); localSettingsGaz.put("number_of_shards", 1).put("number_of_replicas", 0); localSettingsGaz.put("index.analysis.analyzer.suggestAnalyzer.tokenizer", "standard"); localSettingsGaz.putArray( "index.analysis.analyzer.suggestAnalyzer.filter", "standard", "lowercase"); // event feature String eventGazMapping = new Gson() .toJson( new AssociationFeaturePojoIndexMap.Mapping(), AssociationFeaturePojoIndexMap.Mapping.class); ElasticSearchManager eventIndex = IndexManager.createIndex( AssociationFeaturePojoIndexMap.indexName_, null, false, null, eventGazMapping, localSettingsEvent); if (bDeleteEventFeature) { eventIndex.deleteMe(); eventIndex = IndexManager.createIndex( AssociationFeaturePojoIndexMap.indexName_, null, false, null, eventGazMapping, localSettingsEvent); } // entity feature String gazMapping = new Gson() .toJson( new EntityFeaturePojoIndexMap.Mapping(), EntityFeaturePojoIndexMap.Mapping.class); ElasticSearchManager entityIndex = IndexManager.createIndex( EntityFeaturePojoIndexMap.indexName_, null, false, null, gazMapping, localSettingsGaz); if (bDeleteEntityFeature) { entityIndex.deleteMe(); entityIndex = IndexManager.createIndex( EntityFeaturePojoIndexMap.indexName_, null, false, null, gazMapping, localSettingsGaz); } } // DOCS - much more complicated than anything else boolean bPingMainIndexFailed = !ElasticSearchManager.pingIndex(DocumentPojoIndexMap.globalDocumentIndex_); // (ie if main doc index doesn't exist then always rebuild all indexes) if (bPingMainIndexFailed) { // extra level of robustness... sleep for a minute then double // check the index is really missing... try { Thread.sleep(60000); } catch (Exception e) { } bPingMainIndexFailed = !ElasticSearchManager.pingIndex(DocumentPojoIndexMap.globalDocumentIndex_); } bRebuildDocsIndex |= bPingMainIndexFailed; createCommunityDocIndex( DocumentPojoIndexMap.globalDocumentIndex_, null, false, true, bDeleteDocs); createCommunityDocIndex( DocumentPojoIndexMap.manyGeoDocumentIndex_, null, false, false, bDeleteDocs); // Some hardwired dummy communities createCommunityDocIndex( "4e3706c48d26852237078005", null, true, false, bDeleteDocs); // (admin) createCommunityDocIndex( "4e3706c48d26852237079004", null, true, false, bDeleteDocs); // (test user) // (create dummy index used to keep personal group aliases) // OK, going to have different shards for different communities: // Get a list of all the communities: BasicDBObject query = new BasicDBObject(); BasicDBObject fieldsToDrop = new BasicDBObject("members", 0); fieldsToDrop.put("communityAttributes", 0); fieldsToDrop.put("userAttributes", 0); DBCursor dbc = DbManager.getSocial().getCommunity().find(query, fieldsToDrop); if (bRebuildDocsIndex || bDeleteDocs) { List<DBObject> tmparray = dbc.toArray(); // (brings the entire thing into memory so don't get cursor timeouts) int i = 0; System.out.println("Initializing " + dbc.size() + " indexes:"); for (int j = 0; j < 2; ++j) { for (DBObject dbotmp : tmparray) { if ((++i % 100) == 0) { System.out.println("Initialized " + i + " indexes."); } BasicDBObject dbo = (BasicDBObject) dbotmp; // OK, going to see if there are any sources with this group id, create a new index if // so: // (Don't use CommunityPojo data model here for performance reasons.... // (Also, haven't gotten round to porting CommunityPojo field access to using static // fields)) ObjectId communityId = (ObjectId) dbo.get("_id"); boolean bPersonalGroup = dbo.getBoolean("isPersonalCommunity", false); boolean bSystemGroup = dbo.getBoolean("isSystemCommunity", false); ObjectId parentCommunityId = (ObjectId) dbo.get("parentId"); createCommunityDocIndex( communityId.toString(), parentCommunityId, bPersonalGroup, bSystemGroup, bDeleteDocs, j == 0); } // end loop over communities } // end loop over communities - first time parents only } // (end if need to do big loop over all sources) } catch (Exception e) { e.printStackTrace(); throw new RuntimeException(e.getMessage()); } } // TESTED (not changed since by-eye test in Beta - retested after moving code into
public static void synchronizeEventFeature( AssociationFeaturePojo eventFeature, ObjectId communityId) { DBCollection eventFeatureDb = DbManager.getFeature().getAssociation(); // NOTE: Important that feeds update occurs before synchronization, since the sync "corrupts" // the event if (_diagnosticMode || (null != eventFeature.getDb_sync_time()) || (null != eventFeature.getDb_sync_prio())) { // Else this is a new feature so don't need to update the feature DB, only the index (if // db_sync_prio null then have to update to avoid b/g aggergation loop) // (note that db_sync_prio will in practice not be set when this is a new feature because it // will have same sync_doccount as doc_count) long nCurrTime = System.currentTimeMillis(); // (query from top of the function, basically lookup on gaz_index) BasicDBObject update2 = new BasicDBObject(); update2.put(AssociationFeaturePojo.db_sync_time_, Long.toString(nCurrTime)); update2.put(AssociationFeaturePojo.db_sync_doccount_, eventFeature.getDoccount()); BasicDBObject update = new BasicDBObject(MongoDbManager.set_, update2); // (also can be added to below) BasicDBObject update3 = new BasicDBObject(EntityFeaturePojo.db_sync_prio_, 1); update.put(MongoDbManager.unset_, update3); BasicDBObject query = new BasicDBObject(AssociationFeaturePojo.index_, eventFeature.getIndex()); query.put(AssociationFeaturePojo.communityId_, communityId); // Keep the number of entity1 and entity2 sets down to a reasonable number // (In the end would like to be able to do this based on date rather than (essentially) // completely randomly) int nSize; BasicDBObject toPull = null; if (null != eventFeature.getEntity1()) { if ((nSize = eventFeature.getEntity1().size()) > AssociationFeaturePojo.entity_MAXFIELDS) { if (null == toPull) toPull = new BasicDBObject(); ArrayList<String> ent1ToRemove = new ArrayList<String>( eventFeature.getEntity1().size() - AssociationFeaturePojo.entity_MAXFIELDS); Iterator<String> it = eventFeature.getEntity1().iterator(); while (it.hasNext() && (nSize > AssociationFeaturePojo.entity_MAXFIELDS)) { String ent = it.next(); if (-1 == ent.indexOf('/')) { // (ie don't remove the index) nSize--; it.remove(); // (this removes from the index) ent1ToRemove.add(ent); } } toPull.put(AssociationFeaturePojo.entity1_, ent1ToRemove); // (this removes from the database) } } if (null != eventFeature.getEntity2()) { if ((nSize = eventFeature.getEntity2().size()) > AssociationFeaturePojo.entity_MAXFIELDS) { if (null == toPull) toPull = new BasicDBObject(); ArrayList<String> ent2ToRemove = new ArrayList<String>( eventFeature.getEntity2().size() - AssociationFeaturePojo.entity_MAXFIELDS); Iterator<String> it = eventFeature.getEntity2().iterator(); while (it.hasNext() && (nSize > AssociationFeaturePojo.entity_MAXFIELDS)) { String ent = it.next(); if (-1 == ent.indexOf('/')) { // (ie don't remove the index) nSize--; it.remove(); // (this removes from the index) ent2ToRemove.add(ent); } } toPull.put(AssociationFeaturePojo.entity2_, ent2ToRemove); // (this removes from the database) } } if (null != toPull) { update.put(MongoDbManager.pullAll_, toPull); // (this removes from the database) } // TESTED (2.1.4.3b, including no index removal clause) if (_diagnosticMode) { if ((null != eventFeature.getDb_sync_time()) || (null != eventFeature.getDb_sync_prio())) { if (_logInDiagnosticMode) System.out.println( "AssociationAggregationUtils.synchronizeEventFeature, featureDB: " + query.toString() + " / " + update.toString()); } else { if (_logInDiagnosticMode) System.out.println( "(WOULD NOT RUN) EventAggregationUtils.synchronizeEventFeature, featureDB: " + query.toString() + " / " + update.toString()); } } else { eventFeatureDb.update(query, update, false, true); } } if (_diagnosticMode) { if (_logInDiagnosticMode) System.out.println( "AssociationAggregationUtils.synchronizeEventFeature, synchronize: " + new StringBuffer(eventFeature.getIndex()) .append(':') .append(communityId) .toString() + " = " + IndexManager.mapToIndex(eventFeature, new AssociationFeaturePojoIndexMap())); } else { ElasticSearchManager esm = IndexManager.getIndex(AssociationFeaturePojoIndexMap.indexName_); esm.addDocument(eventFeature, new AssociationFeaturePojoIndexMap(), null, true); } } // TESTED