public static void updateMatchingEntities(EntityFeaturePojo entFeature, ObjectId communityId) { String index = entFeature.getIndex(); long totalFreq = entFeature.getTotalfreq(); long docCount = entFeature.getDoccount(); try { DBCollection docDb = DbManager.getDocument().getMetadata(); BasicDBObject query1 = new BasicDBObject(); query1.put(EntityPojo.docQuery_index_, index); query1.put(DocumentPojo.communityId_, communityId); BasicDBObject multiopB = new BasicDBObject(); multiopB.put(EntityPojo.docUpdate_totalfrequency_, totalFreq); multiopB.put(EntityPojo.docUpdate_doccount_, docCount); BasicDBObject multiopA = new BasicDBObject(MongoDbManager.set_, multiopB); if (_diagnosticMode) { System.out.println( "EntityAggregationUtils.updateMatchingEntities: " + query1.toString() + " / " + multiopA.toString()); } else { synchronized (GenericProcessingController.class) { // Because this op can be slow, and traverse a lot of disk, need to ensure that // we don't allow all the threads to hammer it at once (the updates all yield to each // other // enough that the disk goes totally crazy) docDb.update(query1, multiopA, false, true); DbManager.getDocument().getLastError(DbManager.getDocument().getMetadata().getName()); // (enforce consecutive accesses for this potentially very slow operation) } // Was originally checked updatedExisting but for INF-1406, it sometimes seemed to be // checking the wrong command. I suspect the reason we had this code in here has gone away, // and it doesn't matter if this update occasionally fails anyway, it will just be out of // date // so the check/retry has been removed. } } catch (Exception ex) { logger.error(ex.getMessage(), ex); } } // TESTED (by eye, mostly cut-and-paste from test Beta)
public void InitializeDatabase() { // Add indices: try { PropertiesManager pm = new PropertiesManager(); DbManager.getDocument() .getContent() .ensureIndex(new BasicDBObject(DocumentPojo.url_, 1)); // (annoyingly necessary) DbManager.getDocument() .getMetadata() .ensureIndex( new BasicDBObject(DocumentPojo.sourceUrl_, 2), new BasicDBObject(MongoDbManager.sparse_, true)); try { DbManager.getDocument() .getMetadata() .dropIndex(new BasicDBObject(DocumentPojo.sourceUrl_, 1)); } catch (Exception e) { } // (leave this in for a while until all legacy DBs are removed) // Compound index lets me access {url, sourceKey}, {url} efficiently ... but need sourceKey // separately to do {sourceKey} BasicDBObject compIndex = new BasicDBObject(DocumentPojo.url_, 1); compIndex.put(DocumentPojo.sourceKey_, 1); DbManager.getDocument().getMetadata().ensureIndex(compIndex); // Add {_id:-1} to "standalone" sourceKey, sort docs matching source key by "time" (sort of!) compIndex = new BasicDBObject(DocumentPojo.sourceKey_, 1); compIndex.put(DocumentPojo._id_, -1); DbManager.getDocument().getMetadata().ensureIndex(compIndex); try { DbManager.getDocument() .getMetadata() .dropIndex(new BasicDBObject(DocumentPojo.sourceKey_, 1)); } catch (Exception e) { } // (leave this in for a while until all legacy DBs are removed) // Title simply not needed, that was a mistake from an early iteration: try { DbManager.getDocument().getMetadata().dropIndex(new BasicDBObject(DocumentPojo.title_, 1)); } catch (Exception e) { } // (leave this in for a while until all legacy DBs are removed) DbManager.getDocument() .getMetadata() .ensureIndex( new BasicDBObject(DocumentPojo.updateId_, 2), new BasicDBObject(MongoDbManager.sparse_, true)); try { DbManager.getDocument() .getMetadata() .dropIndex(new BasicDBObject(DocumentPojo.updateId_, 1)); } catch (Exception e) { } // (leave this in for a while until all legacy DBs are removed) if (!pm.getAggregationDisabled()) { compIndex = new BasicDBObject(EntityPojo.docQuery_index_, 1); compIndex.put(DocumentPojo.communityId_, 1); DbManager.getDocument().getMetadata().ensureIndex(compIndex); } compIndex = new BasicDBObject(DocCountPojo._id_, 1); compIndex.put(DocCountPojo.doccount_, 1); DbManager.getDocument().getCounts().ensureIndex(compIndex); DbManager.getFeature() .getEntity() .ensureIndex(new BasicDBObject(EntityFeaturePojo.disambiguated_name_, 1)); DbManager.getFeature() .getEntity() .ensureIndex(new BasicDBObject(EntityFeaturePojo.index_, 1)); DbManager.getFeature() .getEntity() .ensureIndex(new BasicDBObject(EntityFeaturePojo.alias_, 1)); DbManager.getFeature() .getEntity() .ensureIndex( new BasicDBObject(EntityFeaturePojo.db_sync_prio_, 2), new BasicDBObject(MongoDbManager.sparse_, true)); DbManager.getFeature() .getAssociation() .ensureIndex(new BasicDBObject(AssociationFeaturePojo.index_, 1)); DbManager.getFeature().getGeo().ensureIndex(new BasicDBObject("country", 1)); DbManager.getFeature().getGeo().ensureIndex(new BasicDBObject("search_field", 1)); DbManager.getFeature().getGeo().ensureIndex(new BasicDBObject("geoindex", "2d")); DbManager.getIngest().getSource().ensureIndex(new BasicDBObject(SourcePojo.key_, 1)); DbManager.getIngest().getSource().ensureIndex(new BasicDBObject(SourcePojo.communityIds_, 1)); DbManager.getIngest() .getSource() .ensureIndex(new BasicDBObject(SourceHarvestStatusPojo.sourceQuery_harvested_, 1)); DbManager.getIngest() .getSource() .ensureIndex(new BasicDBObject(SourceHarvestStatusPojo.sourceQuery_synced_, 1)); // Compound index lets me access {type, communities._id}, {type} efficiently compIndex = new BasicDBObject("type", 1); compIndex.put("communities._id", 1); DbManager.getSocial().getShare().ensureIndex(compIndex); try { DbManager.getSocial().getShare().dropIndex(new BasicDBObject("type", 1)); } catch (Exception e) { } // (leave this in for a while until all legacy DBs are removed) DbManager.getSocial() .getCookies() .ensureIndex( new BasicDBObject("apiKey", 2), new BasicDBObject(MongoDbManager.sparse_, true)); try { DbManager.getSocial().getCookies().dropIndex(new BasicDBObject("apiKey", 1)); } catch (Exception e) { } // (leave this in for a while until all legacy DBs are removed) DbManager.getCustom() .getLookup() .ensureIndex(new BasicDBObject(CustomMapReduceJobPojo.jobtitle_, 1)); // TODO (): MOVE THESE TO SPARSE INDEXES AFTER YOU'VE UPDATED THE LOGIC (SWAP THE 1 AND 2) DbManager.getCustom() .getLookup() .ensureIndex( new BasicDBObject(CustomMapReduceJobPojo.jobidS_, 1), new BasicDBObject(MongoDbManager.sparse_, false)); try { DbManager.getCustom() .getLookup() .dropIndex(new BasicDBObject(CustomMapReduceJobPojo.jobidS_, 2)); } catch (Exception e) { } // (leave this in for a while until all legacy DBs are removed) // //DbManager.getCustom().getLookup().ensureIndex(new // BasicDBObject(CustomMapReduceJobPojo.jobidS_, 2), new // BasicDBObject(MongoDbManager.sparse_, true)); // DbManager.getCustom().getLookup().ensureIndex(new // BasicDBObject(CustomMapReduceJobPojo.jobidS_, 2), new BasicDBObject(MongoDbManager.sparse_, // true)); // try { DbManager.getCustom().getLookup().dropIndex(new // BasicDBObject(CustomMapReduceJobPojo.jobidS_, 1)); } catch (Exception e) {} // (leave this // in for a while until all legacy DBs are removed) DbManager.getCustom() .getLookup() .ensureIndex( new BasicDBObject(CustomMapReduceJobPojo.waitingOn_, 1), new BasicDBObject(MongoDbManager.sparse_, false)); try { DbManager.getCustom() .getLookup() .dropIndex(new BasicDBObject(CustomMapReduceJobPojo.waitingOn_, 2)); } catch (Exception e) { } // (leave this in for a while until all legacy DBs are removed) // DbManager.getCustom().getLookup().ensureIndex(new // BasicDBObject(CustomMapReduceJobPojo.waitingOn_, 2), new // BasicDBObject(MongoDbManager.sparse_, true)); // try { DbManager.getCustom().getLookup().dropIndex(new // BasicDBObject(CustomMapReduceJobPojo.waitingOn_, 1)); } catch (Exception e) {} // (leave // this in for a while until all legacy DBs are removed) } catch (Exception e) { e.printStackTrace(); throw new RuntimeException(e.getMessage()); } } // TESTED (not changed since by-eye test in Beta)