Ejemplo n.º 1
0
  public static void updateMatchingEntities(EntityFeaturePojo entFeature, ObjectId communityId) {
    String index = entFeature.getIndex();
    long totalFreq = entFeature.getTotalfreq();
    long docCount = entFeature.getDoccount();

    try {
      DBCollection docDb = DbManager.getDocument().getMetadata();

      BasicDBObject query1 = new BasicDBObject();
      query1.put(EntityPojo.docQuery_index_, index);
      query1.put(DocumentPojo.communityId_, communityId);

      BasicDBObject multiopB = new BasicDBObject();
      multiopB.put(EntityPojo.docUpdate_totalfrequency_, totalFreq);
      multiopB.put(EntityPojo.docUpdate_doccount_, docCount);
      BasicDBObject multiopA = new BasicDBObject(MongoDbManager.set_, multiopB);

      if (_diagnosticMode) {
        System.out.println(
            "EntityAggregationUtils.updateMatchingEntities: "
                + query1.toString()
                + " / "
                + multiopA.toString());
      } else {
        synchronized (GenericProcessingController.class) {
          // Because this op can be slow, and traverse a lot of disk, need to ensure that
          // we don't allow all the threads to hammer it at once (the updates all yield to each
          // other
          // enough that the disk goes totally crazy)

          docDb.update(query1, multiopA, false, true);
          DbManager.getDocument().getLastError(DbManager.getDocument().getMetadata().getName());
          // (enforce consecutive accesses for this potentially very slow operation)
        }

        // Was originally checked updatedExisting but for INF-1406, it sometimes seemed to be
        // checking the wrong command. I suspect the reason we had this code in here has gone away,
        // and it doesn't matter if this update occasionally fails anyway, it will just be out of
        // date
        // so the check/retry has been removed.
      }
    } catch (Exception ex) {
      logger.error(ex.getMessage(), ex);
    }
  } // TESTED (by eye, mostly cut-and-paste from test Beta)
  public void InitializeDatabase() {
    // Add indices:
    try {
      PropertiesManager pm = new PropertiesManager();

      DbManager.getDocument()
          .getContent()
          .ensureIndex(new BasicDBObject(DocumentPojo.url_, 1)); // (annoyingly necessary)
      DbManager.getDocument()
          .getMetadata()
          .ensureIndex(
              new BasicDBObject(DocumentPojo.sourceUrl_, 2),
              new BasicDBObject(MongoDbManager.sparse_, true));
      try {
        DbManager.getDocument()
            .getMetadata()
            .dropIndex(new BasicDBObject(DocumentPojo.sourceUrl_, 1));
      } catch (Exception e) {
      } // (leave this in for a while until all legacy DBs are removed)

      // Compound index lets me access {url, sourceKey}, {url} efficiently ... but need sourceKey
      // separately to do {sourceKey}
      BasicDBObject compIndex = new BasicDBObject(DocumentPojo.url_, 1);
      compIndex.put(DocumentPojo.sourceKey_, 1);
      DbManager.getDocument().getMetadata().ensureIndex(compIndex);
      // Add {_id:-1} to "standalone" sourceKey, sort docs matching source key by "time" (sort of!)
      compIndex = new BasicDBObject(DocumentPojo.sourceKey_, 1);
      compIndex.put(DocumentPojo._id_, -1);
      DbManager.getDocument().getMetadata().ensureIndex(compIndex);
      try {
        DbManager.getDocument()
            .getMetadata()
            .dropIndex(new BasicDBObject(DocumentPojo.sourceKey_, 1));
      } catch (Exception e) {
      } // (leave this in for a while until all legacy DBs are removed)
      // Title simply not needed, that was a mistake from an early iteration:
      try {
        DbManager.getDocument().getMetadata().dropIndex(new BasicDBObject(DocumentPojo.title_, 1));
      } catch (Exception e) {
      } // (leave this in for a while until all legacy DBs are removed)
      DbManager.getDocument()
          .getMetadata()
          .ensureIndex(
              new BasicDBObject(DocumentPojo.updateId_, 2),
              new BasicDBObject(MongoDbManager.sparse_, true));
      try {
        DbManager.getDocument()
            .getMetadata()
            .dropIndex(new BasicDBObject(DocumentPojo.updateId_, 1));
      } catch (Exception e) {
      } // (leave this in for a while until all legacy DBs are removed)
      if (!pm.getAggregationDisabled()) {
        compIndex = new BasicDBObject(EntityPojo.docQuery_index_, 1);
        compIndex.put(DocumentPojo.communityId_, 1);
        DbManager.getDocument().getMetadata().ensureIndex(compIndex);
      }
      compIndex = new BasicDBObject(DocCountPojo._id_, 1);
      compIndex.put(DocCountPojo.doccount_, 1);
      DbManager.getDocument().getCounts().ensureIndex(compIndex);
      DbManager.getFeature()
          .getEntity()
          .ensureIndex(new BasicDBObject(EntityFeaturePojo.disambiguated_name_, 1));
      DbManager.getFeature()
          .getEntity()
          .ensureIndex(new BasicDBObject(EntityFeaturePojo.index_, 1));
      DbManager.getFeature()
          .getEntity()
          .ensureIndex(new BasicDBObject(EntityFeaturePojo.alias_, 1));
      DbManager.getFeature()
          .getEntity()
          .ensureIndex(
              new BasicDBObject(EntityFeaturePojo.db_sync_prio_, 2),
              new BasicDBObject(MongoDbManager.sparse_, true));
      DbManager.getFeature()
          .getAssociation()
          .ensureIndex(new BasicDBObject(AssociationFeaturePojo.index_, 1));
      DbManager.getFeature().getGeo().ensureIndex(new BasicDBObject("country", 1));
      DbManager.getFeature().getGeo().ensureIndex(new BasicDBObject("search_field", 1));
      DbManager.getFeature().getGeo().ensureIndex(new BasicDBObject("geoindex", "2d"));
      DbManager.getIngest().getSource().ensureIndex(new BasicDBObject(SourcePojo.key_, 1));
      DbManager.getIngest().getSource().ensureIndex(new BasicDBObject(SourcePojo.communityIds_, 1));
      DbManager.getIngest()
          .getSource()
          .ensureIndex(new BasicDBObject(SourceHarvestStatusPojo.sourceQuery_harvested_, 1));
      DbManager.getIngest()
          .getSource()
          .ensureIndex(new BasicDBObject(SourceHarvestStatusPojo.sourceQuery_synced_, 1));
      // Compound index lets me access {type, communities._id}, {type} efficiently
      compIndex = new BasicDBObject("type", 1);
      compIndex.put("communities._id", 1);
      DbManager.getSocial().getShare().ensureIndex(compIndex);
      try {
        DbManager.getSocial().getShare().dropIndex(new BasicDBObject("type", 1));
      } catch (Exception e) {
      } // (leave this in for a while until all legacy DBs are removed)
      DbManager.getSocial()
          .getCookies()
          .ensureIndex(
              new BasicDBObject("apiKey", 2), new BasicDBObject(MongoDbManager.sparse_, true));
      try {
        DbManager.getSocial().getCookies().dropIndex(new BasicDBObject("apiKey", 1));
      } catch (Exception e) {
      } // (leave this in for a while until all legacy DBs are removed)
      DbManager.getCustom()
          .getLookup()
          .ensureIndex(new BasicDBObject(CustomMapReduceJobPojo.jobtitle_, 1));
      // TODO (): MOVE THESE TO SPARSE INDEXES AFTER YOU'VE UPDATED THE LOGIC (SWAP THE 1 AND 2)
      DbManager.getCustom()
          .getLookup()
          .ensureIndex(
              new BasicDBObject(CustomMapReduceJobPojo.jobidS_, 1),
              new BasicDBObject(MongoDbManager.sparse_, false));
      try {
        DbManager.getCustom()
            .getLookup()
            .dropIndex(new BasicDBObject(CustomMapReduceJobPojo.jobidS_, 2));
      } catch (Exception e) {
      } // (leave this in for a while until all legacy DBs are removed)
      //	//DbManager.getCustom().getLookup().ensureIndex(new
      // BasicDBObject(CustomMapReduceJobPojo.jobidS_, 2), new
      // BasicDBObject(MongoDbManager.sparse_, true));
      //			DbManager.getCustom().getLookup().ensureIndex(new
      // BasicDBObject(CustomMapReduceJobPojo.jobidS_, 2), new BasicDBObject(MongoDbManager.sparse_,
      // true));
      //			try { DbManager.getCustom().getLookup().dropIndex(new
      // BasicDBObject(CustomMapReduceJobPojo.jobidS_, 1)); } catch (Exception e) {} // (leave this
      // in for a while until all legacy DBs are removed)
      DbManager.getCustom()
          .getLookup()
          .ensureIndex(
              new BasicDBObject(CustomMapReduceJobPojo.waitingOn_, 1),
              new BasicDBObject(MongoDbManager.sparse_, false));
      try {
        DbManager.getCustom()
            .getLookup()
            .dropIndex(new BasicDBObject(CustomMapReduceJobPojo.waitingOn_, 2));
      } catch (Exception e) {
      } // (leave this in for a while until all legacy DBs are removed)
      //			DbManager.getCustom().getLookup().ensureIndex(new
      // BasicDBObject(CustomMapReduceJobPojo.waitingOn_, 2), new
      // BasicDBObject(MongoDbManager.sparse_, true));
      //			try { DbManager.getCustom().getLookup().dropIndex(new
      // BasicDBObject(CustomMapReduceJobPojo.waitingOn_, 1)); } catch (Exception e) {} // (leave
      // this in for a while until all legacy DBs are removed)
    } catch (Exception e) {
      e.printStackTrace();
      throw new RuntimeException(e.getMessage());
    }
  } // TESTED (not changed since by-eye test in Beta)