Ejemplo n.º 1
0
  public static void synchronizeEntityFeature(
      EntityFeaturePojo entityFeature, ObjectId communityId) {
    DBCollection entityFeatureDb = DbManager.getFeature().getEntity();

    // NOTE: Important that feeds update occurs before synchronization, since the sync "corrupts"
    // the entity

    if (_diagnosticMode
        || (null
            != entityFeature
                .getDbSyncTime())) { // Else this is a new feature so don't need to update the
                                     // feature DB, only the index
      long nCurrTime = System.currentTimeMillis();
      // (query from top of the function, basically lookup on gaz_index)
      BasicDBObject update2 = new BasicDBObject();
      update2.put(EntityFeaturePojo.db_sync_time_, Long.toString(nCurrTime));
      update2.put(EntityFeaturePojo.db_sync_doccount_, entityFeature.getDoccount());
      BasicDBObject update = new BasicDBObject(MongoDbManager.set_, update2);
      BasicDBObject query = new BasicDBObject(EntityFeaturePojo.index_, entityFeature.getIndex());
      query.put(EntityFeaturePojo.communityId_, communityId);

      if (_diagnosticMode) {
        System.out.println(
            "EntityAggregationUtils.synchronizeEntityFeature, featureDB: "
                + query.toString()
                + " / "
                + update.toString());
      } else {
        entityFeatureDb.update(query, update, false, true);
      }
    }

    if (_diagnosticMode) {
      System.out.println(
          "EntityAggregationUtils.synchronizeEntityFeature, synchronize: "
              + new StringBuffer(entityFeature.getIndex())
                  .append(':')
                  .append(communityId)
                  .toString()
              + " = "
              + IndexManager.mapToIndex(entityFeature, new EntityFeaturePojoIndexMap()));
    } else {
      ElasticSearchManager esm = IndexManager.getIndex(EntityFeaturePojoIndexMap.indexName_);
      esm.addDocument(entityFeature, new EntityFeaturePojoIndexMap(), null, true);
      // (_id is set by the index map to index:communityId)
    }
  } // TESTED (by eye, mostly cut-and-paste from test Beta)
  public static void deleteCommunityDocIndex(
      String nameOrCommunityIdStr, ObjectId parentCommunityId, boolean bPersonalGroup) {

    String sGroupIndex = null;
    try {
      sGroupIndex =
          new StringBuffer("doc_").append(new ObjectId(nameOrCommunityIdStr).toString()).toString();
    } catch (Exception e) {
      sGroupIndex = nameOrCommunityIdStr;
    }
    if (bPersonalGroup) {
      ElasticSearchManager dummyGroupIndex =
          IndexManager.getIndex(DocumentPojoIndexMap.dummyDocumentIndex_);
      dummyGroupIndex.removeAlias(sGroupIndex);
    } else if (null != parentCommunityId) {
      String sParentGroupIndex =
          new StringBuffer("doc_").append(parentCommunityId.toString()).toString();
      ElasticSearchManager docIndex = IndexManager.getIndex(sParentGroupIndex);
      docIndex.removeAlias(sGroupIndex);
      docIndex.closeIndex();
    } else {
      ElasticSearchManager docIndex = IndexManager.getIndex(sGroupIndex);
      docIndex.deleteMe();
    }
    // TESTED (parent, children, and personal)
  }
Ejemplo n.º 3
0
  /**
   * Until we can verify that all instances have moved over to our new mapping, we need to handcheck
   * all index mappings to make sure they have doc.associations.assoc_index as a doc value
   *
   * @param communityIdStrs
   * @return
   */
  @SuppressWarnings("unchecked")
  private static boolean validateAssociationMapping(String[] communityIdStrs) {
    // get all index mappings associated with these commids
    String[] mappings = new String[communityIdStrs.length];
    StringBuilder sb = new StringBuilder(", ");
    for (int i = 0; i < communityIdStrs.length; i++) {
      String s = communityIdStrs[i];
      mappings[i] = "doc_" + s + "*";
      sb.append("doc_").append(s).append("*, ");
    }
    ElasticSearchManager esm = ElasticSearchManager.getIndex(sb.substring(2, sb.length()));
    GetMappingsResponse response =
        esm.getRawClient().admin().indices().prepareGetMappings(mappings).get();
    for (ObjectObjectCursor<String, ImmutableOpenMap<String, MappingMetaData>> mapping :
        response.getMappings()) {
      ImmutableOpenMap<String, MappingMetaData> mappingVal = mapping.value;
      MappingMetaData mapping_meta = mappingVal.get("document_index");
      try {
        Map<String, Object> map = mapping_meta.getSourceAsMap();
        Map<String, Object> props = (Map<String, Object>) map.get("properties");
        Map<String, Object> assocs = (Map<String, Object>) props.get(DocumentPojo.associations_);
        Map<String, Object> assocs_props = (Map<String, Object>) assocs.get("properties");
        Map<String, Object> assoc_index =
            (Map<String, Object>) assocs_props.get(AssociationPojo.assoc_index_);
        if (!assoc_index.containsKey("doc_values") || !((Boolean) assoc_index.get("doc_values"))) {
          // doc values doesn't exist in mapping or was false
          return false;
        }

      } catch (Exception ex) {
        // failed somehow
        return false;
      }
    }
    // if we fell through, all the checked indexes had the doc_value field set
    return true;
  }
  public static void createCommunityDocIndex(
      String nameOrCommunityIdStr,
      ObjectId parentCommunityId,
      boolean bPersonalGroup,
      boolean bSystemGroup,
      boolean bClearIndex,
      boolean bParentsOnly) {
    // create elasticsearch indexes
    PropertiesManager pm = new PropertiesManager();
    boolean languageNormalization = pm.getNormalizeEncoding();
    int nPreferredReplicas = pm.getMaxIndexReplicas();

    String docMapping =
        new Gson().toJson(new DocumentPojoIndexMap.Mapping(), DocumentPojoIndexMap.Mapping.class);

    String sGroupIndex = null;
    try {
      sGroupIndex =
          new StringBuffer("doc_").append(new ObjectId(nameOrCommunityIdStr).toString()).toString();
    } catch (Exception e) {
      sGroupIndex = nameOrCommunityIdStr;
    }

    if (!bPersonalGroup) {

      String parentCommunityIdStr = null;
      if (null != parentCommunityId) {
        parentCommunityIdStr = parentCommunityId.toString();
      }

      if ((null == parentCommunityIdStr)
          || (parentCommunityIdStr.equals("4c927585d591d31d7b37097a"))) {
        // (system community is hardwired - children of this community are ignored)

        int nShards = bSystemGroup ? 10 : 5; // (system group is largest)

        // Remove the alias, in case it exists:
        // Then create an index with this name:
        Builder localSettingsGroupIndex = ImmutableSettings.settingsBuilder();
        localSettingsGroupIndex
            .put("number_of_shards", nShards)
            .put("number_of_replicas", nPreferredReplicas);
        if (languageNormalization) {
          localSettingsGroupIndex.put("index.analysis.analyzer.default.tokenizer", "standard");
          localSettingsGroupIndex.putArray(
              "index.analysis.analyzer.default.filter",
              "icu_normalizer",
              "icu_folding",
              "standard",
              "lowercase",
              "stop");
        } // TESTED

        ElasticSearchManager docIndex = null;
        try {
          docIndex =
              IndexManager.createIndex(
                  sGroupIndex,
                  DocumentPojoIndexMap.documentType_,
                  false,
                  null,
                  docMapping,
                  localSettingsGroupIndex);
        } catch (
            RuntimeException e) { // illegal arg exception, probably the language normalization?
          if (languageNormalization) { // (likely the required plugins have not been installed, just
            // regress back to normal)
            localSettingsGroupIndex = ImmutableSettings.settingsBuilder();
            localSettingsGroupIndex
                .put("number_of_shards", nShards)
                .put("number_of_replicas", nPreferredReplicas);

            docIndex =
                IndexManager.createIndex(
                    sGroupIndex,
                    DocumentPojoIndexMap.documentType_,
                    false,
                    null,
                    docMapping,
                    localSettingsGroupIndex);
          } // TESTED
          else throw e;
        } // TOTEST
        if (bClearIndex) {
          docIndex.deleteMe();
          docIndex =
              IndexManager.createIndex(
                  sGroupIndex,
                  DocumentPojoIndexMap.documentType_,
                  false,
                  null,
                  docMapping,
                  localSettingsGroupIndex);
        }
        if (null != docIndex) {
          try {
            docIndex.pingIndex(); // (wait until it's created itself)
          } catch (Exception e) {
          } // (just make sure this doesn't die horribly)

          docIndex.closeIndex();
        }
      } else if (!bParentsOnly) {
        String sParentGroupIndex =
            new StringBuffer("doc_")
                .append(new ObjectId(parentCommunityIdStr).toString())
                .toString();
        ElasticSearchManager docIndex = IndexManager.getIndex(sParentGroupIndex);

        // DEBUG (alias corruption)
        //				if (null == _aliasInfo) {
        //					ClusterStateResponse clusterState =
        // docIndex.getRawClient().admin().cluster().state(new ClusterStateRequest()).actionGet();
        //					_aliasInfo = clusterState.getState().getMetaData().getAliases();
        //				}
        //				else {
        //					if (_aliasInfo.containsKey(sGroupIndex)) { // has no aliases, we're not good
        //						return;
        //					}
        //					else {
        //						//DEBUG
        //						System.out.println("Alias " + sGroupIndex + " has no aliases (but should)");
        //						ElasticSearchManager docIndex2 = IndexManager.getIndex(sGroupIndex);
        //						docIndex2.deleteMe();
        //					}
        //				}

        docIndex.createAlias(sGroupIndex);
        docIndex.closeIndex();
        // (do nothing on delete - that will be handled at the parent index level)
      }
      // TESTED (parents, children, and personal)
    } else {
      // Just create the dummy index, no different to getting it in practice
      Builder localSettingsGroupIndex = ImmutableSettings.settingsBuilder();
      localSettingsGroupIndex
          .put("number_of_shards", 1)
          .put("number_of_replicas", 0); // (ie guaranteed to be local to each ES node)
      ElasticSearchManager dummyGroupIndex =
          IndexManager.createIndex(
              DocumentPojoIndexMap.dummyDocumentIndex_,
              DocumentPojoIndexMap.documentType_,
              false,
              null,
              docMapping,
              localSettingsGroupIndex);
      if (null == dummyGroupIndex) {
        dummyGroupIndex = IndexManager.getIndex(DocumentPojoIndexMap.dummyDocumentIndex_);
      }

      // Just create an alias, so that queries work arbitrarily:
      dummyGroupIndex.createAlias(sGroupIndex);
      // (do nothing on delete since don't have any docs in here anyway)
    }
  }
  public void InitializeIndex(
      boolean bDeleteDocs,
      boolean bDeleteEntityFeature,
      boolean bDeleteEventFeature,
      boolean bRebuildDocsIndex) {

    try { // create elasticsearch indexes

      PropertiesManager pm = new PropertiesManager();

      if (!pm.getAggregationDisabled()) {

        Builder localSettingsEvent = ImmutableSettings.settingsBuilder();
        localSettingsEvent.put("number_of_shards", 1).put("number_of_replicas", 0);
        localSettingsEvent.put("index.analysis.analyzer.suggestAnalyzer.tokenizer", "standard");
        localSettingsEvent.putArray(
            "index.analysis.analyzer.suggestAnalyzer.filter", "standard", "lowercase");

        localSettingsEvent.put("index.analysis.analyzer.suggestAnalyzer.tokenizer", "standard");
        localSettingsEvent.putArray(
            "index.analysis.analyzer.suggestAnalyzer.filter", "standard", "lowercase");

        Builder localSettingsGaz = ImmutableSettings.settingsBuilder();
        localSettingsGaz.put("number_of_shards", 1).put("number_of_replicas", 0);
        localSettingsGaz.put("index.analysis.analyzer.suggestAnalyzer.tokenizer", "standard");
        localSettingsGaz.putArray(
            "index.analysis.analyzer.suggestAnalyzer.filter", "standard", "lowercase");

        // event feature
        String eventGazMapping =
            new Gson()
                .toJson(
                    new AssociationFeaturePojoIndexMap.Mapping(),
                    AssociationFeaturePojoIndexMap.Mapping.class);
        ElasticSearchManager eventIndex =
            IndexManager.createIndex(
                AssociationFeaturePojoIndexMap.indexName_,
                null,
                false,
                null,
                eventGazMapping,
                localSettingsEvent);
        if (bDeleteEventFeature) {
          eventIndex.deleteMe();
          eventIndex =
              IndexManager.createIndex(
                  AssociationFeaturePojoIndexMap.indexName_,
                  null,
                  false,
                  null,
                  eventGazMapping,
                  localSettingsEvent);
        }
        // entity feature
        String gazMapping =
            new Gson()
                .toJson(
                    new EntityFeaturePojoIndexMap.Mapping(),
                    EntityFeaturePojoIndexMap.Mapping.class);
        ElasticSearchManager entityIndex =
            IndexManager.createIndex(
                EntityFeaturePojoIndexMap.indexName_,
                null,
                false,
                null,
                gazMapping,
                localSettingsGaz);
        if (bDeleteEntityFeature) {
          entityIndex.deleteMe();
          entityIndex =
              IndexManager.createIndex(
                  EntityFeaturePojoIndexMap.indexName_,
                  null,
                  false,
                  null,
                  gazMapping,
                  localSettingsGaz);
        }
      }

      // DOCS - much more complicated than anything else

      boolean bPingMainIndexFailed =
          !ElasticSearchManager.pingIndex(DocumentPojoIndexMap.globalDocumentIndex_);
      // (ie if main doc index doesn't exist then always rebuild all indexes)

      if (bPingMainIndexFailed) { // extra level of robustness... sleep for a minute then double
        // check the index is really missing...
        try {
          Thread.sleep(60000);
        } catch (Exception e) {
        }
        bPingMainIndexFailed =
            !ElasticSearchManager.pingIndex(DocumentPojoIndexMap.globalDocumentIndex_);
      }
      bRebuildDocsIndex |= bPingMainIndexFailed;

      createCommunityDocIndex(
          DocumentPojoIndexMap.globalDocumentIndex_, null, false, true, bDeleteDocs);
      createCommunityDocIndex(
          DocumentPojoIndexMap.manyGeoDocumentIndex_, null, false, false, bDeleteDocs);

      // Some hardwired dummy communities
      createCommunityDocIndex(
          "4e3706c48d26852237078005", null, true, false, bDeleteDocs); // (admin)
      createCommunityDocIndex(
          "4e3706c48d26852237079004", null, true, false, bDeleteDocs); // (test user)
      // (create dummy index used to keep personal group aliases)

      // OK, going to have different shards for different communities:
      // Get a list of all the communities:

      BasicDBObject query = new BasicDBObject();
      BasicDBObject fieldsToDrop = new BasicDBObject("members", 0);
      fieldsToDrop.put("communityAttributes", 0);
      fieldsToDrop.put("userAttributes", 0);
      DBCursor dbc = DbManager.getSocial().getCommunity().find(query, fieldsToDrop);

      if (bRebuildDocsIndex || bDeleteDocs) {

        List<DBObject> tmparray =
            dbc.toArray(); // (brings the entire thing into memory so don't get cursor timeouts)
        int i = 0;
        System.out.println("Initializing " + dbc.size() + " indexes:");
        for (int j = 0; j < 2; ++j) {
          for (DBObject dbotmp : tmparray) {
            if ((++i % 100) == 0) {
              System.out.println("Initialized " + i + " indexes.");
            }
            BasicDBObject dbo = (BasicDBObject) dbotmp;

            // OK, going to see if there are any sources with this group id, create a new index if
            // so:
            // (Don't use CommunityPojo data model here for performance reasons....
            //  (Also, haven't gotten round to porting CommunityPojo field access to using static
            // fields))
            ObjectId communityId = (ObjectId) dbo.get("_id");
            boolean bPersonalGroup = dbo.getBoolean("isPersonalCommunity", false);
            boolean bSystemGroup = dbo.getBoolean("isSystemCommunity", false);
            ObjectId parentCommunityId = (ObjectId) dbo.get("parentId");

            createCommunityDocIndex(
                communityId.toString(),
                parentCommunityId,
                bPersonalGroup,
                bSystemGroup,
                bDeleteDocs,
                j == 0);
          } // end loop over communities
        } // end loop over communities - first time parents only
      } // (end if need to do big loop over all sources)
    } catch (Exception e) {
      e.printStackTrace();
      throw new RuntimeException(e.getMessage());
    }
  } // TESTED (not changed since by-eye test in Beta - retested after moving code into
  public static void synchronizeEventFeature(
      AssociationFeaturePojo eventFeature, ObjectId communityId) {
    DBCollection eventFeatureDb = DbManager.getFeature().getAssociation();

    // NOTE: Important that feeds update occurs before synchronization, since the sync "corrupts"
    // the event

    if (_diagnosticMode
        || (null != eventFeature.getDb_sync_time())
        || (null != eventFeature.getDb_sync_prio())) {
      // Else this is a new feature so don't need to update the feature DB, only the index (if
      // db_sync_prio null then have to update to avoid b/g aggergation loop)
      // (note that db_sync_prio will in practice not be set when this is a new feature because it
      // will have same sync_doccount as doc_count)

      long nCurrTime = System.currentTimeMillis();
      // (query from top of the function, basically lookup on gaz_index)
      BasicDBObject update2 = new BasicDBObject();
      update2.put(AssociationFeaturePojo.db_sync_time_, Long.toString(nCurrTime));
      update2.put(AssociationFeaturePojo.db_sync_doccount_, eventFeature.getDoccount());
      BasicDBObject update = new BasicDBObject(MongoDbManager.set_, update2);
      // (also can be added to below)
      BasicDBObject update3 = new BasicDBObject(EntityFeaturePojo.db_sync_prio_, 1);
      update.put(MongoDbManager.unset_, update3);
      BasicDBObject query =
          new BasicDBObject(AssociationFeaturePojo.index_, eventFeature.getIndex());
      query.put(AssociationFeaturePojo.communityId_, communityId);

      // Keep the number of entity1 and entity2 sets down to a reasonable number
      // (In the end would like to be able to do this based on date rather than (essentially)
      // completely randomly)
      int nSize;
      BasicDBObject toPull = null;
      if (null != eventFeature.getEntity1()) {
        if ((nSize = eventFeature.getEntity1().size()) > AssociationFeaturePojo.entity_MAXFIELDS) {
          if (null == toPull) toPull = new BasicDBObject();
          ArrayList<String> ent1ToRemove =
              new ArrayList<String>(
                  eventFeature.getEntity1().size() - AssociationFeaturePojo.entity_MAXFIELDS);
          Iterator<String> it = eventFeature.getEntity1().iterator();
          while (it.hasNext() && (nSize > AssociationFeaturePojo.entity_MAXFIELDS)) {
            String ent = it.next();
            if (-1 == ent.indexOf('/')) { // (ie don't remove the index)
              nSize--;
              it.remove(); // (this removes from the index)
              ent1ToRemove.add(ent);
            }
          }
          toPull.put(AssociationFeaturePojo.entity1_, ent1ToRemove);
          // (this removes from the database)
        }
      }
      if (null != eventFeature.getEntity2()) {
        if ((nSize = eventFeature.getEntity2().size()) > AssociationFeaturePojo.entity_MAXFIELDS) {
          if (null == toPull) toPull = new BasicDBObject();
          ArrayList<String> ent2ToRemove =
              new ArrayList<String>(
                  eventFeature.getEntity2().size() - AssociationFeaturePojo.entity_MAXFIELDS);
          Iterator<String> it = eventFeature.getEntity2().iterator();
          while (it.hasNext() && (nSize > AssociationFeaturePojo.entity_MAXFIELDS)) {
            String ent = it.next();
            if (-1 == ent.indexOf('/')) { // (ie don't remove the index)
              nSize--;
              it.remove(); // (this removes from the index)
              ent2ToRemove.add(ent);
            }
          }
          toPull.put(AssociationFeaturePojo.entity2_, ent2ToRemove);
          // (this removes from the database)
        }
      }
      if (null != toPull) {
        update.put(MongoDbManager.pullAll_, toPull);
        // (this removes from the database)
      }
      // TESTED (2.1.4.3b, including no index removal clause)

      if (_diagnosticMode) {
        if ((null != eventFeature.getDb_sync_time()) || (null != eventFeature.getDb_sync_prio())) {
          if (_logInDiagnosticMode)
            System.out.println(
                "AssociationAggregationUtils.synchronizeEventFeature, featureDB: "
                    + query.toString()
                    + " / "
                    + update.toString());
        } else {
          if (_logInDiagnosticMode)
            System.out.println(
                "(WOULD NOT RUN) EventAggregationUtils.synchronizeEventFeature, featureDB: "
                    + query.toString()
                    + " / "
                    + update.toString());
        }
      } else {
        eventFeatureDb.update(query, update, false, true);
      }
    }

    if (_diagnosticMode) {
      if (_logInDiagnosticMode)
        System.out.println(
            "AssociationAggregationUtils.synchronizeEventFeature, synchronize: "
                + new StringBuffer(eventFeature.getIndex())
                    .append(':')
                    .append(communityId)
                    .toString()
                + " = "
                + IndexManager.mapToIndex(eventFeature, new AssociationFeaturePojoIndexMap()));
    } else {
      ElasticSearchManager esm = IndexManager.getIndex(AssociationFeaturePojoIndexMap.indexName_);
      esm.addDocument(eventFeature, new AssociationFeaturePojoIndexMap(), null, true);
    }
  } // TESTED