/** * Returns true if the given field is a member of any primary key on the given class, for the * given source. * * @param model the Model in which to find ClassDescriptors * @param clazz the Class in which to look * @param fieldName the name of the field to check * @param source the Source that the keys belong to * @return true if the field is a primary key */ public static boolean fieldIsPrimaryKey( Model model, Class<?> clazz, String fieldName, Source source) { Map<PrimaryKeyCacheKey, Set<String>> cache = primaryKeyCache.get(); PrimaryKeyCacheKey key = new PrimaryKeyCacheKey(model, clazz, source); Set<String> fields = cache.get(key); if (fields == null) { fields = new HashSet<String>(); for (ClassDescriptor cld : model.getClassDescriptorsForClass(clazz)) { for (PrimaryKey pk : getPrimaryKeys(cld, source, null)) { fields.addAll(pk.getFieldNames()); } } cache.put(key, fields); } return fields.contains(fieldName); }
/** * Returns whether this primary key can be fetched now. * * @param pk the PrimaryKey * @param cld the ClassDescriptor that the PrimaryKey is in * @param pksNotDone a Map of pks not yet fetched * @return a boolean */ protected boolean canDoPkNow( PrimaryKey pk, ClassDescriptor cld, Map<PrimaryKey, ClassDescriptor> pksNotDone) { boolean canDoPkNow = true; Iterator<String> fieldNameIter = pk.getFieldNames().iterator(); while (fieldNameIter.hasNext() && canDoPkNow) { String fieldName = fieldNameIter.next(); FieldDescriptor fd = cld.getFieldDescriptorByName(fieldName); if (fd.isReference()) { Iterator<ClassDescriptor> otherCldIter = pksNotDone.values().iterator(); while (otherCldIter.hasNext() && canDoPkNow) { ClassDescriptor otherCld = otherCldIter.next(); Class<? extends FastPathObject> fieldClass = ((ReferenceDescriptor) fd).getReferencedClassDescriptor().getType(); if (otherCld.getType().isAssignableFrom(fieldClass) || fieldClass.isAssignableFrom(otherCld.getType())) { canDoPkNow = false; } } } } return canDoPkNow; }
/** * Fetches equivalent objects for a particular primary key. * * @param pk the PrimaryKey * @param cld the ClassDescriptor of the PrimaryKey * @param results a Map to hold results that are to be added to the cache * @param objectsForCld a List of objects relevant to this PrimaryKey * @param fetchedObjectIds a Set to hold ids of objects that are fetched, to prefetch from the * data tracker later * @throws ObjectStoreException if something goes wrong */ protected void doPk( PrimaryKey pk, ClassDescriptor cld, Map<InterMineObject, Set<InterMineObject>> results, List<InterMineObject> objectsForCld, Set<Integer> fetchedObjectIds) throws ObjectStoreException { Iterator<InterMineObject> objectsForCldIter = objectsForCld.iterator(); while (objectsForCldIter.hasNext()) { int objCount = 0; int origObjCount = 0; Query q = new Query(); QueryClass qc = new QueryClass(cld.getType()); q.addFrom(qc); q.addToSelect(qc); ConstraintSet cs = new ConstraintSet(ConstraintOp.AND); q.setConstraint(cs); Map<String, Set<Object>> fieldNameToValues = new HashMap<String, Set<Object>>(); for (String fieldName : pk.getFieldNames()) { try { QueryField qf = new QueryField(qc, fieldName); q.addToSelect(qf); Set<Object> values = new HashSet<Object>(); fieldNameToValues.put(fieldName, values); cs.addConstraint(new BagConstraint(qf, ConstraintOp.IN, values)); } catch (IllegalArgumentException e) { QueryForeignKey qf = new QueryForeignKey(qc, fieldName); q.addToSelect(qf); Set<Object> values = new HashSet<Object>(); fieldNameToValues.put(fieldName, values); cs.addConstraint(new BagConstraint(qf, ConstraintOp.IN, values)); } } // Now make a map from the primary key values to source objects Map<List<Object>, InterMineObject> keysToSourceObjects = new HashMap<List<Object>, InterMineObject>(); while (objectsForCldIter.hasNext() && (objCount < 500)) { InterMineObject object = objectsForCldIter.next(); origObjCount++; try { if (DataLoaderHelper.objectPrimaryKeyNotNull(model, object, cld, pk, source, idMap)) { List<Collection<Object>> values = new ArrayList<Collection<Object>>(); boolean skipObject = false; Map<String, Set<Object>> fieldsValues = new HashMap<String, Set<Object>>(); for (String fieldName : pk.getFieldNames()) { try { Object value = object.getFieldProxy(fieldName); Set<Object> fieldValues; if (value instanceof InterMineObject) { Integer id = idMap.get(((InterMineObject) value).getId()); if (id == null) { Set<InterMineObject> eqs = results.get(value); if (eqs == null) { value = object.getFieldValue(fieldName); eqs = queryEquivalentObjects((InterMineObject) value, source); } fieldValues = new HashSet<Object>(); for (InterMineObject obj : eqs) { fieldValues.add(obj.getId()); } } else { fieldValues = Collections.singleton((Object) id); } } else { fieldValues = Collections.singleton(value); } values.add(fieldValues); fieldsValues.put(fieldName, fieldValues); for (Object fieldValue : fieldValues) { long time = System.currentTimeMillis(); boolean pkQueryFruitless = hints.pkQueryFruitless(cld.getType(), fieldName, fieldValue); String summaryName = Util.getFriendlyName(cld.getType()) + "." + fieldName; if (!savedTimes.containsKey(summaryName)) { savedTimes.put(summaryName, new Long(System.currentTimeMillis() - time)); savedCounts.put(summaryName, new Integer(0)); } if (pkQueryFruitless) { skipObject = true; } } } catch (IllegalAccessException e) { throw new RuntimeException(e); } } if (!skipObject) { objCount++; for (String fieldName : pk.getFieldNames()) { fieldNameToValues.get(fieldName).addAll(fieldsValues.get(fieldName)); } for (List<Object> valueSet : CollectionUtil.fanOutCombinations(values)) { if (keysToSourceObjects.containsKey(valueSet)) { throw new ObjectStoreException( "Duplicate objects found for pk " + cld.getName() + "." + pk.getName() + ": " + object); } keysToSourceObjects.put(valueSet, object); } } } } catch (MetaDataException e) { throw new ObjectStoreException(e); } } // Prune BagConstraints using the hints system. // boolean emptyQuery = false; // Iterator<String> fieldNameIter = pk.getFieldNames().iterator(); // while (fieldNameIter.hasNext() && (!emptyQuery)) { // String fieldName = fieldNameIter.next(); // Set values = fieldNameToValues.get(fieldName); // Iterator valueIter = values.iterator(); // while (valueIter.hasNext()) { // if (hints.pkQueryFruitless(cld.getType(), fieldName, valueIter.next())) { // valueIter.remove(); // } // } // if (values.isEmpty()) { // emptyQuery = true; // } // } if (objCount > 0) { // Iterate through query, and add objects to results // long time = System.currentTimeMillis(); int matches = 0; Results res = lookupOs.execute(q, 2000, false, false, false); @SuppressWarnings("unchecked") List<ResultsRow<Object>> tmpRes = (List) res; for (ResultsRow<Object> row : tmpRes) { List<Object> values = new ArrayList<Object>(); for (int i = 1; i <= pk.getFieldNames().size(); i++) { values.add(row.get(i)); } Set<InterMineObject> set = results.get(keysToSourceObjects.get(values)); if (set != null) { set.add((InterMineObject) row.get(0)); matches++; } fetchedObjectIds.add(((InterMineObject) row.get(0)).getId()); } // LOG.info("Fetched " + res.size() + " equivalent objects for " + objCount // + " objects in " + (System.currentTimeMillis() - time) + " ms for " // + cld.getName() + "." + pk.getName()); } } }
/** * Look a the values of the given primary key in the object and return true if and only if some * part of the primary key is null. If the primary key contains a reference it is sufficient for * any of the primary keys of the referenced object to be non-null (ie objectPrimaryKeyIsNull() * returning true). * * @param model the Model in which to find ClassDescriptors * @param obj the Object to check * @param cld one of the classes that obj is. Only primary keys for this classes will be checked * @param pk the primary key to check * @param source the Source database * @param idMap an IntToIntMap from source IDs to destination IDs * @return true if the the given primary key is non-null for the given object * @throws MetaDataException if anything goes wrong */ public static boolean objectPrimaryKeyNotNull( Model model, InterMineObject obj, ClassDescriptor cld, PrimaryKey pk, Source source, IntToIntMap idMap) throws MetaDataException { for (String fieldName : pk.getFieldNames()) { FieldDescriptor fd = cld.getFieldDescriptorByName(fieldName); if (fd instanceof AttributeDescriptor) { Object value; try { value = obj.getFieldValue(fieldName); } catch (IllegalAccessException e) { throw new MetaDataException( "Failed to get field " + fieldName + " for key " + pk + " from " + obj, e); } if (value == null) { return false; } } else if (fd instanceof CollectionDescriptor) { throw new MetaDataException( "Primary key " + pk.getName() + " for class " + cld.getName() + " cannot contain collection " + fd.getName() + ": collections cannot be part of a primary key. Please edit" + model.getName() + "_keyDefs.properties"); } else if (fd instanceof ReferenceDescriptor) { InterMineObject refObj; try { refObj = (InterMineObject) obj.getFieldProxy(fieldName); } catch (IllegalAccessException e) { throw new MetaDataException( "Failed to get field " + fieldName + " for key " + pk + " from " + obj, e); } if (refObj == null) { return false; } if ((refObj.getId() != null) && (idMap.get(refObj.getId()) != null)) { // We have previously loaded the object in this reference. continue; } if (refObj instanceof ProxyReference) { refObj = ((ProxyReference) refObj).getObject(); } boolean foundNonNullKey = false; boolean foundKey = false; Set<ClassDescriptor> classDescriptors = model.getClassDescriptorsForClass(refObj.getClass()); CLDS: for (ClassDescriptor refCld : classDescriptors) { Set<PrimaryKey> primaryKeys; if (source == null) { primaryKeys = new LinkedHashSet<PrimaryKey>(PrimaryKeyUtil.getPrimaryKeys(refCld).values()); } else { primaryKeys = DataLoaderHelper.getPrimaryKeys(refCld, source, null); } for (PrimaryKey refPK : primaryKeys) { foundKey = true; if (objectPrimaryKeyNotNull(model, refObj, refCld, refPK, source, idMap)) { foundNonNullKey = true; break CLDS; } } } if (foundKey && (!foundNonNullKey)) { return false; } } } return true; }
/** * Return a Set of PrimaryKeys relevant to a given Source for a ClassDescriptor. The Set contains * all the primary keys that exist on a particular class that are used by the source, without * performing any recursion. The Model.getClassDescriptorsForClass() method is recommended if you * wish for all the primary keys of the class' parents as well. * * @param cld the ClassDescriptor * @param source the Source * @param os the ObjectStore that these PrimaryKeys are used in, for creating indexes * @return a Set of PrimaryKeys */ public static Set<PrimaryKey> getPrimaryKeys(ClassDescriptor cld, Source source, ObjectStore os) { GetPrimaryKeyCacheKey key = new GetPrimaryKeyCacheKey(cld, source); synchronized (getPrimaryKeyCache) { Set<PrimaryKey> keySet = getPrimaryKeyCache.get(key); if (keySet == null) { keySet = new LinkedHashSet<PrimaryKey>(); Properties keys = getKeyProperties(source); if (keys != null) { if (!verifiedSources.contains(source)) { String packageNameWithDot = cld.getName().substring(0, cld.getName().lastIndexOf('.') + 1); LOG.info( "Verifying primary key config for source " + source + ", packageName = " + packageNameWithDot); for (Map.Entry<Object, Object> entry : keys.entrySet()) { String cldName = (String) entry.getKey(); String keyList = (String) entry.getValue(); if (!cldName.contains(".")) { ClassDescriptor iCld = cld.getModel().getClassDescriptorByName(packageNameWithDot + cldName); if (iCld != null) { Map<String, PrimaryKey> map = PrimaryKeyUtil.getPrimaryKeys(iCld); String[] tokens = keyList.split(","); for (int i = 0; i < tokens.length; i++) { String token = tokens[i].trim(); if (map.get(token) == null) { throw new IllegalArgumentException( "Primary key " + token + " for class " + cldName + " required by datasource " + source.getName() + " in " + source.getName() + "_keys.properties is not defined in " + cld.getModel().getName() + "_keyDefs.properties"); } } } else { LOG.warn( "Ignoring entry for " + cldName + " in file " + cld.getModel().getName() + "_keyDefs.properties - not in model!"); } } } verifiedSources.add(source); } Map<String, PrimaryKey> map = PrimaryKeyUtil.getPrimaryKeys(cld); String cldName = TypeUtil.unqualifiedName(cld.getName()); String keyList = (String) keys.get(cldName); if (keyList != null) { String[] tokens = keyList.split(","); for (int i = 0; i < tokens.length; i++) { String token = tokens[i].trim(); if (map.get(token) == null) { throw new IllegalArgumentException( "Primary key " + token + " for class " + cld.getName() + " required by data source " + source.getName() + " in " + source.getName() + "_keys.properties is not defined in " + cld.getModel().getName() + "_keyDefs.properties"); } else { keySet.add(map.get(token)); } } } for (Map.Entry<Object, Object> entry : keys.entrySet()) { String propKey = (String) entry.getKey(); String fieldList = (String) entry.getValue(); int posOfDot = propKey.indexOf('.'); if (posOfDot > 0) { String propCldName = propKey.substring(0, posOfDot); if (cldName.equals(propCldName)) { String keyName = propKey.substring(posOfDot + 1); PrimaryKey pk = new PrimaryKey(keyName, fieldList, cld); if (!keySet.contains(pk)) { keySet.add(pk); if (os instanceof ObjectStoreInterMineImpl) { ObjectStoreInterMineImpl osimi = (ObjectStoreInterMineImpl) os; DatabaseSchema schema = osimi.getSchema(); ClassDescriptor tableMaster = schema.getTableMaster(cld); String tableName = DatabaseUtil.getTableName(tableMaster); List<String> fields = new ArrayList<String>(); for (String field : pk.getFieldNames()) { String colName = DatabaseUtil.generateSqlCompatibleName(field); if (tableMaster.getReferenceDescriptorByName(field, true) != null) { colName += "id"; } fields.add(colName); } String sql = "CREATE INDEX " + tableName + "__" + keyName + " ON " + tableName + " (" + StringUtil.join(fields, ", ") + ")"; System.out.println("Creating index: " + sql); LOG.info("Creating index: " + sql); Connection conn = null; try { conn = osimi.getConnection(); conn.createStatement().execute(sql); } catch (SQLException e) { LOG.warn("Index creation failed", e); } finally { if (conn != null) { osimi.releaseConnection(conn); } } } } } } } } else { throw new IllegalArgumentException( "Unable to find keys for source " + source.getName() + " in file " + source.getName() + "_keys.properties"); } getPrimaryKeyCache.put(key, keySet); } return keySet; } }