// RULE: Feature can only belong to one Protocol in a DataSet.(see issue #1136) private void checkFeatureCanOnlyBelongToOneProtocolForOneDataSet() { // RULE: Feature can only belong to one Protocol in a DataSet. Check it (see issue #1136) Iterable<DataSet> dataSets = dataService.findAll(DataSet.ENTITY_NAME, DataSet.class); for (DataSet dataSet : dataSets) { List<Protocol> dataSetProtocols = ProtocolUtils.getProtocolDescendants(dataSet.getProtocolUsed(), true); for (Protocol protocol : dataSetProtocols) { for (ObservableFeature feature : protocol.getFeatures()) { for (Protocol p : dataSetProtocols) { if (!p.equals(protocol) && p.getFeatures().contains(feature)) { String message = String.format( "An ObservableFeature can only belong to one Protocol but feature '%s' belongs to both '%s' and '%s'", feature.getIdentifier(), p.getIdentifier(), protocol.getIdentifier()); throw new MolgenisValidationException( Sets.newHashSet( new ConstraintViolation( message, feature.getIdentifier(), feature, null, null, 0))); } } } } } }
@RequestMapping(method = RequestMethod.GET) public String init( @RequestParam(value = "selectedDataSet", required = false) String selectedDataSetId, Model model) { Iterable<DataSet> allDataSets = dataService.findAll(DataSet.ENTITY_NAME, new QueryImpl()); List<DataSet> dataSets = new ArrayList<DataSet>(); for (DataSet dataSet : allDataSets) { if (selectedDataSetId == null) selectedDataSetId = dataSet.getId().toString(); if (!dataSet.getProtocolUsed().getIdentifier().equals(PROTOCOL_IDENTIFIER)) dataSets.add(dataSet); } model.addAttribute("dataSets", dataSets); List<String> mappedDataSets = new ArrayList<String>(); if (selectedDataSetId != null) { model.addAttribute("selectedDataSet", selectedDataSetId); Iterable<DataSet> it = dataService.findAll( DataSet.ENTITY_NAME, new QueryImpl().like(DataSet.IDENTIFIER, selectedDataSetId)); for (DataSet dataSet : it) { if (dataSet .getIdentifier() .startsWith(SecurityUtils.getCurrentUsername() + "-" + selectedDataSetId)) { String[] dataSetIds = dataSet.getIdentifier().toString().split("-"); if (dataSetIds.length > 1) mappedDataSets.add(dataSetIds[2]); } } } model.addAttribute("mappedDataSets", mappedDataSets); return "EvaluationPlugin"; }
public static void setUp(DataService dataService) throws Exception { features0 = new ArrayList<ObservableFeature>(); features1 = new ArrayList<ObservableFeature>(); allEntities = new ArrayList<Entity>(); subProtocols = new ArrayList<Protocol>(); subProtocols1 = new ArrayList<Protocol>(); categories = new ArrayList<Category>(); feature0 = new ObservableFeature(); feature0.setId(0); feature0.setName("featureName" + 0); feature0.setIdentifier("feature" + 0); feature1 = new ObservableFeature(); feature1.setId(1); feature1.setName("featureName" + 1); feature1.setIdentifier("feature" + 1); features0.add(feature0); features1.add(feature1); protocol0 = new Protocol(); protocol0.setDescription("description0"); protocol0.setIdentifier("identifier0"); protocol0.setId(0); protocol0.setFeatures(features0); protocol1 = new Protocol(); protocol1.setDescription("description1"); protocol1.setIdentifier("identifier1"); protocol1.setId(1); protocol1.setFeatures(features1); subProtocols1.add(protocol1); protocol2 = new Protocol(); protocol2.setDescription("description2"); protocol2.setIdentifier("identifier2"); protocol2.setSubprotocols(subProtocols1); protocol2.setId(2); protocol3 = new Protocol(); protocol3.setDescription("description3"); protocol3.setIdentifier("identifier3"); protocol3.setSubprotocols(subProtocols1); protocol3.setId(3); allEntities.add(protocol0); allEntities.add(protocol1); allEntities.add(protocol2); allEntities.add(protocol3); subProtocols.add(protocol0); protocolUsed = new Protocol(); protocolUsed.setDescription("protocolUsed_description"); protocolUsed.setIdentifier("protocolUsed_identifier"); protocolUsed.setId(100); protocolUsed.setSubprotocols(subProtocols); allEntities.add(protocolUsed); dataset = new DataSet(); dataset.setId(0); dataset.setIdentifier("dataset" + 0); dataset.setName("datasetname" + 0); dataset.setProtocolUsed(protocolUsed); observationSet0 = new ObservationSet(); observationSet0.setId(0); observationSet0.setPartOfDataSet(dataset); observationSets0 = new ArrayList<Entity>(); observationSets0.add(observationSet0); observationSet1 = new ObservationSet(); observationSet1.setId(1); ObservedValue observedValue0 = new ObservedValue(); observedValue0.setId(0); observedValue0.setObservationSet(observationSet0); observedValue0.setValue(new Value()); observedValue0.setFeature(feature0); observedValue0.setObservationSet(observationSet0); observedValues0 = new ArrayList<ObservedValue>(); observedValues0.add(observedValue0); ObservedValue observedValue1 = new ObservedValue(); observedValue1.setId(1); observedValue1.setObservationSet(observationSet1); Value v1 = new Value(); v1.setId(1); observedValue1.setValue(v1); observedValue1.setFeature(feature1); observedValue1.setObservationSet(observationSet1); observedValues1 = new ArrayList<ObservedValue>(); observedValues1.add(observedValue1); observationSets0 = new ArrayList<Entity>(); observationSets0.add(observationSet0); category0 = new Category(); category0.setId(0); category0.setIdentifier("category" + 0); categories.add(category0); List<DataSet> datasets = new ArrayList<DataSet>(); datasets.add(dataset); when(dataService.findAllAsList( DataSet.ENTITY_NAME, new QueryImpl().eq(DataSet.IDENTIFIER, "dataset1"))) .thenReturn(Arrays.<Entity>asList(dataset)); when(dataService.findAllAsList( ObservedValue.ENTITY_NAME, new QueryImpl().eq(ObservedValue.OBSERVATIONSET, observationSet0))) .thenReturn(Arrays.<Entity>asList(observedValue0)); when(dataService.findAllAsList( ObservationSet.ENTITY_NAME, new QueryImpl().eq(ObservationSet.PARTOFDATASET, 0))) .thenReturn(Arrays.<Entity>asList(observationSet0)); }
@RequestMapping( value = "/verify", method = RequestMethod.POST, headers = "Content-Type=multipart/form-data") public void verify( @RequestParam(value = "selectedDataSet", required = false) String selectedDataSetId, @RequestParam Part file, HttpServletResponse response, Model model) throws IOException { EntitySource reader = null; ExcelWriter<Entity> excelWriterRanks = null; try { if (selectedDataSetId != null) { String origFileName = FileUploadUtils.getOriginalFileName(file); File uploadFile = fileStore.store(file.getInputStream(), origFileName); response.setContentType("application/vnd.ms-excel"); response.addHeader( "Content-Disposition", "attachment; filename=" + getCsvFileName(file.getName() + "-ranks")); excelWriterRanks = new ExcelWriter<Entity>(response.getOutputStream()); excelWriterRanks.addCellProcessor(new LowerCaseProcessor(true, false)); Writable<Entity> sheetWriterRank = null; Writable<Entity> sheetWriterRankStatistics = null; Writable<Entity> sheetWriteBiobankRanks = null; Writable<Entity> sheetWriteSpssInput = null; reader = new ExcelEntitySourceFactory().create(uploadFile); Repository<? extends Entity> inputSheet = reader.getRepositoryByEntityName("Sheet1"); List<String> biobankNames = new ArrayList<String>(); for (AttributeMetaData attr : inputSheet.getAttributes()) { biobankNames.add(attr.getName()); } String firstColumn = biobankNames.get(0); biobankNames.remove(0); // First column has to correspond to the selected dataset DataSet ds = dataService.findOne(DataSet.ENTITY_NAME, Integer.parseInt(selectedDataSetId)); if (ds.getName().equalsIgnoreCase(firstColumn)) { Map<String, Map<String, List<String>>> maunalMappings = new HashMap<String, Map<String, List<String>>>(); for (Entity row : inputSheet) { String variableName = row.getString(firstColumn); if (!maunalMappings.containsKey(variableName)) maunalMappings.put(variableName, new HashMap<String, List<String>>()); for (String biobank : biobankNames) { if (row.get(biobank) != null) { String mappingString = row.get(biobank).toString(); if (!maunalMappings.containsKey(variableName)) { maunalMappings.put(variableName, new HashMap<String, List<String>>()); } if (!maunalMappings.get(variableName).containsKey(biobank.toLowerCase())) { maunalMappings .get(variableName) .put(biobank.toLowerCase(), new ArrayList<String>()); } maunalMappings .get(variableName) .get(biobank.toLowerCase()) .addAll(Arrays.asList(mappingString.split(","))); } } } List<String> lowerCaseBiobankNames = new ArrayList<String>(); for (String element : biobankNames) { lowerCaseBiobankNames.add(element.toLowerCase()); } List<DataSet> dataSets = dataService.findAllAsList( DataSet.ENTITY_NAME, new QueryImpl().in(DataSet.NAME, lowerCaseBiobankNames)); lowerCaseBiobankNames.add(0, firstColumn.toLowerCase()); sheetWriterRank = excelWriterRanks.createWritable("result", lowerCaseBiobankNames); Map<String, Map<String, List<Integer>>> rankCollection = new HashMap<String, Map<String, List<Integer>>>(); List<Object> allRanks = new ArrayList<Object>(); for (Entry<String, Map<String, List<String>>> entry : maunalMappings.entrySet()) { String variableName = entry.getKey(); List<String> ranks = new ArrayList<String>(); ranks.add(variableName); Map<String, List<String>> mappingDetail = entry.getValue(); List<ObservableFeature> features = dataService.findAllAsList( ObservableFeature.ENTITY_NAME, new QueryImpl().eq(ObservableFeature.NAME, variableName)); String description = features.get(0).getDescription(); if (!rankCollection.containsKey(description)) rankCollection.put(description, new HashMap<String, List<Integer>>()); if (!features.isEmpty()) { Entity row = new MapEntity(); row.set(firstColumn.toLowerCase(), description); for (DataSet dataSet : dataSets) { List<Integer> ranksBiobank = new ArrayList<Integer>(); if (mappingDetail.containsKey(dataSet.getName().toLowerCase())) { Map<String, Hit> mappedFeatureIds = findFeaturesFromIndex( "name", mappingDetail.get(dataSet.getName().toLowerCase()), dataSet.getId()); String mappingDataSetIdentifier = SecurityUtils.getCurrentUsername() + "-" + selectedDataSetId + "-" + dataSet.getId(); Query q = new QueryImpl() .eq("store_mapping_feature", features.get(0).getId()) .pageSize(50) .sort(new Sort(Direction.DESC, "store_mapping_score")); SearchRequest searchRequest = new SearchRequest(mappingDataSetIdentifier, q, null); SearchResult result = searchService.search(searchRequest); if (mappedFeatureIds.size() == 0) { row.set(dataSet.getName().toLowerCase(), "N/A2"); continue; } List<String> ids = new ArrayList<String>(); for (Hit hit : result.getSearchHits()) { Map<String, Object> columnValueMap = hit.getColumnValueMap(); ids.add(columnValueMap.get("store_mapping_mapped_feature").toString()); } Map<String, Hit> featureInfos = findFeaturesFromIndex("id", ids, dataSet.getId()); String previousDescription = null; int rank = 0; for (Hit hit : result.getSearchHits()) { Map<String, Object> columnValueMap = hit.getColumnValueMap(); String mappedFeatureId = columnValueMap.get("store_mapping_mapped_feature").toString(); String mappedFeatureDescription = featureInfos .get(mappedFeatureId) .getColumnValueMap() .get("description") .toString() .replaceAll("[^0-9a-zA-Z ]", " "); rank++; if (previousDescription != null && previousDescription.equalsIgnoreCase(mappedFeatureDescription)) rank--; if (mappedFeatureIds.containsKey(mappedFeatureId)) { ranksBiobank.add(rank); allRanks.add(rank); mappedFeatureIds.remove(mappedFeatureId); } previousDescription = mappedFeatureDescription; } if (mappedFeatureIds.size() == 0) { String output = StringUtils.join(ranksBiobank, ','); if (ranksBiobank.size() > 1) { output += " (" + averageRank(ranksBiobank) + ")"; } row.set(dataSet.getName().toLowerCase(), output); } else { for (int i = 0; i < mappedFeatureIds.size(); i++) allRanks.add("Not mapped"); row.set(dataSet.getName().toLowerCase(), "Not mapped"); ranksBiobank.clear(); } } else row.set(dataSet.getName().toLowerCase(), "N/A1"); rankCollection.get(description).put(dataSet.getName().toLowerCase(), ranksBiobank); } sheetWriterRank.add(row); } } Map<String, List<Integer>> rankCollectionPerBiobank = new HashMap<String, List<Integer>>(); { sheetWriterRankStatistics = excelWriterRanks.createWritable( "rank statistics", Arrays.asList( firstColumn.toLowerCase(), "average rank", "round-up rank", "median rank", "minium", "maximum")); for (Entry<String, Map<String, List<Integer>>> entry : rankCollection.entrySet()) { String variableName = entry.getKey(); Entity row = new MapEntity(); row.set(firstColumn.toLowerCase(), variableName); List<Integer> rankAllBiobanks = new ArrayList<Integer>(); for (Entry<String, List<Integer>> rankBiobanks : entry.getValue().entrySet()) { if (!rankCollectionPerBiobank.containsKey(rankBiobanks.getKey())) rankCollectionPerBiobank.put(rankBiobanks.getKey(), new ArrayList<Integer>()); rankCollectionPerBiobank.get(rankBiobanks.getKey()).addAll(rankBiobanks.getValue()); rankAllBiobanks.addAll(rankBiobanks.getValue()); } row.set("average rank", averageRank(rankAllBiobanks)); row.set("round-up rank", Math.ceil(averageRank(rankAllBiobanks))); Collections.sort(rankAllBiobanks); if (!rankAllBiobanks.isEmpty()) { row.set("minium", rankAllBiobanks.get(0)); row.set("maximum", rankAllBiobanks.get(rankAllBiobanks.size() - 1)); double medianRank = 0; if (rankAllBiobanks.size() % 2 == 0) { medianRank = (double) (rankAllBiobanks.get(rankAllBiobanks.size() / 2 - 1) + rankAllBiobanks.get(rankAllBiobanks.size() / 2)) / 2; } else { medianRank = rankAllBiobanks.get(rankAllBiobanks.size() / 2); } row.set("median rank", medianRank); } sheetWriterRankStatistics.add(row); } } { lowerCaseBiobankNames.remove(0); sheetWriteBiobankRanks = excelWriterRanks.createWritable("biobank average ranks", lowerCaseBiobankNames); Entity entity = new MapEntity(); for (Entry<String, List<Integer>> entry : rankCollectionPerBiobank.entrySet()) { entity.set(entry.getKey(), averageRank(entry.getValue())); } sheetWriteBiobankRanks.add(entity); } { sheetWriteSpssInput = excelWriterRanks.createWritable("spss ranks", Arrays.asList("rank")); for (Object rank : allRanks) { Entity entity = new MapEntity("rank", rank); sheetWriteSpssInput.add(entity); } } } } } finally { if (reader != null) reader.close(); if (excelWriterRanks != null) IOUtils.closeQuietly(excelWriterRanks); } }
@Override @Transactional(rollbackFor = IOException.class) public EntityImportReport doImport( RepositoryCollection repositories, DatabaseAction databaseAction) throws IOException { // All new repository identifiers List<String> newRepoIdentifiers = new ArrayList<String>(); // First import entities, the data sheets are ignored in the entitiesimporter EntityImportReport importReport = entitiesImporter.importEntities(repositories, databaseAction); // RULE: Feature can only belong to one Protocol in a DataSet. Check it (see issue #1136) checkFeatureCanOnlyBelongToOneProtocolForOneDataSet(); // Import data sheets for (String name : repositories.getEntityNames()) { Repository repository = repositories.getRepositoryByEntityName(name); if (repository.getName().startsWith(DATASET_SHEET_PREFIX)) { // Import DataSet sheet, create new OmxRepository String identifier = repository.getName().substring(DATASET_SHEET_PREFIX.length()); if (!dataService.hasRepository(identifier)) { dataService.addRepository( new AggregateableCrudRepositorySecurityDecorator( new OmxRepository(dataService, searchService, identifier, entityValidator))); newRepoIdentifiers.add(identifier); DataSet dataSet = dataService.findOne( DataSet.ENTITY_NAME, new QueryImpl().eq(DataSet.IDENTIFIER, identifier), DataSet.class); List<Protocol> protocols = ProtocolUtils.getProtocolDescendants(dataSet.getProtocolUsed()); List<ObservableFeature> categoricalFeatures = new ArrayList<ObservableFeature>(); for (Protocol protocol : protocols) { List<ObservableFeature> observableFeatures = protocol.getFeatures(); if (observableFeatures != null) { for (ObservableFeature observableFeature : observableFeatures) { String dataType = observableFeature.getDataType(); FieldType type = MolgenisFieldTypes.getType(dataType); if (type.getEnumType() == FieldTypeEnum.CATEGORICAL) { categoricalFeatures.add(observableFeature); } } } } for (ObservableFeature categoricalFeature : categoricalFeatures) { if (!dataService.hasRepository( OmxLookupTableEntityMetaData.createOmxLookupTableEntityMetaDataName( categoricalFeature.getIdentifier()))) { dataService.addRepository( new OmxLookupTableRepository( dataService, categoricalFeature.getIdentifier(), queryResolver)); newRepoIdentifiers.add( OmxLookupTableEntityMetaData.createOmxLookupTableEntityMetaDataName( categoricalFeature.getIdentifier())); } } } // Check if all column names in the excel sheet exist as attributes of the entity Set<ConstraintViolation> violations = Sets.newLinkedHashSet(); EntityMetaData meta = dataService.getEntityMetaData(identifier); for (AttributeMetaData attr : repository.getEntityMetaData().getAttributes()) { if (meta.getAttribute(attr.getName()) == null) { String message = String.format( "Unknown attributename '%s' for entity '%s'. Sheet: '%s'", attr.getName(), meta.getName(), repository.getName()); violations.add(new ConstraintViolation(message, attr.getName(), null, null, meta, 0)); } } if (!violations.isEmpty()) { throw new MolgenisValidationException(violations); } // Import data into new OmxRepository try { dataService.add(identifier, repository); } catch (MolgenisValidationException e) { // Add sheet info for (ConstraintViolation violation : e.getViolations()) { if (violation.getRownr() > 0) { // Rownr +1 for header violation.setImportInfo( String.format( "Sheet: '%s', row: %d", repository.getName(), violation.getRownr() + 1)); } else { violation.setImportInfo(String.format("Sheet: '%s'", repository.getName())); } } for (String newRepoIdentifier : newRepoIdentifiers) { dataService.removeRepository(newRepoIdentifier); } throw e; } int count = (int) RepositoryUtils.count(repository); importReport.addEntityCount(identifier, count); importReport.addNrImported(count); } } return importReport; }