private Map<String, Hit> findFeaturesFromIndex( String field, List<String> featureNames, Integer dataSetId) { QueryImpl q = new QueryImpl(); q.pageSize(10000); for (String featureName : featureNames) { if (q.getRules().size() > 0) q.addRule(new QueryRule(Operator.OR)); q.addRule(new QueryRule(field, Operator.EQUALS, featureName)); } SearchResult result = searchService.search(new SearchRequest("protocolTree-" + dataSetId, q, null)); Map<String, Hit> featureIds = new HashMap<String, Hit>(); for (Hit hit : result.getSearchHits()) { featureIds.put(hit.getColumnValueMap().get("id").toString(), hit); } return featureIds; }
@RequestMapping( value = "/verify", method = RequestMethod.POST, headers = "Content-Type=multipart/form-data") public void verify( @RequestParam(value = "selectedDataSet", required = false) String selectedDataSetId, @RequestParam Part file, HttpServletResponse response, Model model) throws IOException { EntitySource reader = null; ExcelWriter<Entity> excelWriterRanks = null; try { if (selectedDataSetId != null) { String origFileName = FileUploadUtils.getOriginalFileName(file); File uploadFile = fileStore.store(file.getInputStream(), origFileName); response.setContentType("application/vnd.ms-excel"); response.addHeader( "Content-Disposition", "attachment; filename=" + getCsvFileName(file.getName() + "-ranks")); excelWriterRanks = new ExcelWriter<Entity>(response.getOutputStream()); excelWriterRanks.addCellProcessor(new LowerCaseProcessor(true, false)); Writable<Entity> sheetWriterRank = null; Writable<Entity> sheetWriterRankStatistics = null; Writable<Entity> sheetWriteBiobankRanks = null; Writable<Entity> sheetWriteSpssInput = null; reader = new ExcelEntitySourceFactory().create(uploadFile); Repository<? extends Entity> inputSheet = reader.getRepositoryByEntityName("Sheet1"); List<String> biobankNames = new ArrayList<String>(); for (AttributeMetaData attr : inputSheet.getAttributes()) { biobankNames.add(attr.getName()); } String firstColumn = biobankNames.get(0); biobankNames.remove(0); // First column has to correspond to the selected dataset DataSet ds = dataService.findOne(DataSet.ENTITY_NAME, Integer.parseInt(selectedDataSetId)); if (ds.getName().equalsIgnoreCase(firstColumn)) { Map<String, Map<String, List<String>>> maunalMappings = new HashMap<String, Map<String, List<String>>>(); for (Entity row : inputSheet) { String variableName = row.getString(firstColumn); if (!maunalMappings.containsKey(variableName)) maunalMappings.put(variableName, new HashMap<String, List<String>>()); for (String biobank : biobankNames) { if (row.get(biobank) != null) { String mappingString = row.get(biobank).toString(); if (!maunalMappings.containsKey(variableName)) { maunalMappings.put(variableName, new HashMap<String, List<String>>()); } if (!maunalMappings.get(variableName).containsKey(biobank.toLowerCase())) { maunalMappings .get(variableName) .put(biobank.toLowerCase(), new ArrayList<String>()); } maunalMappings .get(variableName) .get(biobank.toLowerCase()) .addAll(Arrays.asList(mappingString.split(","))); } } } List<String> lowerCaseBiobankNames = new ArrayList<String>(); for (String element : biobankNames) { lowerCaseBiobankNames.add(element.toLowerCase()); } List<DataSet> dataSets = dataService.findAllAsList( DataSet.ENTITY_NAME, new QueryImpl().in(DataSet.NAME, lowerCaseBiobankNames)); lowerCaseBiobankNames.add(0, firstColumn.toLowerCase()); sheetWriterRank = excelWriterRanks.createWritable("result", lowerCaseBiobankNames); Map<String, Map<String, List<Integer>>> rankCollection = new HashMap<String, Map<String, List<Integer>>>(); List<Object> allRanks = new ArrayList<Object>(); for (Entry<String, Map<String, List<String>>> entry : maunalMappings.entrySet()) { String variableName = entry.getKey(); List<String> ranks = new ArrayList<String>(); ranks.add(variableName); Map<String, List<String>> mappingDetail = entry.getValue(); List<ObservableFeature> features = dataService.findAllAsList( ObservableFeature.ENTITY_NAME, new QueryImpl().eq(ObservableFeature.NAME, variableName)); String description = features.get(0).getDescription(); if (!rankCollection.containsKey(description)) rankCollection.put(description, new HashMap<String, List<Integer>>()); if (!features.isEmpty()) { Entity row = new MapEntity(); row.set(firstColumn.toLowerCase(), description); for (DataSet dataSet : dataSets) { List<Integer> ranksBiobank = new ArrayList<Integer>(); if (mappingDetail.containsKey(dataSet.getName().toLowerCase())) { Map<String, Hit> mappedFeatureIds = findFeaturesFromIndex( "name", mappingDetail.get(dataSet.getName().toLowerCase()), dataSet.getId()); String mappingDataSetIdentifier = SecurityUtils.getCurrentUsername() + "-" + selectedDataSetId + "-" + dataSet.getId(); Query q = new QueryImpl() .eq("store_mapping_feature", features.get(0).getId()) .pageSize(50) .sort(new Sort(Direction.DESC, "store_mapping_score")); SearchRequest searchRequest = new SearchRequest(mappingDataSetIdentifier, q, null); SearchResult result = searchService.search(searchRequest); if (mappedFeatureIds.size() == 0) { row.set(dataSet.getName().toLowerCase(), "N/A2"); continue; } List<String> ids = new ArrayList<String>(); for (Hit hit : result.getSearchHits()) { Map<String, Object> columnValueMap = hit.getColumnValueMap(); ids.add(columnValueMap.get("store_mapping_mapped_feature").toString()); } Map<String, Hit> featureInfos = findFeaturesFromIndex("id", ids, dataSet.getId()); String previousDescription = null; int rank = 0; for (Hit hit : result.getSearchHits()) { Map<String, Object> columnValueMap = hit.getColumnValueMap(); String mappedFeatureId = columnValueMap.get("store_mapping_mapped_feature").toString(); String mappedFeatureDescription = featureInfos .get(mappedFeatureId) .getColumnValueMap() .get("description") .toString() .replaceAll("[^0-9a-zA-Z ]", " "); rank++; if (previousDescription != null && previousDescription.equalsIgnoreCase(mappedFeatureDescription)) rank--; if (mappedFeatureIds.containsKey(mappedFeatureId)) { ranksBiobank.add(rank); allRanks.add(rank); mappedFeatureIds.remove(mappedFeatureId); } previousDescription = mappedFeatureDescription; } if (mappedFeatureIds.size() == 0) { String output = StringUtils.join(ranksBiobank, ','); if (ranksBiobank.size() > 1) { output += " (" + averageRank(ranksBiobank) + ")"; } row.set(dataSet.getName().toLowerCase(), output); } else { for (int i = 0; i < mappedFeatureIds.size(); i++) allRanks.add("Not mapped"); row.set(dataSet.getName().toLowerCase(), "Not mapped"); ranksBiobank.clear(); } } else row.set(dataSet.getName().toLowerCase(), "N/A1"); rankCollection.get(description).put(dataSet.getName().toLowerCase(), ranksBiobank); } sheetWriterRank.add(row); } } Map<String, List<Integer>> rankCollectionPerBiobank = new HashMap<String, List<Integer>>(); { sheetWriterRankStatistics = excelWriterRanks.createWritable( "rank statistics", Arrays.asList( firstColumn.toLowerCase(), "average rank", "round-up rank", "median rank", "minium", "maximum")); for (Entry<String, Map<String, List<Integer>>> entry : rankCollection.entrySet()) { String variableName = entry.getKey(); Entity row = new MapEntity(); row.set(firstColumn.toLowerCase(), variableName); List<Integer> rankAllBiobanks = new ArrayList<Integer>(); for (Entry<String, List<Integer>> rankBiobanks : entry.getValue().entrySet()) { if (!rankCollectionPerBiobank.containsKey(rankBiobanks.getKey())) rankCollectionPerBiobank.put(rankBiobanks.getKey(), new ArrayList<Integer>()); rankCollectionPerBiobank.get(rankBiobanks.getKey()).addAll(rankBiobanks.getValue()); rankAllBiobanks.addAll(rankBiobanks.getValue()); } row.set("average rank", averageRank(rankAllBiobanks)); row.set("round-up rank", Math.ceil(averageRank(rankAllBiobanks))); Collections.sort(rankAllBiobanks); if (!rankAllBiobanks.isEmpty()) { row.set("minium", rankAllBiobanks.get(0)); row.set("maximum", rankAllBiobanks.get(rankAllBiobanks.size() - 1)); double medianRank = 0; if (rankAllBiobanks.size() % 2 == 0) { medianRank = (double) (rankAllBiobanks.get(rankAllBiobanks.size() / 2 - 1) + rankAllBiobanks.get(rankAllBiobanks.size() / 2)) / 2; } else { medianRank = rankAllBiobanks.get(rankAllBiobanks.size() / 2); } row.set("median rank", medianRank); } sheetWriterRankStatistics.add(row); } } { lowerCaseBiobankNames.remove(0); sheetWriteBiobankRanks = excelWriterRanks.createWritable("biobank average ranks", lowerCaseBiobankNames); Entity entity = new MapEntity(); for (Entry<String, List<Integer>> entry : rankCollectionPerBiobank.entrySet()) { entity.set(entry.getKey(), averageRank(entry.getValue())); } sheetWriteBiobankRanks.add(entity); } { sheetWriteSpssInput = excelWriterRanks.createWritable("spss ranks", Arrays.asList("rank")); for (Object rank : allRanks) { Entity entity = new MapEntity("rank", rank); sheetWriteSpssInput.add(entity); } } } } } finally { if (reader != null) reader.close(); if (excelWriterRanks != null) IOUtils.closeQuietly(excelWriterRanks); } }