예제 #1
0
  private Map<String, Hit> findFeaturesFromIndex(
      String field, List<String> featureNames, Integer dataSetId) {
    QueryImpl q = new QueryImpl();
    q.pageSize(10000);

    for (String featureName : featureNames) {
      if (q.getRules().size() > 0) q.addRule(new QueryRule(Operator.OR));
      q.addRule(new QueryRule(field, Operator.EQUALS, featureName));
    }

    SearchResult result =
        searchService.search(new SearchRequest("protocolTree-" + dataSetId, q, null));

    Map<String, Hit> featureIds = new HashMap<String, Hit>();
    for (Hit hit : result.getSearchHits()) {
      featureIds.put(hit.getColumnValueMap().get("id").toString(), hit);
    }
    return featureIds;
  }
예제 #2
0
  @RequestMapping(
      value = "/verify",
      method = RequestMethod.POST,
      headers = "Content-Type=multipart/form-data")
  public void verify(
      @RequestParam(value = "selectedDataSet", required = false) String selectedDataSetId,
      @RequestParam Part file,
      HttpServletResponse response,
      Model model)
      throws IOException {
    EntitySource reader = null;
    ExcelWriter<Entity> excelWriterRanks = null;

    try {
      if (selectedDataSetId != null) {
        String origFileName = FileUploadUtils.getOriginalFileName(file);
        File uploadFile = fileStore.store(file.getInputStream(), origFileName);
        response.setContentType("application/vnd.ms-excel");
        response.addHeader(
            "Content-Disposition",
            "attachment; filename=" + getCsvFileName(file.getName() + "-ranks"));
        excelWriterRanks = new ExcelWriter<Entity>(response.getOutputStream());
        excelWriterRanks.addCellProcessor(new LowerCaseProcessor(true, false));

        Writable<Entity> sheetWriterRank = null;
        Writable<Entity> sheetWriterRankStatistics = null;
        Writable<Entity> sheetWriteBiobankRanks = null;
        Writable<Entity> sheetWriteSpssInput = null;

        reader = new ExcelEntitySourceFactory().create(uploadFile);
        Repository<? extends Entity> inputSheet = reader.getRepositoryByEntityName("Sheet1");

        List<String> biobankNames = new ArrayList<String>();
        for (AttributeMetaData attr : inputSheet.getAttributes()) {
          biobankNames.add(attr.getName());
        }
        String firstColumn = biobankNames.get(0);
        biobankNames.remove(0);

        // First column has to correspond to the selected dataset
        DataSet ds = dataService.findOne(DataSet.ENTITY_NAME, Integer.parseInt(selectedDataSetId));

        if (ds.getName().equalsIgnoreCase(firstColumn)) {
          Map<String, Map<String, List<String>>> maunalMappings =
              new HashMap<String, Map<String, List<String>>>();
          for (Entity row : inputSheet) {
            String variableName = row.getString(firstColumn);
            if (!maunalMappings.containsKey(variableName))
              maunalMappings.put(variableName, new HashMap<String, List<String>>());
            for (String biobank : biobankNames) {
              if (row.get(biobank) != null) {
                String mappingString = row.get(biobank).toString();
                if (!maunalMappings.containsKey(variableName)) {
                  maunalMappings.put(variableName, new HashMap<String, List<String>>());
                }
                if (!maunalMappings.get(variableName).containsKey(biobank.toLowerCase())) {
                  maunalMappings
                      .get(variableName)
                      .put(biobank.toLowerCase(), new ArrayList<String>());
                }
                maunalMappings
                    .get(variableName)
                    .get(biobank.toLowerCase())
                    .addAll(Arrays.asList(mappingString.split(",")));
              }
            }
          }

          List<String> lowerCaseBiobankNames = new ArrayList<String>();
          for (String element : biobankNames) {
            lowerCaseBiobankNames.add(element.toLowerCase());
          }

          List<DataSet> dataSets =
              dataService.findAllAsList(
                  DataSet.ENTITY_NAME, new QueryImpl().in(DataSet.NAME, lowerCaseBiobankNames));

          lowerCaseBiobankNames.add(0, firstColumn.toLowerCase());
          sheetWriterRank = excelWriterRanks.createWritable("result", lowerCaseBiobankNames);

          Map<String, Map<String, List<Integer>>> rankCollection =
              new HashMap<String, Map<String, List<Integer>>>();
          List<Object> allRanks = new ArrayList<Object>();

          for (Entry<String, Map<String, List<String>>> entry : maunalMappings.entrySet()) {
            String variableName = entry.getKey();
            List<String> ranks = new ArrayList<String>();
            ranks.add(variableName);
            Map<String, List<String>> mappingDetail = entry.getValue();
            List<ObservableFeature> features =
                dataService.findAllAsList(
                    ObservableFeature.ENTITY_NAME,
                    new QueryImpl().eq(ObservableFeature.NAME, variableName));
            String description = features.get(0).getDescription();
            if (!rankCollection.containsKey(description))
              rankCollection.put(description, new HashMap<String, List<Integer>>());

            if (!features.isEmpty()) {
              Entity row = new MapEntity();
              row.set(firstColumn.toLowerCase(), description);

              for (DataSet dataSet : dataSets) {
                List<Integer> ranksBiobank = new ArrayList<Integer>();
                if (mappingDetail.containsKey(dataSet.getName().toLowerCase())) {
                  Map<String, Hit> mappedFeatureIds =
                      findFeaturesFromIndex(
                          "name",
                          mappingDetail.get(dataSet.getName().toLowerCase()),
                          dataSet.getId());

                  String mappingDataSetIdentifier =
                      SecurityUtils.getCurrentUsername()
                          + "-"
                          + selectedDataSetId
                          + "-"
                          + dataSet.getId();

                  Query q =
                      new QueryImpl()
                          .eq("store_mapping_feature", features.get(0).getId())
                          .pageSize(50)
                          .sort(new Sort(Direction.DESC, "store_mapping_score"));

                  SearchRequest searchRequest =
                      new SearchRequest(mappingDataSetIdentifier, q, null);

                  SearchResult result = searchService.search(searchRequest);

                  if (mappedFeatureIds.size() == 0) {
                    row.set(dataSet.getName().toLowerCase(), "N/A2");
                    continue;
                  }

                  List<String> ids = new ArrayList<String>();
                  for (Hit hit : result.getSearchHits()) {
                    Map<String, Object> columnValueMap = hit.getColumnValueMap();
                    ids.add(columnValueMap.get("store_mapping_mapped_feature").toString());
                  }
                  Map<String, Hit> featureInfos = findFeaturesFromIndex("id", ids, dataSet.getId());

                  String previousDescription = null;
                  int rank = 0;
                  for (Hit hit : result.getSearchHits()) {
                    Map<String, Object> columnValueMap = hit.getColumnValueMap();
                    String mappedFeatureId =
                        columnValueMap.get("store_mapping_mapped_feature").toString();
                    String mappedFeatureDescription =
                        featureInfos
                            .get(mappedFeatureId)
                            .getColumnValueMap()
                            .get("description")
                            .toString()
                            .replaceAll("[^0-9a-zA-Z ]", " ");

                    rank++;
                    if (previousDescription != null
                        && previousDescription.equalsIgnoreCase(mappedFeatureDescription)) rank--;

                    if (mappedFeatureIds.containsKey(mappedFeatureId)) {
                      ranksBiobank.add(rank);
                      allRanks.add(rank);
                      mappedFeatureIds.remove(mappedFeatureId);
                    }
                    previousDescription = mappedFeatureDescription;
                  }
                  if (mappedFeatureIds.size() == 0) {
                    String output = StringUtils.join(ranksBiobank, ',');
                    if (ranksBiobank.size() > 1) {
                      output += " (" + averageRank(ranksBiobank) + ")";
                    }
                    row.set(dataSet.getName().toLowerCase(), output);
                  } else {
                    for (int i = 0; i < mappedFeatureIds.size(); i++) allRanks.add("Not mapped");
                    row.set(dataSet.getName().toLowerCase(), "Not mapped");
                    ranksBiobank.clear();
                  }
                } else row.set(dataSet.getName().toLowerCase(), "N/A1");

                rankCollection.get(description).put(dataSet.getName().toLowerCase(), ranksBiobank);
              }
              sheetWriterRank.add(row);
            }
          }

          Map<String, List<Integer>> rankCollectionPerBiobank =
              new HashMap<String, List<Integer>>();
          {
            sheetWriterRankStatistics =
                excelWriterRanks.createWritable(
                    "rank statistics",
                    Arrays.asList(
                        firstColumn.toLowerCase(),
                        "average rank",
                        "round-up rank",
                        "median rank",
                        "minium",
                        "maximum"));

            for (Entry<String, Map<String, List<Integer>>> entry : rankCollection.entrySet()) {
              String variableName = entry.getKey();
              Entity row = new MapEntity();
              row.set(firstColumn.toLowerCase(), variableName);
              List<Integer> rankAllBiobanks = new ArrayList<Integer>();
              for (Entry<String, List<Integer>> rankBiobanks : entry.getValue().entrySet()) {
                if (!rankCollectionPerBiobank.containsKey(rankBiobanks.getKey()))
                  rankCollectionPerBiobank.put(rankBiobanks.getKey(), new ArrayList<Integer>());
                rankCollectionPerBiobank.get(rankBiobanks.getKey()).addAll(rankBiobanks.getValue());
                rankAllBiobanks.addAll(rankBiobanks.getValue());
              }

              row.set("average rank", averageRank(rankAllBiobanks));
              row.set("round-up rank", Math.ceil(averageRank(rankAllBiobanks)));
              Collections.sort(rankAllBiobanks);
              if (!rankAllBiobanks.isEmpty()) {
                row.set("minium", rankAllBiobanks.get(0));
                row.set("maximum", rankAllBiobanks.get(rankAllBiobanks.size() - 1));

                double medianRank = 0;
                if (rankAllBiobanks.size() % 2 == 0) {
                  medianRank =
                      (double)
                              (rankAllBiobanks.get(rankAllBiobanks.size() / 2 - 1)
                                  + rankAllBiobanks.get(rankAllBiobanks.size() / 2))
                          / 2;
                } else {
                  medianRank = rankAllBiobanks.get(rankAllBiobanks.size() / 2);
                }
                row.set("median rank", medianRank);
              }

              sheetWriterRankStatistics.add(row);
            }
          }

          {
            lowerCaseBiobankNames.remove(0);
            sheetWriteBiobankRanks =
                excelWriterRanks.createWritable("biobank average ranks", lowerCaseBiobankNames);
            Entity entity = new MapEntity();
            for (Entry<String, List<Integer>> entry : rankCollectionPerBiobank.entrySet()) {
              entity.set(entry.getKey(), averageRank(entry.getValue()));
            }
            sheetWriteBiobankRanks.add(entity);
          }

          {
            sheetWriteSpssInput =
                excelWriterRanks.createWritable("spss ranks", Arrays.asList("rank"));
            for (Object rank : allRanks) {
              Entity entity = new MapEntity("rank", rank);
              sheetWriteSpssInput.add(entity);
            }
          }
        }
      }
    } finally {
      if (reader != null) reader.close();
      if (excelWriterRanks != null) IOUtils.closeQuietly(excelWriterRanks);
    }
  }