// RULE: Feature can only belong to one Protocol in a DataSet.(see issue #1136)
  private void checkFeatureCanOnlyBelongToOneProtocolForOneDataSet() {
    // RULE: Feature can only belong to one Protocol in a DataSet. Check it (see issue #1136)
    Iterable<DataSet> dataSets = dataService.findAll(DataSet.ENTITY_NAME, DataSet.class);
    for (DataSet dataSet : dataSets) {
      List<Protocol> dataSetProtocols =
          ProtocolUtils.getProtocolDescendants(dataSet.getProtocolUsed(), true);

      for (Protocol protocol : dataSetProtocols) {
        for (ObservableFeature feature : protocol.getFeatures()) {
          for (Protocol p : dataSetProtocols) {
            if (!p.equals(protocol) && p.getFeatures().contains(feature)) {
              String message =
                  String.format(
                      "An ObservableFeature can only belong to one Protocol but feature '%s' belongs to both '%s' and '%s'",
                      feature.getIdentifier(), p.getIdentifier(), protocol.getIdentifier());

              throw new MolgenisValidationException(
                  Sets.newHashSet(
                      new ConstraintViolation(
                          message, feature.getIdentifier(), feature, null, null, 0)));
            }
          }
        }
      }
    }
  }
예제 #2
0
  @RequestMapping(method = RequestMethod.GET)
  public String init(
      @RequestParam(value = "selectedDataSet", required = false) String selectedDataSetId,
      Model model) {
    Iterable<DataSet> allDataSets = dataService.findAll(DataSet.ENTITY_NAME, new QueryImpl());

    List<DataSet> dataSets = new ArrayList<DataSet>();
    for (DataSet dataSet : allDataSets) {
      if (selectedDataSetId == null) selectedDataSetId = dataSet.getId().toString();
      if (!dataSet.getProtocolUsed().getIdentifier().equals(PROTOCOL_IDENTIFIER))
        dataSets.add(dataSet);
    }
    model.addAttribute("dataSets", dataSets);

    List<String> mappedDataSets = new ArrayList<String>();
    if (selectedDataSetId != null) {
      model.addAttribute("selectedDataSet", selectedDataSetId);
      Iterable<DataSet> it =
          dataService.findAll(
              DataSet.ENTITY_NAME, new QueryImpl().like(DataSet.IDENTIFIER, selectedDataSetId));
      for (DataSet dataSet : it) {
        if (dataSet
            .getIdentifier()
            .startsWith(SecurityUtils.getCurrentUsername() + "-" + selectedDataSetId)) {
          String[] dataSetIds = dataSet.getIdentifier().toString().split("-");
          if (dataSetIds.length > 1) mappedDataSets.add(dataSetIds[2]);
        }
      }
    }
    model.addAttribute("mappedDataSets", mappedDataSets);
    return "EvaluationPlugin";
  }
  public static void setUp(DataService dataService) throws Exception {
    features0 = new ArrayList<ObservableFeature>();
    features1 = new ArrayList<ObservableFeature>();
    allEntities = new ArrayList<Entity>();
    subProtocols = new ArrayList<Protocol>();
    subProtocols1 = new ArrayList<Protocol>();
    categories = new ArrayList<Category>();

    feature0 = new ObservableFeature();
    feature0.setId(0);
    feature0.setName("featureName" + 0);
    feature0.setIdentifier("feature" + 0);

    feature1 = new ObservableFeature();
    feature1.setId(1);
    feature1.setName("featureName" + 1);
    feature1.setIdentifier("feature" + 1);

    features0.add(feature0);
    features1.add(feature1);

    protocol0 = new Protocol();
    protocol0.setDescription("description0");
    protocol0.setIdentifier("identifier0");
    protocol0.setId(0);
    protocol0.setFeatures(features0);
    protocol1 = new Protocol();
    protocol1.setDescription("description1");
    protocol1.setIdentifier("identifier1");
    protocol1.setId(1);
    protocol1.setFeatures(features1);
    subProtocols1.add(protocol1);
    protocol2 = new Protocol();
    protocol2.setDescription("description2");
    protocol2.setIdentifier("identifier2");
    protocol2.setSubprotocols(subProtocols1);
    protocol2.setId(2);
    protocol3 = new Protocol();
    protocol3.setDescription("description3");
    protocol3.setIdentifier("identifier3");
    protocol3.setSubprotocols(subProtocols1);
    protocol3.setId(3);

    allEntities.add(protocol0);
    allEntities.add(protocol1);
    allEntities.add(protocol2);
    allEntities.add(protocol3);

    subProtocols.add(protocol0);

    protocolUsed = new Protocol();
    protocolUsed.setDescription("protocolUsed_description");
    protocolUsed.setIdentifier("protocolUsed_identifier");
    protocolUsed.setId(100);
    protocolUsed.setSubprotocols(subProtocols);
    allEntities.add(protocolUsed);

    dataset = new DataSet();
    dataset.setId(0);
    dataset.setIdentifier("dataset" + 0);
    dataset.setName("datasetname" + 0);
    dataset.setProtocolUsed(protocolUsed);

    observationSet0 = new ObservationSet();
    observationSet0.setId(0);
    observationSet0.setPartOfDataSet(dataset);

    observationSets0 = new ArrayList<Entity>();
    observationSets0.add(observationSet0);

    observationSet1 = new ObservationSet();
    observationSet1.setId(1);

    ObservedValue observedValue0 = new ObservedValue();
    observedValue0.setId(0);
    observedValue0.setObservationSet(observationSet0);
    observedValue0.setValue(new Value());
    observedValue0.setFeature(feature0);
    observedValue0.setObservationSet(observationSet0);
    observedValues0 = new ArrayList<ObservedValue>();
    observedValues0.add(observedValue0);

    ObservedValue observedValue1 = new ObservedValue();
    observedValue1.setId(1);
    observedValue1.setObservationSet(observationSet1);
    Value v1 = new Value();
    v1.setId(1);
    observedValue1.setValue(v1);
    observedValue1.setFeature(feature1);
    observedValue1.setObservationSet(observationSet1);
    observedValues1 = new ArrayList<ObservedValue>();
    observedValues1.add(observedValue1);

    observationSets0 = new ArrayList<Entity>();
    observationSets0.add(observationSet0);

    category0 = new Category();
    category0.setId(0);
    category0.setIdentifier("category" + 0);
    categories.add(category0);

    List<DataSet> datasets = new ArrayList<DataSet>();
    datasets.add(dataset);

    when(dataService.findAllAsList(
            DataSet.ENTITY_NAME, new QueryImpl().eq(DataSet.IDENTIFIER, "dataset1")))
        .thenReturn(Arrays.<Entity>asList(dataset));
    when(dataService.findAllAsList(
            ObservedValue.ENTITY_NAME,
            new QueryImpl().eq(ObservedValue.OBSERVATIONSET, observationSet0)))
        .thenReturn(Arrays.<Entity>asList(observedValue0));

    when(dataService.findAllAsList(
            ObservationSet.ENTITY_NAME, new QueryImpl().eq(ObservationSet.PARTOFDATASET, 0)))
        .thenReturn(Arrays.<Entity>asList(observationSet0));
  }
예제 #4
0
  @RequestMapping(
      value = "/verify",
      method = RequestMethod.POST,
      headers = "Content-Type=multipart/form-data")
  public void verify(
      @RequestParam(value = "selectedDataSet", required = false) String selectedDataSetId,
      @RequestParam Part file,
      HttpServletResponse response,
      Model model)
      throws IOException {
    EntitySource reader = null;
    ExcelWriter<Entity> excelWriterRanks = null;

    try {
      if (selectedDataSetId != null) {
        String origFileName = FileUploadUtils.getOriginalFileName(file);
        File uploadFile = fileStore.store(file.getInputStream(), origFileName);
        response.setContentType("application/vnd.ms-excel");
        response.addHeader(
            "Content-Disposition",
            "attachment; filename=" + getCsvFileName(file.getName() + "-ranks"));
        excelWriterRanks = new ExcelWriter<Entity>(response.getOutputStream());
        excelWriterRanks.addCellProcessor(new LowerCaseProcessor(true, false));

        Writable<Entity> sheetWriterRank = null;
        Writable<Entity> sheetWriterRankStatistics = null;
        Writable<Entity> sheetWriteBiobankRanks = null;
        Writable<Entity> sheetWriteSpssInput = null;

        reader = new ExcelEntitySourceFactory().create(uploadFile);
        Repository<? extends Entity> inputSheet = reader.getRepositoryByEntityName("Sheet1");

        List<String> biobankNames = new ArrayList<String>();
        for (AttributeMetaData attr : inputSheet.getAttributes()) {
          biobankNames.add(attr.getName());
        }
        String firstColumn = biobankNames.get(0);
        biobankNames.remove(0);

        // First column has to correspond to the selected dataset
        DataSet ds = dataService.findOne(DataSet.ENTITY_NAME, Integer.parseInt(selectedDataSetId));

        if (ds.getName().equalsIgnoreCase(firstColumn)) {
          Map<String, Map<String, List<String>>> maunalMappings =
              new HashMap<String, Map<String, List<String>>>();
          for (Entity row : inputSheet) {
            String variableName = row.getString(firstColumn);
            if (!maunalMappings.containsKey(variableName))
              maunalMappings.put(variableName, new HashMap<String, List<String>>());
            for (String biobank : biobankNames) {
              if (row.get(biobank) != null) {
                String mappingString = row.get(biobank).toString();
                if (!maunalMappings.containsKey(variableName)) {
                  maunalMappings.put(variableName, new HashMap<String, List<String>>());
                }
                if (!maunalMappings.get(variableName).containsKey(biobank.toLowerCase())) {
                  maunalMappings
                      .get(variableName)
                      .put(biobank.toLowerCase(), new ArrayList<String>());
                }
                maunalMappings
                    .get(variableName)
                    .get(biobank.toLowerCase())
                    .addAll(Arrays.asList(mappingString.split(",")));
              }
            }
          }

          List<String> lowerCaseBiobankNames = new ArrayList<String>();
          for (String element : biobankNames) {
            lowerCaseBiobankNames.add(element.toLowerCase());
          }

          List<DataSet> dataSets =
              dataService.findAllAsList(
                  DataSet.ENTITY_NAME, new QueryImpl().in(DataSet.NAME, lowerCaseBiobankNames));

          lowerCaseBiobankNames.add(0, firstColumn.toLowerCase());
          sheetWriterRank = excelWriterRanks.createWritable("result", lowerCaseBiobankNames);

          Map<String, Map<String, List<Integer>>> rankCollection =
              new HashMap<String, Map<String, List<Integer>>>();
          List<Object> allRanks = new ArrayList<Object>();

          for (Entry<String, Map<String, List<String>>> entry : maunalMappings.entrySet()) {
            String variableName = entry.getKey();
            List<String> ranks = new ArrayList<String>();
            ranks.add(variableName);
            Map<String, List<String>> mappingDetail = entry.getValue();
            List<ObservableFeature> features =
                dataService.findAllAsList(
                    ObservableFeature.ENTITY_NAME,
                    new QueryImpl().eq(ObservableFeature.NAME, variableName));
            String description = features.get(0).getDescription();
            if (!rankCollection.containsKey(description))
              rankCollection.put(description, new HashMap<String, List<Integer>>());

            if (!features.isEmpty()) {
              Entity row = new MapEntity();
              row.set(firstColumn.toLowerCase(), description);

              for (DataSet dataSet : dataSets) {
                List<Integer> ranksBiobank = new ArrayList<Integer>();
                if (mappingDetail.containsKey(dataSet.getName().toLowerCase())) {
                  Map<String, Hit> mappedFeatureIds =
                      findFeaturesFromIndex(
                          "name",
                          mappingDetail.get(dataSet.getName().toLowerCase()),
                          dataSet.getId());

                  String mappingDataSetIdentifier =
                      SecurityUtils.getCurrentUsername()
                          + "-"
                          + selectedDataSetId
                          + "-"
                          + dataSet.getId();

                  Query q =
                      new QueryImpl()
                          .eq("store_mapping_feature", features.get(0).getId())
                          .pageSize(50)
                          .sort(new Sort(Direction.DESC, "store_mapping_score"));

                  SearchRequest searchRequest =
                      new SearchRequest(mappingDataSetIdentifier, q, null);

                  SearchResult result = searchService.search(searchRequest);

                  if (mappedFeatureIds.size() == 0) {
                    row.set(dataSet.getName().toLowerCase(), "N/A2");
                    continue;
                  }

                  List<String> ids = new ArrayList<String>();
                  for (Hit hit : result.getSearchHits()) {
                    Map<String, Object> columnValueMap = hit.getColumnValueMap();
                    ids.add(columnValueMap.get("store_mapping_mapped_feature").toString());
                  }
                  Map<String, Hit> featureInfos = findFeaturesFromIndex("id", ids, dataSet.getId());

                  String previousDescription = null;
                  int rank = 0;
                  for (Hit hit : result.getSearchHits()) {
                    Map<String, Object> columnValueMap = hit.getColumnValueMap();
                    String mappedFeatureId =
                        columnValueMap.get("store_mapping_mapped_feature").toString();
                    String mappedFeatureDescription =
                        featureInfos
                            .get(mappedFeatureId)
                            .getColumnValueMap()
                            .get("description")
                            .toString()
                            .replaceAll("[^0-9a-zA-Z ]", " ");

                    rank++;
                    if (previousDescription != null
                        && previousDescription.equalsIgnoreCase(mappedFeatureDescription)) rank--;

                    if (mappedFeatureIds.containsKey(mappedFeatureId)) {
                      ranksBiobank.add(rank);
                      allRanks.add(rank);
                      mappedFeatureIds.remove(mappedFeatureId);
                    }
                    previousDescription = mappedFeatureDescription;
                  }
                  if (mappedFeatureIds.size() == 0) {
                    String output = StringUtils.join(ranksBiobank, ',');
                    if (ranksBiobank.size() > 1) {
                      output += " (" + averageRank(ranksBiobank) + ")";
                    }
                    row.set(dataSet.getName().toLowerCase(), output);
                  } else {
                    for (int i = 0; i < mappedFeatureIds.size(); i++) allRanks.add("Not mapped");
                    row.set(dataSet.getName().toLowerCase(), "Not mapped");
                    ranksBiobank.clear();
                  }
                } else row.set(dataSet.getName().toLowerCase(), "N/A1");

                rankCollection.get(description).put(dataSet.getName().toLowerCase(), ranksBiobank);
              }
              sheetWriterRank.add(row);
            }
          }

          Map<String, List<Integer>> rankCollectionPerBiobank =
              new HashMap<String, List<Integer>>();
          {
            sheetWriterRankStatistics =
                excelWriterRanks.createWritable(
                    "rank statistics",
                    Arrays.asList(
                        firstColumn.toLowerCase(),
                        "average rank",
                        "round-up rank",
                        "median rank",
                        "minium",
                        "maximum"));

            for (Entry<String, Map<String, List<Integer>>> entry : rankCollection.entrySet()) {
              String variableName = entry.getKey();
              Entity row = new MapEntity();
              row.set(firstColumn.toLowerCase(), variableName);
              List<Integer> rankAllBiobanks = new ArrayList<Integer>();
              for (Entry<String, List<Integer>> rankBiobanks : entry.getValue().entrySet()) {
                if (!rankCollectionPerBiobank.containsKey(rankBiobanks.getKey()))
                  rankCollectionPerBiobank.put(rankBiobanks.getKey(), new ArrayList<Integer>());
                rankCollectionPerBiobank.get(rankBiobanks.getKey()).addAll(rankBiobanks.getValue());
                rankAllBiobanks.addAll(rankBiobanks.getValue());
              }

              row.set("average rank", averageRank(rankAllBiobanks));
              row.set("round-up rank", Math.ceil(averageRank(rankAllBiobanks)));
              Collections.sort(rankAllBiobanks);
              if (!rankAllBiobanks.isEmpty()) {
                row.set("minium", rankAllBiobanks.get(0));
                row.set("maximum", rankAllBiobanks.get(rankAllBiobanks.size() - 1));

                double medianRank = 0;
                if (rankAllBiobanks.size() % 2 == 0) {
                  medianRank =
                      (double)
                              (rankAllBiobanks.get(rankAllBiobanks.size() / 2 - 1)
                                  + rankAllBiobanks.get(rankAllBiobanks.size() / 2))
                          / 2;
                } else {
                  medianRank = rankAllBiobanks.get(rankAllBiobanks.size() / 2);
                }
                row.set("median rank", medianRank);
              }

              sheetWriterRankStatistics.add(row);
            }
          }

          {
            lowerCaseBiobankNames.remove(0);
            sheetWriteBiobankRanks =
                excelWriterRanks.createWritable("biobank average ranks", lowerCaseBiobankNames);
            Entity entity = new MapEntity();
            for (Entry<String, List<Integer>> entry : rankCollectionPerBiobank.entrySet()) {
              entity.set(entry.getKey(), averageRank(entry.getValue()));
            }
            sheetWriteBiobankRanks.add(entity);
          }

          {
            sheetWriteSpssInput =
                excelWriterRanks.createWritable("spss ranks", Arrays.asList("rank"));
            for (Object rank : allRanks) {
              Entity entity = new MapEntity("rank", rank);
              sheetWriteSpssInput.add(entity);
            }
          }
        }
      }
    } finally {
      if (reader != null) reader.close();
      if (excelWriterRanks != null) IOUtils.closeQuietly(excelWriterRanks);
    }
  }
  @Override
  @Transactional(rollbackFor = IOException.class)
  public EntityImportReport doImport(
      RepositoryCollection repositories, DatabaseAction databaseAction) throws IOException {
    // All new repository identifiers
    List<String> newRepoIdentifiers = new ArrayList<String>();

    // First import entities, the data sheets are ignored in the entitiesimporter
    EntityImportReport importReport = entitiesImporter.importEntities(repositories, databaseAction);

    // RULE: Feature can only belong to one Protocol in a DataSet. Check it (see issue #1136)
    checkFeatureCanOnlyBelongToOneProtocolForOneDataSet();

    // Import data sheets
    for (String name : repositories.getEntityNames()) {
      Repository repository = repositories.getRepositoryByEntityName(name);

      if (repository.getName().startsWith(DATASET_SHEET_PREFIX)) {
        // Import DataSet sheet, create new OmxRepository
        String identifier = repository.getName().substring(DATASET_SHEET_PREFIX.length());

        if (!dataService.hasRepository(identifier)) {

          dataService.addRepository(
              new AggregateableCrudRepositorySecurityDecorator(
                  new OmxRepository(dataService, searchService, identifier, entityValidator)));
          newRepoIdentifiers.add(identifier);

          DataSet dataSet =
              dataService.findOne(
                  DataSet.ENTITY_NAME,
                  new QueryImpl().eq(DataSet.IDENTIFIER, identifier),
                  DataSet.class);

          List<Protocol> protocols =
              ProtocolUtils.getProtocolDescendants(dataSet.getProtocolUsed());
          List<ObservableFeature> categoricalFeatures = new ArrayList<ObservableFeature>();
          for (Protocol protocol : protocols) {
            List<ObservableFeature> observableFeatures = protocol.getFeatures();
            if (observableFeatures != null) {
              for (ObservableFeature observableFeature : observableFeatures) {
                String dataType = observableFeature.getDataType();
                FieldType type = MolgenisFieldTypes.getType(dataType);
                if (type.getEnumType() == FieldTypeEnum.CATEGORICAL) {
                  categoricalFeatures.add(observableFeature);
                }
              }
            }
          }
          for (ObservableFeature categoricalFeature : categoricalFeatures) {
            if (!dataService.hasRepository(
                OmxLookupTableEntityMetaData.createOmxLookupTableEntityMetaDataName(
                    categoricalFeature.getIdentifier()))) {
              dataService.addRepository(
                  new OmxLookupTableRepository(
                      dataService, categoricalFeature.getIdentifier(), queryResolver));
              newRepoIdentifiers.add(
                  OmxLookupTableEntityMetaData.createOmxLookupTableEntityMetaDataName(
                      categoricalFeature.getIdentifier()));
            }
          }
        }

        // Check if all column names in the excel sheet exist as attributes of the entity
        Set<ConstraintViolation> violations = Sets.newLinkedHashSet();
        EntityMetaData meta = dataService.getEntityMetaData(identifier);
        for (AttributeMetaData attr : repository.getEntityMetaData().getAttributes()) {
          if (meta.getAttribute(attr.getName()) == null) {
            String message =
                String.format(
                    "Unknown attributename '%s' for entity '%s'. Sheet: '%s'",
                    attr.getName(), meta.getName(), repository.getName());
            violations.add(new ConstraintViolation(message, attr.getName(), null, null, meta, 0));
          }
        }

        if (!violations.isEmpty()) {
          throw new MolgenisValidationException(violations);
        }

        // Import data into new OmxRepository
        try {
          dataService.add(identifier, repository);
        } catch (MolgenisValidationException e) {
          // Add sheet info
          for (ConstraintViolation violation : e.getViolations()) {
            if (violation.getRownr() > 0) {

              // Rownr +1 for header
              violation.setImportInfo(
                  String.format(
                      "Sheet: '%s', row: %d", repository.getName(), violation.getRownr() + 1));
            } else {
              violation.setImportInfo(String.format("Sheet: '%s'", repository.getName()));
            }
          }

          for (String newRepoIdentifier : newRepoIdentifiers) {
            dataService.removeRepository(newRepoIdentifier);
          }

          throw e;
        }

        int count = (int) RepositoryUtils.count(repository);
        importReport.addEntityCount(identifier, count);
        importReport.addNrImported(count);
      }
    }

    return importReport;
  }