private List<Sample> getFileSamples( Study study, File file, ObjectMap indexFileModifyParams, boolean simulate, QueryOptions options, String sessionId) throws CatalogException { List<Sample> sampleList; QueryOptions queryOptions = new QueryOptions( "include", Arrays.asList("projects.studies.samples.id", "projects.studies.samples.name")); if (file.getSampleIds() == null || file.getSampleIds().isEmpty()) { // Read samples from file List<String> sampleNames = null; switch (file.getBioformat()) { case VARIANT: { if (file.getAttributes().containsKey("variantSource")) { Object variantSource = file.getAttributes().get("variantSource"); if (variantSource instanceof VariantSource) { sampleNames = ((VariantSource) variantSource).getSamples(); } else if (variantSource instanceof Map) { sampleNames = new ObjectMap((Map) variantSource).getAsStringList("samples"); } else { logger.warn( "Unexpected object type of variantSource ({}) in file attributes. Expected {} or {}", variantSource.getClass(), VariantSource.class, Map.class); } } if (sampleNames == null) { VariantSource variantSource = readVariantSource(catalogManager, study, file); indexFileModifyParams .get("attributes", ObjectMap.class) .put("variantSource", variantSource); sampleNames = variantSource.getSamples(); } } break; default: return new LinkedList<>(); // throw new CatalogException("Unknown to get samples names from // bioformat " + file.getBioformat()); } // Find matching samples in catalog with the sampleName from the VariantSource. queryOptions.add("name", sampleNames); sampleList = catalogManager.getAllSamples(study.getId(), queryOptions, sessionId).getResult(); // check if all file samples exists on Catalog if (sampleList.size() != sampleNames.size()) { // Size does not match. Find the missing samples. Set<String> set = new HashSet<>(sampleNames); for (Sample sample : sampleList) { set.remove(sample.getName()); } logger.warn("Missing samples: m{}", set); if (options.getBoolean(CREATE_MISSING_SAMPLES, true)) { for (String sampleName : set) { if (simulate) { sampleList.add(new Sample(-1, sampleName, file.getName(), null, null)); } else { sampleList.add( catalogManager .createSample( study.getId(), sampleName, file.getName(), null, null, null, sessionId) .first()); } } } else { throw new CatalogException( "Can not find samples " + set + " in catalog"); // FIXME: Create missing samples?? } } } else { // Get samples from file.sampleIds queryOptions.add("id", file.getSampleIds()); sampleList = catalogManager.getAllSamples(study.getId(), queryOptions, sessionId).getResult(); } List<Integer> sampleIdsList = new ArrayList<>(sampleList.size()); for (Sample sample : sampleList) { sampleIdsList.add(sample.getId()); // // sampleIdsString.append(sample.getName()).append(":").append(sample.getId()).append(","); } indexFileModifyParams.put("sampleIds", sampleIdsList); return sampleList; }
/** * @param study Study where file is located * @param file File to be indexed * @param indexFile Generated index file * @param sampleList * @param storageEngine StorageEngine to be used * @param outDirUri Index outdir * @param indexFileModifyParams This map will be used to modify the indexFile * @param dbName * @return CommandLine * @throws org.opencb.opencga.catalog.db.CatalogDBException * @throws CatalogIOManagerException */ private String createCommandLine( Study study, File file, File indexFile, List<Sample> sampleList, String storageEngine, URI outDirUri, final ObjectMap indexFileModifyParams, final String dbName, QueryOptions options) throws CatalogException { // Create command line String userId = file.getOwnerId(); String name = file.getName(); String commandLine; ObjectMap indexAttributes = indexFileModifyParams.get("attributes", ObjectMap.class); String opencgaStorageBin = Paths.get(Config.getOpenCGAHome(), "bin", OPENCGA_STORAGE_BIN_NAME).toString(); if (file.getBioformat() == File.Bioformat.ALIGNMENT || name.endsWith(".bam") || name.endsWith(".sam")) { int chunkSize = 200; // TODO: Read from properties. commandLine = new StringBuilder(opencgaStorageBin) .append(" --storage-engine ") .append(storageEngine) .append(" index-alignments ") .append(" --file-id ") .append(indexFile.getId()) .append(" --database ") .append(dbName) .append(" --input ") .append(catalogManager.getFileUri(file)) .append(" --calculate-coverage ") .append(chunkSize) .append(" --mean-coverage ") .append(chunkSize) .append(" --outdir ") .append(outDirUri) // .append(" --credentials ") .toString(); indexAttributes.put("chunkSize", chunkSize); } else if (name.endsWith(".fasta") || name.endsWith(".fasta.gz")) { throw new UnsupportedOperationException(); } else if (file.getBioformat() == File.Bioformat.VARIANT || name.contains(".vcf") || name.contains(".vcf.gz")) { StringBuilder sampleIdsString = new StringBuilder(); for (Sample sample : sampleList) { sampleIdsString.append(sample.getName()).append(":").append(sample.getId()).append(","); } StringBuilder sb = new StringBuilder(opencgaStorageBin) .append(" --storage-engine ") .append(storageEngine) .append(" index-variants ") .append(" --file-id ") .append(indexFile.getId()) .append(" --study-name \'") .append(study.getName()) .append("\'") .append(" --study-id ") .append(study.getId()) // .append(" --study-type ").append(study.getType()) .append(" --database ") .append(dbName) .append(" --input ") .append(catalogManager.getFileUri(file)) .append(" --outdir ") .append(outDirUri) .append(" --include-genotypes ") .append(" --compress-genotypes ") .append(" --include-stats ") // .append(" --sample-ids ").append(sampleIdsString) // .append(" --credentials ") ; if (options.getBoolean(VariantStorageManager.ANNOTATE, true)) { sb.append(" --annotate "); } if (options.getBoolean(VariantStorageManager.INCLUDE_SRC, false)) { sb.append(" --include-src "); } commandLine = sb.toString(); } else { return null; } indexAttributes.put(INDEXED_FILE, file.getId()); indexAttributes.put(DB_NAME, dbName); indexAttributes.put(STORAGE_ENGINE, storageEngine); return commandLine; }