@Override public QueryResult<Variant> getAllVariantsByRegionAndStudy( Region region, String sourceId, QueryOptions options) { Long start, end, dbstart, dbend; start = System.currentTimeMillis(); QueryResult<Variant> queryResult = new QueryResult<>( String.format("%s:%d-%d", region.getChromosome(), region.getStart(), region.getEnd())); List<Variant> results = new LinkedList<>(); boolean includeSamples; boolean includeStats; boolean includeEffects; if (!options.containsKey("samples") && !options.containsKey("stats") && !options.containsKey("effects")) { includeSamples = true; includeStats = true; includeEffects = true; } else { includeSamples = options.containsKey("samples") && options.getBoolean("samples"); includeStats = options.containsKey("stats") && options.getBoolean("stats"); includeEffects = options.containsKey("effects") && options.getBoolean("effects"); } try { String startRow = buildRowkey(region.getChromosome(), Long.toString(region.getStart())); String stopRow = buildRowkey(region.getChromosome(), Long.toString(region.getEnd())); HTable table = new HTable(admin.getConfiguration(), tableName); dbstart = System.currentTimeMillis(); Scan regionScan = new Scan(startRow.getBytes(), stopRow.getBytes()); ResultScanner scanres = table.getScanner(regionScan); dbend = System.currentTimeMillis(); queryResult.setDbTime(dbend - dbstart); // Iterate over results and, optionally, their samples and statistics for (Result result : scanres) { String[] rowkeyParts = new String(result.getRow(), CHARSET_UTF_8).split("_"); String chromosome = rowkeyParts[0].replaceFirst("^0+(?!$)", ""); int position = Integer.parseInt(rowkeyParts[1]); // Get basic result fields from Protocol Buffers message NavigableMap<byte[], byte[]> infoMap = result.getFamilyMap("i".getBytes()); byte[] byteInfo = infoMap.get((sourceId + "_data").getBytes()); VariantFieldsProtos.VariantInfo protoInfo = VariantFieldsProtos.VariantInfo.parseFrom(byteInfo); String reference = protoInfo.getReference(); String alternate = StringUtils.join(protoInfo.getAlternateList(), ","); String format = StringUtils.join(protoInfo.getFormatList(), ":"); Variant variant = new Variant(chromosome, position, position, reference, alternate); // Set samples if requested if (includeSamples) { NavigableMap<byte[], byte[]> sampleMap = result.getFamilyMap("d".getBytes()); Map<String, Map<String, String>> resultSampleMap = new HashMap<>(); // Set samples for (byte[] s : sampleMap.keySet()) { String sampleName = (new String(s, CHARSET_UTF_8)).replaceAll(sourceId + "_", ""); VariantFieldsProtos.VariantSample sample = VariantFieldsProtos.VariantSample.parseFrom(sampleMap.get(s)); String sample1 = sample.getSample(); String[] values = sample1.split(":"); String[] fields = format.split(":"); Map<String, String> singleSampleMap = new HashMap<>(); for (int i = 0; i < fields.length; i++) { singleSampleMap.put(fields[i], values[i]); } // TODO // variant.addSampleData(sampleName, singleSampleMap); } } // Set stats if requested if (includeStats) { byte[] byteStats = infoMap.get((sourceId + "_stats").getBytes()); VariantFieldsProtos.VariantStats protoStats = VariantFieldsProtos.VariantStats.parseFrom(byteStats); VariantStats variantStats = new VariantStats( chromosome, position, reference, alternate, protoStats.getMaf(), protoStats.getMgf(), protoStats.getMafAllele(), protoStats.getMgfGenotype(), protoStats.getMissingAlleles(), protoStats.getMissingGenotypes(), protoStats.getMendelianErrors(), protoStats.getIsIndel(), protoStats.getCasesPercentDominant(), protoStats.getControlsPercentDominant(), protoStats.getCasesPercentRecessive(), protoStats.getControlsPercentRecessive()); variant.setStats(variantStats); } if (includeEffects) { QueryResult<VariantEffect> queryEffects = getEffectsByVariant(variant, options); variant.setEffect(queryEffects.getResult()); } results.add(variant); } } catch (IOException e) { System.err.println(e.getClass().getName() + ": " + e.getMessage()); } queryResult.setResult(results); queryResult.setNumResults(results.size()); end = System.currentTimeMillis(); queryResult.setTime(end - start); return queryResult; }
public QueryResult<File> index( int fileId, int outDirId, String storageEngine, String sessionId, QueryOptions options) throws IOException, CatalogException, AnalysisExecutionException { if (options == null) { options = new QueryOptions(); } final boolean execute = options.getBoolean(AnalysisJobExecuter.EXECUTE); final boolean simulate = options.getBoolean(AnalysisJobExecuter.SIMULATE); final boolean recordOutput = options.getBoolean(AnalysisJobExecuter.RECORD_OUTPUT); final long start = System.currentTimeMillis(); /** Query catalog for user data. * */ String userId = catalogManager.getUserIdBySessionId(sessionId); File file = catalogManager.getFile(fileId, sessionId).first(); File outDir = catalogManager.getFile(outDirId, sessionId).first(); int studyIdByOutDirId = catalogManager.getStudyIdByFileId(outDirId); Study study = catalogManager.getStudy(studyIdByOutDirId, sessionId).getResult().get(0); if (file.getType() != File.Type.FILE) { throw new CatalogException( "Expected file type = " + File.Type.FILE + " instead of " + file.getType()); } final String dbName; if (options.containsKey(DB_NAME)) { dbName = options.getString(DB_NAME); } else { if (study.getAttributes().containsKey(DB_NAME) && study.getAttributes().get(DB_NAME) != null) { dbName = study.getAttributes().get(DB_NAME).toString(); } else { int projectId = catalogManager.getProjectIdByStudyId(study.getId()); String alias = catalogManager .getProject(projectId, new QueryOptions("include", "alias"), sessionId) .first() .getAlias(); dbName = Config.getAnalysisProperties() .getProperty(OPENCGA_ANALYSIS_STORAGE_DATABASE_PREFIX, "opencga_") + userId + "_" + alias; } } // TODO: Check if file can be indexed // ObjectMap to fill with modifications over the indexed file (like new attributes or jobId) ObjectMap indexFileModifyParams = new ObjectMap("attributes", new ObjectMap()); /** Create temporal Job Outdir * */ final URI temporalOutDirUri; final String randomString = "I_" + StringUtils.randomString(10); if (simulate) { temporalOutDirUri = createSimulatedOutDirUri(randomString); } else { temporalOutDirUri = catalogManager.createJobOutDir(studyIdByOutDirId, randomString, sessionId); } List<Sample> sampleList; /** Create index file* */ final File index; if (options.containsKey(INDEX_FILE_ID)) { logger.debug("Using an existing indexedFile."); int indexFileId = options.getInt(INDEX_FILE_ID); index = catalogManager.getFile(indexFileId, sessionId).first(); if (index.getType() != File.Type.INDEX) { throw new CatalogException("Expected {type: INDEX} in IndexedFile " + indexFileId); } if (index.getStatus() != File.Status.READY) { throw new CatalogException("Expected {status: READY} in IndexedFile " + indexFileId); } if (simulate) { index.setStatus(File.Status.INDEXING); } else { ObjectMap parameters = new ObjectMap("status", File.Status.INDEXING); catalogManager.modifyFile(index.getId(), parameters, sessionId); } /** Get file samples * */ sampleList = catalogManager .getAllSamples(study.getId(), new QueryOptions("id", index.getSampleIds()), sessionId) .getResult(); } else { /** Get file samples * */ sampleList = getFileSamples(study, file, indexFileModifyParams, simulate, options, sessionId); String indexedFileDescription = "Indexation of " + file.getName() + " (" + fileId + ")"; String indexedFileName = file.getName() + "." + storageEngine; String indexedFilePath = Paths.get(outDir.getPath(), indexedFileName).toString(); if (simulate) { index = new File( -10, indexedFileName, File.Type.INDEX, file.getFormat(), file.getBioformat(), indexedFilePath, userId, TimeUtils.getTime(), indexedFileDescription, File.Status.INDEXING, -1, -1, null, -1, null, null, new HashMap<String, Object>()); } else { index = catalogManager .createFile( studyIdByOutDirId, File.Type.INDEX, file.getFormat(), file.getBioformat(), indexedFilePath, null, null, indexedFileDescription, File.Status.INDEXING, 0, -1, null, -1, null, null, false, null, sessionId) .first(); } } /** Create commandLine * */ String commandLine = createCommandLine( study, file, index, sampleList, storageEngine, temporalOutDirUri, indexFileModifyParams, dbName, options); if (options.containsKey(PARAMETERS)) { List<String> extraParams = options.getAsStringList(PARAMETERS); for (String extraParam : extraParams) { commandLine += " " + extraParam; } } /** Create job * */ ObjectMap jobResourceManagerAttributes = new ObjectMap(); jobResourceManagerAttributes.put(Job.TYPE, Job.Type.INDEX); jobResourceManagerAttributes.put(Job.INDEXED_FILE_ID, index.getId()); String jobName = "index"; String jobDescription = "Indexing file " + file.getName() + " (" + fileId + ")"; final Job job = AnalysisJobExecuter.createJob( catalogManager, studyIdByOutDirId, jobName, OPENCGA_STORAGE_BIN_NAME, jobDescription, outDir, Collections.<Integer>emptyList(), sessionId, randomString, temporalOutDirUri, commandLine, execute, simulate, recordOutput, jobResourceManagerAttributes) .first(); if (simulate) { index.getAttributes().put("job", job); // index.getAttributes().putAll(indexFileModifyParams.getMap("attributes")); index.setSampleIds(indexFileModifyParams.getAsIntegerList("sampleIds")); // VariantSource variantSource = (VariantSource) // index.getAttributes().get("variantSource"); // for (Map.Entry<String, Integer> entry : // variantSource.getSamplesPosition().entrySet()) { // System.out.println("entry.getKey() = " + entry.getKey()); // System.out.println("entry.getValue() = " + entry.getValue()); // } // for (String s : variantSource.getSamples()) { // System.out.println("sample = " + s); // } // variantSource.setSamplesPosition(new HashMap<String, Integer>()); return new QueryResult<>( "indexFile", (int) (System.currentTimeMillis() - start), 1, 1, "", "", Collections.singletonList(index)); } else { /** Update IndexFile to add extra information (jobId, sampleIds, attributes, ...) * */ indexFileModifyParams.put("jobId", job.getId()); Set<Integer> jobIds; try { jobIds = new HashSet<>(new ObjectMap(index.getAttributes()).getAsIntegerList("jobIds")); } catch (Exception ignore) { jobIds = new HashSet<>(1); } if (index.getJobId() > 0) { jobIds.add(index.getJobId()); } jobIds.add(job.getId()); indexFileModifyParams.getMap("attributes").put("jobIds", jobIds); catalogManager.modifyFile(index.getId(), indexFileModifyParams, sessionId).getResult(); return new QueryResult<>( "indexFile", (int) (System.currentTimeMillis() - start), 1, 1, "", "", catalogManager.getFile(index.getId(), sessionId).getResult()); } }
public QueryResult getSimpleVariantsByRegion( Region region, String sourceId, QueryOptions options) { Long start, end, dbstart, dbend; start = System.currentTimeMillis(); boolean includeStats; boolean includeEffects; if (!options.containsKey("stats") && !options.containsKey("effects")) { includeStats = true; includeEffects = true; } else { includeStats = options.containsKey("stats") && options.getBoolean("stats"); includeEffects = options.containsKey("effects") && options.getBoolean("effects"); } QueryResult<Variant> queryResult = new QueryResult<>( String.format("%s:%d-%d", region.getChromosome(), region.getStart(), region.getEnd())); List<Variant> results = new ArrayList<>(); String startRow = buildRowkey(region.getChromosome(), Long.toString(region.getStart())); String stopRow = buildRowkey(region.getChromosome(), Long.toString(region.getEnd())); BasicDBObject query = new BasicDBObject("position", new BasicDBObject("$gte", startRow).append("$lte", stopRow)) .append("sources.sourceId", sourceId); DBCollection collection = db.getCollection("variants"); dbstart = System.currentTimeMillis(); DBCursor variantInStudies = collection.find(query); dbend = System.currentTimeMillis(); queryResult.setDbTime(dbend - dbstart); for (DBObject result : variantInStudies) { String[] rowkeyParts = result.get("position").toString().split("_"); String chromosome = rowkeyParts[0].replaceFirst("^0+(?!$)", ""); int position = Integer.parseInt(rowkeyParts[1]); BasicDBList studies = (BasicDBList) result.get("sources"); BasicDBObject st = (BasicDBObject) studies.get(0); String ref = (String) st.get("ref"); String alt = StringUtils.join((ArrayList<String>) st.get("alt"), ","); // TODO Needs rework Variant variant = new Variant(chromosome, position, position, ref, alt); // Set stats informations if (includeStats) { VariantStats stats = new VariantStats(); BasicDBObject mongoStats = (BasicDBObject) st.get("stats"); stats.setMaf((float) (double) mongoStats.get("maf")); stats.setMafAllele((String) mongoStats.get("alleleMaf")); stats.setMissingGenotypes((int) mongoStats.get("missing")); List<Genotype> genotypeCount = new ArrayList<>(); for (BasicDBObject s : (List<BasicDBObject>) mongoStats.get("genotypeCount")) { for (Map.Entry<String, Object> entry : s.entrySet()) { Genotype genotype = new Genotype(entry.getKey()); genotype.setCount((Integer) entry.getValue()); genotypeCount.add(genotype); } } stats.setGenotypes(genotypeCount); variant.setStats(stats); } // TODO Set consequence type names if (includeEffects) { BasicDBList mongoEffects = (BasicDBList) st.get("effects"); if (mongoEffects != null) { for (Object e : mongoEffects) { String effectObo = e.toString(); VariantEffect effect = new VariantEffect(); effect.setConsequenceTypeObo(effectObo); variant.addEffect(effect); } } } results.add(variant); } queryResult.setResult(results); queryResult.setNumResults(results.size()); end = System.currentTimeMillis(); queryResult.setTime(end - start); return queryResult; }