@Override public List<QueryResult> getAllByRegionList(List<Region> regionList, QueryOptions options) { // db.regulatory_region.find({"chunkIds": {$in:["1_200", "1_300"]}, "start": 601156}) QueryBuilder builder = new QueryBuilder(); List<Object> featureType = options.getList("featureType", null); List<Object> featureClass = options.getList("featureClass", null); // options = addExcludeReturnFields("chunkIds", options); List<DBObject> queries = new ArrayList<>(); for (Region region : regionList) { int firstChunkId = getChunkId(region.getStart(), regulatoryRegionChunkSize); int lastChunkId = getChunkId(region.getEnd(), regulatoryRegionChunkSize); BasicDBList chunksId = new BasicDBList(); for (int j = firstChunkId; j <= lastChunkId; j++) { String chunkId = region.getChromosome() + "_" + j + "_" + regulatoryRegionChunkSize / 1000 + "k"; chunksId.add(chunkId); } // logger.info(chunksId.toString()); builder = builder .start("_chunkIds") .in(chunksId) .and("start") .lessThanEquals(region.getEnd()) .and("end") .greaterThanEquals(region.getStart()); if (featureType != null && featureType.size() > 0) { BasicDBList featureTypeDBList = new BasicDBList(); featureTypeDBList.addAll(featureType); builder = builder.and("featureType").in(featureTypeDBList); } if (featureClass != null && featureClass.size() > 0) { BasicDBList featureClassDBList = new BasicDBList(); featureClassDBList.addAll(featureClass); builder = builder.and("featureClass").in(featureClassDBList); } queries.add(builder.get()); } // System.out.println(">>"+regionList); // System.out.println(">>"+builder.get().toString()); return executeQueryList2(regionList, queries, options); }
public QueryResult getSimpleVariantsByRegion( Region region, String sourceId, QueryOptions options) { Long start, end, dbstart, dbend; start = System.currentTimeMillis(); boolean includeStats; boolean includeEffects; if (!options.containsKey("stats") && !options.containsKey("effects")) { includeStats = true; includeEffects = true; } else { includeStats = options.containsKey("stats") && options.getBoolean("stats"); includeEffects = options.containsKey("effects") && options.getBoolean("effects"); } QueryResult<Variant> queryResult = new QueryResult<>( String.format("%s:%d-%d", region.getChromosome(), region.getStart(), region.getEnd())); List<Variant> results = new ArrayList<>(); String startRow = buildRowkey(region.getChromosome(), Long.toString(region.getStart())); String stopRow = buildRowkey(region.getChromosome(), Long.toString(region.getEnd())); BasicDBObject query = new BasicDBObject("position", new BasicDBObject("$gte", startRow).append("$lte", stopRow)) .append("sources.sourceId", sourceId); DBCollection collection = db.getCollection("variants"); dbstart = System.currentTimeMillis(); DBCursor variantInStudies = collection.find(query); dbend = System.currentTimeMillis(); queryResult.setDbTime(dbend - dbstart); for (DBObject result : variantInStudies) { String[] rowkeyParts = result.get("position").toString().split("_"); String chromosome = rowkeyParts[0].replaceFirst("^0+(?!$)", ""); int position = Integer.parseInt(rowkeyParts[1]); BasicDBList studies = (BasicDBList) result.get("sources"); BasicDBObject st = (BasicDBObject) studies.get(0); String ref = (String) st.get("ref"); String alt = StringUtils.join((ArrayList<String>) st.get("alt"), ","); // TODO Needs rework Variant variant = new Variant(chromosome, position, position, ref, alt); // Set stats informations if (includeStats) { VariantStats stats = new VariantStats(); BasicDBObject mongoStats = (BasicDBObject) st.get("stats"); stats.setMaf((float) (double) mongoStats.get("maf")); stats.setMafAllele((String) mongoStats.get("alleleMaf")); stats.setMissingGenotypes((int) mongoStats.get("missing")); List<Genotype> genotypeCount = new ArrayList<>(); for (BasicDBObject s : (List<BasicDBObject>) mongoStats.get("genotypeCount")) { for (Map.Entry<String, Object> entry : s.entrySet()) { Genotype genotype = new Genotype(entry.getKey()); genotype.setCount((Integer) entry.getValue()); genotypeCount.add(genotype); } } stats.setGenotypes(genotypeCount); variant.setStats(stats); } // TODO Set consequence type names if (includeEffects) { BasicDBList mongoEffects = (BasicDBList) st.get("effects"); if (mongoEffects != null) { for (Object e : mongoEffects) { String effectObo = e.toString(); VariantEffect effect = new VariantEffect(); effect.setConsequenceTypeObo(effectObo); variant.addEffect(effect); } } } results.add(variant); } queryResult.setResult(results); queryResult.setNumResults(results.size()); end = System.currentTimeMillis(); queryResult.setTime(end - start); return queryResult; }
@Override public QueryResult<Variant> getAllVariantsByRegionAndStudy( Region region, String sourceId, QueryOptions options) { Long start, end, dbstart, dbend; start = System.currentTimeMillis(); QueryResult<Variant> queryResult = new QueryResult<>( String.format("%s:%d-%d", region.getChromosome(), region.getStart(), region.getEnd())); List<Variant> results = new LinkedList<>(); boolean includeSamples; boolean includeStats; boolean includeEffects; if (!options.containsKey("samples") && !options.containsKey("stats") && !options.containsKey("effects")) { includeSamples = true; includeStats = true; includeEffects = true; } else { includeSamples = options.containsKey("samples") && options.getBoolean("samples"); includeStats = options.containsKey("stats") && options.getBoolean("stats"); includeEffects = options.containsKey("effects") && options.getBoolean("effects"); } try { String startRow = buildRowkey(region.getChromosome(), Long.toString(region.getStart())); String stopRow = buildRowkey(region.getChromosome(), Long.toString(region.getEnd())); HTable table = new HTable(admin.getConfiguration(), tableName); dbstart = System.currentTimeMillis(); Scan regionScan = new Scan(startRow.getBytes(), stopRow.getBytes()); ResultScanner scanres = table.getScanner(regionScan); dbend = System.currentTimeMillis(); queryResult.setDbTime(dbend - dbstart); // Iterate over results and, optionally, their samples and statistics for (Result result : scanres) { String[] rowkeyParts = new String(result.getRow(), CHARSET_UTF_8).split("_"); String chromosome = rowkeyParts[0].replaceFirst("^0+(?!$)", ""); int position = Integer.parseInt(rowkeyParts[1]); // Get basic result fields from Protocol Buffers message NavigableMap<byte[], byte[]> infoMap = result.getFamilyMap("i".getBytes()); byte[] byteInfo = infoMap.get((sourceId + "_data").getBytes()); VariantFieldsProtos.VariantInfo protoInfo = VariantFieldsProtos.VariantInfo.parseFrom(byteInfo); String reference = protoInfo.getReference(); String alternate = StringUtils.join(protoInfo.getAlternateList(), ","); String format = StringUtils.join(protoInfo.getFormatList(), ":"); Variant variant = new Variant(chromosome, position, position, reference, alternate); // Set samples if requested if (includeSamples) { NavigableMap<byte[], byte[]> sampleMap = result.getFamilyMap("d".getBytes()); Map<String, Map<String, String>> resultSampleMap = new HashMap<>(); // Set samples for (byte[] s : sampleMap.keySet()) { String sampleName = (new String(s, CHARSET_UTF_8)).replaceAll(sourceId + "_", ""); VariantFieldsProtos.VariantSample sample = VariantFieldsProtos.VariantSample.parseFrom(sampleMap.get(s)); String sample1 = sample.getSample(); String[] values = sample1.split(":"); String[] fields = format.split(":"); Map<String, String> singleSampleMap = new HashMap<>(); for (int i = 0; i < fields.length; i++) { singleSampleMap.put(fields[i], values[i]); } // TODO // variant.addSampleData(sampleName, singleSampleMap); } } // Set stats if requested if (includeStats) { byte[] byteStats = infoMap.get((sourceId + "_stats").getBytes()); VariantFieldsProtos.VariantStats protoStats = VariantFieldsProtos.VariantStats.parseFrom(byteStats); VariantStats variantStats = new VariantStats( chromosome, position, reference, alternate, protoStats.getMaf(), protoStats.getMgf(), protoStats.getMafAllele(), protoStats.getMgfGenotype(), protoStats.getMissingAlleles(), protoStats.getMissingGenotypes(), protoStats.getMendelianErrors(), protoStats.getIsIndel(), protoStats.getCasesPercentDominant(), protoStats.getControlsPercentDominant(), protoStats.getCasesPercentRecessive(), protoStats.getControlsPercentRecessive()); variant.setStats(variantStats); } if (includeEffects) { QueryResult<VariantEffect> queryEffects = getEffectsByVariant(variant, options); variant.setEffect(queryEffects.getResult()); } results.add(variant); } } catch (IOException e) { System.err.println(e.getClass().getName() + ": " + e.getMessage()); } queryResult.setResult(results); queryResult.setNumResults(results.size()); end = System.currentTimeMillis(); queryResult.setTime(end - start); return queryResult; }
@Override public QueryResult getVariantsHistogramByRegion( Region region, String sourceId, boolean histogramLogarithm, int histogramMax) { QueryResult<ObjectMap> queryResult = new QueryResult<>( String.format("%s:%d-%d", region.getChromosome(), region.getStart(), region.getEnd())); List<ObjectMap> data = new ArrayList<>(); String startRow = buildRowkey(region.getChromosome(), Long.toString(region.getStart())); String stopRow = buildRowkey(region.getChromosome(), Long.toString(region.getEnd())); long startTime = System.currentTimeMillis(); long startDbTime = System.currentTimeMillis(); BasicDBObject query = new BasicDBObject("position", new BasicDBObject("$gte", startRow).append("$lte", stopRow)) .append("studies.studyId", sourceId); DBCollection collection = db.getCollection("variants"); DBCursor queryResults = collection.find(query); queryResult.setDbTime(System.currentTimeMillis() - startDbTime); int resultSize = queryResults.size(); if (resultSize > histogramMax) { // Need to group results to fit maximum size of the histogram int sumChunkSize = resultSize / histogramMax; int i = 0, j = 0; int featuresCount = 0; ObjectMap item = null; for (DBObject result : queryResults) { // featuresCount += result.getInt("features_count"); // if (i == 0) { // item = new ObjectMap("chromosome", result.getString("chromosome")); // item.put("chunkId", result.getInt("chunk_id")); // item.put("start", result.getInt("start")); // } else if (i == sumChunkSize - 1 || j == resultSize - 1) { // if (histogramLogarithm) { // item.put("featuresCount", (featuresCount > 0) ? // Math.log(featuresCount) : 0); // } else { // item.put("featuresCount", featuresCount); // } // item.put("end", result.getInt("end")); // data.add(item); // i = -1; // featuresCount = 0; // } // j++; // i++; } } else { for (DBObject result : queryResults) { // ObjectMap item = new ObjectMap("chromosome", // result.getString("chromosome")); // item.put("chunkId", result.getInt("chunk_id")); // item.put("start", result.getInt("start")); // if (histogramLogarithm) { // int features_count = result.getInt("features_count"); // result.put("featuresCount", (features_count > 0) ? // Math.log(features_count) : 0); // } else { // item.put("featuresCount", result.getInt("features_count")); // } // item.put("end", result.getInt("end")); // data.add(item); } } queryResult.setResult(data); queryResult.setNumResults(data.size()); queryResult.setTime(System.currentTimeMillis() - startTime); return queryResult; }