private BasicDBList processGeneList(String genes) { BasicDBList list = new BasicDBList(); Client wsRestClient = Client.create(); WebResource webResource = wsRestClient.resource("http://ws.bioinfo.cipf.es/cellbase/rest/latest/hsa/feature/gene/"); ObjectMapper mapper = new ObjectMapper(); String response = webResource.path(genes).path("info").queryParam("of", "json").get(String.class); try { JsonNode actualObj = mapper.readTree(response); Iterator<JsonNode> it = actualObj.iterator(); Iterator<JsonNode> aux; while (it.hasNext()) { JsonNode node = it.next(); if (node.isArray()) { aux = node.iterator(); while (aux.hasNext()) { JsonNode auxNode = aux.next(); DBObject regionClause = new BasicDBObject("chr", auxNode.get("chromosome").asText()); regionClause.put( "pos", new BasicDBObject("$gte", auxNode.get("start").asInt()) .append("$lte", auxNode.get("end").asInt())); list.add(regionClause); } } } } catch (IOException e) { e.printStackTrace(); } return list; }
public QueryResult<VariantInfo> getRecordsMongo( int page, int start, int limit, MutableInt count, Map<String, String> options) { long startTime = System.currentTimeMillis(); QueryResult<VariantInfo> queryResult = new QueryResult<>(); List<VariantInfo> res = new ArrayList<>(); String sourceId = options.get("studyId"); DBCollection coll = db.getCollection("variants"); BasicDBObject elemMatch = new BasicDBObject("sourceId", sourceId); DBObject query = new BasicDBObject(); BasicDBList orList = new BasicDBList(); Map<String, List<String>> sampleGenotypes = processSamplesGT(options); System.out.println("map = " + options); if (options.containsKey("region") && !options.get("region").equals("")) { String[] regions = options.get("region").split(","); Pattern pattern = Pattern.compile("(\\w+):(\\d+)-(\\d+)"); Matcher matcher, matcherChr; for (int i = 0; i < regions.length; i++) { String region = regions[i]; matcher = pattern.matcher(region); if (matcher.find()) { String chr = matcher.group(1); int s = Integer.valueOf(matcher.group(2)); int e = Integer.valueOf(matcher.group(3)); DBObject regionClause = new BasicDBObject("chr", chr); regionClause.put("pos", new BasicDBObject("$gte", s).append("$lte", e)); orList.add(regionClause); } else { Pattern patternChr = Pattern.compile("(\\w+)"); matcherChr = patternChr.matcher(region); if (matcherChr.find()) { String chr = matcherChr.group(); DBObject regionClause = new BasicDBObject("chr", chr); orList.add(regionClause); } } } query.put("$or", orList); } else if (options.containsKey("genes") && !options.get("genes").equals("")) { orList = processGeneList(options.get("genes")); if (orList.size() > 0) { query.put("$or", orList); } else { queryResult.setWarningMsg("Wrong gene name"); queryResult.setResult(res); queryResult.setNumResults(res.size()); return queryResult; } } if (options.containsKey("conseq_type") && !options.get("conseq_type").equals("")) { String[] cts = options.get("conseq_type").split(","); BasicDBList ctList = new BasicDBList(); for (String ct : cts) { ctList.add(ct); } elemMatch.put("effects", new BasicDBObject("$in", ctList)); } if (sampleGenotypes.size() > 0) { for (Map.Entry<String, List<String>> entry : sampleGenotypes.entrySet()) { BasicDBList gtList = new BasicDBList(); for (String gt : entry.getValue()) { gtList.add(gt); } elemMatch.put("samples." + entry.getKey() + ".GT", new BasicDBObject("$in", gtList)); } } if (options.containsKey("miss_gt") && !options.get("miss_gt").equalsIgnoreCase("")) { Integer val = Integer.valueOf(options.get("miss_gt")); Object missGt = getMongoOption(options.get("option_miss_gt"), val); elemMatch.put("stats.missGenotypes", missGt); } BasicDBList andControls = new BasicDBList(); if (options.containsKey("maf_1000g_controls") && !options.get("maf_1000g_controls").equalsIgnoreCase("")) { BasicDBList or = new BasicDBList(); or.add(new BasicDBObject("attributes.1000G_maf", new BasicDBObject("$exists", false))); or.add( new BasicDBObject( "attributes.1000G_maf", new BasicDBObject("$lte", options.get("maf_1000g_controls")))); andControls.add(new BasicDBObject("$or", or)); } if (options.containsKey("maf_1000g_afr_controls") && !options.get("maf_1000g_afr_controls").equalsIgnoreCase("")) { BasicDBList or = new BasicDBList(); or.add(new BasicDBObject("attributes.1000G_AFR_maf", new BasicDBObject("$exists", false))); or.add( new BasicDBObject( "attributes.1000G_AFR_maf", new BasicDBObject("$lte", options.get("maf_1000g_afr_controls")))); andControls.add(new BasicDBObject("$or", or)); } if (options.containsKey("maf_1000g_asi_controls") && !options.get("maf_1000g_asi_controls").equalsIgnoreCase("")) { BasicDBList or = new BasicDBList(); or.add(new BasicDBObject("attributes.1000G_ASI_maf", new BasicDBObject("$exists", false))); or.add( new BasicDBObject( "attributes.1000G_ASI_maf", new BasicDBObject("$lte", options.get("maf_1000g_asi_controls")))); andControls.add(new BasicDBObject("$or", or)); } if (options.containsKey("maf_1000g_eur_controls") && !options.get("maf_1000g_eur_controls").equalsIgnoreCase("")) { System.out.print("EUR"); BasicDBList or = new BasicDBList(); or.add(new BasicDBObject("attributes.1000G_EUR_maf", new BasicDBObject("$exists", false))); or.add( new BasicDBObject( "attributes.1000G_EUR_maf", new BasicDBObject("$lte", options.get("maf_1000g_eur_controls")))); andControls.add(new BasicDBObject("$or", or)); } if (options.containsKey("maf_1000g_ame_controls") && !options.get("maf_1000g_ame_controls").equalsIgnoreCase("")) { System.out.print("AME"); BasicDBList or = new BasicDBList(); or.add(new BasicDBObject("attributes.1000G_AME_maf", new BasicDBObject("$exists", false))); or.add( new BasicDBObject( "attributes.1000G_AME_maf", new BasicDBObject("$lte", options.get("maf_1000g_ame_controls")))); andControls.add(new BasicDBObject("$or", or)); } if (options.containsKey("maf_evs_controls") && !options.get("maf_evs_controls").equalsIgnoreCase("")) { BasicDBList or = new BasicDBList(); or.add(new BasicDBObject("attributes.EVS_maf", new BasicDBObject("$exists", false))); or.add( new BasicDBObject( "attributes.EVS_maf", new BasicDBObject("$lte", options.get("maf_evs_controls")))); andControls.add(new BasicDBObject("$or", or)); } if (options.containsKey("maf_bier_controls") && !options.get("maf_bier_controls").equalsIgnoreCase("")) { BasicDBList or = new BasicDBList(); or.add(new BasicDBObject("attributes.BIER_maf", new BasicDBObject("$exists", false))); or.add( new BasicDBObject( "attributes.BIER_maf", new BasicDBObject("$lte", options.get("maf_bier_controls")))); andControls.add(new BasicDBObject("$or", or)); } if (andControls.size() > 0) { elemMatch.append("$and", andControls); } query.put("sources", new BasicDBObject("$elemMatch", elemMatch)); System.out.println("#############################"); System.out.println(query); System.out.println("#############################"); long dbStart = System.currentTimeMillis(); DBObject sort = null; DBCursor cursor; if (options.containsKey("sort")) { sort = getQuerySort(options.get("sort")); cursor = coll.find(query).sort(sort).skip(start).limit(limit); } else { cursor = coll.find(query).skip(start).limit(limit); } count.setValue(cursor.count()); queryResult.setDbTime(dbStart - System.currentTimeMillis()); for (DBObject obj : cursor) { BasicDBObject elem = (BasicDBObject) obj; VariantInfo vi = new VariantInfo(); VariantStats vs = new VariantStats(); String chr = elem.getString("chr"); int pos = elem.getInt("pos"); vi.setChromosome(chr); vi.setPosition(pos); BasicDBList studies = (BasicDBList) elem.get("sources"); Iterator<Object> it = studies.iterator(); while (it.hasNext()) { BasicDBObject study = (BasicDBObject) it.next(); if (study.getString("sourceId").equalsIgnoreCase(sourceId)) { BasicDBObject stats = (BasicDBObject) study.get("stats"); String ref = study.getString("ref"); BasicDBList alt = (BasicDBList) study.get("alt"); vi.setRef(ref); vi.setAlt(Joiner.on(",").join(alt.toArray())); vs.setMaf((float) stats.getDouble("maf")); vs.setMgf((float) stats.getDouble("mgf")); vs.setMafAllele(stats.getString("alleleMaf")); vs.setMgfAllele(stats.getString("genotypeMaf")); vs.setMissingAlleles(stats.getInt("missAllele")); vs.setMissingGenotypes(stats.getInt("missGenotypes")); vs.setMendelinanErrors(stats.getInt("mendelErr")); vs.setCasesPercentDominant((float) stats.getDouble("casesPercentDominant")); vs.setControlsPercentDominant((float) stats.getDouble("controlsPercentDominant")); vs.setCasesPercentRecessive((float) stats.getDouble("casesPercentRecessive")); vs.setControlsPercentRecessive((float) stats.getDouble("controlsPercentRecessive")); BasicDBObject samples = (BasicDBObject) study.get("samples"); for (String sampleName : samples.keySet()) { DBObject sample = (DBObject) samples.get(sampleName); if (sample.containsField("GT")) { String sampleGT = (String) sample.get("GT"); vi.addSammpleGenotype(sampleName, sampleGT); } } vi.setSnpid((String) study.get("snpId")); if (study.containsField("effects")) { BasicDBList conseqTypes = (BasicDBList) study.get("effects"); conseqTypes.remove(""); String cts = Joiner.on(",").join(conseqTypes.iterator()); vi.addConsequenceTypes(cts); } if (study.containsField("genes")) { BasicDBList genesList = (BasicDBList) study.get("genes"); String genes = Joiner.on(",").join(genesList.iterator()); vi.addGenes(genes); } if (study.containsField("attributes")) { BasicDBObject attr = (BasicDBObject) study.get("attributes"); if (attr.containsField("1000G_maf")) { vi.addControl("1000G_maf", (String) attr.get("1000G_maf")); vi.addControl("1000G_amaf", (String) attr.get("1000G_amaf")); vi.addControl("1000G_gt", (String) attr.get("1000G_gt")); } if (attr.containsField("1000G_ASI_maf")) { vi.addControl("1000G-ASI_maf", (String) attr.get("1000G_ASI_maf")); vi.addControl("1000G-ASI_amaf", (String) attr.get("1000G_ASI_amaf")); vi.addControl("1000G-ASI_gt", (String) attr.get("1000G_ASI_gt")); } if (attr.containsField("1000G_AFR_maf")) { vi.addControl("1000G-AFR_maf", (String) attr.get("1000G_AFR_maf")); vi.addControl("1000G-AFR_amaf", (String) attr.get("1000G_AFR_amaf")); vi.addControl("1000G-AFR_gt", (String) attr.get("1000G_AFR_gt")); } if (attr.containsField("1000G_AME_maf")) { vi.addControl("1000G-AME_maf", (String) attr.get("1000G_AME_maf")); vi.addControl("1000G-AME_amaf", (String) attr.get("1000G_AME_amaf")); vi.addControl("1000G-AME_gt", (String) attr.get("1000G_AME_gt")); } if (attr.containsField("1000G_EUR_maf")) { vi.addControl("1000G-EUR_maf", (String) attr.get("1000G_EUR_maf")); vi.addControl("1000G-EUR_amaf", (String) attr.get("1000G_EUR_amaf")); vi.addControl("1000G-EUR_gt", (String) attr.get("1000G_EUR_gt")); } if (attr.containsField("EVS_maf")) { vi.addControl("EVS_maf", (String) attr.get("EVS_maf")); vi.addControl("EVS_amaf", (String) attr.get("EVS_amaf")); vi.addControl("EVS_gt", (String) attr.get("EVS_gt")); } if (attr.containsField("BIER_maf")) { vi.addControl("BIER_maf", (String) attr.get("BIER_maf")); vi.addControl("BIER_amaf", (String) attr.get("BIER_amaf")); vi.addControl("BIER_gt", (String) attr.get("BIER_gt")); } if (attr.containsField("PolyphenScore")) { vi.setPolyphen_score(Double.parseDouble(attr.getString("PolyphenScore"))); vi.setPolyphen_effect(Integer.parseInt(attr.getString("PolyphenEffect"))); } if (attr.containsField("SIFTScore")) { vi.setSift_score(Double.parseDouble(attr.getString("SIFTScore"))); vi.setSift_effect(Integer.parseInt(attr.getString("SIFTEffect"))); } } continue; } } vi.addStats(vs); res.add(vi); } queryResult.setResult(res); queryResult.setTime(startTime - System.currentTimeMillis()); return queryResult; }
public QueryResult getSimpleVariantsByRegion( Region region, String sourceId, QueryOptions options) { Long start, end, dbstart, dbend; start = System.currentTimeMillis(); boolean includeStats; boolean includeEffects; if (!options.containsKey("stats") && !options.containsKey("effects")) { includeStats = true; includeEffects = true; } else { includeStats = options.containsKey("stats") && options.getBoolean("stats"); includeEffects = options.containsKey("effects") && options.getBoolean("effects"); } QueryResult<Variant> queryResult = new QueryResult<>( String.format("%s:%d-%d", region.getChromosome(), region.getStart(), region.getEnd())); List<Variant> results = new ArrayList<>(); String startRow = buildRowkey(region.getChromosome(), Long.toString(region.getStart())); String stopRow = buildRowkey(region.getChromosome(), Long.toString(region.getEnd())); BasicDBObject query = new BasicDBObject("position", new BasicDBObject("$gte", startRow).append("$lte", stopRow)) .append("sources.sourceId", sourceId); DBCollection collection = db.getCollection("variants"); dbstart = System.currentTimeMillis(); DBCursor variantInStudies = collection.find(query); dbend = System.currentTimeMillis(); queryResult.setDbTime(dbend - dbstart); for (DBObject result : variantInStudies) { String[] rowkeyParts = result.get("position").toString().split("_"); String chromosome = rowkeyParts[0].replaceFirst("^0+(?!$)", ""); int position = Integer.parseInt(rowkeyParts[1]); BasicDBList studies = (BasicDBList) result.get("sources"); BasicDBObject st = (BasicDBObject) studies.get(0); String ref = (String) st.get("ref"); String alt = StringUtils.join((ArrayList<String>) st.get("alt"), ","); // TODO Needs rework Variant variant = new Variant(chromosome, position, position, ref, alt); // Set stats informations if (includeStats) { VariantStats stats = new VariantStats(); BasicDBObject mongoStats = (BasicDBObject) st.get("stats"); stats.setMaf((float) (double) mongoStats.get("maf")); stats.setMafAllele((String) mongoStats.get("alleleMaf")); stats.setMissingGenotypes((int) mongoStats.get("missing")); List<Genotype> genotypeCount = new ArrayList<>(); for (BasicDBObject s : (List<BasicDBObject>) mongoStats.get("genotypeCount")) { for (Map.Entry<String, Object> entry : s.entrySet()) { Genotype genotype = new Genotype(entry.getKey()); genotype.setCount((Integer) entry.getValue()); genotypeCount.add(genotype); } } stats.setGenotypes(genotypeCount); variant.setStats(stats); } // TODO Set consequence type names if (includeEffects) { BasicDBList mongoEffects = (BasicDBList) st.get("effects"); if (mongoEffects != null) { for (Object e : mongoEffects) { String effectObo = e.toString(); VariantEffect effect = new VariantEffect(); effect.setConsequenceTypeObo(effectObo); variant.addEffect(effect); } } } results.add(variant); } queryResult.setResult(results); queryResult.setNumResults(results.size()); end = System.currentTimeMillis(); queryResult.setTime(end - start); return queryResult; }