コード例 #1
0
  public QueryResult getSimpleVariantsByRegion(
      Region region, String sourceId, QueryOptions options) {
    Long start, end, dbstart, dbend;
    start = System.currentTimeMillis();
    boolean includeStats;
    boolean includeEffects;
    if (!options.containsKey("stats") && !options.containsKey("effects")) {
      includeStats = true;
      includeEffects = true;
    } else {
      includeStats = options.containsKey("stats") && options.getBoolean("stats");
      includeEffects = options.containsKey("effects") && options.getBoolean("effects");
    }

    QueryResult<Variant> queryResult =
        new QueryResult<>(
            String.format("%s:%d-%d", region.getChromosome(), region.getStart(), region.getEnd()));
    List<Variant> results = new ArrayList<>();
    String startRow = buildRowkey(region.getChromosome(), Long.toString(region.getStart()));
    String stopRow = buildRowkey(region.getChromosome(), Long.toString(region.getEnd()));
    BasicDBObject query =
        new BasicDBObject("position", new BasicDBObject("$gte", startRow).append("$lte", stopRow))
            .append("sources.sourceId", sourceId);
    DBCollection collection = db.getCollection("variants");
    dbstart = System.currentTimeMillis();
    DBCursor variantInStudies = collection.find(query);
    dbend = System.currentTimeMillis();
    queryResult.setDbTime(dbend - dbstart);

    for (DBObject result : variantInStudies) {
      String[] rowkeyParts = result.get("position").toString().split("_");
      String chromosome = rowkeyParts[0].replaceFirst("^0+(?!$)", "");
      int position = Integer.parseInt(rowkeyParts[1]);
      BasicDBList studies = (BasicDBList) result.get("sources");
      BasicDBObject st = (BasicDBObject) studies.get(0);
      String ref = (String) st.get("ref");
      String alt = StringUtils.join((ArrayList<String>) st.get("alt"), ",");

      // TODO Needs rework
      Variant variant = new Variant(chromosome, position, position, ref, alt);

      // Set stats informations
      if (includeStats) {
        VariantStats stats = new VariantStats();
        BasicDBObject mongoStats = (BasicDBObject) st.get("stats");
        stats.setMaf((float) (double) mongoStats.get("maf"));
        stats.setMafAllele((String) mongoStats.get("alleleMaf"));
        stats.setMissingGenotypes((int) mongoStats.get("missing"));
        List<Genotype> genotypeCount = new ArrayList<>();
        for (BasicDBObject s : (List<BasicDBObject>) mongoStats.get("genotypeCount")) {
          for (Map.Entry<String, Object> entry : s.entrySet()) {
            Genotype genotype = new Genotype(entry.getKey());
            genotype.setCount((Integer) entry.getValue());
            genotypeCount.add(genotype);
          }
        }
        stats.setGenotypes(genotypeCount);
        variant.setStats(stats);
      }

      // TODO Set consequence type names
      if (includeEffects) {
        BasicDBList mongoEffects = (BasicDBList) st.get("effects");
        if (mongoEffects != null) {
          for (Object e : mongoEffects) {
            String effectObo = e.toString();
            VariantEffect effect = new VariantEffect();
            effect.setConsequenceTypeObo(effectObo);
            variant.addEffect(effect);
          }
        }
      }

      results.add(variant);
    }

    queryResult.setResult(results);
    queryResult.setNumResults(results.size());
    end = System.currentTimeMillis();
    queryResult.setTime(end - start);
    return queryResult;
  }
コード例 #2
0
  public List<Variant> getRecordSimpleStats(
      String study, int missing_gt, float maf, String maf_allele) {
    BasicDBObject compare =
        new BasicDBObject("studies.stats.allele_maf", maf_allele)
            .append("studies.stats.MAF", maf)
            .append("studies.stats.missing", missing_gt);
    List<Get> hbaseQuery = new ArrayList<>();
    DBCollection collection = db.getCollection("variants");
    Iterator<DBObject> result = collection.find(compare);
    String chromosome = new String();
    while (result.hasNext()) {
      DBObject variant = result.next();
      String position = variant.get("_id").toString();
      // hbase query construction
      Get get = new Get(position.getBytes());
      hbaseQuery.add(get);
    }
    // Complete results, from HBase

    tableName = study;
    effectTableName = tableName + "effect";
    Map<String, Variant> resultsMap = new HashMap<>();

    try {
      HTable table = new HTable(admin.getConfiguration(), tableName);
      HTable effectTable = new HTable(admin.getConfiguration(), effectTableName);
      Result[] hbaseResultEffect = effectTable.get(hbaseQuery);
      Result[] hbaseResultStats = table.get(hbaseQuery);

      //            List<Variant> results = new LinkedList<>();
      for (Result r : hbaseResultStats) {
        String position = new String(r.getRow(), CHARSET_UTF_8);
        String[] aux = position.split("_");
        String inner_position = aux[1];
        String chr = aux[0];
        // position parsing
        if (chr.startsWith("0")) {
          chr = chr.substring(1);
        }
        while (inner_position.startsWith("0")) {
          inner_position = inner_position.substring(1);
        }
        List<VariantFieldsProtos.VariantSample> samples = new LinkedList<>();
        NavigableMap<byte[], byte[]> infoMap = r.getFamilyMap("i".getBytes());
        byte[] byteStats = infoMap.get((study + "_stats").getBytes());
        VariantFieldsProtos.VariantStats stats =
            VariantFieldsProtos.VariantStats.parseFrom(byteStats);
        byte[] byteInfo = infoMap.get((study + "_data").getBytes());
        VariantFieldsProtos.VariantInfo info = VariantFieldsProtos.VariantInfo.parseFrom(byteInfo);
        String alternate = StringUtils.join(info.getAlternateList(), ", ");
        String reference = info.getReference();
        Variant partialResult =
            new Variant(
                chr,
                Integer.parseInt(inner_position),
                Integer.parseInt(inner_position),
                reference,
                alternate);
        String format = StringUtils.join(info.getFormatList(), ":");
        NavigableMap<byte[], byte[]> sampleMap = r.getFamilyMap("d".getBytes());
        Map<String, Map<String, String>> resultSampleMap = new HashMap<>();
        //                StringBuilder sampleRaw = new StringBuilder();
        for (byte[] s : sampleMap.keySet()) {
          String qual = (new String(s, CHARSET_UTF_8)).replaceAll(study + "_", "");
          VariantFieldsProtos.VariantSample sample =
              VariantFieldsProtos.VariantSample.parseFrom(sampleMap.get(s));
          String sample1 = sample.getSample();
          String[] values = sample1.split(":");
          String[] fields = format.split(":");
          Map<String, String> singleSampleMap = new HashMap<>();
          for (int i = 0; i < fields.length; i++) {
            singleSampleMap.put(fields[i], values[i]);
          }
          resultSampleMap.put(qual, singleSampleMap);
        }
        VariantStats variantStats =
            new VariantStats(
                chromosome,
                Integer.parseInt(inner_position),
                reference,
                alternate,
                stats.getMaf(),
                stats.getMgf(),
                stats.getMafAllele(),
                stats.getMgfGenotype(),
                stats.getMissingAlleles(),
                stats.getMissingGenotypes(),
                stats.getMendelianErrors(),
                stats.getIsIndel(),
                stats.getCasesPercentDominant(),
                stats.getControlsPercentDominant(),
                stats.getCasesPercentRecessive(),
                stats.getControlsPercentRecessive());
        partialResult.setStats(variantStats);
        resultsMap.put(new String(r.getRow(), CHARSET_UTF_8), partialResult);
      }

      for (Result r : hbaseResultEffect) {
        if (!r.isEmpty()) {
          NavigableMap<byte[], byte[]> effectMap = r.getFamilyMap("e".getBytes());
          Variant partialResult = resultsMap.get(new String(r.getRow(), CHARSET_UTF_8));
          System.out.println("Recuperado " + partialResult.toString());
          String s = partialResult.getReference() + "_" + partialResult.getAlternate();
          VariantEffectProtos.EffectInfo effectInfo =
              VariantEffectProtos.EffectInfo.parseFrom(effectMap.get(s.getBytes()));
          VariantEffect variantEffect =
              new VariantEffect(
                  partialResult.getChromosome(),
                  (int) partialResult.getStart(),
                  partialResult.getReference(),
                  partialResult.getAlternate(),
                  effectInfo.getFeatureId(),
                  effectInfo.getFeatureName(),
                  effectInfo.getFeatureType(),
                  effectInfo.getFeatureBiotype(),
                  effectInfo.getFeatureChromosome(),
                  effectInfo.getFeatureStart(),
                  effectInfo.getFeatureEnd(),
                  effectInfo.getFeatureStrand(),
                  effectInfo.getSnpId(),
                  effectInfo.getAncestral(),
                  effectInfo.getAlternative(),
                  effectInfo.getGeneId(),
                  effectInfo.getTranscriptId(),
                  effectInfo.getGeneName(),
                  effectInfo.getConsequenceType(),
                  effectInfo.getConsequenceTypeObo(),
                  effectInfo.getConsequenceTypeDesc(),
                  effectInfo.getConsequenceTypeType(),
                  effectInfo.getAaPosition(),
                  effectInfo.getAminoacidChange(),
                  effectInfo.getCodonChange());
          resultsMap.put(new String(r.getRow(), CHARSET_UTF_8), partialResult);
        }
      }
    } catch (InvalidProtocolBufferException e) {
      System.err.println(e.getClass().getName() + ": " + e.getMessage());
    } catch (IOException e) {
      System.err.println(e.getClass().getName() + ": " + e.getMessage());
    }

    List<Variant> results = new ArrayList<>(resultsMap.values());
    return results;
  }
コード例 #3
0
  @Override
  public QueryResult getVariantsHistogramByRegion(
      Region region, String sourceId, boolean histogramLogarithm, int histogramMax) {
    QueryResult<ObjectMap> queryResult =
        new QueryResult<>(
            String.format("%s:%d-%d", region.getChromosome(), region.getStart(), region.getEnd()));
    List<ObjectMap> data = new ArrayList<>();
    String startRow = buildRowkey(region.getChromosome(), Long.toString(region.getStart()));
    String stopRow = buildRowkey(region.getChromosome(), Long.toString(region.getEnd()));

    long startTime = System.currentTimeMillis();

    long startDbTime = System.currentTimeMillis();

    BasicDBObject query =
        new BasicDBObject("position", new BasicDBObject("$gte", startRow).append("$lte", stopRow))
            .append("studies.studyId", sourceId);
    DBCollection collection = db.getCollection("variants");
    DBCursor queryResults = collection.find(query);
    queryResult.setDbTime(System.currentTimeMillis() - startDbTime);

    int resultSize = queryResults.size();

    if (resultSize > histogramMax) { // Need to group results to fit maximum size of the histogram
      int sumChunkSize = resultSize / histogramMax;
      int i = 0, j = 0;
      int featuresCount = 0;
      ObjectMap item = null;

      for (DBObject result : queryResults) {
        //                featuresCount += result.getInt("features_count");
        //                if (i == 0) {
        //                    item = new ObjectMap("chromosome", result.getString("chromosome"));
        //                    item.put("chunkId", result.getInt("chunk_id"));
        //                    item.put("start", result.getInt("start"));
        //                } else if (i == sumChunkSize - 1 || j == resultSize - 1) {
        //                    if (histogramLogarithm) {
        //                        item.put("featuresCount", (featuresCount > 0) ?
        // Math.log(featuresCount) : 0);
        //                    } else {
        //                        item.put("featuresCount", featuresCount);
        //                    }
        //                    item.put("end", result.getInt("end"));
        //                    data.add(item);
        //                    i = -1;
        //                    featuresCount = 0;
        //                }
        //                j++;
        //                i++;
      }
    } else {
      for (DBObject result : queryResults) {
        //                ObjectMap item = new ObjectMap("chromosome",
        // result.getString("chromosome"));
        //                item.put("chunkId", result.getInt("chunk_id"));
        //                item.put("start", result.getInt("start"));
        //                if (histogramLogarithm) {
        //                    int features_count = result.getInt("features_count");
        //                    result.put("featuresCount", (features_count > 0) ?
        // Math.log(features_count) : 0);
        //                } else {
        //                    item.put("featuresCount", result.getInt("features_count"));
        //                }
        //                item.put("end", result.getInt("end"));
        //                data.add(item);
      }
    }

    queryResult.setResult(data);
    queryResult.setNumResults(data.size());
    queryResult.setTime(System.currentTimeMillis() - startTime);

    return queryResult;
  }