private void removeIfPresent(Mutation m, byte[] family, byte[] qualifier) { Map<byte[], List<KeyValue>> familyMap = m.getFamilyMap(); List<KeyValue> kvs = familyMap.get(family); if (kvs != null) { Iterator<KeyValue> iterator = kvs.iterator(); while (iterator.hasNext()) { KeyValue kv = iterator.next(); if (Bytes.compareTo(kv.getQualifier(), qualifier) == 0) { iterator.remove(); } } } }
public HBaseStoreManager(org.apache.commons.configuration.Configuration config) throws StorageException { super(config, PORT_DEFAULT); this.tableName = config.getString(TABLE_NAME_KEY, TABLE_NAME_DEFAULT); this.hconf = HBaseConfiguration.create(); for (Map.Entry<String, String> confEntry : HBASE_CONFIGURATION.entrySet()) { if (config.containsKey(confEntry.getKey())) { hconf.set(confEntry.getValue(), config.getString(confEntry.getKey())); } } // Copy a subset of our commons config into a Hadoop config org.apache.commons.configuration.Configuration hbCommons = config.subset(HBASE_CONFIGURATION_NAMESPACE); @SuppressWarnings("unchecked") // I hope commons-config eventually fixes this Iterator<String> keys = hbCommons.getKeys(); int keysLoaded = 0; while (keys.hasNext()) { String key = keys.next(); String value = hbCommons.getString(key); logger.debug("HBase configuration: setting {}={}", key, value); hconf.set(key, value); keysLoaded++; } logger.debug("HBase configuration: set a total of {} configuration values", keysLoaded); connectionPool = new HTablePool(hconf, connectionPoolSize); openStores = new ConcurrentHashMap<String, HBaseKeyColumnValueStore>(); // TODO: allowing publicly mutate fields is bad, should be fixed features = new StoreFeatures(); features.supportsScan = true; features.supportsBatchMutation = true; features.supportsTransactions = false; features.supportsConsistentKeyOperations = true; features.supportsLocking = false; features.isKeyOrdered = false; features.isDistributed = true; features.hasLocalKeyPartition = false; }
public List<Variant> getRecordSimpleStats( String study, int missing_gt, float maf, String maf_allele) { BasicDBObject compare = new BasicDBObject("studies.stats.allele_maf", maf_allele) .append("studies.stats.MAF", maf) .append("studies.stats.missing", missing_gt); List<Get> hbaseQuery = new ArrayList<>(); DBCollection collection = db.getCollection("variants"); Iterator<DBObject> result = collection.find(compare); String chromosome = new String(); while (result.hasNext()) { DBObject variant = result.next(); String position = variant.get("_id").toString(); // hbase query construction Get get = new Get(position.getBytes()); hbaseQuery.add(get); } // Complete results, from HBase tableName = study; effectTableName = tableName + "effect"; Map<String, Variant> resultsMap = new HashMap<>(); try { HTable table = new HTable(admin.getConfiguration(), tableName); HTable effectTable = new HTable(admin.getConfiguration(), effectTableName); Result[] hbaseResultEffect = effectTable.get(hbaseQuery); Result[] hbaseResultStats = table.get(hbaseQuery); // List<Variant> results = new LinkedList<>(); for (Result r : hbaseResultStats) { String position = new String(r.getRow(), CHARSET_UTF_8); String[] aux = position.split("_"); String inner_position = aux[1]; String chr = aux[0]; // position parsing if (chr.startsWith("0")) { chr = chr.substring(1); } while (inner_position.startsWith("0")) { inner_position = inner_position.substring(1); } List<VariantFieldsProtos.VariantSample> samples = new LinkedList<>(); NavigableMap<byte[], byte[]> infoMap = r.getFamilyMap("i".getBytes()); byte[] byteStats = infoMap.get((study + "_stats").getBytes()); VariantFieldsProtos.VariantStats stats = VariantFieldsProtos.VariantStats.parseFrom(byteStats); byte[] byteInfo = infoMap.get((study + "_data").getBytes()); VariantFieldsProtos.VariantInfo info = VariantFieldsProtos.VariantInfo.parseFrom(byteInfo); String alternate = StringUtils.join(info.getAlternateList(), ", "); String reference = info.getReference(); Variant partialResult = new Variant( chr, Integer.parseInt(inner_position), Integer.parseInt(inner_position), reference, alternate); String format = StringUtils.join(info.getFormatList(), ":"); NavigableMap<byte[], byte[]> sampleMap = r.getFamilyMap("d".getBytes()); Map<String, Map<String, String>> resultSampleMap = new HashMap<>(); // StringBuilder sampleRaw = new StringBuilder(); for (byte[] s : sampleMap.keySet()) { String qual = (new String(s, CHARSET_UTF_8)).replaceAll(study + "_", ""); VariantFieldsProtos.VariantSample sample = VariantFieldsProtos.VariantSample.parseFrom(sampleMap.get(s)); String sample1 = sample.getSample(); String[] values = sample1.split(":"); String[] fields = format.split(":"); Map<String, String> singleSampleMap = new HashMap<>(); for (int i = 0; i < fields.length; i++) { singleSampleMap.put(fields[i], values[i]); } resultSampleMap.put(qual, singleSampleMap); } VariantStats variantStats = new VariantStats( chromosome, Integer.parseInt(inner_position), reference, alternate, stats.getMaf(), stats.getMgf(), stats.getMafAllele(), stats.getMgfGenotype(), stats.getMissingAlleles(), stats.getMissingGenotypes(), stats.getMendelianErrors(), stats.getIsIndel(), stats.getCasesPercentDominant(), stats.getControlsPercentDominant(), stats.getCasesPercentRecessive(), stats.getControlsPercentRecessive()); partialResult.setStats(variantStats); resultsMap.put(new String(r.getRow(), CHARSET_UTF_8), partialResult); } for (Result r : hbaseResultEffect) { if (!r.isEmpty()) { NavigableMap<byte[], byte[]> effectMap = r.getFamilyMap("e".getBytes()); Variant partialResult = resultsMap.get(new String(r.getRow(), CHARSET_UTF_8)); System.out.println("Recuperado " + partialResult.toString()); String s = partialResult.getReference() + "_" + partialResult.getAlternate(); VariantEffectProtos.EffectInfo effectInfo = VariantEffectProtos.EffectInfo.parseFrom(effectMap.get(s.getBytes())); VariantEffect variantEffect = new VariantEffect( partialResult.getChromosome(), (int) partialResult.getStart(), partialResult.getReference(), partialResult.getAlternate(), effectInfo.getFeatureId(), effectInfo.getFeatureName(), effectInfo.getFeatureType(), effectInfo.getFeatureBiotype(), effectInfo.getFeatureChromosome(), effectInfo.getFeatureStart(), effectInfo.getFeatureEnd(), effectInfo.getFeatureStrand(), effectInfo.getSnpId(), effectInfo.getAncestral(), effectInfo.getAlternative(), effectInfo.getGeneId(), effectInfo.getTranscriptId(), effectInfo.getGeneName(), effectInfo.getConsequenceType(), effectInfo.getConsequenceTypeObo(), effectInfo.getConsequenceTypeDesc(), effectInfo.getConsequenceTypeType(), effectInfo.getAaPosition(), effectInfo.getAminoacidChange(), effectInfo.getCodonChange()); resultsMap.put(new String(r.getRow(), CHARSET_UTF_8), partialResult); } } } catch (InvalidProtocolBufferException e) { System.err.println(e.getClass().getName() + ": " + e.getMessage()); } catch (IOException e) { System.err.println(e.getClass().getName() + ": " + e.getMessage()); } List<Variant> results = new ArrayList<>(resultsMap.values()); return results; }