public int run(String[] args) throws Exception { Configuration argConf = getConf(); // JobConf conf = new JobConf(diffdb.class); Configuration config = HBaseConfiguration.create(); HBaseAdmin hbAdmin = new HBaseAdmin(config); dbutil db_util = new dbutil(config); HTable runTable = new HTable(config, "gestore_runs"); Get runGet = new Get(argConf.get("id").getBytes()); Result pipeline = runTable.get(runGet); NavigableMap<byte[], byte[]> pipeMap = pipeline.getFamilyMap("d".getBytes()); Map.Entry<byte[], byte[]> results = pipeMap.pollFirstEntry(); HashMap<String, HashMap<String, String>> resultMap = new HashMap<String, HashMap<String, String>>(); while (results != null) { String resultKey = new String(results.getKey()); String resultValue = new String(results.getValue()); String field = "type"; HashMap<String, String> tempMap = new HashMap<String, String>(); String entry = resultKey; if (resultKey.endsWith("_db_timestamp")) { field = "db_timestamp"; entry = resultKey.substring(0, resultKey.lastIndexOf("_db_timestamp")); } else if (resultKey.endsWith("_filename")) { field = "filename"; entry = resultKey.substring(0, resultKey.lastIndexOf("_filename")); } else if (resultKey.endsWith("_regex")) { field = "regex"; entry = resultKey.substring(0, resultKey.lastIndexOf("_regex")); } if (resultMap.containsKey(entry)) { tempMap = resultMap.get(entry); } tempMap.put(field, resultValue); resultMap.put(entry, tempMap); // System.out.println("Key: " + resultKey + " Value: " + resultValue); results = pipeMap.pollFirstEntry(); } for (String key : resultMap.keySet()) { System.out.println("File ID: " + key); for (String subKey : resultMap.get(key).keySet()) { // System.out.println("\t " + subKey + "\t\t" + resultMap.get(key).get(subKey)); System.out.format(" %1$-20s %2$s\n", subKey, resultMap.get(key).get(subKey)); } } return 0; }
public void putBatch(Optional<List<Request>> putRequests, boolean optimize) { if (!valid) { Logger.error("CANNOT PUT! NO VALID CONNECTION"); return; } List<Put> puts = new ArrayList<>(); if (putRequests.isPresent() && !putRequests.get().isEmpty()) { String tableName = putRequests.get().get(0).table; putRequests .get() .forEach( pr -> pr.getPut() .ifPresent( p -> { if (optimize) { p.setDurability(Durability.SKIP_WAL); } puts.add(p); })); try { final Table table = connection.getTable(TableName.valueOf(tableName)); if (optimize && table instanceof HTable) { ((HTable) table).setAutoFlush(false, true); } table.put(puts); table.close(); } catch (IOException e) { e.printStackTrace(); } } }
@Override public Set<Element> get(Object key) { Get get = new Get(ByteArraySerializer.fromObject(key)); Result result; try { result = backingTable.get(get); } catch (IOException e) { LOG.severe("Cannot get from backing table"); e.printStackTrace(); return null; } NavigableMap<byte[], byte[]> map = result.getFamilyMap(Bytes.toBytes(VALUES)); if (null == map) return null; HashSet<Element> elementHashSet = new HashSet<Element>(); for (byte[] byteArray : map.keySet()) { if (indexClass.equals(HVertex.class) || indexClass.equals(Vertex.class)) { HVertex hVertex = new HVertex(hGraph); hVertex.setId(byteArray); elementHashSet.add(hVertex); } else { final HEdge hEdge = new HEdge(hGraph); hEdge.setId(byteArray); elementHashSet.add(hEdge); } } return elementHashSet; }
public void removeSingleElement(Object key, Element value) { final Delete delete = new Delete(ByteArraySerializer.fromObject(key)); delete.deleteColumns(Bytes.toBytes(VALUES), (byte[]) value.getId()); try { backingTable.delete(delete); } catch (IOException e) { LOG.severe("Cannot delete from backing table"); e.printStackTrace(); } }
@Override public Set<Element> remove(Object key) { Delete del = new Delete(ByteArraySerializer.fromObject(key)); try { backingTable.delete(del); } catch (IOException e) { LOG.severe("Error while deleting from table"); e.printStackTrace(); } return null; }
/** * Deletes the specified table with all its columns. ATTENTION: Invoking this method will delete * the table if it exists and therefore causes data loss. */ @Override public void clearStorage() throws StorageException { HBaseAdmin adm = getAdminInterface(); try { // first of all, check if table exists, if not - we are done if (!adm.tableExists(tableName)) { logger.debug("clearStorage() called before table {} was created, skipping.", tableName); return; } } catch (IOException e) { throw new TemporaryStorageException(e); } HTable table = null; try { table = new HTable(hconf, tableName); Scan scan = new Scan(); scan.setBatch(100); scan.setCacheBlocks(false); scan.setCaching(2000); ResultScanner scanner = null; try { scanner = table.getScanner(scan); for (Result res : scanner) { table.delete(new Delete(res.getRow())); } } finally { IOUtils.closeQuietly(scanner); } } catch (IOException e) { throw new TemporaryStorageException(e); } finally { IOUtils.closeQuietly(table); } }
@Override public Set<Element> put(Object key, Set<Element> value) { Put put = new Put(ByteArraySerializer.fromObject(key)); for (Element val : value) put.add(Bytes.toBytes(VALUES), (byte[]) val.getId(), (byte[]) val.getId()); try { backingTable.put(put); } catch (IOException e) { LOG.severe("Cannot put into backing table"); e.printStackTrace(); return null; } return value; }
@Override public QueryResult<Variant> getAllVariantsByRegionAndStudy( Region region, String sourceId, QueryOptions options) { Long start, end, dbstart, dbend; start = System.currentTimeMillis(); QueryResult<Variant> queryResult = new QueryResult<>( String.format("%s:%d-%d", region.getChromosome(), region.getStart(), region.getEnd())); List<Variant> results = new LinkedList<>(); boolean includeSamples; boolean includeStats; boolean includeEffects; if (!options.containsKey("samples") && !options.containsKey("stats") && !options.containsKey("effects")) { includeSamples = true; includeStats = true; includeEffects = true; } else { includeSamples = options.containsKey("samples") && options.getBoolean("samples"); includeStats = options.containsKey("stats") && options.getBoolean("stats"); includeEffects = options.containsKey("effects") && options.getBoolean("effects"); } try { String startRow = buildRowkey(region.getChromosome(), Long.toString(region.getStart())); String stopRow = buildRowkey(region.getChromosome(), Long.toString(region.getEnd())); HTable table = new HTable(admin.getConfiguration(), tableName); dbstart = System.currentTimeMillis(); Scan regionScan = new Scan(startRow.getBytes(), stopRow.getBytes()); ResultScanner scanres = table.getScanner(regionScan); dbend = System.currentTimeMillis(); queryResult.setDbTime(dbend - dbstart); // Iterate over results and, optionally, their samples and statistics for (Result result : scanres) { String[] rowkeyParts = new String(result.getRow(), CHARSET_UTF_8).split("_"); String chromosome = rowkeyParts[0].replaceFirst("^0+(?!$)", ""); int position = Integer.parseInt(rowkeyParts[1]); // Get basic result fields from Protocol Buffers message NavigableMap<byte[], byte[]> infoMap = result.getFamilyMap("i".getBytes()); byte[] byteInfo = infoMap.get((sourceId + "_data").getBytes()); VariantFieldsProtos.VariantInfo protoInfo = VariantFieldsProtos.VariantInfo.parseFrom(byteInfo); String reference = protoInfo.getReference(); String alternate = StringUtils.join(protoInfo.getAlternateList(), ","); String format = StringUtils.join(protoInfo.getFormatList(), ":"); Variant variant = new Variant(chromosome, position, position, reference, alternate); // Set samples if requested if (includeSamples) { NavigableMap<byte[], byte[]> sampleMap = result.getFamilyMap("d".getBytes()); Map<String, Map<String, String>> resultSampleMap = new HashMap<>(); // Set samples for (byte[] s : sampleMap.keySet()) { String sampleName = (new String(s, CHARSET_UTF_8)).replaceAll(sourceId + "_", ""); VariantFieldsProtos.VariantSample sample = VariantFieldsProtos.VariantSample.parseFrom(sampleMap.get(s)); String sample1 = sample.getSample(); String[] values = sample1.split(":"); String[] fields = format.split(":"); Map<String, String> singleSampleMap = new HashMap<>(); for (int i = 0; i < fields.length; i++) { singleSampleMap.put(fields[i], values[i]); } // TODO // variant.addSampleData(sampleName, singleSampleMap); } } // Set stats if requested if (includeStats) { byte[] byteStats = infoMap.get((sourceId + "_stats").getBytes()); VariantFieldsProtos.VariantStats protoStats = VariantFieldsProtos.VariantStats.parseFrom(byteStats); VariantStats variantStats = new VariantStats( chromosome, position, reference, alternate, protoStats.getMaf(), protoStats.getMgf(), protoStats.getMafAllele(), protoStats.getMgfGenotype(), protoStats.getMissingAlleles(), protoStats.getMissingGenotypes(), protoStats.getMendelianErrors(), protoStats.getIsIndel(), protoStats.getCasesPercentDominant(), protoStats.getControlsPercentDominant(), protoStats.getCasesPercentRecessive(), protoStats.getControlsPercentRecessive()); variant.setStats(variantStats); } if (includeEffects) { QueryResult<VariantEffect> queryEffects = getEffectsByVariant(variant, options); variant.setEffect(queryEffects.getResult()); } results.add(variant); } } catch (IOException e) { System.err.println(e.getClass().getName() + ": " + e.getMessage()); } queryResult.setResult(results); queryResult.setNumResults(results.size()); end = System.currentTimeMillis(); queryResult.setTime(end - start); return queryResult; }
public List<Variant> getRecordSimpleStats( String study, int missing_gt, float maf, String maf_allele) { BasicDBObject compare = new BasicDBObject("studies.stats.allele_maf", maf_allele) .append("studies.stats.MAF", maf) .append("studies.stats.missing", missing_gt); List<Get> hbaseQuery = new ArrayList<>(); DBCollection collection = db.getCollection("variants"); Iterator<DBObject> result = collection.find(compare); String chromosome = new String(); while (result.hasNext()) { DBObject variant = result.next(); String position = variant.get("_id").toString(); // hbase query construction Get get = new Get(position.getBytes()); hbaseQuery.add(get); } // Complete results, from HBase tableName = study; effectTableName = tableName + "effect"; Map<String, Variant> resultsMap = new HashMap<>(); try { HTable table = new HTable(admin.getConfiguration(), tableName); HTable effectTable = new HTable(admin.getConfiguration(), effectTableName); Result[] hbaseResultEffect = effectTable.get(hbaseQuery); Result[] hbaseResultStats = table.get(hbaseQuery); // List<Variant> results = new LinkedList<>(); for (Result r : hbaseResultStats) { String position = new String(r.getRow(), CHARSET_UTF_8); String[] aux = position.split("_"); String inner_position = aux[1]; String chr = aux[0]; // position parsing if (chr.startsWith("0")) { chr = chr.substring(1); } while (inner_position.startsWith("0")) { inner_position = inner_position.substring(1); } List<VariantFieldsProtos.VariantSample> samples = new LinkedList<>(); NavigableMap<byte[], byte[]> infoMap = r.getFamilyMap("i".getBytes()); byte[] byteStats = infoMap.get((study + "_stats").getBytes()); VariantFieldsProtos.VariantStats stats = VariantFieldsProtos.VariantStats.parseFrom(byteStats); byte[] byteInfo = infoMap.get((study + "_data").getBytes()); VariantFieldsProtos.VariantInfo info = VariantFieldsProtos.VariantInfo.parseFrom(byteInfo); String alternate = StringUtils.join(info.getAlternateList(), ", "); String reference = info.getReference(); Variant partialResult = new Variant( chr, Integer.parseInt(inner_position), Integer.parseInt(inner_position), reference, alternate); String format = StringUtils.join(info.getFormatList(), ":"); NavigableMap<byte[], byte[]> sampleMap = r.getFamilyMap("d".getBytes()); Map<String, Map<String, String>> resultSampleMap = new HashMap<>(); // StringBuilder sampleRaw = new StringBuilder(); for (byte[] s : sampleMap.keySet()) { String qual = (new String(s, CHARSET_UTF_8)).replaceAll(study + "_", ""); VariantFieldsProtos.VariantSample sample = VariantFieldsProtos.VariantSample.parseFrom(sampleMap.get(s)); String sample1 = sample.getSample(); String[] values = sample1.split(":"); String[] fields = format.split(":"); Map<String, String> singleSampleMap = new HashMap<>(); for (int i = 0; i < fields.length; i++) { singleSampleMap.put(fields[i], values[i]); } resultSampleMap.put(qual, singleSampleMap); } VariantStats variantStats = new VariantStats( chromosome, Integer.parseInt(inner_position), reference, alternate, stats.getMaf(), stats.getMgf(), stats.getMafAllele(), stats.getMgfGenotype(), stats.getMissingAlleles(), stats.getMissingGenotypes(), stats.getMendelianErrors(), stats.getIsIndel(), stats.getCasesPercentDominant(), stats.getControlsPercentDominant(), stats.getCasesPercentRecessive(), stats.getControlsPercentRecessive()); partialResult.setStats(variantStats); resultsMap.put(new String(r.getRow(), CHARSET_UTF_8), partialResult); } for (Result r : hbaseResultEffect) { if (!r.isEmpty()) { NavigableMap<byte[], byte[]> effectMap = r.getFamilyMap("e".getBytes()); Variant partialResult = resultsMap.get(new String(r.getRow(), CHARSET_UTF_8)); System.out.println("Recuperado " + partialResult.toString()); String s = partialResult.getReference() + "_" + partialResult.getAlternate(); VariantEffectProtos.EffectInfo effectInfo = VariantEffectProtos.EffectInfo.parseFrom(effectMap.get(s.getBytes())); VariantEffect variantEffect = new VariantEffect( partialResult.getChromosome(), (int) partialResult.getStart(), partialResult.getReference(), partialResult.getAlternate(), effectInfo.getFeatureId(), effectInfo.getFeatureName(), effectInfo.getFeatureType(), effectInfo.getFeatureBiotype(), effectInfo.getFeatureChromosome(), effectInfo.getFeatureStart(), effectInfo.getFeatureEnd(), effectInfo.getFeatureStrand(), effectInfo.getSnpId(), effectInfo.getAncestral(), effectInfo.getAlternative(), effectInfo.getGeneId(), effectInfo.getTranscriptId(), effectInfo.getGeneName(), effectInfo.getConsequenceType(), effectInfo.getConsequenceTypeObo(), effectInfo.getConsequenceTypeDesc(), effectInfo.getConsequenceTypeType(), effectInfo.getAaPosition(), effectInfo.getAminoacidChange(), effectInfo.getCodonChange()); resultsMap.put(new String(r.getRow(), CHARSET_UTF_8), partialResult); } } } catch (InvalidProtocolBufferException e) { System.err.println(e.getClass().getName() + ": " + e.getMessage()); } catch (IOException e) { System.err.println(e.getClass().getName() + ": " + e.getMessage()); } List<Variant> results = new ArrayList<>(resultsMap.values()); return results; }