public Map<String, Long> getRegionSizes(String tableName) { Map<String, Long> regions = new HashMap<>(); try { final Table table = connection.getTable(TableName.valueOf(tableName)); RegionLocator regionLocator = connection.getRegionLocator(table.getName()); List<HRegionLocation> tableRegionInfos = regionLocator.getAllRegionLocations(); Set<byte[]> tableRegions = new TreeSet<byte[]>(Bytes.BYTES_COMPARATOR); for (HRegionLocation regionInfo : tableRegionInfos) { tableRegions.add(regionInfo.getRegionInfo().getRegionName()); } ClusterStatus clusterStatus = connection.getAdmin().getClusterStatus(); Collection<ServerName> servers = clusterStatus.getServers(); final long megaByte = 1024L * 1024L; for (ServerName serverName : servers) { ServerLoad serverLoad = clusterStatus.getLoad(serverName); for (RegionLoad regionLoad : serverLoad.getRegionsLoad().values()) { byte[] regionId = regionLoad.getName(); if (tableRegions.contains(regionId)) { long regionSizeBytes = regionLoad.getStorefileSizeMB() * megaByte; regions.put(regionLoad.getNameAsString(), regionSizeBytes); } } } } catch (IOException e) { e.printStackTrace(); } return regions; }
@Override public void readFields(DataInput input) throws IOException { byte[] tableNameBytes = Bytes.readByteArray(input); PName tableName = new PNameImpl(tableNameBytes); PTableType tableType = PTableType.values()[WritableUtils.readVInt(input)]; long sequenceNumber = WritableUtils.readVLong(input); long timeStamp = input.readLong(); byte[] pkNameBytes = Bytes.readByteArray(input); String pkName = pkNameBytes.length == 0 ? null : Bytes.toString(pkNameBytes); int nColumns = WritableUtils.readVInt(input); List<PColumn> columns = Lists.newArrayListWithExpectedSize(nColumns); for (int i = 0; i < nColumns; i++) { PColumn column = new PColumnImpl(); column.readFields(input); columns.add(column); } Map<String, byte[][]> guidePosts = new HashMap<String, byte[][]>(); int size = WritableUtils.readVInt(input); for (int i = 0; i < size; i++) { String key = WritableUtils.readString(input); int valueSize = WritableUtils.readVInt(input); byte[][] value = new byte[valueSize][]; for (int j = 0; j < valueSize; j++) { value[j] = Bytes.readByteArray(input); } guidePosts.put(key, value); } PTableStats stats = new PTableStatsImpl(guidePosts); init(tableName, tableType, timeStamp, sequenceNumber, pkName, columns, stats); }
@Override public PColumnFamily getColumnFamily(String familyName) throws ColumnFamilyNotFoundException { PColumnFamily family = familyByString.get(familyName); if (family == null) { throw new ColumnFamilyNotFoundException(familyName); } return family; }
@Override public PColumnFamily getColumnFamily(byte[] familyBytes) throws ColumnFamilyNotFoundException { PColumnFamily family = familyByBytes.get(familyBytes); if (family == null) { String familyName = Bytes.toString(familyBytes); throw new ColumnFamilyNotFoundException(familyName); } return family; }
@Override public QueryResult<Variant> getAllVariantsByRegionAndStudy( Region region, String sourceId, QueryOptions options) { Long start, end, dbstart, dbend; start = System.currentTimeMillis(); QueryResult<Variant> queryResult = new QueryResult<>( String.format("%s:%d-%d", region.getChromosome(), region.getStart(), region.getEnd())); List<Variant> results = new LinkedList<>(); boolean includeSamples; boolean includeStats; boolean includeEffects; if (!options.containsKey("samples") && !options.containsKey("stats") && !options.containsKey("effects")) { includeSamples = true; includeStats = true; includeEffects = true; } else { includeSamples = options.containsKey("samples") && options.getBoolean("samples"); includeStats = options.containsKey("stats") && options.getBoolean("stats"); includeEffects = options.containsKey("effects") && options.getBoolean("effects"); } try { String startRow = buildRowkey(region.getChromosome(), Long.toString(region.getStart())); String stopRow = buildRowkey(region.getChromosome(), Long.toString(region.getEnd())); HTable table = new HTable(admin.getConfiguration(), tableName); dbstart = System.currentTimeMillis(); Scan regionScan = new Scan(startRow.getBytes(), stopRow.getBytes()); ResultScanner scanres = table.getScanner(regionScan); dbend = System.currentTimeMillis(); queryResult.setDbTime(dbend - dbstart); // Iterate over results and, optionally, their samples and statistics for (Result result : scanres) { String[] rowkeyParts = new String(result.getRow(), CHARSET_UTF_8).split("_"); String chromosome = rowkeyParts[0].replaceFirst("^0+(?!$)", ""); int position = Integer.parseInt(rowkeyParts[1]); // Get basic result fields from Protocol Buffers message NavigableMap<byte[], byte[]> infoMap = result.getFamilyMap("i".getBytes()); byte[] byteInfo = infoMap.get((sourceId + "_data").getBytes()); VariantFieldsProtos.VariantInfo protoInfo = VariantFieldsProtos.VariantInfo.parseFrom(byteInfo); String reference = protoInfo.getReference(); String alternate = StringUtils.join(protoInfo.getAlternateList(), ","); String format = StringUtils.join(protoInfo.getFormatList(), ":"); Variant variant = new Variant(chromosome, position, position, reference, alternate); // Set samples if requested if (includeSamples) { NavigableMap<byte[], byte[]> sampleMap = result.getFamilyMap("d".getBytes()); Map<String, Map<String, String>> resultSampleMap = new HashMap<>(); // Set samples for (byte[] s : sampleMap.keySet()) { String sampleName = (new String(s, CHARSET_UTF_8)).replaceAll(sourceId + "_", ""); VariantFieldsProtos.VariantSample sample = VariantFieldsProtos.VariantSample.parseFrom(sampleMap.get(s)); String sample1 = sample.getSample(); String[] values = sample1.split(":"); String[] fields = format.split(":"); Map<String, String> singleSampleMap = new HashMap<>(); for (int i = 0; i < fields.length; i++) { singleSampleMap.put(fields[i], values[i]); } // TODO // variant.addSampleData(sampleName, singleSampleMap); } } // Set stats if requested if (includeStats) { byte[] byteStats = infoMap.get((sourceId + "_stats").getBytes()); VariantFieldsProtos.VariantStats protoStats = VariantFieldsProtos.VariantStats.parseFrom(byteStats); VariantStats variantStats = new VariantStats( chromosome, position, reference, alternate, protoStats.getMaf(), protoStats.getMgf(), protoStats.getMafAllele(), protoStats.getMgfGenotype(), protoStats.getMissingAlleles(), protoStats.getMissingGenotypes(), protoStats.getMendelianErrors(), protoStats.getIsIndel(), protoStats.getCasesPercentDominant(), protoStats.getControlsPercentDominant(), protoStats.getCasesPercentRecessive(), protoStats.getControlsPercentRecessive()); variant.setStats(variantStats); } if (includeEffects) { QueryResult<VariantEffect> queryEffects = getEffectsByVariant(variant, options); variant.setEffect(queryEffects.getResult()); } results.add(variant); } } catch (IOException e) { System.err.println(e.getClass().getName() + ": " + e.getMessage()); } queryResult.setResult(results); queryResult.setNumResults(results.size()); end = System.currentTimeMillis(); queryResult.setTime(end - start); return queryResult; }
public List<Variant> getRecordSimpleStats( String study, int missing_gt, float maf, String maf_allele) { BasicDBObject compare = new BasicDBObject("studies.stats.allele_maf", maf_allele) .append("studies.stats.MAF", maf) .append("studies.stats.missing", missing_gt); List<Get> hbaseQuery = new ArrayList<>(); DBCollection collection = db.getCollection("variants"); Iterator<DBObject> result = collection.find(compare); String chromosome = new String(); while (result.hasNext()) { DBObject variant = result.next(); String position = variant.get("_id").toString(); // hbase query construction Get get = new Get(position.getBytes()); hbaseQuery.add(get); } // Complete results, from HBase tableName = study; effectTableName = tableName + "effect"; Map<String, Variant> resultsMap = new HashMap<>(); try { HTable table = new HTable(admin.getConfiguration(), tableName); HTable effectTable = new HTable(admin.getConfiguration(), effectTableName); Result[] hbaseResultEffect = effectTable.get(hbaseQuery); Result[] hbaseResultStats = table.get(hbaseQuery); // List<Variant> results = new LinkedList<>(); for (Result r : hbaseResultStats) { String position = new String(r.getRow(), CHARSET_UTF_8); String[] aux = position.split("_"); String inner_position = aux[1]; String chr = aux[0]; // position parsing if (chr.startsWith("0")) { chr = chr.substring(1); } while (inner_position.startsWith("0")) { inner_position = inner_position.substring(1); } List<VariantFieldsProtos.VariantSample> samples = new LinkedList<>(); NavigableMap<byte[], byte[]> infoMap = r.getFamilyMap("i".getBytes()); byte[] byteStats = infoMap.get((study + "_stats").getBytes()); VariantFieldsProtos.VariantStats stats = VariantFieldsProtos.VariantStats.parseFrom(byteStats); byte[] byteInfo = infoMap.get((study + "_data").getBytes()); VariantFieldsProtos.VariantInfo info = VariantFieldsProtos.VariantInfo.parseFrom(byteInfo); String alternate = StringUtils.join(info.getAlternateList(), ", "); String reference = info.getReference(); Variant partialResult = new Variant( chr, Integer.parseInt(inner_position), Integer.parseInt(inner_position), reference, alternate); String format = StringUtils.join(info.getFormatList(), ":"); NavigableMap<byte[], byte[]> sampleMap = r.getFamilyMap("d".getBytes()); Map<String, Map<String, String>> resultSampleMap = new HashMap<>(); // StringBuilder sampleRaw = new StringBuilder(); for (byte[] s : sampleMap.keySet()) { String qual = (new String(s, CHARSET_UTF_8)).replaceAll(study + "_", ""); VariantFieldsProtos.VariantSample sample = VariantFieldsProtos.VariantSample.parseFrom(sampleMap.get(s)); String sample1 = sample.getSample(); String[] values = sample1.split(":"); String[] fields = format.split(":"); Map<String, String> singleSampleMap = new HashMap<>(); for (int i = 0; i < fields.length; i++) { singleSampleMap.put(fields[i], values[i]); } resultSampleMap.put(qual, singleSampleMap); } VariantStats variantStats = new VariantStats( chromosome, Integer.parseInt(inner_position), reference, alternate, stats.getMaf(), stats.getMgf(), stats.getMafAllele(), stats.getMgfGenotype(), stats.getMissingAlleles(), stats.getMissingGenotypes(), stats.getMendelianErrors(), stats.getIsIndel(), stats.getCasesPercentDominant(), stats.getControlsPercentDominant(), stats.getCasesPercentRecessive(), stats.getControlsPercentRecessive()); partialResult.setStats(variantStats); resultsMap.put(new String(r.getRow(), CHARSET_UTF_8), partialResult); } for (Result r : hbaseResultEffect) { if (!r.isEmpty()) { NavigableMap<byte[], byte[]> effectMap = r.getFamilyMap("e".getBytes()); Variant partialResult = resultsMap.get(new String(r.getRow(), CHARSET_UTF_8)); System.out.println("Recuperado " + partialResult.toString()); String s = partialResult.getReference() + "_" + partialResult.getAlternate(); VariantEffectProtos.EffectInfo effectInfo = VariantEffectProtos.EffectInfo.parseFrom(effectMap.get(s.getBytes())); VariantEffect variantEffect = new VariantEffect( partialResult.getChromosome(), (int) partialResult.getStart(), partialResult.getReference(), partialResult.getAlternate(), effectInfo.getFeatureId(), effectInfo.getFeatureName(), effectInfo.getFeatureType(), effectInfo.getFeatureBiotype(), effectInfo.getFeatureChromosome(), effectInfo.getFeatureStart(), effectInfo.getFeatureEnd(), effectInfo.getFeatureStrand(), effectInfo.getSnpId(), effectInfo.getAncestral(), effectInfo.getAlternative(), effectInfo.getGeneId(), effectInfo.getTranscriptId(), effectInfo.getGeneName(), effectInfo.getConsequenceType(), effectInfo.getConsequenceTypeObo(), effectInfo.getConsequenceTypeDesc(), effectInfo.getConsequenceTypeType(), effectInfo.getAaPosition(), effectInfo.getAminoacidChange(), effectInfo.getCodonChange()); resultsMap.put(new String(r.getRow(), CHARSET_UTF_8), partialResult); } } } catch (InvalidProtocolBufferException e) { System.err.println(e.getClass().getName() + ": " + e.getMessage()); } catch (IOException e) { System.err.println(e.getClass().getName() + ": " + e.getMessage()); } List<Variant> results = new ArrayList<>(resultsMap.values()); return results; }
/** * init station code map * * @param file station code file path */ private static void initStationCode(String file) { List<String> lstStation = new Vector<>(); strConfig = file + ".process"; File fsP = new File(strConfig); if (!fsP.exists()) { try { fsP.createNewFile(); } catch (IOException e) { e.printStackTrace(); } } else { BufferedReader readp = null; try { String line = null; readp = new BufferedReader(new FileReader(fsP)); while ((line = readp.readLine()) != null) { processed.add(line); } } catch (IOException e) { e.printStackTrace(); } finally { if (readp != null) { try { readp.close(); } catch (IOException e) { e.printStackTrace(); } } } } File fs = new File(file); BufferedReader reader = null; try { String stationName = null; String stationCode = null; String line = null; String[] splitLine = null; reader = new BufferedReader(new FileReader(fs)); while ((line = reader.readLine()) != null) { splitLine = line.split("\t"); if (splitLine.length != 2) { continue; } stationName = splitLine[0]; stationCode = splitLine[1]; mapStationCode.put(stationName, stationCode); lstStation.add(stationName); } } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } finally { if (reader == null) { try { reader.close(); } catch (IOException e1) { e1.printStackTrace(); } } } int len = lstStation.size(); String start = null; for (int i = 0; i < len; i++) { start = lstStation.get(i); for (String end : lstStation) { if (start.equals(end) || processed.contains(start + ":" + end)) { continue; } lstAllProcessStation.add(new DefaultKeyValue(start, end)); } } }
public static List<KeyValue> getAllTrainInfo(Configuration config, String date) { List<KeyValue> result = new ArrayList<>(); String strJson = null; BufferedWriter writer = null; Table table = null; try (Connection connect = ConnectionFactory.createConnection(config); Admin admin = connect.getAdmin()) { TableName tablename = TableName.valueOf(TABLE_NAME); if (!admin.tableExists(tablename)) { System.out.println("Table does not exist."); return null; } table = connect.getTable(tablename); Put put = null; String start = null; String end = null; writer = new BufferedWriter(new FileWriter(new File(strConfig), true)); for (KeyValue item : lstAllProcessStation) { start = (String) item.getKey(); end = (String) item.getValue(); try { try { Thread.sleep(200); } catch (InterruptedException e1) { e1.printStackTrace(); } System.out.println("process : " + start + ":" + end); strJson = getFromAPIX(mapStationCode.get(start), mapStationCode.get(end), date); writer.write(start + ":" + end); writer.newLine(); } catch (Exception e) { System.out.println(start + ":" + end + "error"); e.printStackTrace(); break; } JSONObject jo = new JSONObject(strJson); if (jo.has("httpstatus") && (jo.getInt("httpstatus") == 200)) { JSONObject joData = jo.getJSONObject("data"); if (joData.has("flag") && joData.getBoolean("flag")) { result.add(new DefaultKeyValue(start, end)); // 插入到hbase String rowkey = start + ":" + end; put = new Put(rowkey.getBytes()); put.addColumn( CF_JSON.getBytes(), "json".getBytes(), joData.toString().getBytes("utf-8")); table.put(put); System.out.println("start " + start + "\t end " + end + "\t has ticket"); } } } } catch (IOException e) { e.printStackTrace(); } finally { if (writer != null) { try { writer.flush(); writer.close(); } catch (IOException e) { e.printStackTrace(); } } if (table != null) { try { table.close(); } catch (IOException e) { e.printStackTrace(); } } } return result; }