コード例 #1
0
ファイル: HBaseUtils.java プロジェクト: paul-weiss/Utils
 public Map<String, Long> getRegionSizes(String tableName) {
   Map<String, Long> regions = new HashMap<>();
   try {
     final Table table = connection.getTable(TableName.valueOf(tableName));
     RegionLocator regionLocator = connection.getRegionLocator(table.getName());
     List<HRegionLocation> tableRegionInfos = regionLocator.getAllRegionLocations();
     Set<byte[]> tableRegions = new TreeSet<byte[]>(Bytes.BYTES_COMPARATOR);
     for (HRegionLocation regionInfo : tableRegionInfos) {
       tableRegions.add(regionInfo.getRegionInfo().getRegionName());
     }
     ClusterStatus clusterStatus = connection.getAdmin().getClusterStatus();
     Collection<ServerName> servers = clusterStatus.getServers();
     final long megaByte = 1024L * 1024L;
     for (ServerName serverName : servers) {
       ServerLoad serverLoad = clusterStatus.getLoad(serverName);
       for (RegionLoad regionLoad : serverLoad.getRegionsLoad().values()) {
         byte[] regionId = regionLoad.getName();
         if (tableRegions.contains(regionId)) {
           long regionSizeBytes = regionLoad.getStorefileSizeMB() * megaByte;
           regions.put(regionLoad.getNameAsString(), regionSizeBytes);
         }
       }
     }
   } catch (IOException e) {
     e.printStackTrace();
   }
   return regions;
 }
コード例 #2
0
ファイル: PTableImpl.java プロジェクト: cclien/phoenix
 @Override
 public void readFields(DataInput input) throws IOException {
   byte[] tableNameBytes = Bytes.readByteArray(input);
   PName tableName = new PNameImpl(tableNameBytes);
   PTableType tableType = PTableType.values()[WritableUtils.readVInt(input)];
   long sequenceNumber = WritableUtils.readVLong(input);
   long timeStamp = input.readLong();
   byte[] pkNameBytes = Bytes.readByteArray(input);
   String pkName = pkNameBytes.length == 0 ? null : Bytes.toString(pkNameBytes);
   int nColumns = WritableUtils.readVInt(input);
   List<PColumn> columns = Lists.newArrayListWithExpectedSize(nColumns);
   for (int i = 0; i < nColumns; i++) {
     PColumn column = new PColumnImpl();
     column.readFields(input);
     columns.add(column);
   }
   Map<String, byte[][]> guidePosts = new HashMap<String, byte[][]>();
   int size = WritableUtils.readVInt(input);
   for (int i = 0; i < size; i++) {
     String key = WritableUtils.readString(input);
     int valueSize = WritableUtils.readVInt(input);
     byte[][] value = new byte[valueSize][];
     for (int j = 0; j < valueSize; j++) {
       value[j] = Bytes.readByteArray(input);
     }
     guidePosts.put(key, value);
   }
   PTableStats stats = new PTableStatsImpl(guidePosts);
   init(tableName, tableType, timeStamp, sequenceNumber, pkName, columns, stats);
 }
コード例 #3
0
ファイル: PTableImpl.java プロジェクト: cclien/phoenix
 @Override
 public PColumnFamily getColumnFamily(String familyName) throws ColumnFamilyNotFoundException {
   PColumnFamily family = familyByString.get(familyName);
   if (family == null) {
     throw new ColumnFamilyNotFoundException(familyName);
   }
   return family;
 }
コード例 #4
0
ファイル: PTableImpl.java プロジェクト: cclien/phoenix
 @Override
 public PColumnFamily getColumnFamily(byte[] familyBytes) throws ColumnFamilyNotFoundException {
   PColumnFamily family = familyByBytes.get(familyBytes);
   if (family == null) {
     String familyName = Bytes.toString(familyBytes);
     throw new ColumnFamilyNotFoundException(familyName);
   }
   return family;
 }
コード例 #5
0
  @Override
  public QueryResult<Variant> getAllVariantsByRegionAndStudy(
      Region region, String sourceId, QueryOptions options) {
    Long start, end, dbstart, dbend;
    start = System.currentTimeMillis();
    QueryResult<Variant> queryResult =
        new QueryResult<>(
            String.format("%s:%d-%d", region.getChromosome(), region.getStart(), region.getEnd()));
    List<Variant> results = new LinkedList<>();

    boolean includeSamples;
    boolean includeStats;
    boolean includeEffects;
    if (!options.containsKey("samples")
        && !options.containsKey("stats")
        && !options.containsKey("effects")) {
      includeSamples = true;
      includeStats = true;
      includeEffects = true;
    } else {
      includeSamples = options.containsKey("samples") && options.getBoolean("samples");
      includeStats = options.containsKey("stats") && options.getBoolean("stats");
      includeEffects = options.containsKey("effects") && options.getBoolean("effects");
    }

    try {
      String startRow = buildRowkey(region.getChromosome(), Long.toString(region.getStart()));
      String stopRow = buildRowkey(region.getChromosome(), Long.toString(region.getEnd()));
      HTable table = new HTable(admin.getConfiguration(), tableName);
      dbstart = System.currentTimeMillis();
      Scan regionScan = new Scan(startRow.getBytes(), stopRow.getBytes());
      ResultScanner scanres = table.getScanner(regionScan);
      dbend = System.currentTimeMillis();
      queryResult.setDbTime(dbend - dbstart);

      // Iterate over results and, optionally, their samples and statistics
      for (Result result : scanres) {
        String[] rowkeyParts = new String(result.getRow(), CHARSET_UTF_8).split("_");
        String chromosome = rowkeyParts[0].replaceFirst("^0+(?!$)", "");
        int position = Integer.parseInt(rowkeyParts[1]);

        // Get basic result fields from Protocol Buffers message
        NavigableMap<byte[], byte[]> infoMap = result.getFamilyMap("i".getBytes());
        byte[] byteInfo = infoMap.get((sourceId + "_data").getBytes());
        VariantFieldsProtos.VariantInfo protoInfo =
            VariantFieldsProtos.VariantInfo.parseFrom(byteInfo);
        String reference = protoInfo.getReference();
        String alternate = StringUtils.join(protoInfo.getAlternateList(), ",");
        String format = StringUtils.join(protoInfo.getFormatList(), ":");
        Variant variant = new Variant(chromosome, position, position, reference, alternate);

        // Set samples if requested
        if (includeSamples) {
          NavigableMap<byte[], byte[]> sampleMap = result.getFamilyMap("d".getBytes());
          Map<String, Map<String, String>> resultSampleMap = new HashMap<>();

          // Set samples
          for (byte[] s : sampleMap.keySet()) {
            String sampleName = (new String(s, CHARSET_UTF_8)).replaceAll(sourceId + "_", "");
            VariantFieldsProtos.VariantSample sample =
                VariantFieldsProtos.VariantSample.parseFrom(sampleMap.get(s));
            String sample1 = sample.getSample();
            String[] values = sample1.split(":");
            String[] fields = format.split(":");
            Map<String, String> singleSampleMap = new HashMap<>();
            for (int i = 0; i < fields.length; i++) {
              singleSampleMap.put(fields[i], values[i]);
            }
            // TODO
            //                        variant.addSampleData(sampleName, singleSampleMap);
          }
        }

        // Set stats if requested
        if (includeStats) {
          byte[] byteStats = infoMap.get((sourceId + "_stats").getBytes());
          VariantFieldsProtos.VariantStats protoStats =
              VariantFieldsProtos.VariantStats.parseFrom(byteStats);
          VariantStats variantStats =
              new VariantStats(
                  chromosome,
                  position,
                  reference,
                  alternate,
                  protoStats.getMaf(),
                  protoStats.getMgf(),
                  protoStats.getMafAllele(),
                  protoStats.getMgfGenotype(),
                  protoStats.getMissingAlleles(),
                  protoStats.getMissingGenotypes(),
                  protoStats.getMendelianErrors(),
                  protoStats.getIsIndel(),
                  protoStats.getCasesPercentDominant(),
                  protoStats.getControlsPercentDominant(),
                  protoStats.getCasesPercentRecessive(),
                  protoStats.getControlsPercentRecessive());
          variant.setStats(variantStats);
        }

        if (includeEffects) {
          QueryResult<VariantEffect> queryEffects = getEffectsByVariant(variant, options);
          variant.setEffect(queryEffects.getResult());
        }

        results.add(variant);
      }
    } catch (IOException e) {
      System.err.println(e.getClass().getName() + ": " + e.getMessage());
    }
    queryResult.setResult(results);
    queryResult.setNumResults(results.size());
    end = System.currentTimeMillis();
    queryResult.setTime(end - start);
    return queryResult;
  }
コード例 #6
0
  public List<Variant> getRecordSimpleStats(
      String study, int missing_gt, float maf, String maf_allele) {
    BasicDBObject compare =
        new BasicDBObject("studies.stats.allele_maf", maf_allele)
            .append("studies.stats.MAF", maf)
            .append("studies.stats.missing", missing_gt);
    List<Get> hbaseQuery = new ArrayList<>();
    DBCollection collection = db.getCollection("variants");
    Iterator<DBObject> result = collection.find(compare);
    String chromosome = new String();
    while (result.hasNext()) {
      DBObject variant = result.next();
      String position = variant.get("_id").toString();
      // hbase query construction
      Get get = new Get(position.getBytes());
      hbaseQuery.add(get);
    }
    // Complete results, from HBase

    tableName = study;
    effectTableName = tableName + "effect";
    Map<String, Variant> resultsMap = new HashMap<>();

    try {
      HTable table = new HTable(admin.getConfiguration(), tableName);
      HTable effectTable = new HTable(admin.getConfiguration(), effectTableName);
      Result[] hbaseResultEffect = effectTable.get(hbaseQuery);
      Result[] hbaseResultStats = table.get(hbaseQuery);

      //            List<Variant> results = new LinkedList<>();
      for (Result r : hbaseResultStats) {
        String position = new String(r.getRow(), CHARSET_UTF_8);
        String[] aux = position.split("_");
        String inner_position = aux[1];
        String chr = aux[0];
        // position parsing
        if (chr.startsWith("0")) {
          chr = chr.substring(1);
        }
        while (inner_position.startsWith("0")) {
          inner_position = inner_position.substring(1);
        }
        List<VariantFieldsProtos.VariantSample> samples = new LinkedList<>();
        NavigableMap<byte[], byte[]> infoMap = r.getFamilyMap("i".getBytes());
        byte[] byteStats = infoMap.get((study + "_stats").getBytes());
        VariantFieldsProtos.VariantStats stats =
            VariantFieldsProtos.VariantStats.parseFrom(byteStats);
        byte[] byteInfo = infoMap.get((study + "_data").getBytes());
        VariantFieldsProtos.VariantInfo info = VariantFieldsProtos.VariantInfo.parseFrom(byteInfo);
        String alternate = StringUtils.join(info.getAlternateList(), ", ");
        String reference = info.getReference();
        Variant partialResult =
            new Variant(
                chr,
                Integer.parseInt(inner_position),
                Integer.parseInt(inner_position),
                reference,
                alternate);
        String format = StringUtils.join(info.getFormatList(), ":");
        NavigableMap<byte[], byte[]> sampleMap = r.getFamilyMap("d".getBytes());
        Map<String, Map<String, String>> resultSampleMap = new HashMap<>();
        //                StringBuilder sampleRaw = new StringBuilder();
        for (byte[] s : sampleMap.keySet()) {
          String qual = (new String(s, CHARSET_UTF_8)).replaceAll(study + "_", "");
          VariantFieldsProtos.VariantSample sample =
              VariantFieldsProtos.VariantSample.parseFrom(sampleMap.get(s));
          String sample1 = sample.getSample();
          String[] values = sample1.split(":");
          String[] fields = format.split(":");
          Map<String, String> singleSampleMap = new HashMap<>();
          for (int i = 0; i < fields.length; i++) {
            singleSampleMap.put(fields[i], values[i]);
          }
          resultSampleMap.put(qual, singleSampleMap);
        }
        VariantStats variantStats =
            new VariantStats(
                chromosome,
                Integer.parseInt(inner_position),
                reference,
                alternate,
                stats.getMaf(),
                stats.getMgf(),
                stats.getMafAllele(),
                stats.getMgfGenotype(),
                stats.getMissingAlleles(),
                stats.getMissingGenotypes(),
                stats.getMendelianErrors(),
                stats.getIsIndel(),
                stats.getCasesPercentDominant(),
                stats.getControlsPercentDominant(),
                stats.getCasesPercentRecessive(),
                stats.getControlsPercentRecessive());
        partialResult.setStats(variantStats);
        resultsMap.put(new String(r.getRow(), CHARSET_UTF_8), partialResult);
      }

      for (Result r : hbaseResultEffect) {
        if (!r.isEmpty()) {
          NavigableMap<byte[], byte[]> effectMap = r.getFamilyMap("e".getBytes());
          Variant partialResult = resultsMap.get(new String(r.getRow(), CHARSET_UTF_8));
          System.out.println("Recuperado " + partialResult.toString());
          String s = partialResult.getReference() + "_" + partialResult.getAlternate();
          VariantEffectProtos.EffectInfo effectInfo =
              VariantEffectProtos.EffectInfo.parseFrom(effectMap.get(s.getBytes()));
          VariantEffect variantEffect =
              new VariantEffect(
                  partialResult.getChromosome(),
                  (int) partialResult.getStart(),
                  partialResult.getReference(),
                  partialResult.getAlternate(),
                  effectInfo.getFeatureId(),
                  effectInfo.getFeatureName(),
                  effectInfo.getFeatureType(),
                  effectInfo.getFeatureBiotype(),
                  effectInfo.getFeatureChromosome(),
                  effectInfo.getFeatureStart(),
                  effectInfo.getFeatureEnd(),
                  effectInfo.getFeatureStrand(),
                  effectInfo.getSnpId(),
                  effectInfo.getAncestral(),
                  effectInfo.getAlternative(),
                  effectInfo.getGeneId(),
                  effectInfo.getTranscriptId(),
                  effectInfo.getGeneName(),
                  effectInfo.getConsequenceType(),
                  effectInfo.getConsequenceTypeObo(),
                  effectInfo.getConsequenceTypeDesc(),
                  effectInfo.getConsequenceTypeType(),
                  effectInfo.getAaPosition(),
                  effectInfo.getAminoacidChange(),
                  effectInfo.getCodonChange());
          resultsMap.put(new String(r.getRow(), CHARSET_UTF_8), partialResult);
        }
      }
    } catch (InvalidProtocolBufferException e) {
      System.err.println(e.getClass().getName() + ": " + e.getMessage());
    } catch (IOException e) {
      System.err.println(e.getClass().getName() + ": " + e.getMessage());
    }

    List<Variant> results = new ArrayList<>(resultsMap.values());
    return results;
  }
コード例 #7
0
ファイル: TrainRun.java プロジェクト: super-sponge/hbase-demo
  /**
   * init station code map
   *
   * @param file station code file path
   */
  private static void initStationCode(String file) {

    List<String> lstStation = new Vector<>();
    strConfig = file + ".process";
    File fsP = new File(strConfig);
    if (!fsP.exists()) {
      try {
        fsP.createNewFile();
      } catch (IOException e) {
        e.printStackTrace();
      }
    } else {
      BufferedReader readp = null;
      try {
        String line = null;
        readp = new BufferedReader(new FileReader(fsP));
        while ((line = readp.readLine()) != null) {
          processed.add(line);
        }
      } catch (IOException e) {
        e.printStackTrace();
      } finally {
        if (readp != null) {
          try {
            readp.close();
          } catch (IOException e) {
            e.printStackTrace();
          }
        }
      }
    }
    File fs = new File(file);
    BufferedReader reader = null;
    try {
      String stationName = null;
      String stationCode = null;
      String line = null;
      String[] splitLine = null;
      reader = new BufferedReader(new FileReader(fs));
      while ((line = reader.readLine()) != null) {
        splitLine = line.split("\t");
        if (splitLine.length != 2) {
          continue;
        }
        stationName = splitLine[0];
        stationCode = splitLine[1];
        mapStationCode.put(stationName, stationCode);
        lstStation.add(stationName);
      }

    } catch (FileNotFoundException e) {
      e.printStackTrace();
    } catch (IOException e) {
      e.printStackTrace();
    } finally {
      if (reader == null) {
        try {
          reader.close();
        } catch (IOException e1) {
          e1.printStackTrace();
        }
      }
    }

    int len = lstStation.size();
    String start = null;
    for (int i = 0; i < len; i++) {
      start = lstStation.get(i);
      for (String end : lstStation) {
        if (start.equals(end) || processed.contains(start + ":" + end)) {
          continue;
        }

        lstAllProcessStation.add(new DefaultKeyValue(start, end));
      }
    }
  }
コード例 #8
0
ファイル: TrainRun.java プロジェクト: super-sponge/hbase-demo
  public static List<KeyValue> getAllTrainInfo(Configuration config, String date) {
    List<KeyValue> result = new ArrayList<>();
    String strJson = null;
    BufferedWriter writer = null;
    Table table = null;
    try (Connection connect = ConnectionFactory.createConnection(config);
        Admin admin = connect.getAdmin()) {
      TableName tablename = TableName.valueOf(TABLE_NAME);
      if (!admin.tableExists(tablename)) {
        System.out.println("Table does not exist.");
        return null;
      }

      table = connect.getTable(tablename);
      Put put = null;
      String start = null;
      String end = null;
      writer = new BufferedWriter(new FileWriter(new File(strConfig), true));
      for (KeyValue item : lstAllProcessStation) {
        start = (String) item.getKey();
        end = (String) item.getValue();
        try {
          try {
            Thread.sleep(200);
          } catch (InterruptedException e1) {
            e1.printStackTrace();
          }
          System.out.println("process : " + start + ":" + end);
          strJson = getFromAPIX(mapStationCode.get(start), mapStationCode.get(end), date);
          writer.write(start + ":" + end);
          writer.newLine();

        } catch (Exception e) {
          System.out.println(start + ":" + end + "error");
          e.printStackTrace();
          break;
        }
        JSONObject jo = new JSONObject(strJson);

        if (jo.has("httpstatus") && (jo.getInt("httpstatus") == 200)) {
          JSONObject joData = jo.getJSONObject("data");
          if (joData.has("flag") && joData.getBoolean("flag")) {
            result.add(new DefaultKeyValue(start, end));
            // 插入到hbase
            String rowkey = start + ":" + end;
            put = new Put(rowkey.getBytes());
            put.addColumn(
                CF_JSON.getBytes(), "json".getBytes(), joData.toString().getBytes("utf-8"));
            table.put(put);

            System.out.println("start " + start + "\t end " + end + "\t has ticket");
          }
        }
      }
    } catch (IOException e) {
      e.printStackTrace();
    } finally {
      if (writer != null) {
        try {
          writer.flush();
          writer.close();
        } catch (IOException e) {
          e.printStackTrace();
        }
      }
      if (table != null) {
        try {
          table.close();
        } catch (IOException e) {
          e.printStackTrace();
        }
      }
    }

    return result;
  }