Esempio n. 1
0
 public void putBatch(Optional<List<Request>> putRequests, boolean optimize) {
   if (!valid) {
     Logger.error("CANNOT PUT! NO VALID CONNECTION");
     return;
   }
   List<Put> puts = new ArrayList<>();
   if (putRequests.isPresent() && !putRequests.get().isEmpty()) {
     String tableName = putRequests.get().get(0).table;
     putRequests
         .get()
         .forEach(
             pr ->
                 pr.getPut()
                     .ifPresent(
                         p -> {
                           if (optimize) {
                             p.setDurability(Durability.SKIP_WAL);
                           }
                           puts.add(p);
                         }));
     try {
       final Table table = connection.getTable(TableName.valueOf(tableName));
       if (optimize && table instanceof HTable) {
         ((HTable) table).setAutoFlush(false, true);
       }
       table.put(puts);
       table.close();
     } catch (IOException e) {
       e.printStackTrace();
     }
   }
 }
Esempio n. 2
0
 @Override
 public void readFields(DataInput input) throws IOException {
   byte[] tableNameBytes = Bytes.readByteArray(input);
   PName tableName = new PNameImpl(tableNameBytes);
   PTableType tableType = PTableType.values()[WritableUtils.readVInt(input)];
   long sequenceNumber = WritableUtils.readVLong(input);
   long timeStamp = input.readLong();
   byte[] pkNameBytes = Bytes.readByteArray(input);
   String pkName = pkNameBytes.length == 0 ? null : Bytes.toString(pkNameBytes);
   int nColumns = WritableUtils.readVInt(input);
   List<PColumn> columns = Lists.newArrayListWithExpectedSize(nColumns);
   for (int i = 0; i < nColumns; i++) {
     PColumn column = new PColumnImpl();
     column.readFields(input);
     columns.add(column);
   }
   Map<String, byte[][]> guidePosts = new HashMap<String, byte[][]>();
   int size = WritableUtils.readVInt(input);
   for (int i = 0; i < size; i++) {
     String key = WritableUtils.readString(input);
     int valueSize = WritableUtils.readVInt(input);
     byte[][] value = new byte[valueSize][];
     for (int j = 0; j < valueSize; j++) {
       value[j] = Bytes.readByteArray(input);
     }
     guidePosts.put(key, value);
   }
   PTableStats stats = new PTableStatsImpl(guidePosts);
   init(tableName, tableType, timeStamp, sequenceNumber, pkName, columns, stats);
 }
Esempio n. 3
0
 @Override
 public List<Mutation>
     toRowMutations() { // TODO: change to List<Mutation> once it implements Row
   List<Mutation> mutations = new ArrayList<Mutation>(3);
   if (deleteRow != null) {
     // Include only deleteRow mutation if present because it takes precedence over all others
     mutations.add(deleteRow);
   } else {
     // Because we cannot enforce a not null constraint on a KV column (since we don't know if
     // the row exists when
     // we upsert it), se instead add a KV that is always emtpy. This allows us to imitate SQL
     // semantics given the
     // way HBase works.
     setValues.add(
         SchemaUtil.getEmptyColumnFamily(getColumnFamilies()),
         QueryConstants.EMPTY_COLUMN_BYTES,
         ts,
         ByteUtil.EMPTY_BYTE_ARRAY);
     mutations.add(setValues);
     if (!unsetValues.isEmpty()) {
       mutations.add(unsetValues);
     }
   }
   return mutations;
 }
Esempio n. 4
0
  @Override
  public void mutateMany(Map<String, Map<ByteBuffer, KCVMutation>> mutations, StoreTransaction txh)
      throws StorageException {
    final long delTS = System.currentTimeMillis();
    final long putTS = delTS + 1;

    Map<ByteBuffer, Pair<Put, Delete>> commandsPerKey = convertToCommands(mutations, putTS, delTS);
    List<Row> batch = new ArrayList<Row>(commandsPerKey.size()); // actual batch operation

    // convert sorted commands into representation required for 'batch' operation
    for (Pair<Put, Delete> commands : commandsPerKey.values()) {
      if (commands.getFirst() != null) batch.add(commands.getFirst());

      if (commands.getSecond() != null) batch.add(commands.getSecond());
    }

    try {
      HTableInterface table = null;

      try {
        table = connectionPool.getTable(tableName);
        table.batch(batch);
        table.flushCommits();
      } finally {
        IOUtils.closeQuietly(table);
      }
    } catch (IOException e) {
      throw new TemporaryStorageException(e);
    } catch (InterruptedException e) {
      throw new TemporaryStorageException(e);
    }

    waitUntil(putTS);
  }
 @Override
 public List<QueryResult> getAllVariantsByRegionList(
     List<Region> regions, String sourceId, QueryOptions options) {
   List<QueryResult> allResults = new LinkedList<>();
   for (Region r : regions) {
     QueryResult queryResult = getAllVariantsByRegionAndStudy(r, sourceId, options);
     allResults.add(queryResult);
   }
   return allResults;
 }
Esempio n. 6
0
 public Optional<List<Response>> getBatch(Optional<List<Request>> requests) {
   if (!valid) {
     Logger.error("CANNOT GET! NO VALID CONNECTION");
     return Optional.empty();
   }
   List<Response> responses = new ArrayList<>();
   requests.ifPresent(
       reqs ->
           reqs.forEach(r -> get(Optional.of(r)).ifPresent(response -> responses.add(response))));
   return Optional.of(responses);
 }
Esempio n. 7
0
 private void removeIfPresent(Mutation m, byte[] family, byte[] qualifier) {
   Map<byte[], List<KeyValue>> familyMap = m.getFamilyMap();
   List<KeyValue> kvs = familyMap.get(family);
   if (kvs != null) {
     Iterator<KeyValue> iterator = kvs.iterator();
     while (iterator.hasNext()) {
       KeyValue kv = iterator.next();
       if (Bytes.compareTo(kv.getQualifier(), qualifier) == 0) {
         iterator.remove();
       }
     }
   }
 }
Esempio n. 8
0
 @Override
 public void write(DataOutput output) throws IOException {
   Bytes.writeByteArray(output, name.getBytes());
   WritableUtils.writeVInt(output, type.ordinal());
   WritableUtils.writeVLong(output, sequenceNumber);
   output.writeLong(timeStamp);
   Bytes.writeByteArray(
       output, pkName == null ? ByteUtil.EMPTY_BYTE_ARRAY : Bytes.toBytes(pkName));
   WritableUtils.writeVInt(output, allColumns.size());
   for (int i = 0; i < allColumns.size(); i++) {
     PColumn column = allColumns.get(i);
     column.write(output);
   }
   stats.write(output);
 }
Esempio n. 9
0
 public Optional<Put> getPut() {
   if (valid()) {
     Put p = new Put(key);
     columns.forEach(c -> p.addColumn(c.family.getBytes(), c.qualifier.getBytes(), c.value));
     return Optional.of(p);
   }
   return Optional.empty();
 }
Esempio n. 10
0
 @Override
 public PColumn getColumn(String name) throws ColumnNotFoundException, AmbiguousColumnException {
   List<PColumn> columns = columnsByName.get(name);
   int size = columns.size();
   if (size == 0) {
     throw new ColumnNotFoundException(name);
   }
   if (size > 1) {
     for (PColumn column : columns) {
       if (QueryConstants.DEFAULT_COLUMN_FAMILY.equals(column.getFamilyName().getString())) {
         // Allow ambiguity with default column, since a user would not know how to prefix it.
         return column;
       }
     }
     throw new AmbiguousColumnException(name);
   }
   return columns.get(0);
 }
Esempio n. 11
0
 @Override
 public PColumn getPKColumn(String name) throws ColumnNotFoundException {
   List<PColumn> columns = columnsByName.get(name);
   int size = columns.size();
   if (size == 0) {
     throw new ColumnNotFoundException(name);
   }
   if (size > 1) {
     do {
       PColumn column = columns.get(--size);
       if (column.getFamilyName() == null) {
         return column;
       }
     } while (size > 0);
     throw new ColumnNotFoundException(name);
   }
   return columns.get(0);
 }
  @Override
  public QueryResult<Variant> getAllVariantsByRegionAndStudy(
      Region region, String sourceId, QueryOptions options) {
    Long start, end, dbstart, dbend;
    start = System.currentTimeMillis();
    QueryResult<Variant> queryResult =
        new QueryResult<>(
            String.format("%s:%d-%d", region.getChromosome(), region.getStart(), region.getEnd()));
    List<Variant> results = new LinkedList<>();

    boolean includeSamples;
    boolean includeStats;
    boolean includeEffects;
    if (!options.containsKey("samples")
        && !options.containsKey("stats")
        && !options.containsKey("effects")) {
      includeSamples = true;
      includeStats = true;
      includeEffects = true;
    } else {
      includeSamples = options.containsKey("samples") && options.getBoolean("samples");
      includeStats = options.containsKey("stats") && options.getBoolean("stats");
      includeEffects = options.containsKey("effects") && options.getBoolean("effects");
    }

    try {
      String startRow = buildRowkey(region.getChromosome(), Long.toString(region.getStart()));
      String stopRow = buildRowkey(region.getChromosome(), Long.toString(region.getEnd()));
      HTable table = new HTable(admin.getConfiguration(), tableName);
      dbstart = System.currentTimeMillis();
      Scan regionScan = new Scan(startRow.getBytes(), stopRow.getBytes());
      ResultScanner scanres = table.getScanner(regionScan);
      dbend = System.currentTimeMillis();
      queryResult.setDbTime(dbend - dbstart);

      // Iterate over results and, optionally, their samples and statistics
      for (Result result : scanres) {
        String[] rowkeyParts = new String(result.getRow(), CHARSET_UTF_8).split("_");
        String chromosome = rowkeyParts[0].replaceFirst("^0+(?!$)", "");
        int position = Integer.parseInt(rowkeyParts[1]);

        // Get basic result fields from Protocol Buffers message
        NavigableMap<byte[], byte[]> infoMap = result.getFamilyMap("i".getBytes());
        byte[] byteInfo = infoMap.get((sourceId + "_data").getBytes());
        VariantFieldsProtos.VariantInfo protoInfo =
            VariantFieldsProtos.VariantInfo.parseFrom(byteInfo);
        String reference = protoInfo.getReference();
        String alternate = StringUtils.join(protoInfo.getAlternateList(), ",");
        String format = StringUtils.join(protoInfo.getFormatList(), ":");
        Variant variant = new Variant(chromosome, position, position, reference, alternate);

        // Set samples if requested
        if (includeSamples) {
          NavigableMap<byte[], byte[]> sampleMap = result.getFamilyMap("d".getBytes());
          Map<String, Map<String, String>> resultSampleMap = new HashMap<>();

          // Set samples
          for (byte[] s : sampleMap.keySet()) {
            String sampleName = (new String(s, CHARSET_UTF_8)).replaceAll(sourceId + "_", "");
            VariantFieldsProtos.VariantSample sample =
                VariantFieldsProtos.VariantSample.parseFrom(sampleMap.get(s));
            String sample1 = sample.getSample();
            String[] values = sample1.split(":");
            String[] fields = format.split(":");
            Map<String, String> singleSampleMap = new HashMap<>();
            for (int i = 0; i < fields.length; i++) {
              singleSampleMap.put(fields[i], values[i]);
            }
            // TODO
            //                        variant.addSampleData(sampleName, singleSampleMap);
          }
        }

        // Set stats if requested
        if (includeStats) {
          byte[] byteStats = infoMap.get((sourceId + "_stats").getBytes());
          VariantFieldsProtos.VariantStats protoStats =
              VariantFieldsProtos.VariantStats.parseFrom(byteStats);
          VariantStats variantStats =
              new VariantStats(
                  chromosome,
                  position,
                  reference,
                  alternate,
                  protoStats.getMaf(),
                  protoStats.getMgf(),
                  protoStats.getMafAllele(),
                  protoStats.getMgfGenotype(),
                  protoStats.getMissingAlleles(),
                  protoStats.getMissingGenotypes(),
                  protoStats.getMendelianErrors(),
                  protoStats.getIsIndel(),
                  protoStats.getCasesPercentDominant(),
                  protoStats.getControlsPercentDominant(),
                  protoStats.getCasesPercentRecessive(),
                  protoStats.getControlsPercentRecessive());
          variant.setStats(variantStats);
        }

        if (includeEffects) {
          QueryResult<VariantEffect> queryEffects = getEffectsByVariant(variant, options);
          variant.setEffect(queryEffects.getResult());
        }

        results.add(variant);
      }
    } catch (IOException e) {
      System.err.println(e.getClass().getName() + ": " + e.getMessage());
    }
    queryResult.setResult(results);
    queryResult.setNumResults(results.size());
    end = System.currentTimeMillis();
    queryResult.setTime(end - start);
    return queryResult;
  }
Esempio n. 13
0
  private void init(
      PName name,
      PTableType type,
      long timeStamp,
      long sequenceNumber,
      String pkName,
      List<PColumn> columns,
      PTableStats stats) {
    this.name = name;
    this.type = type;
    this.timeStamp = timeStamp;
    this.sequenceNumber = sequenceNumber;
    this.columnsByName = ArrayListMultimap.create(columns.size(), 1);
    this.pkName = pkName;
    List<PColumn> pkColumns = Lists.newArrayListWithExpectedSize(columns.size() - 1);
    PColumn[] allColumns = new PColumn[columns.size()];
    RowKeySchemaBuilder builder = new RowKeySchemaBuilder();
    for (int i = 0; i < allColumns.length; i++) {
      PColumn column = columns.get(i);
      allColumns[column.getPosition()] = column;
      PName familyName = column.getFamilyName();
      if (familyName == null) {
        pkColumns.add(column);
        builder.addField(column);
      }
      columnsByName.put(column.getName().getString(), column);
    }
    this.pkColumns = ImmutableList.copyOf(pkColumns);
    this.rowKeySchema = builder.setMinNullable(pkColumns.size()).build();
    this.allColumns = ImmutableList.copyOf(allColumns);

    // Two pass so that column order in column families matches overall column order
    // and to ensure that column family order is constant
    int maxExpectedSize = allColumns.length - pkColumns.size();
    // Maintain iteration order so that column families are ordered as they are listed
    Map<PName, List<PColumn>> familyMap = Maps.newLinkedHashMap();
    for (PColumn column : allColumns) {
      PName familyName = column.getFamilyName();
      if (familyName != null) {
        List<PColumn> columnsInFamily = familyMap.get(familyName);
        if (columnsInFamily == null) {
          columnsInFamily = Lists.newArrayListWithExpectedSize(maxExpectedSize);
          familyMap.put(familyName, columnsInFamily);
        }
        columnsInFamily.add(column);
      }
    }

    Iterator<Map.Entry<PName, List<PColumn>>> iterator = familyMap.entrySet().iterator();
    PColumnFamily[] families = new PColumnFamily[familyMap.size()];
    ImmutableMap.Builder<String, PColumnFamily> familyByString = ImmutableMap.builder();
    ImmutableSortedMap.Builder<byte[], PColumnFamily> familyByBytes =
        ImmutableSortedMap.orderedBy(Bytes.BYTES_COMPARATOR);
    for (int i = 0; i < families.length; i++) {
      Map.Entry<PName, List<PColumn>> entry = iterator.next();
      PColumnFamily family = new PColumnFamilyImpl(entry.getKey(), entry.getValue());
      families[i] = family;
      familyByString.put(family.getName().getString(), family);
      familyByBytes.put(family.getName().getBytes(), family);
    }
    this.families = ImmutableList.copyOf(families);
    this.familyByBytes = familyByBytes.build();
    this.familyByString = familyByString.build();
    this.stats = stats;
  }
  public List<Variant> getRecordSimpleStats(
      String study, int missing_gt, float maf, String maf_allele) {
    BasicDBObject compare =
        new BasicDBObject("studies.stats.allele_maf", maf_allele)
            .append("studies.stats.MAF", maf)
            .append("studies.stats.missing", missing_gt);
    List<Get> hbaseQuery = new ArrayList<>();
    DBCollection collection = db.getCollection("variants");
    Iterator<DBObject> result = collection.find(compare);
    String chromosome = new String();
    while (result.hasNext()) {
      DBObject variant = result.next();
      String position = variant.get("_id").toString();
      // hbase query construction
      Get get = new Get(position.getBytes());
      hbaseQuery.add(get);
    }
    // Complete results, from HBase

    tableName = study;
    effectTableName = tableName + "effect";
    Map<String, Variant> resultsMap = new HashMap<>();

    try {
      HTable table = new HTable(admin.getConfiguration(), tableName);
      HTable effectTable = new HTable(admin.getConfiguration(), effectTableName);
      Result[] hbaseResultEffect = effectTable.get(hbaseQuery);
      Result[] hbaseResultStats = table.get(hbaseQuery);

      //            List<Variant> results = new LinkedList<>();
      for (Result r : hbaseResultStats) {
        String position = new String(r.getRow(), CHARSET_UTF_8);
        String[] aux = position.split("_");
        String inner_position = aux[1];
        String chr = aux[0];
        // position parsing
        if (chr.startsWith("0")) {
          chr = chr.substring(1);
        }
        while (inner_position.startsWith("0")) {
          inner_position = inner_position.substring(1);
        }
        List<VariantFieldsProtos.VariantSample> samples = new LinkedList<>();
        NavigableMap<byte[], byte[]> infoMap = r.getFamilyMap("i".getBytes());
        byte[] byteStats = infoMap.get((study + "_stats").getBytes());
        VariantFieldsProtos.VariantStats stats =
            VariantFieldsProtos.VariantStats.parseFrom(byteStats);
        byte[] byteInfo = infoMap.get((study + "_data").getBytes());
        VariantFieldsProtos.VariantInfo info = VariantFieldsProtos.VariantInfo.parseFrom(byteInfo);
        String alternate = StringUtils.join(info.getAlternateList(), ", ");
        String reference = info.getReference();
        Variant partialResult =
            new Variant(
                chr,
                Integer.parseInt(inner_position),
                Integer.parseInt(inner_position),
                reference,
                alternate);
        String format = StringUtils.join(info.getFormatList(), ":");
        NavigableMap<byte[], byte[]> sampleMap = r.getFamilyMap("d".getBytes());
        Map<String, Map<String, String>> resultSampleMap = new HashMap<>();
        //                StringBuilder sampleRaw = new StringBuilder();
        for (byte[] s : sampleMap.keySet()) {
          String qual = (new String(s, CHARSET_UTF_8)).replaceAll(study + "_", "");
          VariantFieldsProtos.VariantSample sample =
              VariantFieldsProtos.VariantSample.parseFrom(sampleMap.get(s));
          String sample1 = sample.getSample();
          String[] values = sample1.split(":");
          String[] fields = format.split(":");
          Map<String, String> singleSampleMap = new HashMap<>();
          for (int i = 0; i < fields.length; i++) {
            singleSampleMap.put(fields[i], values[i]);
          }
          resultSampleMap.put(qual, singleSampleMap);
        }
        VariantStats variantStats =
            new VariantStats(
                chromosome,
                Integer.parseInt(inner_position),
                reference,
                alternate,
                stats.getMaf(),
                stats.getMgf(),
                stats.getMafAllele(),
                stats.getMgfGenotype(),
                stats.getMissingAlleles(),
                stats.getMissingGenotypes(),
                stats.getMendelianErrors(),
                stats.getIsIndel(),
                stats.getCasesPercentDominant(),
                stats.getControlsPercentDominant(),
                stats.getCasesPercentRecessive(),
                stats.getControlsPercentRecessive());
        partialResult.setStats(variantStats);
        resultsMap.put(new String(r.getRow(), CHARSET_UTF_8), partialResult);
      }

      for (Result r : hbaseResultEffect) {
        if (!r.isEmpty()) {
          NavigableMap<byte[], byte[]> effectMap = r.getFamilyMap("e".getBytes());
          Variant partialResult = resultsMap.get(new String(r.getRow(), CHARSET_UTF_8));
          System.out.println("Recuperado " + partialResult.toString());
          String s = partialResult.getReference() + "_" + partialResult.getAlternate();
          VariantEffectProtos.EffectInfo effectInfo =
              VariantEffectProtos.EffectInfo.parseFrom(effectMap.get(s.getBytes()));
          VariantEffect variantEffect =
              new VariantEffect(
                  partialResult.getChromosome(),
                  (int) partialResult.getStart(),
                  partialResult.getReference(),
                  partialResult.getAlternate(),
                  effectInfo.getFeatureId(),
                  effectInfo.getFeatureName(),
                  effectInfo.getFeatureType(),
                  effectInfo.getFeatureBiotype(),
                  effectInfo.getFeatureChromosome(),
                  effectInfo.getFeatureStart(),
                  effectInfo.getFeatureEnd(),
                  effectInfo.getFeatureStrand(),
                  effectInfo.getSnpId(),
                  effectInfo.getAncestral(),
                  effectInfo.getAlternative(),
                  effectInfo.getGeneId(),
                  effectInfo.getTranscriptId(),
                  effectInfo.getGeneName(),
                  effectInfo.getConsequenceType(),
                  effectInfo.getConsequenceTypeObo(),
                  effectInfo.getConsequenceTypeDesc(),
                  effectInfo.getConsequenceTypeType(),
                  effectInfo.getAaPosition(),
                  effectInfo.getAminoacidChange(),
                  effectInfo.getCodonChange());
          resultsMap.put(new String(r.getRow(), CHARSET_UTF_8), partialResult);
        }
      }
    } catch (InvalidProtocolBufferException e) {
      System.err.println(e.getClass().getName() + ": " + e.getMessage());
    } catch (IOException e) {
      System.err.println(e.getClass().getName() + ": " + e.getMessage());
    }

    List<Variant> results = new ArrayList<>(resultsMap.values());
    return results;
  }
  public QueryResult getSimpleVariantsByRegion(
      Region region, String sourceId, QueryOptions options) {
    Long start, end, dbstart, dbend;
    start = System.currentTimeMillis();
    boolean includeStats;
    boolean includeEffects;
    if (!options.containsKey("stats") && !options.containsKey("effects")) {
      includeStats = true;
      includeEffects = true;
    } else {
      includeStats = options.containsKey("stats") && options.getBoolean("stats");
      includeEffects = options.containsKey("effects") && options.getBoolean("effects");
    }

    QueryResult<Variant> queryResult =
        new QueryResult<>(
            String.format("%s:%d-%d", region.getChromosome(), region.getStart(), region.getEnd()));
    List<Variant> results = new ArrayList<>();
    String startRow = buildRowkey(region.getChromosome(), Long.toString(region.getStart()));
    String stopRow = buildRowkey(region.getChromosome(), Long.toString(region.getEnd()));
    BasicDBObject query =
        new BasicDBObject("position", new BasicDBObject("$gte", startRow).append("$lte", stopRow))
            .append("sources.sourceId", sourceId);
    DBCollection collection = db.getCollection("variants");
    dbstart = System.currentTimeMillis();
    DBCursor variantInStudies = collection.find(query);
    dbend = System.currentTimeMillis();
    queryResult.setDbTime(dbend - dbstart);

    for (DBObject result : variantInStudies) {
      String[] rowkeyParts = result.get("position").toString().split("_");
      String chromosome = rowkeyParts[0].replaceFirst("^0+(?!$)", "");
      int position = Integer.parseInt(rowkeyParts[1]);
      BasicDBList studies = (BasicDBList) result.get("sources");
      BasicDBObject st = (BasicDBObject) studies.get(0);
      String ref = (String) st.get("ref");
      String alt = StringUtils.join((ArrayList<String>) st.get("alt"), ",");

      // TODO Needs rework
      Variant variant = new Variant(chromosome, position, position, ref, alt);

      // Set stats informations
      if (includeStats) {
        VariantStats stats = new VariantStats();
        BasicDBObject mongoStats = (BasicDBObject) st.get("stats");
        stats.setMaf((float) (double) mongoStats.get("maf"));
        stats.setMafAllele((String) mongoStats.get("alleleMaf"));
        stats.setMissingGenotypes((int) mongoStats.get("missing"));
        List<Genotype> genotypeCount = new ArrayList<>();
        for (BasicDBObject s : (List<BasicDBObject>) mongoStats.get("genotypeCount")) {
          for (Map.Entry<String, Object> entry : s.entrySet()) {
            Genotype genotype = new Genotype(entry.getKey());
            genotype.setCount((Integer) entry.getValue());
            genotypeCount.add(genotype);
          }
        }
        stats.setGenotypes(genotypeCount);
        variant.setStats(stats);
      }

      // TODO Set consequence type names
      if (includeEffects) {
        BasicDBList mongoEffects = (BasicDBList) st.get("effects");
        if (mongoEffects != null) {
          for (Object e : mongoEffects) {
            String effectObo = e.toString();
            VariantEffect effect = new VariantEffect();
            effect.setConsequenceTypeObo(effectObo);
            variant.addEffect(effect);
          }
        }
      }

      results.add(variant);
    }

    queryResult.setResult(results);
    queryResult.setNumResults(results.size());
    end = System.currentTimeMillis();
    queryResult.setTime(end - start);
    return queryResult;
  }
  @Override
  public QueryResult getVariantsHistogramByRegion(
      Region region, String sourceId, boolean histogramLogarithm, int histogramMax) {
    QueryResult<ObjectMap> queryResult =
        new QueryResult<>(
            String.format("%s:%d-%d", region.getChromosome(), region.getStart(), region.getEnd()));
    List<ObjectMap> data = new ArrayList<>();
    String startRow = buildRowkey(region.getChromosome(), Long.toString(region.getStart()));
    String stopRow = buildRowkey(region.getChromosome(), Long.toString(region.getEnd()));

    long startTime = System.currentTimeMillis();

    long startDbTime = System.currentTimeMillis();

    BasicDBObject query =
        new BasicDBObject("position", new BasicDBObject("$gte", startRow).append("$lte", stopRow))
            .append("studies.studyId", sourceId);
    DBCollection collection = db.getCollection("variants");
    DBCursor queryResults = collection.find(query);
    queryResult.setDbTime(System.currentTimeMillis() - startDbTime);

    int resultSize = queryResults.size();

    if (resultSize > histogramMax) { // Need to group results to fit maximum size of the histogram
      int sumChunkSize = resultSize / histogramMax;
      int i = 0, j = 0;
      int featuresCount = 0;
      ObjectMap item = null;

      for (DBObject result : queryResults) {
        //                featuresCount += result.getInt("features_count");
        //                if (i == 0) {
        //                    item = new ObjectMap("chromosome", result.getString("chromosome"));
        //                    item.put("chunkId", result.getInt("chunk_id"));
        //                    item.put("start", result.getInt("start"));
        //                } else if (i == sumChunkSize - 1 || j == resultSize - 1) {
        //                    if (histogramLogarithm) {
        //                        item.put("featuresCount", (featuresCount > 0) ?
        // Math.log(featuresCount) : 0);
        //                    } else {
        //                        item.put("featuresCount", featuresCount);
        //                    }
        //                    item.put("end", result.getInt("end"));
        //                    data.add(item);
        //                    i = -1;
        //                    featuresCount = 0;
        //                }
        //                j++;
        //                i++;
      }
    } else {
      for (DBObject result : queryResults) {
        //                ObjectMap item = new ObjectMap("chromosome",
        // result.getString("chromosome"));
        //                item.put("chunkId", result.getInt("chunk_id"));
        //                item.put("start", result.getInt("start"));
        //                if (histogramLogarithm) {
        //                    int features_count = result.getInt("features_count");
        //                    result.put("featuresCount", (features_count > 0) ?
        // Math.log(features_count) : 0);
        //                } else {
        //                    item.put("featuresCount", result.getInt("features_count"));
        //                }
        //                item.put("end", result.getInt("end"));
        //                data.add(item);
      }
    }

    queryResult.setResult(data);
    queryResult.setNumResults(data.size());
    queryResult.setTime(System.currentTimeMillis() - startTime);

    return queryResult;
  }
Esempio n. 17
0
 @Override
 public int newKey(ImmutableBytesWritable key, byte[][] values) {
   int i = 0;
   TrustedByteArrayOutputStream os =
       new TrustedByteArrayOutputStream(SchemaUtil.estimateKeyLength(this));
   try {
     List<PColumn> columns = getPKColumns();
     int nColumns = columns.size();
     PColumn lastPKColumn = columns.get(nColumns - 1);
     while (i < values.length && i < nColumns) {
       PColumn column = columns.get(i);
       PDataType type = column.getDataType();
       // This will throw if the value is null and the type doesn't allow null
       byte[] byteValue = values[i++];
       if (byteValue == null) {
         byteValue = ByteUtil.EMPTY_BYTE_ARRAY;
       }
       // An empty byte array return value means null. Do this,
       // since a type may have muliple representations of null.
       // For example, VARCHAR treats both null and an empty string
       // as null. This way we don't need to leak that part of the
       // implementation outside of PDataType by checking the value
       // here.
       if (byteValue.length == 0 && !column.isNullable()) {
         throw new ConstraintViolationException(
             name.getString() + "." + column.getName().getString() + " may not be null");
       }
       Integer byteSize = column.getByteSize();
       if (type.isFixedWidth()) { // TODO: handle multi-byte characters
         if (byteValue.length != byteSize) {
           throw new ConstraintViolationException(
               name.getString()
                   + "."
                   + column.getName().getString()
                   + " must be "
                   + byteSize
                   + " bytes ("
                   + SchemaUtil.toString(type, byteValue)
                   + ")");
         }
       } else if (byteSize != null && byteValue.length > byteSize) {
         throw new ConstraintViolationException(
             name.getString()
                 + "."
                 + column.getName().getString()
                 + " may not exceed "
                 + byteSize
                 + " bytes ("
                 + SchemaUtil.toString(type, byteValue)
                 + ")");
       }
       os.write(byteValue, 0, byteValue.length);
       // Separate variable length column values in key with zero byte
       if (!type.isFixedWidth() && column != lastPKColumn) {
         os.write(SEPARATOR_BYTE);
       }
     }
     // If some non null pk values aren't set, then throw
     if (i < nColumns) {
       PColumn column = columns.get(i);
       PDataType type = column.getDataType();
       if (type.isFixedWidth() || !column.isNullable()) {
         throw new ConstraintViolationException(
             name.getString() + "." + column.getName().getString() + " may not be null");
       }
       // Separate variable length column values in key with zero byte
       if (column != lastPKColumn) {
         os.write(SEPARATOR_BYTE);
       }
     }
     key.set(os.getBuffer(), 0, os.size());
     return i;
   } finally {
     try {
       os.close();
     } catch (IOException e) {
       throw new RuntimeException(e); // Impossible
     }
   }
 }
Esempio n. 18
0
  /**
   * init station code map
   *
   * @param file station code file path
   */
  private static void initStationCode(String file) {

    List<String> lstStation = new Vector<>();
    strConfig = file + ".process";
    File fsP = new File(strConfig);
    if (!fsP.exists()) {
      try {
        fsP.createNewFile();
      } catch (IOException e) {
        e.printStackTrace();
      }
    } else {
      BufferedReader readp = null;
      try {
        String line = null;
        readp = new BufferedReader(new FileReader(fsP));
        while ((line = readp.readLine()) != null) {
          processed.add(line);
        }
      } catch (IOException e) {
        e.printStackTrace();
      } finally {
        if (readp != null) {
          try {
            readp.close();
          } catch (IOException e) {
            e.printStackTrace();
          }
        }
      }
    }
    File fs = new File(file);
    BufferedReader reader = null;
    try {
      String stationName = null;
      String stationCode = null;
      String line = null;
      String[] splitLine = null;
      reader = new BufferedReader(new FileReader(fs));
      while ((line = reader.readLine()) != null) {
        splitLine = line.split("\t");
        if (splitLine.length != 2) {
          continue;
        }
        stationName = splitLine[0];
        stationCode = splitLine[1];
        mapStationCode.put(stationName, stationCode);
        lstStation.add(stationName);
      }

    } catch (FileNotFoundException e) {
      e.printStackTrace();
    } catch (IOException e) {
      e.printStackTrace();
    } finally {
      if (reader == null) {
        try {
          reader.close();
        } catch (IOException e1) {
          e1.printStackTrace();
        }
      }
    }

    int len = lstStation.size();
    String start = null;
    for (int i = 0; i < len; i++) {
      start = lstStation.get(i);
      for (String end : lstStation) {
        if (start.equals(end) || processed.contains(start + ":" + end)) {
          continue;
        }

        lstAllProcessStation.add(new DefaultKeyValue(start, end));
      }
    }
  }
Esempio n. 19
0
 public void createTable(String tableName, String columnFamily) {
   List<String> families = new ArrayList<>();
   families.add(columnFamily);
   createTable(tableName, families);
 }
Esempio n. 20
0
  public int run(String[] args) throws Exception {
    // printUsage();
    /*
     * SETUP
     */
    Configuration argConf = getConf();
    Hashtable<String, String> confArg = new Hashtable<String, String>();
    setup(confArg, argConf);
    Date currentTime = new Date();
    Date endDate = new Date(new Long(confArg.get("timestamp_stop")));
    Boolean full_run = confArg.get("intermediate").matches("(?i).*true.*");
    Boolean quick_add = confArg.get("quick_add").matches("(?i).*true.*");
    logger.info("Running GeStore");

    // ZooKeeper setup
    Configuration config = HBaseConfiguration.create();
    zkWatcher = new ZooKeeperWatcher(config, "Testing", new HBaseAdmin(config));
    zkInstance =
        new ZooKeeper(
            ZKConfig.getZKQuorumServersString(config),
            config.getInt("zookeeper.session.timeout", -1),
            zkWatcher);

    if (!confArg.get("task_id").isEmpty()) {
      confArg.put("temp_path", confArg.get("temp_path") + confArg.get("task_id"));
    }

    String lockRequest = confArg.get("file_id");
    if (!confArg.get("run_id").isEmpty())
      lockRequest = lockRequest + "_" + confArg.get("run_id") + "_";
    if (!confArg.get("task_id").isEmpty())
      lockRequest = lockRequest + "_" + confArg.get("task_id") + "_";

    // Get type of movement
    toFrom type_move = checkArgs(confArg);
    if (type_move == toFrom.LOCAL2REMOTE && !confArg.get("format").equals("unknown")) {
      List<String> arguments = new ArrayList<String>();
      arguments.add("-Dinput=" + confArg.get("local_path"));
      arguments.add("-Dtable=" + confArg.get("file_id"));
      arguments.add("-Dtimestamp=" + confArg.get("timestamp_stop"));
      arguments.add("-Dtype=" + confArg.get("format"));
      arguments.add("-Dtarget_dir=" + confArg.get("base_path") + "_" + confArg.get("file_id"));
      arguments.add("-Dtemp_hdfs_path=" + confArg.get("temp_path"));
      arguments.add("-Drun_id=" + confArg.get("run_id"));
      if (!confArg.get("run_id").isEmpty()) arguments.add("-Drun_id=" + confArg.get("run_id"));
      if (!confArg.get("task_id").isEmpty()) arguments.add("-Dtask_id=" + confArg.get("task_id"));
      if (quick_add) arguments.add("-Dquick_add=" + confArg.get("quick_add"));
      String lockName = lock(lockRequest);
      String[] argumentString = arguments.toArray(new String[arguments.size()]);
      adddb.main(argumentString);
      unlock(lockName);
      System.exit(0);
    }

    // Database registration

    dbutil db_util = new dbutil(config);
    db_util.register_database(confArg.get("db_name_files"), true);
    db_util.register_database(confArg.get("db_name_runs"), true);
    db_util.register_database(confArg.get("db_name_updates"), true);
    FileSystem hdfs = FileSystem.get(config);
    FileSystem localFS = FileSystem.getLocal(config);

    // Get source type
    confArg.put("source", getSource(db_util, confArg.get("db_name_files"), confArg.get("file_id")));
    confArg.put(
        "database", isDatabase(db_util, confArg.get("db_name_files"), confArg.get("file_id")));
    if (!confArg.get("source").equals("local")
        && type_move == toFrom.REMOTE2LOCAL
        && !confArg.get("timestamp_stop").equals(Integer.toString(Integer.MAX_VALUE))) {
      confArg.put("timestamp_stop", Long.toString(latestVersion(confArg, db_util)));
    }

    /*
     * Get previous timestamp
     */
    Get run_id_get = new Get(confArg.get("run_id").getBytes());
    Result run_get = db_util.doGet(confArg.get("db_name_runs"), run_id_get);
    KeyValue run_file_prev =
        run_get.getColumnLatest(
            "d".getBytes(), (confArg.get("file_id") + "_db_timestamp").getBytes());
    String last_timestamp = new String("0");
    if (null != run_file_prev && !confArg.get("source").equals("local")) {
      long last_timestamp_real = run_file_prev.getTimestamp();
      Long current_timestamp = new Long(confArg.get("timestamp_real"));
      if ((current_timestamp - last_timestamp_real) > 36000) {
        last_timestamp = new String(run_file_prev.getValue());
        Integer lastTimestamp = new Integer(last_timestamp);
        lastTimestamp += 1;
        last_timestamp = lastTimestamp.toString();
        logger.info("Last timestamp: " + last_timestamp + " End data: " + endDate);
        Date last_run = new Date(run_file_prev.getTimestamp());
        if (last_run.before(endDate) && !full_run) {
          confArg.put("timestamp_start", last_timestamp);
        }
      }
    }

    Integer tse = new Integer(confArg.get("timestamp_stop"));
    Integer tss = new Integer(confArg.get("timestamp_start"));
    if (tss > tse) {
      logger.info("No new version of requested file.");
      return 0;
    }

    /*
     * Generate file
     */

    String lockName = lock(lockRequest);

    Get file_id_get = new Get(confArg.get("file_id").getBytes());
    Result file_get = db_util.doGet(confArg.get("db_name_files"), file_id_get);
    if (!file_get.isEmpty()) {
      boolean found =
          hasFile(
              db_util,
              hdfs,
              confArg.get("db_name_files"),
              confArg.get("file_id"),
              getFullPath(confArg));
      if (confArg.get("source").equals("fullfile")) {
        found = false;
      }
      String filenames_put =
          getFileNames(
              db_util, confArg.get("db_name_files"), confArg.get("file_id"), getFullPath(confArg));
      // Filename not found in file database
      if (!found && type_move == toFrom.REMOTE2LOCAL) {
        if (!confArg.get("source").equals("local")) {
          // Generate intermediate file
          if (getFile(hdfs, confArg, db_util) == null) {
            unlock(lockName);
            return 1;
          }
          // Put generated file into file database
          if (!confArg.get("format").equals("fullfile")) {
            putFileEntry(
                db_util,
                hdfs,
                confArg.get("db_name_files"),
                confArg.get("file_id"),
                confArg.get("full_file_name"),
                confArg.get("source"));
          }
        } else {
          logger.warn("Remote file not found, and cannot be generated! File: " + confArg);
          unlock(lockName);
          return 1;
        }
      }
    } else {
      if (type_move == toFrom.REMOTE2LOCAL) {
        logger.warn("Remote file not found, and cannot be generated.");
        unlock(lockName);
        return 1;
      }
    }

    /*
     * Copy file
     * Update tables
     */

    if (type_move == toFrom.LOCAL2REMOTE) {
      if (!confArg.get("format").equals("fullfile")) {
        putFileEntry(
            db_util,
            hdfs,
            confArg.get("db_name_files"),
            confArg.get("file_id"),
            getFullPath(confArg),
            confArg.get("source"));
      }
      putRunEntry(
          db_util,
          confArg.get("db_name_runs"),
          confArg.get("run_id"),
          confArg.get("file_id"),
          confArg.get("type"),
          confArg.get("timestamp_real"),
          confArg.get("timestamp_stop"),
          getFullPath(confArg),
          confArg.get("delimiter"));
      hdfs.copyFromLocalFile(new Path(confArg.get("local_path")), new Path(getFullPath(confArg)));
    } else if (type_move == toFrom.REMOTE2LOCAL) {
      FileStatus[] files = hdfs.globStatus(new Path(getFullPath(confArg) + "*"));
      putRunEntry(
          db_util,
          confArg.get("db_name_runs"),
          confArg.get("run_id"),
          confArg.get("file_id"),
          confArg.get("type"),
          confArg.get("timestamp_real"),
          confArg.get("timestamp_stop"),
          getFullPath(confArg),
          confArg.get("delimiter"));
      unlock(lockName);
      for (FileStatus file : files) {
        Path cur_file = file.getPath();
        Path cur_local_path =
            new Path(new String(confArg.get("local_path") + confArg.get("file_id")));
        String suffix = getSuffix(getFileName(confArg), cur_file.getName());
        if (suffix.length() > 0) {
          cur_local_path = cur_local_path.suffix(new String("." + suffix));
        }
        if (confArg.get("copy").equals("true")) {
          String crc = hdfs.getFileChecksum(cur_file).toString();
          if (checksumLocalTest(cur_local_path, crc)) {
            continue;
          } else {
            hdfs.copyToLocalFile(cur_file, cur_local_path);
            writeChecksum(cur_local_path, crc);
          }
        } else {
          System.out.println(cur_local_path + "\t" + cur_file);
        }
      }
    }
    unlock(lockName);
    return 0;
  }
Esempio n. 21
0
 public boolean valid() {
   return !table.isEmpty()
       && key != null
       && key.length > 0
       && columns.stream().filter(c -> !c.valid()).count() == 0;
 }
Esempio n. 22
0
  public static List<KeyValue> getAllTrainInfo(Configuration config, String date) {
    List<KeyValue> result = new ArrayList<>();
    String strJson = null;
    BufferedWriter writer = null;
    Table table = null;
    try (Connection connect = ConnectionFactory.createConnection(config);
        Admin admin = connect.getAdmin()) {
      TableName tablename = TableName.valueOf(TABLE_NAME);
      if (!admin.tableExists(tablename)) {
        System.out.println("Table does not exist.");
        return null;
      }

      table = connect.getTable(tablename);
      Put put = null;
      String start = null;
      String end = null;
      writer = new BufferedWriter(new FileWriter(new File(strConfig), true));
      for (KeyValue item : lstAllProcessStation) {
        start = (String) item.getKey();
        end = (String) item.getValue();
        try {
          try {
            Thread.sleep(200);
          } catch (InterruptedException e1) {
            e1.printStackTrace();
          }
          System.out.println("process : " + start + ":" + end);
          strJson = getFromAPIX(mapStationCode.get(start), mapStationCode.get(end), date);
          writer.write(start + ":" + end);
          writer.newLine();

        } catch (Exception e) {
          System.out.println(start + ":" + end + "error");
          e.printStackTrace();
          break;
        }
        JSONObject jo = new JSONObject(strJson);

        if (jo.has("httpstatus") && (jo.getInt("httpstatus") == 200)) {
          JSONObject joData = jo.getJSONObject("data");
          if (joData.has("flag") && joData.getBoolean("flag")) {
            result.add(new DefaultKeyValue(start, end));
            // 插入到hbase
            String rowkey = start + ":" + end;
            put = new Put(rowkey.getBytes());
            put.addColumn(
                CF_JSON.getBytes(), "json".getBytes(), joData.toString().getBytes("utf-8"));
            table.put(put);

            System.out.println("start " + start + "\t end " + end + "\t has ticket");
          }
        }
      }
    } catch (IOException e) {
      e.printStackTrace();
    } finally {
      if (writer != null) {
        try {
          writer.flush();
          writer.close();
        } catch (IOException e) {
          e.printStackTrace();
        }
      }
      if (table != null) {
        try {
          table.close();
        } catch (IOException e) {
          e.printStackTrace();
        }
      }
    }

    return result;
  }