Example #1
0
  @Test
  public void insert_rowkey_prefix_date() throws IOException {

    System.out.println(errorTable);
    errorTable.setAutoFlushTo(false);
    List<Put> puts = new ArrayList<Put>();
    long t1 = System.currentTimeMillis();
    for (int i = 0; i < 10000000; i++) {
      String uuid = UUID.randomUUID().toString().replaceAll("-", "").substring(0, 8);
      Put put = new Put(Bytes.toBytes("20150705" + "_" + uuid));
      put.add(
          fBytes,
          Bytes.toBytes("stacktrace"),
          Bytes.toBytes("java.io.IOException:file not found" + UUID.randomUUID().toString()));
      //            puts.add(put);
      errorTable.put(put);
      if (i % 10000 == 0) {
        errorTable.flushCommits();
      }
    }
    errorTable.flushCommits();
    long t2 = System.currentTimeMillis();
    System.out.println("count=" + puts.size() + ",t2-t1=" + (t2 - t1));
    //        errorTable.close();
  }
 public void doScan() throws IOException {
   Table tableRef = connection.getTable(TableName.valueOf(table));
   Scan scan = new Scan();
   ResultScanner scanner = tableRef.getScanner(scan);
   long now = System.currentTimeMillis();
   if (verbose) System.out.println("Starting scan");
   for (Result res : scanner) {
     if (verbose) System.out.println(res);
   }
   if (verbose) System.out.printf("Scan finished: %d ms\n\n", System.currentTimeMillis() - now);
   tableRef.close();
 }
Example #3
0
  private static void waitUntil(long until) {
    long now = System.currentTimeMillis();

    while (now <= until) {
      try {
        Thread.sleep(1L);
        now = System.currentTimeMillis();
      } catch (InterruptedException e) {
        throw new RuntimeException(e);
      }
    }
  }
Example #4
0
  @Override
  public void mutateMany(Map<String, Map<ByteBuffer, KCVMutation>> mutations, StoreTransaction txh)
      throws StorageException {
    final long delTS = System.currentTimeMillis();
    final long putTS = delTS + 1;

    Map<ByteBuffer, Pair<Put, Delete>> commandsPerKey = convertToCommands(mutations, putTS, delTS);
    List<Row> batch = new ArrayList<Row>(commandsPerKey.size()); // actual batch operation

    // convert sorted commands into representation required for 'batch' operation
    for (Pair<Put, Delete> commands : commandsPerKey.values()) {
      if (commands.getFirst() != null) batch.add(commands.getFirst());

      if (commands.getSecond() != null) batch.add(commands.getSecond());
    }

    try {
      HTableInterface table = null;

      try {
        table = connectionPool.getTable(tableName);
        table.batch(batch);
        table.flushCommits();
      } finally {
        IOUtils.closeQuietly(table);
      }
    } catch (IOException e) {
      throw new TemporaryStorageException(e);
    } catch (InterruptedException e) {
      throw new TemporaryStorageException(e);
    }

    waitUntil(putTS);
  }
Example #5
0
  @Override
  public void tweet(User user, String tweetText) throws IOException {
    final long epoch = System.currentTimeMillis();
    final long tranposeEpoch = Long.MAX_VALUE - epoch;
    final byte[] epochBytes = Bytes.toBytes(epoch);

    final byte[] tweetBytes = Bytes.toBytes(tweetText);
    byte[] nameBytes = Bytes.toBytes(user.getName());

    /** put tweet into tweets */
    Put tweetRowPut = new Put(generateTweetId(user));

    tweetRowPut.add(_DEFAULT, _NAME, nameBytes);
    tweetRowPut.add(_DEFAULT, _MAIL, Bytes.toBytes(user.getEmail()));
    tweetRowPut.add(_DEFAULT, _TWEET, tweetBytes);
    tweetRowPut.add(_DEFAULT, _TIME, epochBytes);

    tweetsTable.put(tweetRowPut);

    /** put tweets for followers */
    Scan followerScan = new Scan();
    followerScan.setStartRow(Bytes.toBytes(user.getUserId() + "-"));
    followerScan.setStopRow(Bytes.toBytes((user.getUserId() + 1) + "-"));

    ResultScanner followerRS = followersTable.getScanner(followerScan);

    /** put users on tweet to her own tweetline */
    Put put =
        new Put(Bytes.toBytes(user.getUserId() + "-" + tranposeEpoch + "-" + user.getUserId()));
    put.add(_DEFAULT, _NAME, nameBytes);
    put.add(_DEFAULT, _TWEET, tweetBytes);
    put.add(_DEFAULT, _TIME, epochBytes);

    List<Row> puts = new ArrayList<Row>();
    puts.add(put);
    for (Result result : followerRS) {

      Long followerid = Bytes.toLong(result.getColumnLatest(_DEFAULT, _USERID).getValue());

      put = new Put(Bytes.toBytes(followerid + "-" + tranposeEpoch + "-" + user.getUserId()));
      put.add(_DEFAULT, _NAME, nameBytes);
      put.add(_DEFAULT, _TWEET, tweetBytes);
      put.add(_DEFAULT, _TIME, epochBytes);

      puts.add(put);
    }
    followerRS.close();
    try {
      tweetlineTable.batch(puts);
    } catch (InterruptedException e) {
      e.printStackTrace(); // @TODO log and handle properly.
    }
  }
Example #6
0
  @Test
  public void scan_by_prefix_date() throws IOException {
    FilterList fl = new FilterList();
    Filter filter =
        new RowFilter(CompareFilter.CompareOp.EQUAL, new SubstringComparator("20150903"));
    fl.addFilter(filter);
    Scan scan = new Scan();
    scan.setFilter(fl);

    long t1 = System.currentTimeMillis();
    ResultScanner rs = errorTable.getScanner(scan);
    Result result;
    int count = 0;
    while ((result = rs.next()) != null) {
      System.out.println("rowkey=" + new String(result.getRow()));
      System.out.println(
          "value=" + new String(result.getValue(fBytes, Bytes.toBytes("stacktrace"))));
      System.out.println();
      count++;
    }
    long t2 = System.currentTimeMillis();
    System.out.println("count=" + count + ",t2 - t1=" + ((t2 - t1) / 1000));
  }
  public static void main(String[] args) throws Exception {
    if (args.length < 5) {
      System.err.printf(
          "Usage: %s <conf> <keytab> <user> <table> <reps> [-v]\n", ExampleJavaClient.class);
      System.exit(-1);
    }

    ExampleJavaClient exampleJavaClient = new ExampleJavaClient(args[0], args[1], args[2], args[3]);
    exampleJavaClient.initialise();
    if (args.length == 6 && args[5].equals("-v")) {
      exampleJavaClient.verbose(true);
    }

    int reps = Integer.parseInt(args[4]);

    long start = System.currentTimeMillis();
    System.out.println("Beginning " + reps + " scans");
    for (int i = 0; i < reps; ++i) {
      exampleJavaClient.doScan();
    }
    System.out.println("Runtime: " + (System.currentTimeMillis() - start) + "ms");
    System.out.println("Ended scans for " + args[2]);
  }
Example #8
0
 private byte[] generateTweetId(User user) {
   return Bytes.toBytes(user.getUserId() + "-" + (Long.MAX_VALUE - System.currentTimeMillis()));
 }
  @Test
  public void testRun() throws Exception {

    TableName tn = TableName.valueOf(tableName);
    byte[] mobValueBytes = new byte[100];

    // get the path where mob files lie in
    Path mobFamilyPath = MobUtils.getMobFamilyPath(TEST_UTIL.getConfiguration(), tn, family);

    Put put = new Put(Bytes.toBytes(row));
    put.addColumn(Bytes.toBytes(family), Bytes.toBytes(qf), 1, mobValueBytes);
    Put put2 = new Put(Bytes.toBytes(row + "ignore"));
    put2.addColumn(Bytes.toBytes(family), Bytes.toBytes(qf), 1, mobValueBytes);
    table.mutate(put);
    table.mutate(put2);
    table.flush();
    admin.flush(tn);

    FileStatus[] fileStatuses = TEST_UTIL.getTestFileSystem().listStatus(mobFamilyPath);
    // check the generation of a mob file
    assertEquals(1, fileStatuses.length);

    String mobFile1 = fileStatuses[0].getPath().getName();

    Configuration configuration = new Configuration(TEST_UTIL.getConfiguration());
    configuration.setFloat(MobConstants.MOB_SWEEP_TOOL_COMPACTION_RATIO, 0.6f);
    configuration.setStrings(TableInputFormat.INPUT_TABLE, tableName);
    configuration.setStrings(TableInputFormat.SCAN_COLUMN_FAMILY, family);
    configuration.setStrings(SweepJob.WORKING_VISITED_DIR_KEY, "jobWorkingNamesDir");
    configuration.setStrings(SweepJob.WORKING_FILES_DIR_KEY, "compactionFileDir");
    configuration.setStrings(
        CommonConfigurationKeys.IO_SERIALIZATIONS_KEY, JavaSerialization.class.getName());
    configuration.set(SweepJob.WORKING_VISITED_DIR_KEY, "compactionVisitedDir");
    configuration.setLong(
        MobConstants.MOB_SWEEP_TOOL_COMPACTION_START_DATE,
        System.currentTimeMillis() + 24 * 3600 * 1000);

    ZooKeeperWatcher zkw = new ZooKeeperWatcher(configuration, "1", new DummyMobAbortable());
    TableName lockName = MobUtils.getTableLockName(tn);
    String znode = ZKUtil.joinZNode(zkw.tableLockZNode, lockName.getNameAsString());
    configuration.set(SweepJob.SWEEP_JOB_ID, "1");
    configuration.set(SweepJob.SWEEP_JOB_TABLE_NODE, znode);
    ServerName serverName = SweepJob.getCurrentServerName(configuration);
    configuration.set(SweepJob.SWEEP_JOB_SERVERNAME, serverName.toString());

    TableLockManager tableLockManager =
        TableLockManager.createTableLockManager(configuration, zkw, serverName);
    TableLock lock = tableLockManager.writeLock(lockName, "Run sweep tool");
    lock.acquire();
    try {
      // use the same counter when mocking
      Counter counter = new GenericCounter();
      Reducer<Text, KeyValue, Writable, Writable>.Context ctx = mock(Reducer.Context.class);
      when(ctx.getConfiguration()).thenReturn(configuration);
      when(ctx.getCounter(Matchers.any(SweepCounter.class))).thenReturn(counter);
      when(ctx.nextKey()).thenReturn(true).thenReturn(false);
      when(ctx.getCurrentKey()).thenReturn(new Text(mobFile1));

      byte[] refBytes = Bytes.toBytes(mobFile1);
      long valueLength = refBytes.length;
      byte[] newValue = Bytes.add(Bytes.toBytes(valueLength), refBytes);
      KeyValue kv2 =
          new KeyValue(
              Bytes.toBytes(row),
              Bytes.toBytes(family),
              Bytes.toBytes(qf),
              1,
              KeyValue.Type.Put,
              newValue);
      List<KeyValue> list = new ArrayList<KeyValue>();
      list.add(kv2);

      when(ctx.getValues()).thenReturn(list);

      SweepReducer reducer = new SweepReducer();
      reducer.run(ctx);
    } finally {
      lock.release();
    }
    FileStatus[] filsStatuses2 = TEST_UTIL.getTestFileSystem().listStatus(mobFamilyPath);
    String mobFile2 = filsStatuses2[0].getPath().getName();
    // new mob file is generated, old one has been archived
    assertEquals(1, filsStatuses2.length);
    assertEquals(false, mobFile2.equalsIgnoreCase(mobFile1));

    // test sequence file
    String workingPath = configuration.get(SweepJob.WORKING_VISITED_DIR_KEY);
    FileStatus[] statuses = TEST_UTIL.getTestFileSystem().listStatus(new Path(workingPath));
    Set<String> files = new TreeSet<String>();
    for (FileStatus st : statuses) {
      files.addAll(
          getKeyFromSequenceFile(TEST_UTIL.getTestFileSystem(), st.getPath(), configuration));
    }
    assertEquals(1, files.size());
    assertEquals(true, files.contains(mobFile1));
  }
  @Override
  public QueryResult<Variant> getAllVariantsByRegionAndStudy(
      Region region, String sourceId, QueryOptions options) {
    Long start, end, dbstart, dbend;
    start = System.currentTimeMillis();
    QueryResult<Variant> queryResult =
        new QueryResult<>(
            String.format("%s:%d-%d", region.getChromosome(), region.getStart(), region.getEnd()));
    List<Variant> results = new LinkedList<>();

    boolean includeSamples;
    boolean includeStats;
    boolean includeEffects;
    if (!options.containsKey("samples")
        && !options.containsKey("stats")
        && !options.containsKey("effects")) {
      includeSamples = true;
      includeStats = true;
      includeEffects = true;
    } else {
      includeSamples = options.containsKey("samples") && options.getBoolean("samples");
      includeStats = options.containsKey("stats") && options.getBoolean("stats");
      includeEffects = options.containsKey("effects") && options.getBoolean("effects");
    }

    try {
      String startRow = buildRowkey(region.getChromosome(), Long.toString(region.getStart()));
      String stopRow = buildRowkey(region.getChromosome(), Long.toString(region.getEnd()));
      HTable table = new HTable(admin.getConfiguration(), tableName);
      dbstart = System.currentTimeMillis();
      Scan regionScan = new Scan(startRow.getBytes(), stopRow.getBytes());
      ResultScanner scanres = table.getScanner(regionScan);
      dbend = System.currentTimeMillis();
      queryResult.setDbTime(dbend - dbstart);

      // Iterate over results and, optionally, their samples and statistics
      for (Result result : scanres) {
        String[] rowkeyParts = new String(result.getRow(), CHARSET_UTF_8).split("_");
        String chromosome = rowkeyParts[0].replaceFirst("^0+(?!$)", "");
        int position = Integer.parseInt(rowkeyParts[1]);

        // Get basic result fields from Protocol Buffers message
        NavigableMap<byte[], byte[]> infoMap = result.getFamilyMap("i".getBytes());
        byte[] byteInfo = infoMap.get((sourceId + "_data").getBytes());
        VariantFieldsProtos.VariantInfo protoInfo =
            VariantFieldsProtos.VariantInfo.parseFrom(byteInfo);
        String reference = protoInfo.getReference();
        String alternate = StringUtils.join(protoInfo.getAlternateList(), ",");
        String format = StringUtils.join(protoInfo.getFormatList(), ":");
        Variant variant = new Variant(chromosome, position, position, reference, alternate);

        // Set samples if requested
        if (includeSamples) {
          NavigableMap<byte[], byte[]> sampleMap = result.getFamilyMap("d".getBytes());
          Map<String, Map<String, String>> resultSampleMap = new HashMap<>();

          // Set samples
          for (byte[] s : sampleMap.keySet()) {
            String sampleName = (new String(s, CHARSET_UTF_8)).replaceAll(sourceId + "_", "");
            VariantFieldsProtos.VariantSample sample =
                VariantFieldsProtos.VariantSample.parseFrom(sampleMap.get(s));
            String sample1 = sample.getSample();
            String[] values = sample1.split(":");
            String[] fields = format.split(":");
            Map<String, String> singleSampleMap = new HashMap<>();
            for (int i = 0; i < fields.length; i++) {
              singleSampleMap.put(fields[i], values[i]);
            }
            // TODO
            //                        variant.addSampleData(sampleName, singleSampleMap);
          }
        }

        // Set stats if requested
        if (includeStats) {
          byte[] byteStats = infoMap.get((sourceId + "_stats").getBytes());
          VariantFieldsProtos.VariantStats protoStats =
              VariantFieldsProtos.VariantStats.parseFrom(byteStats);
          VariantStats variantStats =
              new VariantStats(
                  chromosome,
                  position,
                  reference,
                  alternate,
                  protoStats.getMaf(),
                  protoStats.getMgf(),
                  protoStats.getMafAllele(),
                  protoStats.getMgfGenotype(),
                  protoStats.getMissingAlleles(),
                  protoStats.getMissingGenotypes(),
                  protoStats.getMendelianErrors(),
                  protoStats.getIsIndel(),
                  protoStats.getCasesPercentDominant(),
                  protoStats.getControlsPercentDominant(),
                  protoStats.getCasesPercentRecessive(),
                  protoStats.getControlsPercentRecessive());
          variant.setStats(variantStats);
        }

        if (includeEffects) {
          QueryResult<VariantEffect> queryEffects = getEffectsByVariant(variant, options);
          variant.setEffect(queryEffects.getResult());
        }

        results.add(variant);
      }
    } catch (IOException e) {
      System.err.println(e.getClass().getName() + ": " + e.getMessage());
    }
    queryResult.setResult(results);
    queryResult.setNumResults(results.size());
    end = System.currentTimeMillis();
    queryResult.setTime(end - start);
    return queryResult;
  }
  public QueryResult getSimpleVariantsByRegion(
      Region region, String sourceId, QueryOptions options) {
    Long start, end, dbstart, dbend;
    start = System.currentTimeMillis();
    boolean includeStats;
    boolean includeEffects;
    if (!options.containsKey("stats") && !options.containsKey("effects")) {
      includeStats = true;
      includeEffects = true;
    } else {
      includeStats = options.containsKey("stats") && options.getBoolean("stats");
      includeEffects = options.containsKey("effects") && options.getBoolean("effects");
    }

    QueryResult<Variant> queryResult =
        new QueryResult<>(
            String.format("%s:%d-%d", region.getChromosome(), region.getStart(), region.getEnd()));
    List<Variant> results = new ArrayList<>();
    String startRow = buildRowkey(region.getChromosome(), Long.toString(region.getStart()));
    String stopRow = buildRowkey(region.getChromosome(), Long.toString(region.getEnd()));
    BasicDBObject query =
        new BasicDBObject("position", new BasicDBObject("$gte", startRow).append("$lte", stopRow))
            .append("sources.sourceId", sourceId);
    DBCollection collection = db.getCollection("variants");
    dbstart = System.currentTimeMillis();
    DBCursor variantInStudies = collection.find(query);
    dbend = System.currentTimeMillis();
    queryResult.setDbTime(dbend - dbstart);

    for (DBObject result : variantInStudies) {
      String[] rowkeyParts = result.get("position").toString().split("_");
      String chromosome = rowkeyParts[0].replaceFirst("^0+(?!$)", "");
      int position = Integer.parseInt(rowkeyParts[1]);
      BasicDBList studies = (BasicDBList) result.get("sources");
      BasicDBObject st = (BasicDBObject) studies.get(0);
      String ref = (String) st.get("ref");
      String alt = StringUtils.join((ArrayList<String>) st.get("alt"), ",");

      // TODO Needs rework
      Variant variant = new Variant(chromosome, position, position, ref, alt);

      // Set stats informations
      if (includeStats) {
        VariantStats stats = new VariantStats();
        BasicDBObject mongoStats = (BasicDBObject) st.get("stats");
        stats.setMaf((float) (double) mongoStats.get("maf"));
        stats.setMafAllele((String) mongoStats.get("alleleMaf"));
        stats.setMissingGenotypes((int) mongoStats.get("missing"));
        List<Genotype> genotypeCount = new ArrayList<>();
        for (BasicDBObject s : (List<BasicDBObject>) mongoStats.get("genotypeCount")) {
          for (Map.Entry<String, Object> entry : s.entrySet()) {
            Genotype genotype = new Genotype(entry.getKey());
            genotype.setCount((Integer) entry.getValue());
            genotypeCount.add(genotype);
          }
        }
        stats.setGenotypes(genotypeCount);
        variant.setStats(stats);
      }

      // TODO Set consequence type names
      if (includeEffects) {
        BasicDBList mongoEffects = (BasicDBList) st.get("effects");
        if (mongoEffects != null) {
          for (Object e : mongoEffects) {
            String effectObo = e.toString();
            VariantEffect effect = new VariantEffect();
            effect.setConsequenceTypeObo(effectObo);
            variant.addEffect(effect);
          }
        }
      }

      results.add(variant);
    }

    queryResult.setResult(results);
    queryResult.setNumResults(results.size());
    end = System.currentTimeMillis();
    queryResult.setTime(end - start);
    return queryResult;
  }
  @Override
  public QueryResult getVariantsHistogramByRegion(
      Region region, String sourceId, boolean histogramLogarithm, int histogramMax) {
    QueryResult<ObjectMap> queryResult =
        new QueryResult<>(
            String.format("%s:%d-%d", region.getChromosome(), region.getStart(), region.getEnd()));
    List<ObjectMap> data = new ArrayList<>();
    String startRow = buildRowkey(region.getChromosome(), Long.toString(region.getStart()));
    String stopRow = buildRowkey(region.getChromosome(), Long.toString(region.getEnd()));

    long startTime = System.currentTimeMillis();

    long startDbTime = System.currentTimeMillis();

    BasicDBObject query =
        new BasicDBObject("position", new BasicDBObject("$gte", startRow).append("$lte", stopRow))
            .append("studies.studyId", sourceId);
    DBCollection collection = db.getCollection("variants");
    DBCursor queryResults = collection.find(query);
    queryResult.setDbTime(System.currentTimeMillis() - startDbTime);

    int resultSize = queryResults.size();

    if (resultSize > histogramMax) { // Need to group results to fit maximum size of the histogram
      int sumChunkSize = resultSize / histogramMax;
      int i = 0, j = 0;
      int featuresCount = 0;
      ObjectMap item = null;

      for (DBObject result : queryResults) {
        //                featuresCount += result.getInt("features_count");
        //                if (i == 0) {
        //                    item = new ObjectMap("chromosome", result.getString("chromosome"));
        //                    item.put("chunkId", result.getInt("chunk_id"));
        //                    item.put("start", result.getInt("start"));
        //                } else if (i == sumChunkSize - 1 || j == resultSize - 1) {
        //                    if (histogramLogarithm) {
        //                        item.put("featuresCount", (featuresCount > 0) ?
        // Math.log(featuresCount) : 0);
        //                    } else {
        //                        item.put("featuresCount", featuresCount);
        //                    }
        //                    item.put("end", result.getInt("end"));
        //                    data.add(item);
        //                    i = -1;
        //                    featuresCount = 0;
        //                }
        //                j++;
        //                i++;
      }
    } else {
      for (DBObject result : queryResults) {
        //                ObjectMap item = new ObjectMap("chromosome",
        // result.getString("chromosome"));
        //                item.put("chunkId", result.getInt("chunk_id"));
        //                item.put("start", result.getInt("start"));
        //                if (histogramLogarithm) {
        //                    int features_count = result.getInt("features_count");
        //                    result.put("featuresCount", (features_count > 0) ?
        // Math.log(features_count) : 0);
        //                } else {
        //                    item.put("featuresCount", result.getInt("features_count"));
        //                }
        //                item.put("end", result.getInt("end"));
        //                data.add(item);
      }
    }

    queryResult.setResult(data);
    queryResult.setNumResults(data.size());
    queryResult.setTime(System.currentTimeMillis() - startTime);

    return queryResult;
  }