@Test public void insert_rowkey_prefix_date() throws IOException { System.out.println(errorTable); errorTable.setAutoFlushTo(false); List<Put> puts = new ArrayList<Put>(); long t1 = System.currentTimeMillis(); for (int i = 0; i < 10000000; i++) { String uuid = UUID.randomUUID().toString().replaceAll("-", "").substring(0, 8); Put put = new Put(Bytes.toBytes("20150705" + "_" + uuid)); put.add( fBytes, Bytes.toBytes("stacktrace"), Bytes.toBytes("java.io.IOException:file not found" + UUID.randomUUID().toString())); // puts.add(put); errorTable.put(put); if (i % 10000 == 0) { errorTable.flushCommits(); } } errorTable.flushCommits(); long t2 = System.currentTimeMillis(); System.out.println("count=" + puts.size() + ",t2-t1=" + (t2 - t1)); // errorTable.close(); }
public void doScan() throws IOException { Table tableRef = connection.getTable(TableName.valueOf(table)); Scan scan = new Scan(); ResultScanner scanner = tableRef.getScanner(scan); long now = System.currentTimeMillis(); if (verbose) System.out.println("Starting scan"); for (Result res : scanner) { if (verbose) System.out.println(res); } if (verbose) System.out.printf("Scan finished: %d ms\n\n", System.currentTimeMillis() - now); tableRef.close(); }
private static void waitUntil(long until) { long now = System.currentTimeMillis(); while (now <= until) { try { Thread.sleep(1L); now = System.currentTimeMillis(); } catch (InterruptedException e) { throw new RuntimeException(e); } } }
@Override public void mutateMany(Map<String, Map<ByteBuffer, KCVMutation>> mutations, StoreTransaction txh) throws StorageException { final long delTS = System.currentTimeMillis(); final long putTS = delTS + 1; Map<ByteBuffer, Pair<Put, Delete>> commandsPerKey = convertToCommands(mutations, putTS, delTS); List<Row> batch = new ArrayList<Row>(commandsPerKey.size()); // actual batch operation // convert sorted commands into representation required for 'batch' operation for (Pair<Put, Delete> commands : commandsPerKey.values()) { if (commands.getFirst() != null) batch.add(commands.getFirst()); if (commands.getSecond() != null) batch.add(commands.getSecond()); } try { HTableInterface table = null; try { table = connectionPool.getTable(tableName); table.batch(batch); table.flushCommits(); } finally { IOUtils.closeQuietly(table); } } catch (IOException e) { throw new TemporaryStorageException(e); } catch (InterruptedException e) { throw new TemporaryStorageException(e); } waitUntil(putTS); }
@Override public void tweet(User user, String tweetText) throws IOException { final long epoch = System.currentTimeMillis(); final long tranposeEpoch = Long.MAX_VALUE - epoch; final byte[] epochBytes = Bytes.toBytes(epoch); final byte[] tweetBytes = Bytes.toBytes(tweetText); byte[] nameBytes = Bytes.toBytes(user.getName()); /** put tweet into tweets */ Put tweetRowPut = new Put(generateTweetId(user)); tweetRowPut.add(_DEFAULT, _NAME, nameBytes); tweetRowPut.add(_DEFAULT, _MAIL, Bytes.toBytes(user.getEmail())); tweetRowPut.add(_DEFAULT, _TWEET, tweetBytes); tweetRowPut.add(_DEFAULT, _TIME, epochBytes); tweetsTable.put(tweetRowPut); /** put tweets for followers */ Scan followerScan = new Scan(); followerScan.setStartRow(Bytes.toBytes(user.getUserId() + "-")); followerScan.setStopRow(Bytes.toBytes((user.getUserId() + 1) + "-")); ResultScanner followerRS = followersTable.getScanner(followerScan); /** put users on tweet to her own tweetline */ Put put = new Put(Bytes.toBytes(user.getUserId() + "-" + tranposeEpoch + "-" + user.getUserId())); put.add(_DEFAULT, _NAME, nameBytes); put.add(_DEFAULT, _TWEET, tweetBytes); put.add(_DEFAULT, _TIME, epochBytes); List<Row> puts = new ArrayList<Row>(); puts.add(put); for (Result result : followerRS) { Long followerid = Bytes.toLong(result.getColumnLatest(_DEFAULT, _USERID).getValue()); put = new Put(Bytes.toBytes(followerid + "-" + tranposeEpoch + "-" + user.getUserId())); put.add(_DEFAULT, _NAME, nameBytes); put.add(_DEFAULT, _TWEET, tweetBytes); put.add(_DEFAULT, _TIME, epochBytes); puts.add(put); } followerRS.close(); try { tweetlineTable.batch(puts); } catch (InterruptedException e) { e.printStackTrace(); // @TODO log and handle properly. } }
@Test public void scan_by_prefix_date() throws IOException { FilterList fl = new FilterList(); Filter filter = new RowFilter(CompareFilter.CompareOp.EQUAL, new SubstringComparator("20150903")); fl.addFilter(filter); Scan scan = new Scan(); scan.setFilter(fl); long t1 = System.currentTimeMillis(); ResultScanner rs = errorTable.getScanner(scan); Result result; int count = 0; while ((result = rs.next()) != null) { System.out.println("rowkey=" + new String(result.getRow())); System.out.println( "value=" + new String(result.getValue(fBytes, Bytes.toBytes("stacktrace")))); System.out.println(); count++; } long t2 = System.currentTimeMillis(); System.out.println("count=" + count + ",t2 - t1=" + ((t2 - t1) / 1000)); }
public static void main(String[] args) throws Exception { if (args.length < 5) { System.err.printf( "Usage: %s <conf> <keytab> <user> <table> <reps> [-v]\n", ExampleJavaClient.class); System.exit(-1); } ExampleJavaClient exampleJavaClient = new ExampleJavaClient(args[0], args[1], args[2], args[3]); exampleJavaClient.initialise(); if (args.length == 6 && args[5].equals("-v")) { exampleJavaClient.verbose(true); } int reps = Integer.parseInt(args[4]); long start = System.currentTimeMillis(); System.out.println("Beginning " + reps + " scans"); for (int i = 0; i < reps; ++i) { exampleJavaClient.doScan(); } System.out.println("Runtime: " + (System.currentTimeMillis() - start) + "ms"); System.out.println("Ended scans for " + args[2]); }
private byte[] generateTweetId(User user) { return Bytes.toBytes(user.getUserId() + "-" + (Long.MAX_VALUE - System.currentTimeMillis())); }
@Test public void testRun() throws Exception { TableName tn = TableName.valueOf(tableName); byte[] mobValueBytes = new byte[100]; // get the path where mob files lie in Path mobFamilyPath = MobUtils.getMobFamilyPath(TEST_UTIL.getConfiguration(), tn, family); Put put = new Put(Bytes.toBytes(row)); put.addColumn(Bytes.toBytes(family), Bytes.toBytes(qf), 1, mobValueBytes); Put put2 = new Put(Bytes.toBytes(row + "ignore")); put2.addColumn(Bytes.toBytes(family), Bytes.toBytes(qf), 1, mobValueBytes); table.mutate(put); table.mutate(put2); table.flush(); admin.flush(tn); FileStatus[] fileStatuses = TEST_UTIL.getTestFileSystem().listStatus(mobFamilyPath); // check the generation of a mob file assertEquals(1, fileStatuses.length); String mobFile1 = fileStatuses[0].getPath().getName(); Configuration configuration = new Configuration(TEST_UTIL.getConfiguration()); configuration.setFloat(MobConstants.MOB_SWEEP_TOOL_COMPACTION_RATIO, 0.6f); configuration.setStrings(TableInputFormat.INPUT_TABLE, tableName); configuration.setStrings(TableInputFormat.SCAN_COLUMN_FAMILY, family); configuration.setStrings(SweepJob.WORKING_VISITED_DIR_KEY, "jobWorkingNamesDir"); configuration.setStrings(SweepJob.WORKING_FILES_DIR_KEY, "compactionFileDir"); configuration.setStrings( CommonConfigurationKeys.IO_SERIALIZATIONS_KEY, JavaSerialization.class.getName()); configuration.set(SweepJob.WORKING_VISITED_DIR_KEY, "compactionVisitedDir"); configuration.setLong( MobConstants.MOB_SWEEP_TOOL_COMPACTION_START_DATE, System.currentTimeMillis() + 24 * 3600 * 1000); ZooKeeperWatcher zkw = new ZooKeeperWatcher(configuration, "1", new DummyMobAbortable()); TableName lockName = MobUtils.getTableLockName(tn); String znode = ZKUtil.joinZNode(zkw.tableLockZNode, lockName.getNameAsString()); configuration.set(SweepJob.SWEEP_JOB_ID, "1"); configuration.set(SweepJob.SWEEP_JOB_TABLE_NODE, znode); ServerName serverName = SweepJob.getCurrentServerName(configuration); configuration.set(SweepJob.SWEEP_JOB_SERVERNAME, serverName.toString()); TableLockManager tableLockManager = TableLockManager.createTableLockManager(configuration, zkw, serverName); TableLock lock = tableLockManager.writeLock(lockName, "Run sweep tool"); lock.acquire(); try { // use the same counter when mocking Counter counter = new GenericCounter(); Reducer<Text, KeyValue, Writable, Writable>.Context ctx = mock(Reducer.Context.class); when(ctx.getConfiguration()).thenReturn(configuration); when(ctx.getCounter(Matchers.any(SweepCounter.class))).thenReturn(counter); when(ctx.nextKey()).thenReturn(true).thenReturn(false); when(ctx.getCurrentKey()).thenReturn(new Text(mobFile1)); byte[] refBytes = Bytes.toBytes(mobFile1); long valueLength = refBytes.length; byte[] newValue = Bytes.add(Bytes.toBytes(valueLength), refBytes); KeyValue kv2 = new KeyValue( Bytes.toBytes(row), Bytes.toBytes(family), Bytes.toBytes(qf), 1, KeyValue.Type.Put, newValue); List<KeyValue> list = new ArrayList<KeyValue>(); list.add(kv2); when(ctx.getValues()).thenReturn(list); SweepReducer reducer = new SweepReducer(); reducer.run(ctx); } finally { lock.release(); } FileStatus[] filsStatuses2 = TEST_UTIL.getTestFileSystem().listStatus(mobFamilyPath); String mobFile2 = filsStatuses2[0].getPath().getName(); // new mob file is generated, old one has been archived assertEquals(1, filsStatuses2.length); assertEquals(false, mobFile2.equalsIgnoreCase(mobFile1)); // test sequence file String workingPath = configuration.get(SweepJob.WORKING_VISITED_DIR_KEY); FileStatus[] statuses = TEST_UTIL.getTestFileSystem().listStatus(new Path(workingPath)); Set<String> files = new TreeSet<String>(); for (FileStatus st : statuses) { files.addAll( getKeyFromSequenceFile(TEST_UTIL.getTestFileSystem(), st.getPath(), configuration)); } assertEquals(1, files.size()); assertEquals(true, files.contains(mobFile1)); }
@Override public QueryResult<Variant> getAllVariantsByRegionAndStudy( Region region, String sourceId, QueryOptions options) { Long start, end, dbstart, dbend; start = System.currentTimeMillis(); QueryResult<Variant> queryResult = new QueryResult<>( String.format("%s:%d-%d", region.getChromosome(), region.getStart(), region.getEnd())); List<Variant> results = new LinkedList<>(); boolean includeSamples; boolean includeStats; boolean includeEffects; if (!options.containsKey("samples") && !options.containsKey("stats") && !options.containsKey("effects")) { includeSamples = true; includeStats = true; includeEffects = true; } else { includeSamples = options.containsKey("samples") && options.getBoolean("samples"); includeStats = options.containsKey("stats") && options.getBoolean("stats"); includeEffects = options.containsKey("effects") && options.getBoolean("effects"); } try { String startRow = buildRowkey(region.getChromosome(), Long.toString(region.getStart())); String stopRow = buildRowkey(region.getChromosome(), Long.toString(region.getEnd())); HTable table = new HTable(admin.getConfiguration(), tableName); dbstart = System.currentTimeMillis(); Scan regionScan = new Scan(startRow.getBytes(), stopRow.getBytes()); ResultScanner scanres = table.getScanner(regionScan); dbend = System.currentTimeMillis(); queryResult.setDbTime(dbend - dbstart); // Iterate over results and, optionally, their samples and statistics for (Result result : scanres) { String[] rowkeyParts = new String(result.getRow(), CHARSET_UTF_8).split("_"); String chromosome = rowkeyParts[0].replaceFirst("^0+(?!$)", ""); int position = Integer.parseInt(rowkeyParts[1]); // Get basic result fields from Protocol Buffers message NavigableMap<byte[], byte[]> infoMap = result.getFamilyMap("i".getBytes()); byte[] byteInfo = infoMap.get((sourceId + "_data").getBytes()); VariantFieldsProtos.VariantInfo protoInfo = VariantFieldsProtos.VariantInfo.parseFrom(byteInfo); String reference = protoInfo.getReference(); String alternate = StringUtils.join(protoInfo.getAlternateList(), ","); String format = StringUtils.join(protoInfo.getFormatList(), ":"); Variant variant = new Variant(chromosome, position, position, reference, alternate); // Set samples if requested if (includeSamples) { NavigableMap<byte[], byte[]> sampleMap = result.getFamilyMap("d".getBytes()); Map<String, Map<String, String>> resultSampleMap = new HashMap<>(); // Set samples for (byte[] s : sampleMap.keySet()) { String sampleName = (new String(s, CHARSET_UTF_8)).replaceAll(sourceId + "_", ""); VariantFieldsProtos.VariantSample sample = VariantFieldsProtos.VariantSample.parseFrom(sampleMap.get(s)); String sample1 = sample.getSample(); String[] values = sample1.split(":"); String[] fields = format.split(":"); Map<String, String> singleSampleMap = new HashMap<>(); for (int i = 0; i < fields.length; i++) { singleSampleMap.put(fields[i], values[i]); } // TODO // variant.addSampleData(sampleName, singleSampleMap); } } // Set stats if requested if (includeStats) { byte[] byteStats = infoMap.get((sourceId + "_stats").getBytes()); VariantFieldsProtos.VariantStats protoStats = VariantFieldsProtos.VariantStats.parseFrom(byteStats); VariantStats variantStats = new VariantStats( chromosome, position, reference, alternate, protoStats.getMaf(), protoStats.getMgf(), protoStats.getMafAllele(), protoStats.getMgfGenotype(), protoStats.getMissingAlleles(), protoStats.getMissingGenotypes(), protoStats.getMendelianErrors(), protoStats.getIsIndel(), protoStats.getCasesPercentDominant(), protoStats.getControlsPercentDominant(), protoStats.getCasesPercentRecessive(), protoStats.getControlsPercentRecessive()); variant.setStats(variantStats); } if (includeEffects) { QueryResult<VariantEffect> queryEffects = getEffectsByVariant(variant, options); variant.setEffect(queryEffects.getResult()); } results.add(variant); } } catch (IOException e) { System.err.println(e.getClass().getName() + ": " + e.getMessage()); } queryResult.setResult(results); queryResult.setNumResults(results.size()); end = System.currentTimeMillis(); queryResult.setTime(end - start); return queryResult; }
public QueryResult getSimpleVariantsByRegion( Region region, String sourceId, QueryOptions options) { Long start, end, dbstart, dbend; start = System.currentTimeMillis(); boolean includeStats; boolean includeEffects; if (!options.containsKey("stats") && !options.containsKey("effects")) { includeStats = true; includeEffects = true; } else { includeStats = options.containsKey("stats") && options.getBoolean("stats"); includeEffects = options.containsKey("effects") && options.getBoolean("effects"); } QueryResult<Variant> queryResult = new QueryResult<>( String.format("%s:%d-%d", region.getChromosome(), region.getStart(), region.getEnd())); List<Variant> results = new ArrayList<>(); String startRow = buildRowkey(region.getChromosome(), Long.toString(region.getStart())); String stopRow = buildRowkey(region.getChromosome(), Long.toString(region.getEnd())); BasicDBObject query = new BasicDBObject("position", new BasicDBObject("$gte", startRow).append("$lte", stopRow)) .append("sources.sourceId", sourceId); DBCollection collection = db.getCollection("variants"); dbstart = System.currentTimeMillis(); DBCursor variantInStudies = collection.find(query); dbend = System.currentTimeMillis(); queryResult.setDbTime(dbend - dbstart); for (DBObject result : variantInStudies) { String[] rowkeyParts = result.get("position").toString().split("_"); String chromosome = rowkeyParts[0].replaceFirst("^0+(?!$)", ""); int position = Integer.parseInt(rowkeyParts[1]); BasicDBList studies = (BasicDBList) result.get("sources"); BasicDBObject st = (BasicDBObject) studies.get(0); String ref = (String) st.get("ref"); String alt = StringUtils.join((ArrayList<String>) st.get("alt"), ","); // TODO Needs rework Variant variant = new Variant(chromosome, position, position, ref, alt); // Set stats informations if (includeStats) { VariantStats stats = new VariantStats(); BasicDBObject mongoStats = (BasicDBObject) st.get("stats"); stats.setMaf((float) (double) mongoStats.get("maf")); stats.setMafAllele((String) mongoStats.get("alleleMaf")); stats.setMissingGenotypes((int) mongoStats.get("missing")); List<Genotype> genotypeCount = new ArrayList<>(); for (BasicDBObject s : (List<BasicDBObject>) mongoStats.get("genotypeCount")) { for (Map.Entry<String, Object> entry : s.entrySet()) { Genotype genotype = new Genotype(entry.getKey()); genotype.setCount((Integer) entry.getValue()); genotypeCount.add(genotype); } } stats.setGenotypes(genotypeCount); variant.setStats(stats); } // TODO Set consequence type names if (includeEffects) { BasicDBList mongoEffects = (BasicDBList) st.get("effects"); if (mongoEffects != null) { for (Object e : mongoEffects) { String effectObo = e.toString(); VariantEffect effect = new VariantEffect(); effect.setConsequenceTypeObo(effectObo); variant.addEffect(effect); } } } results.add(variant); } queryResult.setResult(results); queryResult.setNumResults(results.size()); end = System.currentTimeMillis(); queryResult.setTime(end - start); return queryResult; }
@Override public QueryResult getVariantsHistogramByRegion( Region region, String sourceId, boolean histogramLogarithm, int histogramMax) { QueryResult<ObjectMap> queryResult = new QueryResult<>( String.format("%s:%d-%d", region.getChromosome(), region.getStart(), region.getEnd())); List<ObjectMap> data = new ArrayList<>(); String startRow = buildRowkey(region.getChromosome(), Long.toString(region.getStart())); String stopRow = buildRowkey(region.getChromosome(), Long.toString(region.getEnd())); long startTime = System.currentTimeMillis(); long startDbTime = System.currentTimeMillis(); BasicDBObject query = new BasicDBObject("position", new BasicDBObject("$gte", startRow).append("$lte", stopRow)) .append("studies.studyId", sourceId); DBCollection collection = db.getCollection("variants"); DBCursor queryResults = collection.find(query); queryResult.setDbTime(System.currentTimeMillis() - startDbTime); int resultSize = queryResults.size(); if (resultSize > histogramMax) { // Need to group results to fit maximum size of the histogram int sumChunkSize = resultSize / histogramMax; int i = 0, j = 0; int featuresCount = 0; ObjectMap item = null; for (DBObject result : queryResults) { // featuresCount += result.getInt("features_count"); // if (i == 0) { // item = new ObjectMap("chromosome", result.getString("chromosome")); // item.put("chunkId", result.getInt("chunk_id")); // item.put("start", result.getInt("start")); // } else if (i == sumChunkSize - 1 || j == resultSize - 1) { // if (histogramLogarithm) { // item.put("featuresCount", (featuresCount > 0) ? // Math.log(featuresCount) : 0); // } else { // item.put("featuresCount", featuresCount); // } // item.put("end", result.getInt("end")); // data.add(item); // i = -1; // featuresCount = 0; // } // j++; // i++; } } else { for (DBObject result : queryResults) { // ObjectMap item = new ObjectMap("chromosome", // result.getString("chromosome")); // item.put("chunkId", result.getInt("chunk_id")); // item.put("start", result.getInt("start")); // if (histogramLogarithm) { // int features_count = result.getInt("features_count"); // result.put("featuresCount", (features_count > 0) ? // Math.log(features_count) : 0); // } else { // item.put("featuresCount", result.getInt("features_count")); // } // item.put("end", result.getInt("end")); // data.add(item); } } queryResult.setResult(data); queryResult.setNumResults(data.size()); queryResult.setTime(System.currentTimeMillis() - startTime); return queryResult; }