@Test public void insert_rowkey_prefix_date() throws IOException { System.out.println(errorTable); errorTable.setAutoFlushTo(false); List<Put> puts = new ArrayList<Put>(); long t1 = System.currentTimeMillis(); for (int i = 0; i < 10000000; i++) { String uuid = UUID.randomUUID().toString().replaceAll("-", "").substring(0, 8); Put put = new Put(Bytes.toBytes("20150705" + "_" + uuid)); put.add( fBytes, Bytes.toBytes("stacktrace"), Bytes.toBytes("java.io.IOException:file not found" + UUID.randomUUID().toString())); // puts.add(put); errorTable.put(put); if (i % 10000 == 0) { errorTable.flushCommits(); } } errorTable.flushCommits(); long t2 = System.currentTimeMillis(); System.out.println("count=" + puts.size() + ",t2-t1=" + (t2 - t1)); // errorTable.close(); }
/** Sets up configuration based on params */ private static boolean setup(Hashtable<String, String> curConf, Configuration argConf) { if (argConf.get("file") == null) { logger.fatal("Missing file parameter"); System.exit(1); } if (argConf.get("hdfs_base_path") == null) { logger.fatal("Missing HDFS base path, check gestore-conf.xml"); System.exit(1); } if (argConf.get("hdfs_temp_path") == null) { logger.fatal("Missing HDFS temp path, check gestore-conf.xml"); System.exit(1); } if (argConf.get("local_temp_path") == null) { logger.fatal("Missing local temp path, check gestore-conf.xml"); System.exit(1); } // Input paramaters curConf.put("run_id", argConf.get("run", "")); curConf.put("task_id", argConf.get("task", "")); curConf.put("file_id", argConf.get("file")); curConf.put("local_path", argConf.get("path", "")); curConf.put("type", argConf.get("type", "l2r")); curConf.put("timestamp_start", argConf.get("timestamp_start", "1")); curConf.put( "timestamp_stop", argConf.get("timestamp_stop", Integer.toString(Integer.MAX_VALUE))); curConf.put("delimiter", argConf.get("regex", "ID=.*")); curConf.put("taxon", argConf.get("taxon", "all")); curConf.put("intermediate", argConf.get("full_run", "false")); curConf.put("quick_add", argConf.get("quick_add", "false")); Boolean full_run = curConf.get("intermediate").matches("(?i).*true.*"); curConf.put("format", argConf.get("format", "unknown")); curConf.put("split", argConf.get("split", "1")); curConf.put("copy", argConf.get("copy", "true")); // Constants curConf.put("base_path", argConf.get("hdfs_base_path")); curConf.put("temp_path", argConf.get("hdfs_temp_path")); curConf.put("local_temp_path", argConf.get("local_temp_path")); curConf.put("db_name_files", argConf.get("hbase_file_table")); curConf.put("db_name_runs", argConf.get("hbase_run_table")); curConf.put("db_name_updates", argConf.get("hbase_db_update_table")); // Timestamps Date currentTime = new Date(); Date endDate = new Date(new Long(curConf.get("timestamp_stop"))); curConf.put("timestamp_real", Long.toString(currentTime.getTime())); return true; }
public void doScan() throws IOException { Table tableRef = connection.getTable(TableName.valueOf(table)); Scan scan = new Scan(); ResultScanner scanner = tableRef.getScanner(scan); long now = System.currentTimeMillis(); if (verbose) System.out.println("Starting scan"); for (Result res : scanner) { if (verbose) System.out.println(res); } if (verbose) System.out.printf("Scan finished: %d ms\n\n", System.currentTimeMillis() - now); tableRef.close(); }
private static void waitUntil(long until) { long now = System.currentTimeMillis(); while (now <= until) { try { Thread.sleep(1L); now = System.currentTimeMillis(); } catch (InterruptedException e) { throw new RuntimeException(e); } } }
@Override public void mutateMany(Map<String, Map<ByteBuffer, KCVMutation>> mutations, StoreTransaction txh) throws StorageException { final long delTS = System.currentTimeMillis(); final long putTS = delTS + 1; Map<ByteBuffer, Pair<Put, Delete>> commandsPerKey = convertToCommands(mutations, putTS, delTS); List<Row> batch = new ArrayList<Row>(commandsPerKey.size()); // actual batch operation // convert sorted commands into representation required for 'batch' operation for (Pair<Put, Delete> commands : commandsPerKey.values()) { if (commands.getFirst() != null) batch.add(commands.getFirst()); if (commands.getSecond() != null) batch.add(commands.getSecond()); } try { HTableInterface table = null; try { table = connectionPool.getTable(tableName); table.batch(batch); table.flushCommits(); } finally { IOUtils.closeQuietly(table); } } catch (IOException e) { throw new TemporaryStorageException(e); } catch (InterruptedException e) { throw new TemporaryStorageException(e); } waitUntil(putTS); }
static { try { props.load(ClassLoader.getSystemResource(Constants.PROPERTIES).openStream()); } catch (IOException e) { logger.error("Fail to locate properties file."); System.exit(1); } }
@Override public void tweet(User user, String tweetText) throws IOException { final long epoch = System.currentTimeMillis(); final long tranposeEpoch = Long.MAX_VALUE - epoch; final byte[] epochBytes = Bytes.toBytes(epoch); final byte[] tweetBytes = Bytes.toBytes(tweetText); byte[] nameBytes = Bytes.toBytes(user.getName()); /** put tweet into tweets */ Put tweetRowPut = new Put(generateTweetId(user)); tweetRowPut.add(_DEFAULT, _NAME, nameBytes); tweetRowPut.add(_DEFAULT, _MAIL, Bytes.toBytes(user.getEmail())); tweetRowPut.add(_DEFAULT, _TWEET, tweetBytes); tweetRowPut.add(_DEFAULT, _TIME, epochBytes); tweetsTable.put(tweetRowPut); /** put tweets for followers */ Scan followerScan = new Scan(); followerScan.setStartRow(Bytes.toBytes(user.getUserId() + "-")); followerScan.setStopRow(Bytes.toBytes((user.getUserId() + 1) + "-")); ResultScanner followerRS = followersTable.getScanner(followerScan); /** put users on tweet to her own tweetline */ Put put = new Put(Bytes.toBytes(user.getUserId() + "-" + tranposeEpoch + "-" + user.getUserId())); put.add(_DEFAULT, _NAME, nameBytes); put.add(_DEFAULT, _TWEET, tweetBytes); put.add(_DEFAULT, _TIME, epochBytes); List<Row> puts = new ArrayList<Row>(); puts.add(put); for (Result result : followerRS) { Long followerid = Bytes.toLong(result.getColumnLatest(_DEFAULT, _USERID).getValue()); put = new Put(Bytes.toBytes(followerid + "-" + tranposeEpoch + "-" + user.getUserId())); put.add(_DEFAULT, _NAME, nameBytes); put.add(_DEFAULT, _TWEET, tweetBytes); put.add(_DEFAULT, _TIME, epochBytes); puts.add(put); } followerRS.close(); try { tweetlineTable.batch(puts); } catch (InterruptedException e) { e.printStackTrace(); // @TODO log and handle properly. } }
@Test public void scan_by_prefix_date() throws IOException { FilterList fl = new FilterList(); Filter filter = new RowFilter(CompareFilter.CompareOp.EQUAL, new SubstringComparator("20150903")); fl.addFilter(filter); Scan scan = new Scan(); scan.setFilter(fl); long t1 = System.currentTimeMillis(); ResultScanner rs = errorTable.getScanner(scan); Result result; int count = 0; while ((result = rs.next()) != null) { System.out.println("rowkey=" + new String(result.getRow())); System.out.println( "value=" + new String(result.getValue(fBytes, Bytes.toBytes("stacktrace")))); System.out.println(); count++; } long t2 = System.currentTimeMillis(); System.out.println("count=" + count + ",t2 - t1=" + ((t2 - t1) / 1000)); }
public static void main(String[] args) throws Exception { if (args.length < 5) { System.err.printf( "Usage: %s <conf> <keytab> <user> <table> <reps> [-v]\n", ExampleJavaClient.class); System.exit(-1); } ExampleJavaClient exampleJavaClient = new ExampleJavaClient(args[0], args[1], args[2], args[3]); exampleJavaClient.initialise(); if (args.length == 6 && args[5].equals("-v")) { exampleJavaClient.verbose(true); } int reps = Integer.parseInt(args[4]); long start = System.currentTimeMillis(); System.out.println("Beginning " + reps + " scans"); for (int i = 0; i < reps; ++i) { exampleJavaClient.doScan(); } System.out.println("Runtime: " + (System.currentTimeMillis() - start) + "ms"); System.out.println("Ended scans for " + args[2]); }
public HostRankHBaseTest() { super(HostRankHBaseTest.class.getName()); // Let's set up the hbase root directory. Configuration conf = HBaseConfiguration.create(); try { FileSystem fs = FileSystem.get(conf); String randomStr = UUID.randomUUID().toString(); String tmpdir = System.getProperty("java.io.tmpdir") + "/" + randomStr + "/"; hbaseRootdir = fs.makeQualified(new Path(tmpdir)); conf.set(HConstants.HBASE_DIR, hbaseRootdir.toString()); fs.mkdirs(hbaseRootdir); } catch (IOException ioe) { fail("Could not create hbase root directory."); } // Start the test utility. testUtil = new HBaseTestingUtility(conf); }
public static void main(String[] args) throws Exception { Configuration conf = HBaseConfiguration.create(); IntegrationTestingUtility.setUseDistributedCluster(conf); IntegrationTestDDLMasterFailover masterFailover = new IntegrationTestDDLMasterFailover(); Connection connection = null; int ret = 1; try { // Initialize connection once, then pass to Actions LOG.debug("Setting up connection ..."); connection = ConnectionFactory.createConnection(conf); masterFailover.setConnection(connection); ret = ToolRunner.run(conf, masterFailover, args); } catch (IOException e) { LOG.fatal("Failed to establish connection. Aborting test ...", e); } finally { connection = masterFailover.getConnection(); if (connection != null) { connection.close(); } System.exit(ret); } }
@Override public QueryResult getVariantsHistogramByRegion( Region region, String sourceId, boolean histogramLogarithm, int histogramMax) { QueryResult<ObjectMap> queryResult = new QueryResult<>( String.format("%s:%d-%d", region.getChromosome(), region.getStart(), region.getEnd())); List<ObjectMap> data = new ArrayList<>(); String startRow = buildRowkey(region.getChromosome(), Long.toString(region.getStart())); String stopRow = buildRowkey(region.getChromosome(), Long.toString(region.getEnd())); long startTime = System.currentTimeMillis(); long startDbTime = System.currentTimeMillis(); BasicDBObject query = new BasicDBObject("position", new BasicDBObject("$gte", startRow).append("$lte", stopRow)) .append("studies.studyId", sourceId); DBCollection collection = db.getCollection("variants"); DBCursor queryResults = collection.find(query); queryResult.setDbTime(System.currentTimeMillis() - startDbTime); int resultSize = queryResults.size(); if (resultSize > histogramMax) { // Need to group results to fit maximum size of the histogram int sumChunkSize = resultSize / histogramMax; int i = 0, j = 0; int featuresCount = 0; ObjectMap item = null; for (DBObject result : queryResults) { // featuresCount += result.getInt("features_count"); // if (i == 0) { // item = new ObjectMap("chromosome", result.getString("chromosome")); // item.put("chunkId", result.getInt("chunk_id")); // item.put("start", result.getInt("start")); // } else if (i == sumChunkSize - 1 || j == resultSize - 1) { // if (histogramLogarithm) { // item.put("featuresCount", (featuresCount > 0) ? // Math.log(featuresCount) : 0); // } else { // item.put("featuresCount", featuresCount); // } // item.put("end", result.getInt("end")); // data.add(item); // i = -1; // featuresCount = 0; // } // j++; // i++; } } else { for (DBObject result : queryResults) { // ObjectMap item = new ObjectMap("chromosome", // result.getString("chromosome")); // item.put("chunkId", result.getInt("chunk_id")); // item.put("start", result.getInt("start")); // if (histogramLogarithm) { // int features_count = result.getInt("features_count"); // result.put("featuresCount", (features_count > 0) ? // Math.log(features_count) : 0); // } else { // item.put("featuresCount", result.getInt("features_count")); // } // item.put("end", result.getInt("end")); // data.add(item); } } queryResult.setResult(data); queryResult.setNumResults(data.size()); queryResult.setTime(System.currentTimeMillis() - startTime); return queryResult; }
@Test public void testRun() throws Exception { TableName tn = TableName.valueOf(tableName); byte[] mobValueBytes = new byte[100]; // get the path where mob files lie in Path mobFamilyPath = MobUtils.getMobFamilyPath(TEST_UTIL.getConfiguration(), tn, family); Put put = new Put(Bytes.toBytes(row)); put.addColumn(Bytes.toBytes(family), Bytes.toBytes(qf), 1, mobValueBytes); Put put2 = new Put(Bytes.toBytes(row + "ignore")); put2.addColumn(Bytes.toBytes(family), Bytes.toBytes(qf), 1, mobValueBytes); table.mutate(put); table.mutate(put2); table.flush(); admin.flush(tn); FileStatus[] fileStatuses = TEST_UTIL.getTestFileSystem().listStatus(mobFamilyPath); // check the generation of a mob file assertEquals(1, fileStatuses.length); String mobFile1 = fileStatuses[0].getPath().getName(); Configuration configuration = new Configuration(TEST_UTIL.getConfiguration()); configuration.setFloat(MobConstants.MOB_SWEEP_TOOL_COMPACTION_RATIO, 0.6f); configuration.setStrings(TableInputFormat.INPUT_TABLE, tableName); configuration.setStrings(TableInputFormat.SCAN_COLUMN_FAMILY, family); configuration.setStrings(SweepJob.WORKING_VISITED_DIR_KEY, "jobWorkingNamesDir"); configuration.setStrings(SweepJob.WORKING_FILES_DIR_KEY, "compactionFileDir"); configuration.setStrings( CommonConfigurationKeys.IO_SERIALIZATIONS_KEY, JavaSerialization.class.getName()); configuration.set(SweepJob.WORKING_VISITED_DIR_KEY, "compactionVisitedDir"); configuration.setLong( MobConstants.MOB_SWEEP_TOOL_COMPACTION_START_DATE, System.currentTimeMillis() + 24 * 3600 * 1000); ZooKeeperWatcher zkw = new ZooKeeperWatcher(configuration, "1", new DummyMobAbortable()); TableName lockName = MobUtils.getTableLockName(tn); String znode = ZKUtil.joinZNode(zkw.tableLockZNode, lockName.getNameAsString()); configuration.set(SweepJob.SWEEP_JOB_ID, "1"); configuration.set(SweepJob.SWEEP_JOB_TABLE_NODE, znode); ServerName serverName = SweepJob.getCurrentServerName(configuration); configuration.set(SweepJob.SWEEP_JOB_SERVERNAME, serverName.toString()); TableLockManager tableLockManager = TableLockManager.createTableLockManager(configuration, zkw, serverName); TableLock lock = tableLockManager.writeLock(lockName, "Run sweep tool"); lock.acquire(); try { // use the same counter when mocking Counter counter = new GenericCounter(); Reducer<Text, KeyValue, Writable, Writable>.Context ctx = mock(Reducer.Context.class); when(ctx.getConfiguration()).thenReturn(configuration); when(ctx.getCounter(Matchers.any(SweepCounter.class))).thenReturn(counter); when(ctx.nextKey()).thenReturn(true).thenReturn(false); when(ctx.getCurrentKey()).thenReturn(new Text(mobFile1)); byte[] refBytes = Bytes.toBytes(mobFile1); long valueLength = refBytes.length; byte[] newValue = Bytes.add(Bytes.toBytes(valueLength), refBytes); KeyValue kv2 = new KeyValue( Bytes.toBytes(row), Bytes.toBytes(family), Bytes.toBytes(qf), 1, KeyValue.Type.Put, newValue); List<KeyValue> list = new ArrayList<KeyValue>(); list.add(kv2); when(ctx.getValues()).thenReturn(list); SweepReducer reducer = new SweepReducer(); reducer.run(ctx); } finally { lock.release(); } FileStatus[] filsStatuses2 = TEST_UTIL.getTestFileSystem().listStatus(mobFamilyPath); String mobFile2 = filsStatuses2[0].getPath().getName(); // new mob file is generated, old one has been archived assertEquals(1, filsStatuses2.length); assertEquals(false, mobFile2.equalsIgnoreCase(mobFile1)); // test sequence file String workingPath = configuration.get(SweepJob.WORKING_VISITED_DIR_KEY); FileStatus[] statuses = TEST_UTIL.getTestFileSystem().listStatus(new Path(workingPath)); Set<String> files = new TreeSet<String>(); for (FileStatus st : statuses) { files.addAll( getKeyFromSequenceFile(TEST_UTIL.getTestFileSystem(), st.getPath(), configuration)); } assertEquals(1, files.size()); assertEquals(true, files.contains(mobFile1)); }
public static void main(String[] args) throws Exception { int result = ToolRunner.run(new Configuration(), new move(), args); System.exit(result); }
@Override public QueryResult<Variant> getAllVariantsByRegionAndStudy( Region region, String sourceId, QueryOptions options) { Long start, end, dbstart, dbend; start = System.currentTimeMillis(); QueryResult<Variant> queryResult = new QueryResult<>( String.format("%s:%d-%d", region.getChromosome(), region.getStart(), region.getEnd())); List<Variant> results = new LinkedList<>(); boolean includeSamples; boolean includeStats; boolean includeEffects; if (!options.containsKey("samples") && !options.containsKey("stats") && !options.containsKey("effects")) { includeSamples = true; includeStats = true; includeEffects = true; } else { includeSamples = options.containsKey("samples") && options.getBoolean("samples"); includeStats = options.containsKey("stats") && options.getBoolean("stats"); includeEffects = options.containsKey("effects") && options.getBoolean("effects"); } try { String startRow = buildRowkey(region.getChromosome(), Long.toString(region.getStart())); String stopRow = buildRowkey(region.getChromosome(), Long.toString(region.getEnd())); HTable table = new HTable(admin.getConfiguration(), tableName); dbstart = System.currentTimeMillis(); Scan regionScan = new Scan(startRow.getBytes(), stopRow.getBytes()); ResultScanner scanres = table.getScanner(regionScan); dbend = System.currentTimeMillis(); queryResult.setDbTime(dbend - dbstart); // Iterate over results and, optionally, their samples and statistics for (Result result : scanres) { String[] rowkeyParts = new String(result.getRow(), CHARSET_UTF_8).split("_"); String chromosome = rowkeyParts[0].replaceFirst("^0+(?!$)", ""); int position = Integer.parseInt(rowkeyParts[1]); // Get basic result fields from Protocol Buffers message NavigableMap<byte[], byte[]> infoMap = result.getFamilyMap("i".getBytes()); byte[] byteInfo = infoMap.get((sourceId + "_data").getBytes()); VariantFieldsProtos.VariantInfo protoInfo = VariantFieldsProtos.VariantInfo.parseFrom(byteInfo); String reference = protoInfo.getReference(); String alternate = StringUtils.join(protoInfo.getAlternateList(), ","); String format = StringUtils.join(protoInfo.getFormatList(), ":"); Variant variant = new Variant(chromosome, position, position, reference, alternate); // Set samples if requested if (includeSamples) { NavigableMap<byte[], byte[]> sampleMap = result.getFamilyMap("d".getBytes()); Map<String, Map<String, String>> resultSampleMap = new HashMap<>(); // Set samples for (byte[] s : sampleMap.keySet()) { String sampleName = (new String(s, CHARSET_UTF_8)).replaceAll(sourceId + "_", ""); VariantFieldsProtos.VariantSample sample = VariantFieldsProtos.VariantSample.parseFrom(sampleMap.get(s)); String sample1 = sample.getSample(); String[] values = sample1.split(":"); String[] fields = format.split(":"); Map<String, String> singleSampleMap = new HashMap<>(); for (int i = 0; i < fields.length; i++) { singleSampleMap.put(fields[i], values[i]); } // TODO // variant.addSampleData(sampleName, singleSampleMap); } } // Set stats if requested if (includeStats) { byte[] byteStats = infoMap.get((sourceId + "_stats").getBytes()); VariantFieldsProtos.VariantStats protoStats = VariantFieldsProtos.VariantStats.parseFrom(byteStats); VariantStats variantStats = new VariantStats( chromosome, position, reference, alternate, protoStats.getMaf(), protoStats.getMgf(), protoStats.getMafAllele(), protoStats.getMgfGenotype(), protoStats.getMissingAlleles(), protoStats.getMissingGenotypes(), protoStats.getMendelianErrors(), protoStats.getIsIndel(), protoStats.getCasesPercentDominant(), protoStats.getControlsPercentDominant(), protoStats.getCasesPercentRecessive(), protoStats.getControlsPercentRecessive()); variant.setStats(variantStats); } if (includeEffects) { QueryResult<VariantEffect> queryEffects = getEffectsByVariant(variant, options); variant.setEffect(queryEffects.getResult()); } results.add(variant); } } catch (IOException e) { System.err.println(e.getClass().getName() + ": " + e.getMessage()); } queryResult.setResult(results); queryResult.setNumResults(results.size()); end = System.currentTimeMillis(); queryResult.setTime(end - start); return queryResult; }
public int run(String[] args) throws Exception { // printUsage(); /* * SETUP */ Configuration argConf = getConf(); Hashtable<String, String> confArg = new Hashtable<String, String>(); setup(confArg, argConf); Date currentTime = new Date(); Date endDate = new Date(new Long(confArg.get("timestamp_stop"))); Boolean full_run = confArg.get("intermediate").matches("(?i).*true.*"); Boolean quick_add = confArg.get("quick_add").matches("(?i).*true.*"); logger.info("Running GeStore"); // ZooKeeper setup Configuration config = HBaseConfiguration.create(); zkWatcher = new ZooKeeperWatcher(config, "Testing", new HBaseAdmin(config)); zkInstance = new ZooKeeper( ZKConfig.getZKQuorumServersString(config), config.getInt("zookeeper.session.timeout", -1), zkWatcher); if (!confArg.get("task_id").isEmpty()) { confArg.put("temp_path", confArg.get("temp_path") + confArg.get("task_id")); } String lockRequest = confArg.get("file_id"); if (!confArg.get("run_id").isEmpty()) lockRequest = lockRequest + "_" + confArg.get("run_id") + "_"; if (!confArg.get("task_id").isEmpty()) lockRequest = lockRequest + "_" + confArg.get("task_id") + "_"; // Get type of movement toFrom type_move = checkArgs(confArg); if (type_move == toFrom.LOCAL2REMOTE && !confArg.get("format").equals("unknown")) { List<String> arguments = new ArrayList<String>(); arguments.add("-Dinput=" + confArg.get("local_path")); arguments.add("-Dtable=" + confArg.get("file_id")); arguments.add("-Dtimestamp=" + confArg.get("timestamp_stop")); arguments.add("-Dtype=" + confArg.get("format")); arguments.add("-Dtarget_dir=" + confArg.get("base_path") + "_" + confArg.get("file_id")); arguments.add("-Dtemp_hdfs_path=" + confArg.get("temp_path")); arguments.add("-Drun_id=" + confArg.get("run_id")); if (!confArg.get("run_id").isEmpty()) arguments.add("-Drun_id=" + confArg.get("run_id")); if (!confArg.get("task_id").isEmpty()) arguments.add("-Dtask_id=" + confArg.get("task_id")); if (quick_add) arguments.add("-Dquick_add=" + confArg.get("quick_add")); String lockName = lock(lockRequest); String[] argumentString = arguments.toArray(new String[arguments.size()]); adddb.main(argumentString); unlock(lockName); System.exit(0); } // Database registration dbutil db_util = new dbutil(config); db_util.register_database(confArg.get("db_name_files"), true); db_util.register_database(confArg.get("db_name_runs"), true); db_util.register_database(confArg.get("db_name_updates"), true); FileSystem hdfs = FileSystem.get(config); FileSystem localFS = FileSystem.getLocal(config); // Get source type confArg.put("source", getSource(db_util, confArg.get("db_name_files"), confArg.get("file_id"))); confArg.put( "database", isDatabase(db_util, confArg.get("db_name_files"), confArg.get("file_id"))); if (!confArg.get("source").equals("local") && type_move == toFrom.REMOTE2LOCAL && !confArg.get("timestamp_stop").equals(Integer.toString(Integer.MAX_VALUE))) { confArg.put("timestamp_stop", Long.toString(latestVersion(confArg, db_util))); } /* * Get previous timestamp */ Get run_id_get = new Get(confArg.get("run_id").getBytes()); Result run_get = db_util.doGet(confArg.get("db_name_runs"), run_id_get); KeyValue run_file_prev = run_get.getColumnLatest( "d".getBytes(), (confArg.get("file_id") + "_db_timestamp").getBytes()); String last_timestamp = new String("0"); if (null != run_file_prev && !confArg.get("source").equals("local")) { long last_timestamp_real = run_file_prev.getTimestamp(); Long current_timestamp = new Long(confArg.get("timestamp_real")); if ((current_timestamp - last_timestamp_real) > 36000) { last_timestamp = new String(run_file_prev.getValue()); Integer lastTimestamp = new Integer(last_timestamp); lastTimestamp += 1; last_timestamp = lastTimestamp.toString(); logger.info("Last timestamp: " + last_timestamp + " End data: " + endDate); Date last_run = new Date(run_file_prev.getTimestamp()); if (last_run.before(endDate) && !full_run) { confArg.put("timestamp_start", last_timestamp); } } } Integer tse = new Integer(confArg.get("timestamp_stop")); Integer tss = new Integer(confArg.get("timestamp_start")); if (tss > tse) { logger.info("No new version of requested file."); return 0; } /* * Generate file */ String lockName = lock(lockRequest); Get file_id_get = new Get(confArg.get("file_id").getBytes()); Result file_get = db_util.doGet(confArg.get("db_name_files"), file_id_get); if (!file_get.isEmpty()) { boolean found = hasFile( db_util, hdfs, confArg.get("db_name_files"), confArg.get("file_id"), getFullPath(confArg)); if (confArg.get("source").equals("fullfile")) { found = false; } String filenames_put = getFileNames( db_util, confArg.get("db_name_files"), confArg.get("file_id"), getFullPath(confArg)); // Filename not found in file database if (!found && type_move == toFrom.REMOTE2LOCAL) { if (!confArg.get("source").equals("local")) { // Generate intermediate file if (getFile(hdfs, confArg, db_util) == null) { unlock(lockName); return 1; } // Put generated file into file database if (!confArg.get("format").equals("fullfile")) { putFileEntry( db_util, hdfs, confArg.get("db_name_files"), confArg.get("file_id"), confArg.get("full_file_name"), confArg.get("source")); } } else { logger.warn("Remote file not found, and cannot be generated! File: " + confArg); unlock(lockName); return 1; } } } else { if (type_move == toFrom.REMOTE2LOCAL) { logger.warn("Remote file not found, and cannot be generated."); unlock(lockName); return 1; } } /* * Copy file * Update tables */ if (type_move == toFrom.LOCAL2REMOTE) { if (!confArg.get("format").equals("fullfile")) { putFileEntry( db_util, hdfs, confArg.get("db_name_files"), confArg.get("file_id"), getFullPath(confArg), confArg.get("source")); } putRunEntry( db_util, confArg.get("db_name_runs"), confArg.get("run_id"), confArg.get("file_id"), confArg.get("type"), confArg.get("timestamp_real"), confArg.get("timestamp_stop"), getFullPath(confArg), confArg.get("delimiter")); hdfs.copyFromLocalFile(new Path(confArg.get("local_path")), new Path(getFullPath(confArg))); } else if (type_move == toFrom.REMOTE2LOCAL) { FileStatus[] files = hdfs.globStatus(new Path(getFullPath(confArg) + "*")); putRunEntry( db_util, confArg.get("db_name_runs"), confArg.get("run_id"), confArg.get("file_id"), confArg.get("type"), confArg.get("timestamp_real"), confArg.get("timestamp_stop"), getFullPath(confArg), confArg.get("delimiter")); unlock(lockName); for (FileStatus file : files) { Path cur_file = file.getPath(); Path cur_local_path = new Path(new String(confArg.get("local_path") + confArg.get("file_id"))); String suffix = getSuffix(getFileName(confArg), cur_file.getName()); if (suffix.length() > 0) { cur_local_path = cur_local_path.suffix(new String("." + suffix)); } if (confArg.get("copy").equals("true")) { String crc = hdfs.getFileChecksum(cur_file).toString(); if (checksumLocalTest(cur_local_path, crc)) { continue; } else { hdfs.copyToLocalFile(cur_file, cur_local_path); writeChecksum(cur_local_path, crc); } } else { System.out.println(cur_local_path + "\t" + cur_file); } } } unlock(lockName); return 0; }
private byte[] generateTweetId(User user) { return Bytes.toBytes(user.getUserId() + "-" + (Long.MAX_VALUE - System.currentTimeMillis())); }
@Test public void testHostRank() throws Exception { if (System.getProperty("prop.mapred.job.tracker") != null) { if (LOG.isInfoEnabled()) LOG.info("testHBaseInputOutput: Ignore this test if not local mode."); return; } File jarTest = new File(System.getProperty("prop.jarLocation")); if (!jarTest.exists()) { fail( "Could not find Giraph jar at " + "location specified by 'prop.jarLocation'. " + "Make sure you built the main Giraph artifact?."); } MiniHBaseCluster cluster = null; MiniZooKeeperCluster zkCluster = null; FileSystem fs = null; try { // using the restart method allows us to avoid having the hbase // root directory overwritten by /home/$username zkCluster = testUtil.startMiniZKCluster(); testUtil.restartHBaseCluster(2); cluster = testUtil.getMiniHBaseCluster(); final byte[] OL_BYTES = Bytes.toBytes("ol"); final byte[] S_BYTES = Bytes.toBytes("s"); final byte[] METADATA_BYTES = Bytes.toBytes("mtdt"); final byte[] HR_BYTES = Bytes.toBytes("_hr_"); final byte[] TAB = Bytes.toBytes(TABLE_NAME); Configuration conf = cluster.getConfiguration(); HTableDescriptor desc = new HTableDescriptor(TAB); desc.addFamily(new HColumnDescriptor(OL_BYTES)); desc.addFamily(new HColumnDescriptor(S_BYTES)); desc.addFamily(new HColumnDescriptor(METADATA_BYTES)); HBaseAdmin hbaseAdmin = new HBaseAdmin(conf); if (hbaseAdmin.isTableAvailable(TABLE_NAME)) { hbaseAdmin.disableTable(TABLE_NAME); hbaseAdmin.deleteTable(TABLE_NAME); } hbaseAdmin.createTable(desc); /** * Enter the initial data (a,b), (b,c), (a,c) a = 1.0 - google b = 1.0 - yahoo c = 1.0 - bing */ HTable table = new HTable(conf, TABLE_NAME); Put p1 = new Put(Bytes.toBytes("com.google.www")); p1.add(OL_BYTES, Bytes.toBytes("www.yahoo.com"), Bytes.toBytes("ab")); Put p2 = new Put(Bytes.toBytes("com.google.www")); p2.add(OL_BYTES, Bytes.toBytes("www.bing.com"), Bytes.toBytes("ac")); p2.add(OL_BYTES, Bytes.toBytes("www.bing.com"), Bytes.toBytes("invalid1")); p2.add(OL_BYTES, Bytes.toBytes("www.google.com"), Bytes.toBytes("invalid2")); Put p3 = new Put(Bytes.toBytes("com.yahoo.www")); p3.add(OL_BYTES, Bytes.toBytes("www.bing.com"), Bytes.toBytes("bc")); // p3.add(OL_BYTES, Bytes.toBytes(""), Bytes.toBytes("invalid4")); Put p4 = new Put(Bytes.toBytes("com.bing.www")); // TODO: Handle below case. use apache isValid method. p4.add(OL_BYTES, Bytes.toBytes("http://invalidurl"), Bytes.toBytes("invalid5")); p4.add(S_BYTES, S_BYTES, Bytes.toBytes(10.0d)); Put p5 = new Put(Bytes.toBytes("dummy")); p5.add(S_BYTES, S_BYTES, Bytes.toBytes(10.0d)); table.put(p1); table.put(p2); table.put(p3); table.put(p4); table.put(p5); // Set Giraph configuration // now operate over HBase using Vertex I/O formats conf.set(TableInputFormat.INPUT_TABLE, TABLE_NAME); conf.set(TableOutputFormat.OUTPUT_TABLE, TABLE_NAME); // Start the giraph job GiraphJob giraphJob = new GiraphJob(conf, BspCase.getCallingMethodName()); GiraphConfiguration giraphConf = giraphJob.getConfiguration(); giraphConf.setZooKeeperConfiguration(cluster.getMaster().getZooKeeper().getQuorum()); setupConfiguration(giraphJob); giraphConf.setComputationClass(LinkRankComputation.class); giraphConf.setMasterComputeClass(LinkRankVertexMasterCompute.class); giraphConf.setOutEdgesClass(ByteArrayEdges.class); giraphConf.setVertexInputFormatClass(Nutch2HostInputFormat.class); giraphConf.setVertexOutputFormatClass(Nutch2HostOutputFormat.class); giraphConf.setInt("giraph.linkRank.superstepCount", 10); giraphConf.setInt("giraph.linkRank.scale", 10); giraphConf.set("giraph.linkRank.family", "mtdt"); giraphConf.set("giraph.linkRank.qualifier", "_hr_"); giraphConf.setVertexInputFilterClass(HostRankVertexFilter.class); assertTrue(giraphJob.run(true)); if (LOG.isInfoEnabled()) LOG.info("Giraph job successful. Checking output qualifier."); /** Check the results * */ Result result; String key; byte[] calculatedScoreByte; HashMap expectedValues = new HashMap<String, Double>(); expectedValues.put("com.google.www", 1.3515060339386287d); expectedValues.put("com.yahoo.www", 4.144902009567587d); expectedValues.put("com.bing.www", 9.063893290511482d); for (Object keyObject : expectedValues.keySet()) { key = keyObject.toString(); result = table.get(new Get(key.getBytes())); calculatedScoreByte = result.getValue(METADATA_BYTES, HR_BYTES); assertNotNull(calculatedScoreByte); assertTrue(calculatedScoreByte.length > 0); Assert.assertEquals( "Scores are not the same", (Double) expectedValues.get(key), Bytes.toDouble(calculatedScoreByte), DELTA); } } finally { if (cluster != null) { cluster.shutdown(); } if (zkCluster != null) { zkCluster.shutdown(); } // clean test files if (fs != null) { fs.delete(hbaseRootdir); } } }
public QueryResult getSimpleVariantsByRegion( Region region, String sourceId, QueryOptions options) { Long start, end, dbstart, dbend; start = System.currentTimeMillis(); boolean includeStats; boolean includeEffects; if (!options.containsKey("stats") && !options.containsKey("effects")) { includeStats = true; includeEffects = true; } else { includeStats = options.containsKey("stats") && options.getBoolean("stats"); includeEffects = options.containsKey("effects") && options.getBoolean("effects"); } QueryResult<Variant> queryResult = new QueryResult<>( String.format("%s:%d-%d", region.getChromosome(), region.getStart(), region.getEnd())); List<Variant> results = new ArrayList<>(); String startRow = buildRowkey(region.getChromosome(), Long.toString(region.getStart())); String stopRow = buildRowkey(region.getChromosome(), Long.toString(region.getEnd())); BasicDBObject query = new BasicDBObject("position", new BasicDBObject("$gte", startRow).append("$lte", stopRow)) .append("sources.sourceId", sourceId); DBCollection collection = db.getCollection("variants"); dbstart = System.currentTimeMillis(); DBCursor variantInStudies = collection.find(query); dbend = System.currentTimeMillis(); queryResult.setDbTime(dbend - dbstart); for (DBObject result : variantInStudies) { String[] rowkeyParts = result.get("position").toString().split("_"); String chromosome = rowkeyParts[0].replaceFirst("^0+(?!$)", ""); int position = Integer.parseInt(rowkeyParts[1]); BasicDBList studies = (BasicDBList) result.get("sources"); BasicDBObject st = (BasicDBObject) studies.get(0); String ref = (String) st.get("ref"); String alt = StringUtils.join((ArrayList<String>) st.get("alt"), ","); // TODO Needs rework Variant variant = new Variant(chromosome, position, position, ref, alt); // Set stats informations if (includeStats) { VariantStats stats = new VariantStats(); BasicDBObject mongoStats = (BasicDBObject) st.get("stats"); stats.setMaf((float) (double) mongoStats.get("maf")); stats.setMafAllele((String) mongoStats.get("alleleMaf")); stats.setMissingGenotypes((int) mongoStats.get("missing")); List<Genotype> genotypeCount = new ArrayList<>(); for (BasicDBObject s : (List<BasicDBObject>) mongoStats.get("genotypeCount")) { for (Map.Entry<String, Object> entry : s.entrySet()) { Genotype genotype = new Genotype(entry.getKey()); genotype.setCount((Integer) entry.getValue()); genotypeCount.add(genotype); } } stats.setGenotypes(genotypeCount); variant.setStats(stats); } // TODO Set consequence type names if (includeEffects) { BasicDBList mongoEffects = (BasicDBList) st.get("effects"); if (mongoEffects != null) { for (Object e : mongoEffects) { String effectObo = e.toString(); VariantEffect effect = new VariantEffect(); effect.setConsequenceTypeObo(effectObo); variant.addEffect(effect); } } } results.add(variant); } queryResult.setResult(results); queryResult.setNumResults(results.size()); end = System.currentTimeMillis(); queryResult.setTime(end - start); return queryResult; }
public static void main(final String[] args) throws Exception { final int res = ToolRunner.run(new Configuration(), new CrunchStockDateInserter(), args); System.exit(res); }