コード例 #1
0
ファイル: HbaseTest.java プロジェクト: calm4wei/bitmap4hadoop
  @Test
  public void insert_rowkey_prefix_date() throws IOException {

    System.out.println(errorTable);
    errorTable.setAutoFlushTo(false);
    List<Put> puts = new ArrayList<Put>();
    long t1 = System.currentTimeMillis();
    for (int i = 0; i < 10000000; i++) {
      String uuid = UUID.randomUUID().toString().replaceAll("-", "").substring(0, 8);
      Put put = new Put(Bytes.toBytes("20150705" + "_" + uuid));
      put.add(
          fBytes,
          Bytes.toBytes("stacktrace"),
          Bytes.toBytes("java.io.IOException:file not found" + UUID.randomUUID().toString()));
      //            puts.add(put);
      errorTable.put(put);
      if (i % 10000 == 0) {
        errorTable.flushCommits();
      }
    }
    errorTable.flushCommits();
    long t2 = System.currentTimeMillis();
    System.out.println("count=" + puts.size() + ",t2-t1=" + (t2 - t1));
    //        errorTable.close();
  }
コード例 #2
0
ファイル: move.java プロジェクト: EdvardPedersen/GeStore
  /** Sets up configuration based on params */
  private static boolean setup(Hashtable<String, String> curConf, Configuration argConf) {

    if (argConf.get("file") == null) {
      logger.fatal("Missing file parameter");
      System.exit(1);
    }

    if (argConf.get("hdfs_base_path") == null) {
      logger.fatal("Missing HDFS base path, check gestore-conf.xml");
      System.exit(1);
    }

    if (argConf.get("hdfs_temp_path") == null) {
      logger.fatal("Missing HDFS temp path, check gestore-conf.xml");
      System.exit(1);
    }

    if (argConf.get("local_temp_path") == null) {
      logger.fatal("Missing local temp path, check gestore-conf.xml");
      System.exit(1);
    }

    // Input paramaters
    curConf.put("run_id", argConf.get("run", ""));
    curConf.put("task_id", argConf.get("task", ""));
    curConf.put("file_id", argConf.get("file"));
    curConf.put("local_path", argConf.get("path", ""));
    curConf.put("type", argConf.get("type", "l2r"));
    curConf.put("timestamp_start", argConf.get("timestamp_start", "1"));
    curConf.put(
        "timestamp_stop", argConf.get("timestamp_stop", Integer.toString(Integer.MAX_VALUE)));
    curConf.put("delimiter", argConf.get("regex", "ID=.*"));
    curConf.put("taxon", argConf.get("taxon", "all"));
    curConf.put("intermediate", argConf.get("full_run", "false"));
    curConf.put("quick_add", argConf.get("quick_add", "false"));
    Boolean full_run = curConf.get("intermediate").matches("(?i).*true.*");
    curConf.put("format", argConf.get("format", "unknown"));
    curConf.put("split", argConf.get("split", "1"));
    curConf.put("copy", argConf.get("copy", "true"));

    // Constants
    curConf.put("base_path", argConf.get("hdfs_base_path"));
    curConf.put("temp_path", argConf.get("hdfs_temp_path"));
    curConf.put("local_temp_path", argConf.get("local_temp_path"));
    curConf.put("db_name_files", argConf.get("hbase_file_table"));
    curConf.put("db_name_runs", argConf.get("hbase_run_table"));
    curConf.put("db_name_updates", argConf.get("hbase_db_update_table"));

    // Timestamps
    Date currentTime = new Date();
    Date endDate = new Date(new Long(curConf.get("timestamp_stop")));
    curConf.put("timestamp_real", Long.toString(currentTime.getTime()));

    return true;
  }
コード例 #3
0
 public void doScan() throws IOException {
   Table tableRef = connection.getTable(TableName.valueOf(table));
   Scan scan = new Scan();
   ResultScanner scanner = tableRef.getScanner(scan);
   long now = System.currentTimeMillis();
   if (verbose) System.out.println("Starting scan");
   for (Result res : scanner) {
     if (verbose) System.out.println(res);
   }
   if (verbose) System.out.printf("Scan finished: %d ms\n\n", System.currentTimeMillis() - now);
   tableRef.close();
 }
コード例 #4
0
ファイル: HBaseStoreManager.java プロジェクト: Hyacinth/titan
  private static void waitUntil(long until) {
    long now = System.currentTimeMillis();

    while (now <= until) {
      try {
        Thread.sleep(1L);
        now = System.currentTimeMillis();
      } catch (InterruptedException e) {
        throw new RuntimeException(e);
      }
    }
  }
コード例 #5
0
ファイル: HBaseStoreManager.java プロジェクト: Hyacinth/titan
  @Override
  public void mutateMany(Map<String, Map<ByteBuffer, KCVMutation>> mutations, StoreTransaction txh)
      throws StorageException {
    final long delTS = System.currentTimeMillis();
    final long putTS = delTS + 1;

    Map<ByteBuffer, Pair<Put, Delete>> commandsPerKey = convertToCommands(mutations, putTS, delTS);
    List<Row> batch = new ArrayList<Row>(commandsPerKey.size()); // actual batch operation

    // convert sorted commands into representation required for 'batch' operation
    for (Pair<Put, Delete> commands : commandsPerKey.values()) {
      if (commands.getFirst() != null) batch.add(commands.getFirst());

      if (commands.getSecond() != null) batch.add(commands.getSecond());
    }

    try {
      HTableInterface table = null;

      try {
        table = connectionPool.getTable(tableName);
        table.batch(batch);
        table.flushCommits();
      } finally {
        IOUtils.closeQuietly(table);
      }
    } catch (IOException e) {
      throw new TemporaryStorageException(e);
    } catch (InterruptedException e) {
      throw new TemporaryStorageException(e);
    }

    waitUntil(putTS);
  }
コード例 #6
0
ファイル: Utils.java プロジェクト: cloudlander/archive
 static {
   try {
     props.load(ClassLoader.getSystemResource(Constants.PROPERTIES).openStream());
   } catch (IOException e) {
     logger.error("Fail to locate properties file.");
     System.exit(1);
   }
 }
コード例 #7
0
  @Override
  public void tweet(User user, String tweetText) throws IOException {
    final long epoch = System.currentTimeMillis();
    final long tranposeEpoch = Long.MAX_VALUE - epoch;
    final byte[] epochBytes = Bytes.toBytes(epoch);

    final byte[] tweetBytes = Bytes.toBytes(tweetText);
    byte[] nameBytes = Bytes.toBytes(user.getName());

    /** put tweet into tweets */
    Put tweetRowPut = new Put(generateTweetId(user));

    tweetRowPut.add(_DEFAULT, _NAME, nameBytes);
    tweetRowPut.add(_DEFAULT, _MAIL, Bytes.toBytes(user.getEmail()));
    tweetRowPut.add(_DEFAULT, _TWEET, tweetBytes);
    tweetRowPut.add(_DEFAULT, _TIME, epochBytes);

    tweetsTable.put(tweetRowPut);

    /** put tweets for followers */
    Scan followerScan = new Scan();
    followerScan.setStartRow(Bytes.toBytes(user.getUserId() + "-"));
    followerScan.setStopRow(Bytes.toBytes((user.getUserId() + 1) + "-"));

    ResultScanner followerRS = followersTable.getScanner(followerScan);

    /** put users on tweet to her own tweetline */
    Put put =
        new Put(Bytes.toBytes(user.getUserId() + "-" + tranposeEpoch + "-" + user.getUserId()));
    put.add(_DEFAULT, _NAME, nameBytes);
    put.add(_DEFAULT, _TWEET, tweetBytes);
    put.add(_DEFAULT, _TIME, epochBytes);

    List<Row> puts = new ArrayList<Row>();
    puts.add(put);
    for (Result result : followerRS) {

      Long followerid = Bytes.toLong(result.getColumnLatest(_DEFAULT, _USERID).getValue());

      put = new Put(Bytes.toBytes(followerid + "-" + tranposeEpoch + "-" + user.getUserId()));
      put.add(_DEFAULT, _NAME, nameBytes);
      put.add(_DEFAULT, _TWEET, tweetBytes);
      put.add(_DEFAULT, _TIME, epochBytes);

      puts.add(put);
    }
    followerRS.close();
    try {
      tweetlineTable.batch(puts);
    } catch (InterruptedException e) {
      e.printStackTrace(); // @TODO log and handle properly.
    }
  }
コード例 #8
0
ファイル: HbaseTest.java プロジェクト: calm4wei/bitmap4hadoop
  @Test
  public void scan_by_prefix_date() throws IOException {
    FilterList fl = new FilterList();
    Filter filter =
        new RowFilter(CompareFilter.CompareOp.EQUAL, new SubstringComparator("20150903"));
    fl.addFilter(filter);
    Scan scan = new Scan();
    scan.setFilter(fl);

    long t1 = System.currentTimeMillis();
    ResultScanner rs = errorTable.getScanner(scan);
    Result result;
    int count = 0;
    while ((result = rs.next()) != null) {
      System.out.println("rowkey=" + new String(result.getRow()));
      System.out.println(
          "value=" + new String(result.getValue(fBytes, Bytes.toBytes("stacktrace"))));
      System.out.println();
      count++;
    }
    long t2 = System.currentTimeMillis();
    System.out.println("count=" + count + ",t2 - t1=" + ((t2 - t1) / 1000));
  }
コード例 #9
0
  public static void main(String[] args) throws Exception {
    if (args.length < 5) {
      System.err.printf(
          "Usage: %s <conf> <keytab> <user> <table> <reps> [-v]\n", ExampleJavaClient.class);
      System.exit(-1);
    }

    ExampleJavaClient exampleJavaClient = new ExampleJavaClient(args[0], args[1], args[2], args[3]);
    exampleJavaClient.initialise();
    if (args.length == 6 && args[5].equals("-v")) {
      exampleJavaClient.verbose(true);
    }

    int reps = Integer.parseInt(args[4]);

    long start = System.currentTimeMillis();
    System.out.println("Beginning " + reps + " scans");
    for (int i = 0; i < reps; ++i) {
      exampleJavaClient.doScan();
    }
    System.out.println("Runtime: " + (System.currentTimeMillis() - start) + "ms");
    System.out.println("Ended scans for " + args[2]);
  }
コード例 #10
0
  public HostRankHBaseTest() {
    super(HostRankHBaseTest.class.getName());

    // Let's set up the hbase root directory.
    Configuration conf = HBaseConfiguration.create();
    try {
      FileSystem fs = FileSystem.get(conf);
      String randomStr = UUID.randomUUID().toString();
      String tmpdir = System.getProperty("java.io.tmpdir") + "/" + randomStr + "/";
      hbaseRootdir = fs.makeQualified(new Path(tmpdir));
      conf.set(HConstants.HBASE_DIR, hbaseRootdir.toString());
      fs.mkdirs(hbaseRootdir);
    } catch (IOException ioe) {
      fail("Could not create hbase root directory.");
    }

    // Start the test utility.
    testUtil = new HBaseTestingUtility(conf);
  }
コード例 #11
0
 public static void main(String[] args) throws Exception {
   Configuration conf = HBaseConfiguration.create();
   IntegrationTestingUtility.setUseDistributedCluster(conf);
   IntegrationTestDDLMasterFailover masterFailover = new IntegrationTestDDLMasterFailover();
   Connection connection = null;
   int ret = 1;
   try {
     // Initialize connection once, then pass to Actions
     LOG.debug("Setting up connection ...");
     connection = ConnectionFactory.createConnection(conf);
     masterFailover.setConnection(connection);
     ret = ToolRunner.run(conf, masterFailover, args);
   } catch (IOException e) {
     LOG.fatal("Failed to establish connection. Aborting test ...", e);
   } finally {
     connection = masterFailover.getConnection();
     if (connection != null) {
       connection.close();
     }
     System.exit(ret);
   }
 }
コード例 #12
0
  @Override
  public QueryResult getVariantsHistogramByRegion(
      Region region, String sourceId, boolean histogramLogarithm, int histogramMax) {
    QueryResult<ObjectMap> queryResult =
        new QueryResult<>(
            String.format("%s:%d-%d", region.getChromosome(), region.getStart(), region.getEnd()));
    List<ObjectMap> data = new ArrayList<>();
    String startRow = buildRowkey(region.getChromosome(), Long.toString(region.getStart()));
    String stopRow = buildRowkey(region.getChromosome(), Long.toString(region.getEnd()));

    long startTime = System.currentTimeMillis();

    long startDbTime = System.currentTimeMillis();

    BasicDBObject query =
        new BasicDBObject("position", new BasicDBObject("$gte", startRow).append("$lte", stopRow))
            .append("studies.studyId", sourceId);
    DBCollection collection = db.getCollection("variants");
    DBCursor queryResults = collection.find(query);
    queryResult.setDbTime(System.currentTimeMillis() - startDbTime);

    int resultSize = queryResults.size();

    if (resultSize > histogramMax) { // Need to group results to fit maximum size of the histogram
      int sumChunkSize = resultSize / histogramMax;
      int i = 0, j = 0;
      int featuresCount = 0;
      ObjectMap item = null;

      for (DBObject result : queryResults) {
        //                featuresCount += result.getInt("features_count");
        //                if (i == 0) {
        //                    item = new ObjectMap("chromosome", result.getString("chromosome"));
        //                    item.put("chunkId", result.getInt("chunk_id"));
        //                    item.put("start", result.getInt("start"));
        //                } else if (i == sumChunkSize - 1 || j == resultSize - 1) {
        //                    if (histogramLogarithm) {
        //                        item.put("featuresCount", (featuresCount > 0) ?
        // Math.log(featuresCount) : 0);
        //                    } else {
        //                        item.put("featuresCount", featuresCount);
        //                    }
        //                    item.put("end", result.getInt("end"));
        //                    data.add(item);
        //                    i = -1;
        //                    featuresCount = 0;
        //                }
        //                j++;
        //                i++;
      }
    } else {
      for (DBObject result : queryResults) {
        //                ObjectMap item = new ObjectMap("chromosome",
        // result.getString("chromosome"));
        //                item.put("chunkId", result.getInt("chunk_id"));
        //                item.put("start", result.getInt("start"));
        //                if (histogramLogarithm) {
        //                    int features_count = result.getInt("features_count");
        //                    result.put("featuresCount", (features_count > 0) ?
        // Math.log(features_count) : 0);
        //                } else {
        //                    item.put("featuresCount", result.getInt("features_count"));
        //                }
        //                item.put("end", result.getInt("end"));
        //                data.add(item);
      }
    }

    queryResult.setResult(data);
    queryResult.setNumResults(data.size());
    queryResult.setTime(System.currentTimeMillis() - startTime);

    return queryResult;
  }
コード例 #13
0
  @Test
  public void testRun() throws Exception {

    TableName tn = TableName.valueOf(tableName);
    byte[] mobValueBytes = new byte[100];

    // get the path where mob files lie in
    Path mobFamilyPath = MobUtils.getMobFamilyPath(TEST_UTIL.getConfiguration(), tn, family);

    Put put = new Put(Bytes.toBytes(row));
    put.addColumn(Bytes.toBytes(family), Bytes.toBytes(qf), 1, mobValueBytes);
    Put put2 = new Put(Bytes.toBytes(row + "ignore"));
    put2.addColumn(Bytes.toBytes(family), Bytes.toBytes(qf), 1, mobValueBytes);
    table.mutate(put);
    table.mutate(put2);
    table.flush();
    admin.flush(tn);

    FileStatus[] fileStatuses = TEST_UTIL.getTestFileSystem().listStatus(mobFamilyPath);
    // check the generation of a mob file
    assertEquals(1, fileStatuses.length);

    String mobFile1 = fileStatuses[0].getPath().getName();

    Configuration configuration = new Configuration(TEST_UTIL.getConfiguration());
    configuration.setFloat(MobConstants.MOB_SWEEP_TOOL_COMPACTION_RATIO, 0.6f);
    configuration.setStrings(TableInputFormat.INPUT_TABLE, tableName);
    configuration.setStrings(TableInputFormat.SCAN_COLUMN_FAMILY, family);
    configuration.setStrings(SweepJob.WORKING_VISITED_DIR_KEY, "jobWorkingNamesDir");
    configuration.setStrings(SweepJob.WORKING_FILES_DIR_KEY, "compactionFileDir");
    configuration.setStrings(
        CommonConfigurationKeys.IO_SERIALIZATIONS_KEY, JavaSerialization.class.getName());
    configuration.set(SweepJob.WORKING_VISITED_DIR_KEY, "compactionVisitedDir");
    configuration.setLong(
        MobConstants.MOB_SWEEP_TOOL_COMPACTION_START_DATE,
        System.currentTimeMillis() + 24 * 3600 * 1000);

    ZooKeeperWatcher zkw = new ZooKeeperWatcher(configuration, "1", new DummyMobAbortable());
    TableName lockName = MobUtils.getTableLockName(tn);
    String znode = ZKUtil.joinZNode(zkw.tableLockZNode, lockName.getNameAsString());
    configuration.set(SweepJob.SWEEP_JOB_ID, "1");
    configuration.set(SweepJob.SWEEP_JOB_TABLE_NODE, znode);
    ServerName serverName = SweepJob.getCurrentServerName(configuration);
    configuration.set(SweepJob.SWEEP_JOB_SERVERNAME, serverName.toString());

    TableLockManager tableLockManager =
        TableLockManager.createTableLockManager(configuration, zkw, serverName);
    TableLock lock = tableLockManager.writeLock(lockName, "Run sweep tool");
    lock.acquire();
    try {
      // use the same counter when mocking
      Counter counter = new GenericCounter();
      Reducer<Text, KeyValue, Writable, Writable>.Context ctx = mock(Reducer.Context.class);
      when(ctx.getConfiguration()).thenReturn(configuration);
      when(ctx.getCounter(Matchers.any(SweepCounter.class))).thenReturn(counter);
      when(ctx.nextKey()).thenReturn(true).thenReturn(false);
      when(ctx.getCurrentKey()).thenReturn(new Text(mobFile1));

      byte[] refBytes = Bytes.toBytes(mobFile1);
      long valueLength = refBytes.length;
      byte[] newValue = Bytes.add(Bytes.toBytes(valueLength), refBytes);
      KeyValue kv2 =
          new KeyValue(
              Bytes.toBytes(row),
              Bytes.toBytes(family),
              Bytes.toBytes(qf),
              1,
              KeyValue.Type.Put,
              newValue);
      List<KeyValue> list = new ArrayList<KeyValue>();
      list.add(kv2);

      when(ctx.getValues()).thenReturn(list);

      SweepReducer reducer = new SweepReducer();
      reducer.run(ctx);
    } finally {
      lock.release();
    }
    FileStatus[] filsStatuses2 = TEST_UTIL.getTestFileSystem().listStatus(mobFamilyPath);
    String mobFile2 = filsStatuses2[0].getPath().getName();
    // new mob file is generated, old one has been archived
    assertEquals(1, filsStatuses2.length);
    assertEquals(false, mobFile2.equalsIgnoreCase(mobFile1));

    // test sequence file
    String workingPath = configuration.get(SweepJob.WORKING_VISITED_DIR_KEY);
    FileStatus[] statuses = TEST_UTIL.getTestFileSystem().listStatus(new Path(workingPath));
    Set<String> files = new TreeSet<String>();
    for (FileStatus st : statuses) {
      files.addAll(
          getKeyFromSequenceFile(TEST_UTIL.getTestFileSystem(), st.getPath(), configuration));
    }
    assertEquals(1, files.size());
    assertEquals(true, files.contains(mobFile1));
  }
コード例 #14
0
ファイル: move.java プロジェクト: EdvardPedersen/GeStore
 public static void main(String[] args) throws Exception {
   int result = ToolRunner.run(new Configuration(), new move(), args);
   System.exit(result);
 }
コード例 #15
0
  @Override
  public QueryResult<Variant> getAllVariantsByRegionAndStudy(
      Region region, String sourceId, QueryOptions options) {
    Long start, end, dbstart, dbend;
    start = System.currentTimeMillis();
    QueryResult<Variant> queryResult =
        new QueryResult<>(
            String.format("%s:%d-%d", region.getChromosome(), region.getStart(), region.getEnd()));
    List<Variant> results = new LinkedList<>();

    boolean includeSamples;
    boolean includeStats;
    boolean includeEffects;
    if (!options.containsKey("samples")
        && !options.containsKey("stats")
        && !options.containsKey("effects")) {
      includeSamples = true;
      includeStats = true;
      includeEffects = true;
    } else {
      includeSamples = options.containsKey("samples") && options.getBoolean("samples");
      includeStats = options.containsKey("stats") && options.getBoolean("stats");
      includeEffects = options.containsKey("effects") && options.getBoolean("effects");
    }

    try {
      String startRow = buildRowkey(region.getChromosome(), Long.toString(region.getStart()));
      String stopRow = buildRowkey(region.getChromosome(), Long.toString(region.getEnd()));
      HTable table = new HTable(admin.getConfiguration(), tableName);
      dbstart = System.currentTimeMillis();
      Scan regionScan = new Scan(startRow.getBytes(), stopRow.getBytes());
      ResultScanner scanres = table.getScanner(regionScan);
      dbend = System.currentTimeMillis();
      queryResult.setDbTime(dbend - dbstart);

      // Iterate over results and, optionally, their samples and statistics
      for (Result result : scanres) {
        String[] rowkeyParts = new String(result.getRow(), CHARSET_UTF_8).split("_");
        String chromosome = rowkeyParts[0].replaceFirst("^0+(?!$)", "");
        int position = Integer.parseInt(rowkeyParts[1]);

        // Get basic result fields from Protocol Buffers message
        NavigableMap<byte[], byte[]> infoMap = result.getFamilyMap("i".getBytes());
        byte[] byteInfo = infoMap.get((sourceId + "_data").getBytes());
        VariantFieldsProtos.VariantInfo protoInfo =
            VariantFieldsProtos.VariantInfo.parseFrom(byteInfo);
        String reference = protoInfo.getReference();
        String alternate = StringUtils.join(protoInfo.getAlternateList(), ",");
        String format = StringUtils.join(protoInfo.getFormatList(), ":");
        Variant variant = new Variant(chromosome, position, position, reference, alternate);

        // Set samples if requested
        if (includeSamples) {
          NavigableMap<byte[], byte[]> sampleMap = result.getFamilyMap("d".getBytes());
          Map<String, Map<String, String>> resultSampleMap = new HashMap<>();

          // Set samples
          for (byte[] s : sampleMap.keySet()) {
            String sampleName = (new String(s, CHARSET_UTF_8)).replaceAll(sourceId + "_", "");
            VariantFieldsProtos.VariantSample sample =
                VariantFieldsProtos.VariantSample.parseFrom(sampleMap.get(s));
            String sample1 = sample.getSample();
            String[] values = sample1.split(":");
            String[] fields = format.split(":");
            Map<String, String> singleSampleMap = new HashMap<>();
            for (int i = 0; i < fields.length; i++) {
              singleSampleMap.put(fields[i], values[i]);
            }
            // TODO
            //                        variant.addSampleData(sampleName, singleSampleMap);
          }
        }

        // Set stats if requested
        if (includeStats) {
          byte[] byteStats = infoMap.get((sourceId + "_stats").getBytes());
          VariantFieldsProtos.VariantStats protoStats =
              VariantFieldsProtos.VariantStats.parseFrom(byteStats);
          VariantStats variantStats =
              new VariantStats(
                  chromosome,
                  position,
                  reference,
                  alternate,
                  protoStats.getMaf(),
                  protoStats.getMgf(),
                  protoStats.getMafAllele(),
                  protoStats.getMgfGenotype(),
                  protoStats.getMissingAlleles(),
                  protoStats.getMissingGenotypes(),
                  protoStats.getMendelianErrors(),
                  protoStats.getIsIndel(),
                  protoStats.getCasesPercentDominant(),
                  protoStats.getControlsPercentDominant(),
                  protoStats.getCasesPercentRecessive(),
                  protoStats.getControlsPercentRecessive());
          variant.setStats(variantStats);
        }

        if (includeEffects) {
          QueryResult<VariantEffect> queryEffects = getEffectsByVariant(variant, options);
          variant.setEffect(queryEffects.getResult());
        }

        results.add(variant);
      }
    } catch (IOException e) {
      System.err.println(e.getClass().getName() + ": " + e.getMessage());
    }
    queryResult.setResult(results);
    queryResult.setNumResults(results.size());
    end = System.currentTimeMillis();
    queryResult.setTime(end - start);
    return queryResult;
  }
コード例 #16
0
ファイル: move.java プロジェクト: EdvardPedersen/GeStore
  public int run(String[] args) throws Exception {
    // printUsage();
    /*
     * SETUP
     */
    Configuration argConf = getConf();
    Hashtable<String, String> confArg = new Hashtable<String, String>();
    setup(confArg, argConf);
    Date currentTime = new Date();
    Date endDate = new Date(new Long(confArg.get("timestamp_stop")));
    Boolean full_run = confArg.get("intermediate").matches("(?i).*true.*");
    Boolean quick_add = confArg.get("quick_add").matches("(?i).*true.*");
    logger.info("Running GeStore");

    // ZooKeeper setup
    Configuration config = HBaseConfiguration.create();
    zkWatcher = new ZooKeeperWatcher(config, "Testing", new HBaseAdmin(config));
    zkInstance =
        new ZooKeeper(
            ZKConfig.getZKQuorumServersString(config),
            config.getInt("zookeeper.session.timeout", -1),
            zkWatcher);

    if (!confArg.get("task_id").isEmpty()) {
      confArg.put("temp_path", confArg.get("temp_path") + confArg.get("task_id"));
    }

    String lockRequest = confArg.get("file_id");
    if (!confArg.get("run_id").isEmpty())
      lockRequest = lockRequest + "_" + confArg.get("run_id") + "_";
    if (!confArg.get("task_id").isEmpty())
      lockRequest = lockRequest + "_" + confArg.get("task_id") + "_";

    // Get type of movement
    toFrom type_move = checkArgs(confArg);
    if (type_move == toFrom.LOCAL2REMOTE && !confArg.get("format").equals("unknown")) {
      List<String> arguments = new ArrayList<String>();
      arguments.add("-Dinput=" + confArg.get("local_path"));
      arguments.add("-Dtable=" + confArg.get("file_id"));
      arguments.add("-Dtimestamp=" + confArg.get("timestamp_stop"));
      arguments.add("-Dtype=" + confArg.get("format"));
      arguments.add("-Dtarget_dir=" + confArg.get("base_path") + "_" + confArg.get("file_id"));
      arguments.add("-Dtemp_hdfs_path=" + confArg.get("temp_path"));
      arguments.add("-Drun_id=" + confArg.get("run_id"));
      if (!confArg.get("run_id").isEmpty()) arguments.add("-Drun_id=" + confArg.get("run_id"));
      if (!confArg.get("task_id").isEmpty()) arguments.add("-Dtask_id=" + confArg.get("task_id"));
      if (quick_add) arguments.add("-Dquick_add=" + confArg.get("quick_add"));
      String lockName = lock(lockRequest);
      String[] argumentString = arguments.toArray(new String[arguments.size()]);
      adddb.main(argumentString);
      unlock(lockName);
      System.exit(0);
    }

    // Database registration

    dbutil db_util = new dbutil(config);
    db_util.register_database(confArg.get("db_name_files"), true);
    db_util.register_database(confArg.get("db_name_runs"), true);
    db_util.register_database(confArg.get("db_name_updates"), true);
    FileSystem hdfs = FileSystem.get(config);
    FileSystem localFS = FileSystem.getLocal(config);

    // Get source type
    confArg.put("source", getSource(db_util, confArg.get("db_name_files"), confArg.get("file_id")));
    confArg.put(
        "database", isDatabase(db_util, confArg.get("db_name_files"), confArg.get("file_id")));
    if (!confArg.get("source").equals("local")
        && type_move == toFrom.REMOTE2LOCAL
        && !confArg.get("timestamp_stop").equals(Integer.toString(Integer.MAX_VALUE))) {
      confArg.put("timestamp_stop", Long.toString(latestVersion(confArg, db_util)));
    }

    /*
     * Get previous timestamp
     */
    Get run_id_get = new Get(confArg.get("run_id").getBytes());
    Result run_get = db_util.doGet(confArg.get("db_name_runs"), run_id_get);
    KeyValue run_file_prev =
        run_get.getColumnLatest(
            "d".getBytes(), (confArg.get("file_id") + "_db_timestamp").getBytes());
    String last_timestamp = new String("0");
    if (null != run_file_prev && !confArg.get("source").equals("local")) {
      long last_timestamp_real = run_file_prev.getTimestamp();
      Long current_timestamp = new Long(confArg.get("timestamp_real"));
      if ((current_timestamp - last_timestamp_real) > 36000) {
        last_timestamp = new String(run_file_prev.getValue());
        Integer lastTimestamp = new Integer(last_timestamp);
        lastTimestamp += 1;
        last_timestamp = lastTimestamp.toString();
        logger.info("Last timestamp: " + last_timestamp + " End data: " + endDate);
        Date last_run = new Date(run_file_prev.getTimestamp());
        if (last_run.before(endDate) && !full_run) {
          confArg.put("timestamp_start", last_timestamp);
        }
      }
    }

    Integer tse = new Integer(confArg.get("timestamp_stop"));
    Integer tss = new Integer(confArg.get("timestamp_start"));
    if (tss > tse) {
      logger.info("No new version of requested file.");
      return 0;
    }

    /*
     * Generate file
     */

    String lockName = lock(lockRequest);

    Get file_id_get = new Get(confArg.get("file_id").getBytes());
    Result file_get = db_util.doGet(confArg.get("db_name_files"), file_id_get);
    if (!file_get.isEmpty()) {
      boolean found =
          hasFile(
              db_util,
              hdfs,
              confArg.get("db_name_files"),
              confArg.get("file_id"),
              getFullPath(confArg));
      if (confArg.get("source").equals("fullfile")) {
        found = false;
      }
      String filenames_put =
          getFileNames(
              db_util, confArg.get("db_name_files"), confArg.get("file_id"), getFullPath(confArg));
      // Filename not found in file database
      if (!found && type_move == toFrom.REMOTE2LOCAL) {
        if (!confArg.get("source").equals("local")) {
          // Generate intermediate file
          if (getFile(hdfs, confArg, db_util) == null) {
            unlock(lockName);
            return 1;
          }
          // Put generated file into file database
          if (!confArg.get("format").equals("fullfile")) {
            putFileEntry(
                db_util,
                hdfs,
                confArg.get("db_name_files"),
                confArg.get("file_id"),
                confArg.get("full_file_name"),
                confArg.get("source"));
          }
        } else {
          logger.warn("Remote file not found, and cannot be generated! File: " + confArg);
          unlock(lockName);
          return 1;
        }
      }
    } else {
      if (type_move == toFrom.REMOTE2LOCAL) {
        logger.warn("Remote file not found, and cannot be generated.");
        unlock(lockName);
        return 1;
      }
    }

    /*
     * Copy file
     * Update tables
     */

    if (type_move == toFrom.LOCAL2REMOTE) {
      if (!confArg.get("format").equals("fullfile")) {
        putFileEntry(
            db_util,
            hdfs,
            confArg.get("db_name_files"),
            confArg.get("file_id"),
            getFullPath(confArg),
            confArg.get("source"));
      }
      putRunEntry(
          db_util,
          confArg.get("db_name_runs"),
          confArg.get("run_id"),
          confArg.get("file_id"),
          confArg.get("type"),
          confArg.get("timestamp_real"),
          confArg.get("timestamp_stop"),
          getFullPath(confArg),
          confArg.get("delimiter"));
      hdfs.copyFromLocalFile(new Path(confArg.get("local_path")), new Path(getFullPath(confArg)));
    } else if (type_move == toFrom.REMOTE2LOCAL) {
      FileStatus[] files = hdfs.globStatus(new Path(getFullPath(confArg) + "*"));
      putRunEntry(
          db_util,
          confArg.get("db_name_runs"),
          confArg.get("run_id"),
          confArg.get("file_id"),
          confArg.get("type"),
          confArg.get("timestamp_real"),
          confArg.get("timestamp_stop"),
          getFullPath(confArg),
          confArg.get("delimiter"));
      unlock(lockName);
      for (FileStatus file : files) {
        Path cur_file = file.getPath();
        Path cur_local_path =
            new Path(new String(confArg.get("local_path") + confArg.get("file_id")));
        String suffix = getSuffix(getFileName(confArg), cur_file.getName());
        if (suffix.length() > 0) {
          cur_local_path = cur_local_path.suffix(new String("." + suffix));
        }
        if (confArg.get("copy").equals("true")) {
          String crc = hdfs.getFileChecksum(cur_file).toString();
          if (checksumLocalTest(cur_local_path, crc)) {
            continue;
          } else {
            hdfs.copyToLocalFile(cur_file, cur_local_path);
            writeChecksum(cur_local_path, crc);
          }
        } else {
          System.out.println(cur_local_path + "\t" + cur_file);
        }
      }
    }
    unlock(lockName);
    return 0;
  }
コード例 #17
0
 private byte[] generateTweetId(User user) {
   return Bytes.toBytes(user.getUserId() + "-" + (Long.MAX_VALUE - System.currentTimeMillis()));
 }
コード例 #18
0
  @Test
  public void testHostRank() throws Exception {

    if (System.getProperty("prop.mapred.job.tracker") != null) {
      if (LOG.isInfoEnabled())
        LOG.info("testHBaseInputOutput: Ignore this test if not local mode.");
      return;
    }

    File jarTest = new File(System.getProperty("prop.jarLocation"));
    if (!jarTest.exists()) {
      fail(
          "Could not find Giraph jar at "
              + "location specified by 'prop.jarLocation'. "
              + "Make sure you built the main Giraph artifact?.");
    }

    MiniHBaseCluster cluster = null;
    MiniZooKeeperCluster zkCluster = null;
    FileSystem fs = null;

    try {
      // using the restart method allows us to avoid having the hbase
      // root directory overwritten by /home/$username
      zkCluster = testUtil.startMiniZKCluster();
      testUtil.restartHBaseCluster(2);
      cluster = testUtil.getMiniHBaseCluster();

      final byte[] OL_BYTES = Bytes.toBytes("ol");
      final byte[] S_BYTES = Bytes.toBytes("s");
      final byte[] METADATA_BYTES = Bytes.toBytes("mtdt");
      final byte[] HR_BYTES = Bytes.toBytes("_hr_");
      final byte[] TAB = Bytes.toBytes(TABLE_NAME);

      Configuration conf = cluster.getConfiguration();
      HTableDescriptor desc = new HTableDescriptor(TAB);
      desc.addFamily(new HColumnDescriptor(OL_BYTES));
      desc.addFamily(new HColumnDescriptor(S_BYTES));
      desc.addFamily(new HColumnDescriptor(METADATA_BYTES));
      HBaseAdmin hbaseAdmin = new HBaseAdmin(conf);
      if (hbaseAdmin.isTableAvailable(TABLE_NAME)) {
        hbaseAdmin.disableTable(TABLE_NAME);
        hbaseAdmin.deleteTable(TABLE_NAME);
      }
      hbaseAdmin.createTable(desc);

      /**
       * Enter the initial data (a,b), (b,c), (a,c) a = 1.0 - google b = 1.0 - yahoo c = 1.0 - bing
       */
      HTable table = new HTable(conf, TABLE_NAME);

      Put p1 = new Put(Bytes.toBytes("com.google.www"));
      p1.add(OL_BYTES, Bytes.toBytes("www.yahoo.com"), Bytes.toBytes("ab"));

      Put p2 = new Put(Bytes.toBytes("com.google.www"));
      p2.add(OL_BYTES, Bytes.toBytes("www.bing.com"), Bytes.toBytes("ac"));
      p2.add(OL_BYTES, Bytes.toBytes("www.bing.com"), Bytes.toBytes("invalid1"));
      p2.add(OL_BYTES, Bytes.toBytes("www.google.com"), Bytes.toBytes("invalid2"));

      Put p3 = new Put(Bytes.toBytes("com.yahoo.www"));
      p3.add(OL_BYTES, Bytes.toBytes("www.bing.com"), Bytes.toBytes("bc"));
      // p3.add(OL_BYTES, Bytes.toBytes(""), Bytes.toBytes("invalid4"));

      Put p4 = new Put(Bytes.toBytes("com.bing.www"));
      // TODO: Handle below case. use apache isValid method.
      p4.add(OL_BYTES, Bytes.toBytes("http://invalidurl"), Bytes.toBytes("invalid5"));
      p4.add(S_BYTES, S_BYTES, Bytes.toBytes(10.0d));

      Put p5 = new Put(Bytes.toBytes("dummy"));
      p5.add(S_BYTES, S_BYTES, Bytes.toBytes(10.0d));

      table.put(p1);
      table.put(p2);
      table.put(p3);
      table.put(p4);
      table.put(p5);

      // Set Giraph configuration
      // now operate over HBase using Vertex I/O formats
      conf.set(TableInputFormat.INPUT_TABLE, TABLE_NAME);
      conf.set(TableOutputFormat.OUTPUT_TABLE, TABLE_NAME);

      // Start the giraph job
      GiraphJob giraphJob = new GiraphJob(conf, BspCase.getCallingMethodName());
      GiraphConfiguration giraphConf = giraphJob.getConfiguration();
      giraphConf.setZooKeeperConfiguration(cluster.getMaster().getZooKeeper().getQuorum());
      setupConfiguration(giraphJob);
      giraphConf.setComputationClass(LinkRankComputation.class);
      giraphConf.setMasterComputeClass(LinkRankVertexMasterCompute.class);
      giraphConf.setOutEdgesClass(ByteArrayEdges.class);
      giraphConf.setVertexInputFormatClass(Nutch2HostInputFormat.class);
      giraphConf.setVertexOutputFormatClass(Nutch2HostOutputFormat.class);
      giraphConf.setInt("giraph.linkRank.superstepCount", 10);
      giraphConf.setInt("giraph.linkRank.scale", 10);
      giraphConf.set("giraph.linkRank.family", "mtdt");
      giraphConf.set("giraph.linkRank.qualifier", "_hr_");
      giraphConf.setVertexInputFilterClass(HostRankVertexFilter.class);
      assertTrue(giraphJob.run(true));

      if (LOG.isInfoEnabled()) LOG.info("Giraph job successful. Checking output qualifier.");

      /** Check the results * */
      Result result;
      String key;
      byte[] calculatedScoreByte;
      HashMap expectedValues = new HashMap<String, Double>();
      expectedValues.put("com.google.www", 1.3515060339386287d);
      expectedValues.put("com.yahoo.www", 4.144902009567587d);
      expectedValues.put("com.bing.www", 9.063893290511482d);

      for (Object keyObject : expectedValues.keySet()) {
        key = keyObject.toString();
        result = table.get(new Get(key.getBytes()));
        calculatedScoreByte = result.getValue(METADATA_BYTES, HR_BYTES);
        assertNotNull(calculatedScoreByte);
        assertTrue(calculatedScoreByte.length > 0);
        Assert.assertEquals(
            "Scores are not the same",
            (Double) expectedValues.get(key),
            Bytes.toDouble(calculatedScoreByte),
            DELTA);
      }
    } finally {
      if (cluster != null) {
        cluster.shutdown();
      }
      if (zkCluster != null) {
        zkCluster.shutdown();
      }
      // clean test files
      if (fs != null) {
        fs.delete(hbaseRootdir);
      }
    }
  }
コード例 #19
0
  public QueryResult getSimpleVariantsByRegion(
      Region region, String sourceId, QueryOptions options) {
    Long start, end, dbstart, dbend;
    start = System.currentTimeMillis();
    boolean includeStats;
    boolean includeEffects;
    if (!options.containsKey("stats") && !options.containsKey("effects")) {
      includeStats = true;
      includeEffects = true;
    } else {
      includeStats = options.containsKey("stats") && options.getBoolean("stats");
      includeEffects = options.containsKey("effects") && options.getBoolean("effects");
    }

    QueryResult<Variant> queryResult =
        new QueryResult<>(
            String.format("%s:%d-%d", region.getChromosome(), region.getStart(), region.getEnd()));
    List<Variant> results = new ArrayList<>();
    String startRow = buildRowkey(region.getChromosome(), Long.toString(region.getStart()));
    String stopRow = buildRowkey(region.getChromosome(), Long.toString(region.getEnd()));
    BasicDBObject query =
        new BasicDBObject("position", new BasicDBObject("$gte", startRow).append("$lte", stopRow))
            .append("sources.sourceId", sourceId);
    DBCollection collection = db.getCollection("variants");
    dbstart = System.currentTimeMillis();
    DBCursor variantInStudies = collection.find(query);
    dbend = System.currentTimeMillis();
    queryResult.setDbTime(dbend - dbstart);

    for (DBObject result : variantInStudies) {
      String[] rowkeyParts = result.get("position").toString().split("_");
      String chromosome = rowkeyParts[0].replaceFirst("^0+(?!$)", "");
      int position = Integer.parseInt(rowkeyParts[1]);
      BasicDBList studies = (BasicDBList) result.get("sources");
      BasicDBObject st = (BasicDBObject) studies.get(0);
      String ref = (String) st.get("ref");
      String alt = StringUtils.join((ArrayList<String>) st.get("alt"), ",");

      // TODO Needs rework
      Variant variant = new Variant(chromosome, position, position, ref, alt);

      // Set stats informations
      if (includeStats) {
        VariantStats stats = new VariantStats();
        BasicDBObject mongoStats = (BasicDBObject) st.get("stats");
        stats.setMaf((float) (double) mongoStats.get("maf"));
        stats.setMafAllele((String) mongoStats.get("alleleMaf"));
        stats.setMissingGenotypes((int) mongoStats.get("missing"));
        List<Genotype> genotypeCount = new ArrayList<>();
        for (BasicDBObject s : (List<BasicDBObject>) mongoStats.get("genotypeCount")) {
          for (Map.Entry<String, Object> entry : s.entrySet()) {
            Genotype genotype = new Genotype(entry.getKey());
            genotype.setCount((Integer) entry.getValue());
            genotypeCount.add(genotype);
          }
        }
        stats.setGenotypes(genotypeCount);
        variant.setStats(stats);
      }

      // TODO Set consequence type names
      if (includeEffects) {
        BasicDBList mongoEffects = (BasicDBList) st.get("effects");
        if (mongoEffects != null) {
          for (Object e : mongoEffects) {
            String effectObo = e.toString();
            VariantEffect effect = new VariantEffect();
            effect.setConsequenceTypeObo(effectObo);
            variant.addEffect(effect);
          }
        }
      }

      results.add(variant);
    }

    queryResult.setResult(results);
    queryResult.setNumResults(results.size());
    end = System.currentTimeMillis();
    queryResult.setTime(end - start);
    return queryResult;
  }
コード例 #20
0
 public static void main(final String[] args) throws Exception {
   final int res = ToolRunner.run(new Configuration(), new CrunchStockDateInserter(), args);
   System.exit(res);
 }