Esempio n. 1
0
  @Override
  public Set<Element> get(Object key) {
    Get get = new Get(ByteArraySerializer.fromObject(key));
    Result result;
    try {
      result = backingTable.get(get);
    } catch (IOException e) {
      LOG.severe("Cannot get from backing table");
      e.printStackTrace();
      return null;
    }

    NavigableMap<byte[], byte[]> map = result.getFamilyMap(Bytes.toBytes(VALUES));
    if (null == map) return null;

    HashSet<Element> elementHashSet = new HashSet<Element>();

    for (byte[] byteArray : map.keySet()) {

      if (indexClass.equals(HVertex.class) || indexClass.equals(Vertex.class)) {
        HVertex hVertex = new HVertex(hGraph);
        hVertex.setId(byteArray);
        elementHashSet.add(hVertex);
      } else {
        final HEdge hEdge = new HEdge(hGraph);
        hEdge.setId(byteArray);
        elementHashSet.add(hEdge);
      }
    }

    return elementHashSet;
  }
  public int run(String[] args) throws Exception {
    Configuration argConf = getConf();

    // JobConf conf = new JobConf(diffdb.class);
    Configuration config = HBaseConfiguration.create();
    HBaseAdmin hbAdmin = new HBaseAdmin(config);
    dbutil db_util = new dbutil(config);

    HTable runTable = new HTable(config, "gestore_runs");
    Get runGet = new Get(argConf.get("id").getBytes());
    Result pipeline = runTable.get(runGet);

    NavigableMap<byte[], byte[]> pipeMap = pipeline.getFamilyMap("d".getBytes());

    Map.Entry<byte[], byte[]> results = pipeMap.pollFirstEntry();

    HashMap<String, HashMap<String, String>> resultMap =
        new HashMap<String, HashMap<String, String>>();

    while (results != null) {
      String resultKey = new String(results.getKey());
      String resultValue = new String(results.getValue());
      String field = "type";
      HashMap<String, String> tempMap = new HashMap<String, String>();
      String entry = resultKey;

      if (resultKey.endsWith("_db_timestamp")) {
        field = "db_timestamp";
        entry = resultKey.substring(0, resultKey.lastIndexOf("_db_timestamp"));
      } else if (resultKey.endsWith("_filename")) {
        field = "filename";
        entry = resultKey.substring(0, resultKey.lastIndexOf("_filename"));
      } else if (resultKey.endsWith("_regex")) {
        field = "regex";
        entry = resultKey.substring(0, resultKey.lastIndexOf("_regex"));
      }

      if (resultMap.containsKey(entry)) {
        tempMap = resultMap.get(entry);
      }

      tempMap.put(field, resultValue);
      resultMap.put(entry, tempMap);

      // System.out.println("Key: " + resultKey + " Value: " + resultValue);
      results = pipeMap.pollFirstEntry();
    }

    for (String key : resultMap.keySet()) {
      System.out.println("File ID: " + key);
      for (String subKey : resultMap.get(key).keySet()) {
        // System.out.println("\t " + subKey + "\t\t" + resultMap.get(key).get(subKey));
        System.out.format("  %1$-20s  %2$s\n", subKey, resultMap.get(key).get(subKey));
      }
    }

    return 0;
  }
Esempio n. 3
0
  @Test
  public void testDisableAndEnableTable() throws IOException {
    final byte[] row = Bytes.toBytes("row");
    final byte[] qualifier = Bytes.toBytes("qualifier");
    final byte[] value = Bytes.toBytes("value");
    final byte[] table = Bytes.toBytes("testDisableAndEnableTable");
    HTable ht = TEST_UTIL.createTable(table, HConstants.CATALOG_FAMILY);
    Put put = new Put(row);
    put.add(HConstants.CATALOG_FAMILY, qualifier, value);
    ht.put(put);
    Get get = new Get(row);
    get.addColumn(HConstants.CATALOG_FAMILY, qualifier);
    ht.get(get);

    this.admin.disableTable(table);

    // Test that table is disabled
    get = new Get(row);
    get.addColumn(HConstants.CATALOG_FAMILY, qualifier);
    boolean ok = false;
    try {
      ht.get(get);
    } catch (NotServingRegionException e) {
      ok = true;
    } catch (RetriesExhaustedException e) {
      ok = true;
    }
    assertTrue(ok);
    this.admin.enableTable(table);

    // Test that table is enabled
    try {
      ht.get(get);
    } catch (RetriesExhaustedException e) {
      ok = false;
    }
    assertTrue(ok);
  }
  public List<Variant> getRecordSimpleStats(
      String study, int missing_gt, float maf, String maf_allele) {
    BasicDBObject compare =
        new BasicDBObject("studies.stats.allele_maf", maf_allele)
            .append("studies.stats.MAF", maf)
            .append("studies.stats.missing", missing_gt);
    List<Get> hbaseQuery = new ArrayList<>();
    DBCollection collection = db.getCollection("variants");
    Iterator<DBObject> result = collection.find(compare);
    String chromosome = new String();
    while (result.hasNext()) {
      DBObject variant = result.next();
      String position = variant.get("_id").toString();
      // hbase query construction
      Get get = new Get(position.getBytes());
      hbaseQuery.add(get);
    }
    // Complete results, from HBase

    tableName = study;
    effectTableName = tableName + "effect";
    Map<String, Variant> resultsMap = new HashMap<>();

    try {
      HTable table = new HTable(admin.getConfiguration(), tableName);
      HTable effectTable = new HTable(admin.getConfiguration(), effectTableName);
      Result[] hbaseResultEffect = effectTable.get(hbaseQuery);
      Result[] hbaseResultStats = table.get(hbaseQuery);

      //            List<Variant> results = new LinkedList<>();
      for (Result r : hbaseResultStats) {
        String position = new String(r.getRow(), CHARSET_UTF_8);
        String[] aux = position.split("_");
        String inner_position = aux[1];
        String chr = aux[0];
        // position parsing
        if (chr.startsWith("0")) {
          chr = chr.substring(1);
        }
        while (inner_position.startsWith("0")) {
          inner_position = inner_position.substring(1);
        }
        List<VariantFieldsProtos.VariantSample> samples = new LinkedList<>();
        NavigableMap<byte[], byte[]> infoMap = r.getFamilyMap("i".getBytes());
        byte[] byteStats = infoMap.get((study + "_stats").getBytes());
        VariantFieldsProtos.VariantStats stats =
            VariantFieldsProtos.VariantStats.parseFrom(byteStats);
        byte[] byteInfo = infoMap.get((study + "_data").getBytes());
        VariantFieldsProtos.VariantInfo info = VariantFieldsProtos.VariantInfo.parseFrom(byteInfo);
        String alternate = StringUtils.join(info.getAlternateList(), ", ");
        String reference = info.getReference();
        Variant partialResult =
            new Variant(
                chr,
                Integer.parseInt(inner_position),
                Integer.parseInt(inner_position),
                reference,
                alternate);
        String format = StringUtils.join(info.getFormatList(), ":");
        NavigableMap<byte[], byte[]> sampleMap = r.getFamilyMap("d".getBytes());
        Map<String, Map<String, String>> resultSampleMap = new HashMap<>();
        //                StringBuilder sampleRaw = new StringBuilder();
        for (byte[] s : sampleMap.keySet()) {
          String qual = (new String(s, CHARSET_UTF_8)).replaceAll(study + "_", "");
          VariantFieldsProtos.VariantSample sample =
              VariantFieldsProtos.VariantSample.parseFrom(sampleMap.get(s));
          String sample1 = sample.getSample();
          String[] values = sample1.split(":");
          String[] fields = format.split(":");
          Map<String, String> singleSampleMap = new HashMap<>();
          for (int i = 0; i < fields.length; i++) {
            singleSampleMap.put(fields[i], values[i]);
          }
          resultSampleMap.put(qual, singleSampleMap);
        }
        VariantStats variantStats =
            new VariantStats(
                chromosome,
                Integer.parseInt(inner_position),
                reference,
                alternate,
                stats.getMaf(),
                stats.getMgf(),
                stats.getMafAllele(),
                stats.getMgfGenotype(),
                stats.getMissingAlleles(),
                stats.getMissingGenotypes(),
                stats.getMendelianErrors(),
                stats.getIsIndel(),
                stats.getCasesPercentDominant(),
                stats.getControlsPercentDominant(),
                stats.getCasesPercentRecessive(),
                stats.getControlsPercentRecessive());
        partialResult.setStats(variantStats);
        resultsMap.put(new String(r.getRow(), CHARSET_UTF_8), partialResult);
      }

      for (Result r : hbaseResultEffect) {
        if (!r.isEmpty()) {
          NavigableMap<byte[], byte[]> effectMap = r.getFamilyMap("e".getBytes());
          Variant partialResult = resultsMap.get(new String(r.getRow(), CHARSET_UTF_8));
          System.out.println("Recuperado " + partialResult.toString());
          String s = partialResult.getReference() + "_" + partialResult.getAlternate();
          VariantEffectProtos.EffectInfo effectInfo =
              VariantEffectProtos.EffectInfo.parseFrom(effectMap.get(s.getBytes()));
          VariantEffect variantEffect =
              new VariantEffect(
                  partialResult.getChromosome(),
                  (int) partialResult.getStart(),
                  partialResult.getReference(),
                  partialResult.getAlternate(),
                  effectInfo.getFeatureId(),
                  effectInfo.getFeatureName(),
                  effectInfo.getFeatureType(),
                  effectInfo.getFeatureBiotype(),
                  effectInfo.getFeatureChromosome(),
                  effectInfo.getFeatureStart(),
                  effectInfo.getFeatureEnd(),
                  effectInfo.getFeatureStrand(),
                  effectInfo.getSnpId(),
                  effectInfo.getAncestral(),
                  effectInfo.getAlternative(),
                  effectInfo.getGeneId(),
                  effectInfo.getTranscriptId(),
                  effectInfo.getGeneName(),
                  effectInfo.getConsequenceType(),
                  effectInfo.getConsequenceTypeObo(),
                  effectInfo.getConsequenceTypeDesc(),
                  effectInfo.getConsequenceTypeType(),
                  effectInfo.getAaPosition(),
                  effectInfo.getAminoacidChange(),
                  effectInfo.getCodonChange());
          resultsMap.put(new String(r.getRow(), CHARSET_UTF_8), partialResult);
        }
      }
    } catch (InvalidProtocolBufferException e) {
      System.err.println(e.getClass().getName() + ": " + e.getMessage());
    } catch (IOException e) {
      System.err.println(e.getClass().getName() + ": " + e.getMessage());
    }

    List<Variant> results = new ArrayList<>(resultsMap.values());
    return results;
  }
  @Test
  public void testHostRank() throws Exception {

    if (System.getProperty("prop.mapred.job.tracker") != null) {
      if (LOG.isInfoEnabled())
        LOG.info("testHBaseInputOutput: Ignore this test if not local mode.");
      return;
    }

    File jarTest = new File(System.getProperty("prop.jarLocation"));
    if (!jarTest.exists()) {
      fail(
          "Could not find Giraph jar at "
              + "location specified by 'prop.jarLocation'. "
              + "Make sure you built the main Giraph artifact?.");
    }

    MiniHBaseCluster cluster = null;
    MiniZooKeeperCluster zkCluster = null;
    FileSystem fs = null;

    try {
      // using the restart method allows us to avoid having the hbase
      // root directory overwritten by /home/$username
      zkCluster = testUtil.startMiniZKCluster();
      testUtil.restartHBaseCluster(2);
      cluster = testUtil.getMiniHBaseCluster();

      final byte[] OL_BYTES = Bytes.toBytes("ol");
      final byte[] S_BYTES = Bytes.toBytes("s");
      final byte[] METADATA_BYTES = Bytes.toBytes("mtdt");
      final byte[] HR_BYTES = Bytes.toBytes("_hr_");
      final byte[] TAB = Bytes.toBytes(TABLE_NAME);

      Configuration conf = cluster.getConfiguration();
      HTableDescriptor desc = new HTableDescriptor(TAB);
      desc.addFamily(new HColumnDescriptor(OL_BYTES));
      desc.addFamily(new HColumnDescriptor(S_BYTES));
      desc.addFamily(new HColumnDescriptor(METADATA_BYTES));
      HBaseAdmin hbaseAdmin = new HBaseAdmin(conf);
      if (hbaseAdmin.isTableAvailable(TABLE_NAME)) {
        hbaseAdmin.disableTable(TABLE_NAME);
        hbaseAdmin.deleteTable(TABLE_NAME);
      }
      hbaseAdmin.createTable(desc);

      /**
       * Enter the initial data (a,b), (b,c), (a,c) a = 1.0 - google b = 1.0 - yahoo c = 1.0 - bing
       */
      HTable table = new HTable(conf, TABLE_NAME);

      Put p1 = new Put(Bytes.toBytes("com.google.www"));
      p1.add(OL_BYTES, Bytes.toBytes("www.yahoo.com"), Bytes.toBytes("ab"));

      Put p2 = new Put(Bytes.toBytes("com.google.www"));
      p2.add(OL_BYTES, Bytes.toBytes("www.bing.com"), Bytes.toBytes("ac"));
      p2.add(OL_BYTES, Bytes.toBytes("www.bing.com"), Bytes.toBytes("invalid1"));
      p2.add(OL_BYTES, Bytes.toBytes("www.google.com"), Bytes.toBytes("invalid2"));

      Put p3 = new Put(Bytes.toBytes("com.yahoo.www"));
      p3.add(OL_BYTES, Bytes.toBytes("www.bing.com"), Bytes.toBytes("bc"));
      // p3.add(OL_BYTES, Bytes.toBytes(""), Bytes.toBytes("invalid4"));

      Put p4 = new Put(Bytes.toBytes("com.bing.www"));
      // TODO: Handle below case. use apache isValid method.
      p4.add(OL_BYTES, Bytes.toBytes("http://invalidurl"), Bytes.toBytes("invalid5"));
      p4.add(S_BYTES, S_BYTES, Bytes.toBytes(10.0d));

      Put p5 = new Put(Bytes.toBytes("dummy"));
      p5.add(S_BYTES, S_BYTES, Bytes.toBytes(10.0d));

      table.put(p1);
      table.put(p2);
      table.put(p3);
      table.put(p4);
      table.put(p5);

      // Set Giraph configuration
      // now operate over HBase using Vertex I/O formats
      conf.set(TableInputFormat.INPUT_TABLE, TABLE_NAME);
      conf.set(TableOutputFormat.OUTPUT_TABLE, TABLE_NAME);

      // Start the giraph job
      GiraphJob giraphJob = new GiraphJob(conf, BspCase.getCallingMethodName());
      GiraphConfiguration giraphConf = giraphJob.getConfiguration();
      giraphConf.setZooKeeperConfiguration(cluster.getMaster().getZooKeeper().getQuorum());
      setupConfiguration(giraphJob);
      giraphConf.setComputationClass(LinkRankComputation.class);
      giraphConf.setMasterComputeClass(LinkRankVertexMasterCompute.class);
      giraphConf.setOutEdgesClass(ByteArrayEdges.class);
      giraphConf.setVertexInputFormatClass(Nutch2HostInputFormat.class);
      giraphConf.setVertexOutputFormatClass(Nutch2HostOutputFormat.class);
      giraphConf.setInt("giraph.linkRank.superstepCount", 10);
      giraphConf.setInt("giraph.linkRank.scale", 10);
      giraphConf.set("giraph.linkRank.family", "mtdt");
      giraphConf.set("giraph.linkRank.qualifier", "_hr_");
      giraphConf.setVertexInputFilterClass(HostRankVertexFilter.class);
      assertTrue(giraphJob.run(true));

      if (LOG.isInfoEnabled()) LOG.info("Giraph job successful. Checking output qualifier.");

      /** Check the results * */
      Result result;
      String key;
      byte[] calculatedScoreByte;
      HashMap expectedValues = new HashMap<String, Double>();
      expectedValues.put("com.google.www", 1.3515060339386287d);
      expectedValues.put("com.yahoo.www", 4.144902009567587d);
      expectedValues.put("com.bing.www", 9.063893290511482d);

      for (Object keyObject : expectedValues.keySet()) {
        key = keyObject.toString();
        result = table.get(new Get(key.getBytes()));
        calculatedScoreByte = result.getValue(METADATA_BYTES, HR_BYTES);
        assertNotNull(calculatedScoreByte);
        assertTrue(calculatedScoreByte.length > 0);
        Assert.assertEquals(
            "Scores are not the same",
            (Double) expectedValues.get(key),
            Bytes.toDouble(calculatedScoreByte),
            DELTA);
      }
    } finally {
      if (cluster != null) {
        cluster.shutdown();
      }
      if (zkCluster != null) {
        zkCluster.shutdown();
      }
      // clean test files
      if (fs != null) {
        fs.delete(hbaseRootdir);
      }
    }
  }