private void report(Map<TableId, LevelDBTableService.TableStats> datasetStat)
      throws DatasetManagementException {
    for (Map.Entry<TableId, LevelDBTableService.TableStats> statEntry : datasetStat.entrySet()) {
      String namespace = statEntry.getKey().getNamespace().getId();
      // emit metrics for only user datasets, tables in system namespace are ignored
      if (namespace.equals(Constants.SYSTEM_NAMESPACE)) {
        continue;
      }
      String tableName = statEntry.getKey().getTableName();

      Collection<DatasetSpecificationSummary> instances =
          dsFramework.getInstances(Id.Namespace.from(namespace));
      for (DatasetSpecificationSummary spec : instances) {
        DatasetSpecification specification =
            dsFramework.getDatasetSpec(Id.DatasetInstance.from(namespace, spec.getName()));
        if (specification.isParent(tableName)) {
          MetricsCollector collector =
              metricsService.getCollector(
                  ImmutableMap.of(
                      Constants.Metrics.Tag.NAMESPACE,
                      namespace,
                      Constants.Metrics.Tag.DATASET,
                      spec.getName()));
          int sizeInMb = (int) (statEntry.getValue().getDiskSizeBytes() / BYTES_IN_MB);
          collector.gauge("dataset.size.mb", sizeInMb);
          break;
        }
      }
    }
  }
Example #2
0
  public Collection<DatasetSpecification> getByTypes(
      Id.Namespace namespaceId, Set<String> typeNames) {
    List<DatasetSpecification> filtered = Lists.newArrayList();

    for (DatasetSpecification spec : getAll(namespaceId)) {
      if (typeNames.contains(spec.getType())) {
        filtered.add(spec);
      }
    }

    return filtered;
  }
 @Override
 public KeyStructValueTable getDataset(
     DatasetContext datasetContext,
     DatasetSpecification spec,
     Map<String, String> arguments,
     ClassLoader classLoader)
     throws IOException {
   Table table =
       tableDef.getDataset(
           datasetContext, spec.getSpecification("key-value-table"), arguments, classLoader);
   return new KeyStructValueTable(spec.getName(), table);
 }
Example #4
0
  /**
   * Returns the schema of the dataset. Uses the schema property if it is set, otherwise derives the
   * schema from the record type.
   *
   * @param conf Configuration that contains RecordScannable name to load, CDAP and HBase
   *     configurations.
   * @return Schema of the dataset
   * @throws IOException in case the dataset does not contain a valid schema
   * @throws UnsupportedTypeException in case the record type generates an unsupported schema
   */
  public static Schema getRecordSchema(Configuration conf, Id.DatasetInstance id)
      throws UnsupportedTypeException, IOException {
    // get the schema from the dataset properties if it's there
    DatasetSpecification spec = getDatasetSpec(conf, id);
    if (spec != null) {
      String schemaStr = spec.getProperty("schema");
      if (schemaStr != null) {
        return Schema.parseJson(schemaStr);
      }
    }

    // otherwise try to derive the schema from the type
    Type type = getRecordType(conf, id);
    return schemaGenerator.generate(type);
  }
  public TimePartitionedFileSetDataset(
      DatasetContext datasetContext,
      String name,
      FileSet fileSet,
      IndexedTable partitionTable,
      DatasetSpecification spec,
      Map<String, String> arguments,
      Provider<ExploreFacade> exploreFacadeProvider) {
    super(
        datasetContext,
        name,
        PARTITIONING,
        fileSet,
        partitionTable,
        spec,
        arguments,
        exploreFacadeProvider);

    // the first version of TPFS in CDAP 2.7 did not have the partitioning in the properties. It is
    // not supported.
    if (PartitionedFileSetProperties.getPartitioning(spec.getProperties()) == null) {
      throw new DataSetException(
          "Unsupported version of TimePartitionedFileSet. Dataset '"
              + name
              + "' is missing "
              + "the partitioning property. This probably means that it was created in CDAP 2.7, "
              + "which is not supported any longer.");
    }
  }
 @Override
 public DatasetSpecification configure(String instanceName, DatasetProperties properties) {
   return DatasetSpecification.builder(instanceName, getName())
       .properties(properties.getProperties())
       .datasets(tableDef.configure("key-value-table", properties))
       .build();
 }
Example #7
0
 @Override
 protected BufferingTable getTable(
     DatasetContext datasetContext, String name, ConflictDetection conflictLevel)
     throws Exception {
   // ttl=-1 means "keep data forever"
   DatasetSpecification spec =
       DatasetSpecification.builder(name, "foo")
           .property(Table.PROPERTY_READLESS_INCREMENT, "true")
           .property(Table.PROPERTY_CONFLICT_LEVEL, conflictLevel.name())
           .build();
   return new HBaseTable(
       datasetContext, spec, cConf, testHBase.getConfiguration(), hBaseTableUtil);
 }
Example #8
0
 public void write(Id.Namespace namespaceId, DatasetSpecification instanceSpec) {
   write(getInstanceKey(namespaceId, instanceSpec.getName()), instanceSpec);
 }
 @Override
 public DatasetAdmin getAdmin(
     DatasetContext datasetContext, DatasetSpecification spec, ClassLoader classLoader)
     throws IOException {
   return tableDef.getAdmin(datasetContext, spec.getSpecification("key-value-table"), classLoader);
 }
Example #10
0
 public PedanticTxAware(DatasetSpecification spec, @EmbeddedDataset("t") Table embedded) {
   super(spec.getName(), embedded);
 }
Example #11
0
 public AssociationTable(DatasetSpecification spec, @EmbeddedDataset("word_assoc") Table table) {
   super(spec.getName(), table);
   this.table = table;
 }
Example #12
0
  @Test
  public void testTTL() throws Exception {
    // for the purpose of this test it is fine not to configure ttl when creating table: we want to
    // see if it
    // applies on reading
    int ttl = 1000;
    String ttlTable = "ttl";
    String noTtlTable = "nottl";
    DatasetProperties props =
        DatasetProperties.builder().add(Table.PROPERTY_TTL, String.valueOf(ttl)).build();
    getTableAdmin(CONTEXT1, ttlTable, props).create();
    DatasetSpecification ttlTableSpec =
        DatasetSpecification.builder(ttlTable, HBaseTable.class.getName())
            .properties(props.getProperties())
            .build();
    HBaseTable table =
        new HBaseTable(CONTEXT1, ttlTableSpec, cConf, testHBase.getConfiguration(), hBaseTableUtil);

    DetachedTxSystemClient txSystemClient = new DetachedTxSystemClient();
    Transaction tx = txSystemClient.startShort();
    table.startTx(tx);
    table.put(b("row1"), b("col1"), b("val1"));
    table.commitTx();

    TimeUnit.SECONDS.sleep(2);

    tx = txSystemClient.startShort();
    table.startTx(tx);
    table.put(b("row2"), b("col2"), b("val2"));
    table.commitTx();

    // now, we should not see first as it should have expired, but see the last one
    tx = txSystemClient.startShort();
    table.startTx(tx);
    byte[] val = table.get(b("row1"), b("col1"));
    if (val != null) {
      LOG.info("Unexpected value " + Bytes.toStringBinary(val));
    }
    Assert.assertNull(val);
    Assert.assertArrayEquals(b("val2"), table.get(b("row2"), b("col2")));

    // test a table with no TTL
    DatasetProperties props2 =
        DatasetProperties.builder().add(Table.PROPERTY_TTL, String.valueOf(Tables.NO_TTL)).build();
    getTableAdmin(CONTEXT1, noTtlTable, props2).create();
    DatasetSpecification noTtlTableSpec =
        DatasetSpecification.builder(noTtlTable, HBaseTable.class.getName())
            .properties(props2.getProperties())
            .build();
    HBaseTable table2 =
        new HBaseTable(
            CONTEXT1, noTtlTableSpec, cConf, testHBase.getConfiguration(), hBaseTableUtil);

    tx = txSystemClient.startShort();
    table2.startTx(tx);
    table2.put(b("row1"), b("col1"), b("val1"));
    table2.commitTx();

    TimeUnit.SECONDS.sleep(2);

    tx = txSystemClient.startShort();
    table2.startTx(tx);
    table2.put(b("row2"), b("col2"), b("val2"));
    table2.commitTx();

    // if ttl is -1 (unlimited), it should see both
    tx = txSystemClient.startShort();
    table2.startTx(tx);
    Assert.assertArrayEquals(b("val1"), table2.get(b("row1"), b("col1")));
    Assert.assertArrayEquals(b("val2"), table2.get(b("row2"), b("col2")));
  }