コード例 #1
0
  /** Checks if a table is empty. */
  public boolean isEmpty() throws IOException {
    Bigquery.Tabledata.List list =
        client.tabledata().list(ref.getProjectId(), ref.getDatasetId(), ref.getTableId());
    list.setMaxResults(1L);
    TableDataList dataList = list.execute();

    return dataList.getRows() == null || dataList.getRows().isEmpty();
  }
コード例 #2
0
  /**
   * Tries to create the BigQuery table. If a table with the same name already exists in the
   * dataset, the table creation fails, and the function returns null. In such a case, the existing
   * table doesn't necessarily have the same schema as specified by the parameter.
   *
   * @param schema Schema of the new BigQuery table.
   * @return The newly created BigQuery table information, or null if the table with the same name
   *     already exists.
   * @throws IOException if other error than already existing table occurs.
   */
  @Nullable
  public Table tryCreateTable(TableSchema schema) throws IOException {
    LOG.info("Trying to create BigQuery table: {}", BigQueryIO.toTableSpec(ref));

    Table content = new Table();
    content.setTableReference(ref);
    content.setSchema(schema);

    try {
      return client.tables().insert(ref.getProjectId(), ref.getDatasetId(), content).execute();
    } catch (IOException e) {
      if (new ApiErrorExtractor().itemAlreadyExists(e)) {
        LOG.info("The BigQuery table already exists.");
        return null;
      }
      throw e;
    }
  }
コード例 #3
0
  /**
   * Retrieves or creates the table.
   *
   * <p>The table is checked to conform to insertion requirements as specified by WriteDisposition
   * and CreateDisposition.
   *
   * <p>If table truncation is requested (WriteDisposition.WRITE_TRUNCATE), then this will re-create
   * the table if necessary to ensure it is empty.
   *
   * <p>If an empty table is required (WriteDisposition.WRITE_EMPTY), then this will fail if the
   * table exists and is not empty.
   *
   * <p>When constructing a table, a {@code TableSchema} must be available. If a schema is provided,
   * then it will be used. If no schema is provided, but an existing table is being cleared
   * (WRITE_TRUNCATE option above), then the existing schema will be re-used. If no schema is
   * available, then an {@code IOException} is thrown.
   */
  public Table getOrCreateTable(
      WriteDisposition writeDisposition,
      CreateDisposition createDisposition,
      @Nullable TableSchema schema)
      throws IOException {
    // Check if table already exists.
    Bigquery.Tables.Get get =
        client.tables().get(ref.getProjectId(), ref.getDatasetId(), ref.getTableId());
    Table table = null;
    try {
      table = get.execute();
    } catch (IOException e) {
      ApiErrorExtractor errorExtractor = new ApiErrorExtractor();
      if (!errorExtractor.itemNotFound(e)
          || createDisposition != CreateDisposition.CREATE_IF_NEEDED) {
        // Rethrow.
        throw e;
      }
    }

    // If we want an empty table, and it isn't, then delete it first.
    if (table != null) {
      if (writeDisposition == WriteDisposition.WRITE_APPEND) {
        return table;
      }

      boolean empty = isEmpty();
      if (empty) {
        if (writeDisposition == WriteDisposition.WRITE_TRUNCATE) {
          LOG.info("Empty table found, not removing {}", BigQueryIO.toTableSpec(ref));
        }
        return table;

      } else if (writeDisposition == WriteDisposition.WRITE_EMPTY) {
        throw new IOException("WriteDisposition is WRITE_EMPTY, " + "but table is not empty");
      }

      // Reuse the existing schema if none was provided.
      if (schema == null) {
        schema = table.getSchema();
      }

      // Delete table and fall through to re-creating it below.
      LOG.info("Deleting table {}", BigQueryIO.toTableSpec(ref));
      Bigquery.Tables.Delete delete =
          client.tables().delete(ref.getProjectId(), ref.getDatasetId(), ref.getTableId());
      delete.execute();
    }

    if (schema == null) {
      throw new IllegalArgumentException("Table schema required for new table.");
    }

    // Create the table.
    return tryCreateTable(schema);
  }
コード例 #4
0
  /** Insert all rows from the given list using specified insertIds if not null. */
  public void insertAll(List<TableRow> rowList, @Nullable List<String> insertIdList)
      throws IOException {
    if (insertIdList != null && rowList.size() != insertIdList.size()) {
      throw new AssertionError(
          "If insertIdList is not null it needs to have at least " + "as many elements as rowList");
    }

    AttemptBoundedExponentialBackOff backoff =
        new AttemptBoundedExponentialBackOff(
            MAX_INSERT_ATTEMPTS, INITIAL_INSERT_BACKOFF_INTERVAL_MS);

    final List<TableDataInsertAllResponse.InsertErrors> allErrors = new ArrayList<>();
    // These lists contain the rows to publish. Initially the contain the entire list. If there are
    // failures, they will contain only the failed rows to be retried.
    List<TableRow> rowsToPublish = rowList;
    List<String> idsToPublish = insertIdList;
    while (true) {
      final List<TableRow> retryRows = new ArrayList<>();
      final List<String> retryIds = (idsToPublish != null) ? new ArrayList<String>() : null;

      int strideIndex = 0;
      // Upload in batches.
      List<TableDataInsertAllRequest.Rows> rows = new LinkedList<>();
      int dataSize = 0;

      List<Future<?>> futures = new ArrayList<>();

      for (int i = 0; i < rowsToPublish.size(); ++i) {
        TableRow row = rowsToPublish.get(i);
        TableDataInsertAllRequest.Rows out = new TableDataInsertAllRequest.Rows();
        if (idsToPublish != null) {
          out.setInsertId(idsToPublish.get(i));
        }
        out.setJson(row.getUnknownKeys());
        rows.add(out);

        dataSize += row.toString().length();
        if (dataSize >= UPLOAD_BATCH_SIZE_BYTES
            || rows.size() >= maxRowsPerBatch
            || i == rowsToPublish.size() - 1) {
          TableDataInsertAllRequest content = new TableDataInsertAllRequest();
          content.setRows(rows);

          final Bigquery.Tabledata.InsertAll insert =
              client
                  .tabledata()
                  .insertAll(ref.getProjectId(), ref.getDatasetId(), ref.getTableId(), content);

          final int finalStrideIndex = strideIndex;
          final List<TableRow> finalRowsToPublish = rowsToPublish;
          final List<String> finalIdsToPublish = idsToPublish;

          futures.add(
              executor.submit(
                  new Runnable() {
                    @Override
                    public void run() {
                      try {
                        TableDataInsertAllResponse response = insert.execute();

                        List<TableDataInsertAllResponse.InsertErrors> errors =
                            response.getInsertErrors();
                        if (errors != null) {
                          synchronized (this) {
                            allErrors.addAll(errors);
                            for (TableDataInsertAllResponse.InsertErrors error : errors) {
                              if (error.getIndex() == null) {
                                throw new IOException("Insert failed: " + allErrors);
                              }

                              int errorIndex = error.getIndex().intValue() + finalStrideIndex;
                              retryRows.add(finalRowsToPublish.get(errorIndex));
                              if (retryIds != null) {
                                retryIds.add(finalIdsToPublish.get(errorIndex));
                              }
                            }
                          }
                        }
                      } catch (IOException e) {
                        throw new RuntimeException(e);
                      }
                    }
                  }));

          dataSize = 0;
          strideIndex = i + 1;
          rows = new LinkedList<>();
        }
      }

      try {
        for (Future<?> future : futures) {
          future.get();
        }
      } catch (InterruptedException e) {
      } catch (ExecutionException e) {
        Throwables.propagate(e.getCause());
      }

      if (!allErrors.isEmpty() && !backoff.atMaxAttempts()) {
        try {
          Thread.sleep(backoff.nextBackOffMillis());
        } catch (InterruptedException e) {
          // ignore.
        }
        LOG.info("Retrying failed inserts to BigQuery");
        rowsToPublish = retryRows;
        idsToPublish = retryIds;
        allErrors.clear();
      } else {
        break;
      }
    }
    if (!allErrors.isEmpty()) {
      throw new IOException("Insert failed: " + allErrors);
    }
  }