/** Checks if a table is empty. */ public boolean isEmpty() throws IOException { Bigquery.Tabledata.List list = client.tabledata().list(ref.getProjectId(), ref.getDatasetId(), ref.getTableId()); list.setMaxResults(1L); TableDataList dataList = list.execute(); return dataList.getRows() == null || dataList.getRows().isEmpty(); }
/** * Tries to create the BigQuery table. If a table with the same name already exists in the * dataset, the table creation fails, and the function returns null. In such a case, the existing * table doesn't necessarily have the same schema as specified by the parameter. * * @param schema Schema of the new BigQuery table. * @return The newly created BigQuery table information, or null if the table with the same name * already exists. * @throws IOException if other error than already existing table occurs. */ @Nullable public Table tryCreateTable(TableSchema schema) throws IOException { LOG.info("Trying to create BigQuery table: {}", BigQueryIO.toTableSpec(ref)); Table content = new Table(); content.setTableReference(ref); content.setSchema(schema); try { return client.tables().insert(ref.getProjectId(), ref.getDatasetId(), content).execute(); } catch (IOException e) { if (new ApiErrorExtractor().itemAlreadyExists(e)) { LOG.info("The BigQuery table already exists."); return null; } throw e; } }
/** * Retrieves or creates the table. * * <p>The table is checked to conform to insertion requirements as specified by WriteDisposition * and CreateDisposition. * * <p>If table truncation is requested (WriteDisposition.WRITE_TRUNCATE), then this will re-create * the table if necessary to ensure it is empty. * * <p>If an empty table is required (WriteDisposition.WRITE_EMPTY), then this will fail if the * table exists and is not empty. * * <p>When constructing a table, a {@code TableSchema} must be available. If a schema is provided, * then it will be used. If no schema is provided, but an existing table is being cleared * (WRITE_TRUNCATE option above), then the existing schema will be re-used. If no schema is * available, then an {@code IOException} is thrown. */ public Table getOrCreateTable( WriteDisposition writeDisposition, CreateDisposition createDisposition, @Nullable TableSchema schema) throws IOException { // Check if table already exists. Bigquery.Tables.Get get = client.tables().get(ref.getProjectId(), ref.getDatasetId(), ref.getTableId()); Table table = null; try { table = get.execute(); } catch (IOException e) { ApiErrorExtractor errorExtractor = new ApiErrorExtractor(); if (!errorExtractor.itemNotFound(e) || createDisposition != CreateDisposition.CREATE_IF_NEEDED) { // Rethrow. throw e; } } // If we want an empty table, and it isn't, then delete it first. if (table != null) { if (writeDisposition == WriteDisposition.WRITE_APPEND) { return table; } boolean empty = isEmpty(); if (empty) { if (writeDisposition == WriteDisposition.WRITE_TRUNCATE) { LOG.info("Empty table found, not removing {}", BigQueryIO.toTableSpec(ref)); } return table; } else if (writeDisposition == WriteDisposition.WRITE_EMPTY) { throw new IOException("WriteDisposition is WRITE_EMPTY, " + "but table is not empty"); } // Reuse the existing schema if none was provided. if (schema == null) { schema = table.getSchema(); } // Delete table and fall through to re-creating it below. LOG.info("Deleting table {}", BigQueryIO.toTableSpec(ref)); Bigquery.Tables.Delete delete = client.tables().delete(ref.getProjectId(), ref.getDatasetId(), ref.getTableId()); delete.execute(); } if (schema == null) { throw new IllegalArgumentException("Table schema required for new table."); } // Create the table. return tryCreateTable(schema); }
/** Insert all rows from the given list using specified insertIds if not null. */ public void insertAll(List<TableRow> rowList, @Nullable List<String> insertIdList) throws IOException { if (insertIdList != null && rowList.size() != insertIdList.size()) { throw new AssertionError( "If insertIdList is not null it needs to have at least " + "as many elements as rowList"); } AttemptBoundedExponentialBackOff backoff = new AttemptBoundedExponentialBackOff( MAX_INSERT_ATTEMPTS, INITIAL_INSERT_BACKOFF_INTERVAL_MS); final List<TableDataInsertAllResponse.InsertErrors> allErrors = new ArrayList<>(); // These lists contain the rows to publish. Initially the contain the entire list. If there are // failures, they will contain only the failed rows to be retried. List<TableRow> rowsToPublish = rowList; List<String> idsToPublish = insertIdList; while (true) { final List<TableRow> retryRows = new ArrayList<>(); final List<String> retryIds = (idsToPublish != null) ? new ArrayList<String>() : null; int strideIndex = 0; // Upload in batches. List<TableDataInsertAllRequest.Rows> rows = new LinkedList<>(); int dataSize = 0; List<Future<?>> futures = new ArrayList<>(); for (int i = 0; i < rowsToPublish.size(); ++i) { TableRow row = rowsToPublish.get(i); TableDataInsertAllRequest.Rows out = new TableDataInsertAllRequest.Rows(); if (idsToPublish != null) { out.setInsertId(idsToPublish.get(i)); } out.setJson(row.getUnknownKeys()); rows.add(out); dataSize += row.toString().length(); if (dataSize >= UPLOAD_BATCH_SIZE_BYTES || rows.size() >= maxRowsPerBatch || i == rowsToPublish.size() - 1) { TableDataInsertAllRequest content = new TableDataInsertAllRequest(); content.setRows(rows); final Bigquery.Tabledata.InsertAll insert = client .tabledata() .insertAll(ref.getProjectId(), ref.getDatasetId(), ref.getTableId(), content); final int finalStrideIndex = strideIndex; final List<TableRow> finalRowsToPublish = rowsToPublish; final List<String> finalIdsToPublish = idsToPublish; futures.add( executor.submit( new Runnable() { @Override public void run() { try { TableDataInsertAllResponse response = insert.execute(); List<TableDataInsertAllResponse.InsertErrors> errors = response.getInsertErrors(); if (errors != null) { synchronized (this) { allErrors.addAll(errors); for (TableDataInsertAllResponse.InsertErrors error : errors) { if (error.getIndex() == null) { throw new IOException("Insert failed: " + allErrors); } int errorIndex = error.getIndex().intValue() + finalStrideIndex; retryRows.add(finalRowsToPublish.get(errorIndex)); if (retryIds != null) { retryIds.add(finalIdsToPublish.get(errorIndex)); } } } } } catch (IOException e) { throw new RuntimeException(e); } } })); dataSize = 0; strideIndex = i + 1; rows = new LinkedList<>(); } } try { for (Future<?> future : futures) { future.get(); } } catch (InterruptedException e) { } catch (ExecutionException e) { Throwables.propagate(e.getCause()); } if (!allErrors.isEmpty() && !backoff.atMaxAttempts()) { try { Thread.sleep(backoff.nextBackOffMillis()); } catch (InterruptedException e) { // ignore. } LOG.info("Retrying failed inserts to BigQuery"); rowsToPublish = retryRows; idsToPublish = retryIds; allErrors.clear(); } else { break; } } if (!allErrors.isEmpty()) { throw new IOException("Insert failed: " + allErrors); } }