private void setupBigQueryTable(
      String projectId, String datasetId, String tableId, TableSchema schema) throws IOException {
    if (bigQueryClient == null) {
      bigQueryClient = Transport.newBigQueryClient(options.as(BigQueryOptions.class)).build();
    }

    Datasets datasetService = bigQueryClient.datasets();
    if (executeNullIfNotFound(datasetService.get(projectId, datasetId)) == null) {
      Dataset newDataset =
          new Dataset()
              .setDatasetReference(
                  new DatasetReference().setProjectId(projectId).setDatasetId(datasetId));
      datasetService.insert(projectId, newDataset).execute();
    }

    Tables tableService = bigQueryClient.tables();
    Table table = executeNullIfNotFound(tableService.get(projectId, datasetId, tableId));
    if (table == null) {
      Table newTable =
          new Table()
              .setSchema(schema)
              .setTableReference(
                  new TableReference()
                      .setProjectId(projectId)
                      .setDatasetId(datasetId)
                      .setTableId(tableId));
      tableService.insert(projectId, datasetId, newTable).execute();
    } else if (!table.getSchema().equals(schema)) {
      throw new RuntimeException(
          "Table exists and schemas do not match, expecting: "
              + schema.toPrettyString()
              + ", actual: "
              + table.getSchema().toPrettyString());
    }
  }
  /**
   * Retrieves or creates the table.
   *
   * <p>The table is checked to conform to insertion requirements as specified by WriteDisposition
   * and CreateDisposition.
   *
   * <p>If table truncation is requested (WriteDisposition.WRITE_TRUNCATE), then this will re-create
   * the table if necessary to ensure it is empty.
   *
   * <p>If an empty table is required (WriteDisposition.WRITE_EMPTY), then this will fail if the
   * table exists and is not empty.
   *
   * <p>When constructing a table, a {@code TableSchema} must be available. If a schema is provided,
   * then it will be used. If no schema is provided, but an existing table is being cleared
   * (WRITE_TRUNCATE option above), then the existing schema will be re-used. If no schema is
   * available, then an {@code IOException} is thrown.
   */
  public Table getOrCreateTable(
      WriteDisposition writeDisposition,
      CreateDisposition createDisposition,
      @Nullable TableSchema schema)
      throws IOException {
    // Check if table already exists.
    Bigquery.Tables.Get get =
        client.tables().get(ref.getProjectId(), ref.getDatasetId(), ref.getTableId());
    Table table = null;
    try {
      table = get.execute();
    } catch (IOException e) {
      ApiErrorExtractor errorExtractor = new ApiErrorExtractor();
      if (!errorExtractor.itemNotFound(e)
          || createDisposition != CreateDisposition.CREATE_IF_NEEDED) {
        // Rethrow.
        throw e;
      }
    }

    // If we want an empty table, and it isn't, then delete it first.
    if (table != null) {
      if (writeDisposition == WriteDisposition.WRITE_APPEND) {
        return table;
      }

      boolean empty = isEmpty();
      if (empty) {
        if (writeDisposition == WriteDisposition.WRITE_TRUNCATE) {
          LOG.info("Empty table found, not removing {}", BigQueryIO.toTableSpec(ref));
        }
        return table;

      } else if (writeDisposition == WriteDisposition.WRITE_EMPTY) {
        throw new IOException("WriteDisposition is WRITE_EMPTY, " + "but table is not empty");
      }

      // Reuse the existing schema if none was provided.
      if (schema == null) {
        schema = table.getSchema();
      }

      // Delete table and fall through to re-creating it below.
      LOG.info("Deleting table {}", BigQueryIO.toTableSpec(ref));
      Bigquery.Tables.Delete delete =
          client.tables().delete(ref.getProjectId(), ref.getDatasetId(), ref.getTableId());
      delete.execute();
    }

    if (schema == null) {
      throw new IllegalArgumentException("Table schema required for new table.");
    }

    // Create the table.
    return tryCreateTable(schema);
  }
  private void onInsertAll(List<List<Long>> errorIndicesSequence) throws Exception {
    when(mockClient.tabledata()).thenReturn(mockTabledata);

    List<TableDataInsertAllResponse> responses = new ArrayList<>();
    for (List<Long> errorIndices : errorIndicesSequence) {
      List<TableDataInsertAllResponse.InsertErrors> errors = new ArrayList<>();
      for (long i : errorIndices) {
        TableDataInsertAllResponse.InsertErrors error =
            new TableDataInsertAllResponse.InsertErrors();
        error.setIndex(i);
      }
      TableDataInsertAllResponse response = new TableDataInsertAllResponse();
      response.setInsertErrors(errors);
      responses.add(response);
    }

    when(mockTabledata.insertAll(
            anyString(), anyString(), anyString(), any(TableDataInsertAllRequest.class)))
        .thenReturn(mockInsertAll);
    when(mockInsertAll.execute())
        .thenReturn(
            responses.get(0),
            responses
                .subList(1, responses.size())
                .toArray(new TableDataInsertAllResponse[responses.size() - 1]));
  }
  @Test
  public void testReadMultiPage() throws IOException {
    onTableGet(basicTableSchema());

    TableDataList page1 = rawDataList(rawRow("Row1", 1)).setPageToken("page2");
    TableDataList page2 = rawDataList(rawRow("Row2", 2)).setTotalRows(2L);

    when(mockClient.tabledata()).thenReturn(mockTabledata);
    when(mockTabledata.list(anyString(), anyString(), anyString())).thenReturn(mockTabledataList);
    when(mockTabledataList.execute()).thenReturn(page1).thenReturn(page2);

    try (BigQueryTableRowIterator iterator =
        BigQueryTableRowIterator.of(
            mockClient, BigQueryIO.parseTableSpec("project:dataset.table"))) {

      List<String> names = new LinkedList<>();
      Iterators.addAll(
          names,
          Iterators.transform(
              iterator,
              new Function<TableRow, String>() {
                @Override
                public String apply(TableRow input) {
                  return (String) input.get("name");
                }
              }));

      Assert.assertThat(names, Matchers.hasItems("Row1", "Row2"));

      verifyTableGet();
      verifyTabledataList();
      // The second call should have used a page token.
      verify(mockTabledataList).setPageToken("page2");
    }
  }
  /** Checks if a table is empty. */
  public boolean isEmpty() throws IOException {
    Bigquery.Tabledata.List list =
        client.tabledata().list(ref.getProjectId(), ref.getDatasetId(), ref.getTableId());
    list.setMaxResults(1L);
    TableDataList dataList = list.execute();

    return dataList.getRows() == null || dataList.getRows().isEmpty();
  }
  @Test
  public void testReadOpenFailure() throws IOException {
    thrown.expect(RuntimeException.class);

    when(mockClient.tables()).thenReturn(mockTables);
    when(mockTables.get(anyString(), anyString(), anyString())).thenReturn(mockTablesGet);
    when(mockTablesGet.execute()).thenThrow(new IOException("No such table"));

    try (BigQueryTableRowIterator iterator =
        BigQueryTableRowIterator.of(
            mockClient, BigQueryIO.parseTableSpec("project:dataset.table"))) {
      try {
        Assert.assertFalse(iterator.hasNext()); // throws.
      } finally {
        verifyTableGet();
      }
    }
  }
  /**
   * Tries to create the BigQuery table. If a table with the same name already exists in the
   * dataset, the table creation fails, and the function returns null. In such a case, the existing
   * table doesn't necessarily have the same schema as specified by the parameter.
   *
   * @param schema Schema of the new BigQuery table.
   * @return The newly created BigQuery table information, or null if the table with the same name
   *     already exists.
   * @throws IOException if other error than already existing table occurs.
   */
  @Nullable
  public Table tryCreateTable(TableSchema schema) throws IOException {
    LOG.info("Trying to create BigQuery table: {}", BigQueryIO.toTableSpec(ref));

    Table content = new Table();
    content.setTableReference(ref);
    content.setSchema(schema);

    try {
      return client.tables().insert(ref.getProjectId(), ref.getDatasetId(), content).execute();
    } catch (IOException e) {
      if (new ApiErrorExtractor().itemAlreadyExists(e)) {
        LOG.info("The BigQuery table already exists.");
        return null;
      }
      throw e;
    }
  }
예제 #8
0
 @VisibleForTesting
 QueryResponse queryWithRetries(
     Bigquery bigqueryClient, QueryRequest queryContent, Sleeper sleeper, BackOff backOff)
     throws IOException, InterruptedException {
   IOException lastException = null;
   do {
     try {
       return bigqueryClient.jobs().query(projectId, queryContent).execute();
     } catch (IOException e) {
       // ignore and retry
       LOG.warn("Ignore the error and retry the query.");
       lastException = e;
     }
   } while (BackOffUtils.next(sleeper, backOff));
   throw new IOException(
       String.format("Unable to get BigQuery response after retrying %d times", MAX_QUERY_RETRIES),
       lastException);
 }
 private void onTableList(TableDataList result) throws IOException {
   when(mockClient.tabledata()).thenReturn(mockTabledata);
   when(mockTabledata.list(anyString(), anyString(), anyString())).thenReturn(mockTabledataList);
   when(mockTabledataList.execute()).thenReturn(result);
 }
 private void onTableGet(Table table) throws IOException {
   when(mockClient.tables()).thenReturn(mockTables);
   when(mockTables.get(anyString(), anyString(), anyString())).thenReturn(mockTablesGet);
   when(mockTablesGet.execute()).thenReturn(table);
 }
  /** Insert all rows from the given list using specified insertIds if not null. */
  public void insertAll(List<TableRow> rowList, @Nullable List<String> insertIdList)
      throws IOException {
    if (insertIdList != null && rowList.size() != insertIdList.size()) {
      throw new AssertionError(
          "If insertIdList is not null it needs to have at least " + "as many elements as rowList");
    }

    AttemptBoundedExponentialBackOff backoff =
        new AttemptBoundedExponentialBackOff(
            MAX_INSERT_ATTEMPTS, INITIAL_INSERT_BACKOFF_INTERVAL_MS);

    final List<TableDataInsertAllResponse.InsertErrors> allErrors = new ArrayList<>();
    // These lists contain the rows to publish. Initially the contain the entire list. If there are
    // failures, they will contain only the failed rows to be retried.
    List<TableRow> rowsToPublish = rowList;
    List<String> idsToPublish = insertIdList;
    while (true) {
      final List<TableRow> retryRows = new ArrayList<>();
      final List<String> retryIds = (idsToPublish != null) ? new ArrayList<String>() : null;

      int strideIndex = 0;
      // Upload in batches.
      List<TableDataInsertAllRequest.Rows> rows = new LinkedList<>();
      int dataSize = 0;

      List<Future<?>> futures = new ArrayList<>();

      for (int i = 0; i < rowsToPublish.size(); ++i) {
        TableRow row = rowsToPublish.get(i);
        TableDataInsertAllRequest.Rows out = new TableDataInsertAllRequest.Rows();
        if (idsToPublish != null) {
          out.setInsertId(idsToPublish.get(i));
        }
        out.setJson(row.getUnknownKeys());
        rows.add(out);

        dataSize += row.toString().length();
        if (dataSize >= UPLOAD_BATCH_SIZE_BYTES
            || rows.size() >= maxRowsPerBatch
            || i == rowsToPublish.size() - 1) {
          TableDataInsertAllRequest content = new TableDataInsertAllRequest();
          content.setRows(rows);

          final Bigquery.Tabledata.InsertAll insert =
              client
                  .tabledata()
                  .insertAll(ref.getProjectId(), ref.getDatasetId(), ref.getTableId(), content);

          final int finalStrideIndex = strideIndex;
          final List<TableRow> finalRowsToPublish = rowsToPublish;
          final List<String> finalIdsToPublish = idsToPublish;

          futures.add(
              executor.submit(
                  new Runnable() {
                    @Override
                    public void run() {
                      try {
                        TableDataInsertAllResponse response = insert.execute();

                        List<TableDataInsertAllResponse.InsertErrors> errors =
                            response.getInsertErrors();
                        if (errors != null) {
                          synchronized (this) {
                            allErrors.addAll(errors);
                            for (TableDataInsertAllResponse.InsertErrors error : errors) {
                              if (error.getIndex() == null) {
                                throw new IOException("Insert failed: " + allErrors);
                              }

                              int errorIndex = error.getIndex().intValue() + finalStrideIndex;
                              retryRows.add(finalRowsToPublish.get(errorIndex));
                              if (retryIds != null) {
                                retryIds.add(finalIdsToPublish.get(errorIndex));
                              }
                            }
                          }
                        }
                      } catch (IOException e) {
                        throw new RuntimeException(e);
                      }
                    }
                  }));

          dataSize = 0;
          strideIndex = i + 1;
          rows = new LinkedList<>();
        }
      }

      try {
        for (Future<?> future : futures) {
          future.get();
        }
      } catch (InterruptedException e) {
      } catch (ExecutionException e) {
        Throwables.propagate(e.getCause());
      }

      if (!allErrors.isEmpty() && !backoff.atMaxAttempts()) {
        try {
          Thread.sleep(backoff.nextBackOffMillis());
        } catch (InterruptedException e) {
          // ignore.
        }
        LOG.info("Retrying failed inserts to BigQuery");
        rowsToPublish = retryRows;
        idsToPublish = retryIds;
        allErrors.clear();
      } else {
        break;
      }
    }
    if (!allErrors.isEmpty()) {
      throw new IOException("Insert failed: " + allErrors);
    }
  }