|
65 | 65 | import com.google.cloud.bigquery.ExternalTableDefinition;
|
66 | 66 | import com.google.cloud.bigquery.ExtractJobConfiguration;
|
67 | 67 | import com.google.cloud.bigquery.Field;
|
| 68 | +import com.google.cloud.bigquery.Field.Mode; |
68 | 69 | import com.google.cloud.bigquery.FieldList;
|
69 | 70 | import com.google.cloud.bigquery.FieldValue;
|
70 | 71 | import com.google.cloud.bigquery.FieldValue.Attribute;
|
@@ -4586,4 +4587,205 @@ public void testPreserveAsciiControlCharacters()
|
4586 | 4587 | assertEquals("\u0000", row.get(0).getStringValue());
|
4587 | 4588 | assertTrue(bigquery.delete(tableId));
|
4588 | 4589 | }
|
| 4590 | + |
| 4591 | + @Test |
| 4592 | + public void testReferenceFileSchemaUriForAvro() { |
| 4593 | + try { |
| 4594 | + String destinationTableName = "test_reference_file_schema_avro"; |
| 4595 | + TableId tableId = TableId.of(DATASET, destinationTableName); |
| 4596 | + Schema expectedSchema = |
| 4597 | + Schema.of( |
| 4598 | + Field.newBuilder("username", StandardSQLTypeName.STRING) |
| 4599 | + .setMode(Mode.NULLABLE) |
| 4600 | + .build(), |
| 4601 | + Field.newBuilder("tweet", StandardSQLTypeName.STRING).setMode(Mode.NULLABLE).build(), |
| 4602 | + Field.newBuilder("timestamp", StandardSQLTypeName.STRING) |
| 4603 | + .setMode(Mode.NULLABLE) |
| 4604 | + .build(), |
| 4605 | + Field.newBuilder("likes", StandardSQLTypeName.INT64).setMode(Mode.NULLABLE).build()); |
| 4606 | + |
| 4607 | + // By default, the table should have c-twitter schema because it is lexicographically last. |
| 4608 | + // a-twitter schema (username, tweet, timestamp, likes) |
| 4609 | + // b-twitter schema (username, tweet, timestamp) |
| 4610 | + // c-twitter schema (username, tweet) |
| 4611 | + List<String> SOURCE_URIS = |
| 4612 | + ImmutableList.of( |
| 4613 | + "gs://" |
| 4614 | + + CLOUD_SAMPLES_DATA |
| 4615 | + + "/bigquery/federated-formats-reference-file-schema/a-twitter.avro", |
| 4616 | + "gs://" |
| 4617 | + + CLOUD_SAMPLES_DATA |
| 4618 | + + "/bigquery/federated-formats-reference-file-schema/b-twitter.avro", |
| 4619 | + "gs://" |
| 4620 | + + CLOUD_SAMPLES_DATA |
| 4621 | + + "/bigquery/federated-formats-reference-file-schema/c-twitter.avro"); |
| 4622 | + |
| 4623 | + // Because referenceFileSchemaUri is set as a-twitter, the table will have a-twitter schema |
| 4624 | + String referenceFileSchema = |
| 4625 | + "gs://" |
| 4626 | + + CLOUD_SAMPLES_DATA |
| 4627 | + + "/bigquery/federated-formats-reference-file-schema/a-twitter.avro"; |
| 4628 | + |
| 4629 | + LoadJobConfiguration loadJobConfiguration = |
| 4630 | + LoadJobConfiguration.newBuilder(tableId, SOURCE_URIS, FormatOptions.avro()) |
| 4631 | + .setReferenceFileSchemaUri(referenceFileSchema) |
| 4632 | + .build(); |
| 4633 | + |
| 4634 | + Job job = bigquery.create(JobInfo.of(loadJobConfiguration)); |
| 4635 | + // Blocks until this load table job completes its execution, either failing or succeeding. |
| 4636 | + job = job.waitFor(); |
| 4637 | + assertEquals(true, job.isDone()); |
| 4638 | + |
| 4639 | + LoadJobConfiguration actualLoadJobConfiguration = job.getConfiguration(); |
| 4640 | + Table generatedTable = bigquery.getTable(actualLoadJobConfiguration.getDestinationTable()); |
| 4641 | + |
| 4642 | + assertEquals(expectedSchema, generatedTable.getDefinition().getSchema()); |
| 4643 | + // clean up after test to avoid conflict with other tests |
| 4644 | + boolean success = bigquery.delete(tableId); |
| 4645 | + assertEquals(true, success); |
| 4646 | + } catch (BigQueryException | InterruptedException e) { |
| 4647 | + System.out.println("Column not added during load append \n" + e.toString()); |
| 4648 | + } |
| 4649 | + } |
| 4650 | + |
| 4651 | + @Test |
| 4652 | + public void testReferenceFileSchemaUriForParquet() { |
| 4653 | + try { |
| 4654 | + String destinationTableName = "test_reference_file_schema_parquet"; |
| 4655 | + TableId tableId = TableId.of(DATASET, destinationTableName); |
| 4656 | + Schema expectedSchema = |
| 4657 | + Schema.of( |
| 4658 | + Field.newBuilder("username", StandardSQLTypeName.STRING) |
| 4659 | + .setMode(Mode.NULLABLE) |
| 4660 | + .build(), |
| 4661 | + Field.newBuilder("tweet", StandardSQLTypeName.STRING).setMode(Mode.NULLABLE).build(), |
| 4662 | + Field.newBuilder("timestamp", StandardSQLTypeName.STRING) |
| 4663 | + .setMode(Mode.NULLABLE) |
| 4664 | + .build(), |
| 4665 | + Field.newBuilder("likes", StandardSQLTypeName.INT64).setMode(Mode.NULLABLE).build()); |
| 4666 | + |
| 4667 | + // By default, the table should have c-twitter schema because it is lexicographically last. |
| 4668 | + // a-twitter schema (username, tweet, timestamp, likes) |
| 4669 | + // b-twitter schema (username, tweet, timestamp) |
| 4670 | + // c-twitter schema (username, tweet) |
| 4671 | + List<String> SOURCE_URIS = |
| 4672 | + ImmutableList.of( |
| 4673 | + "gs://" |
| 4674 | + + CLOUD_SAMPLES_DATA |
| 4675 | + + "/bigquery/federated-formats-reference-file-schema/a-twitter.parquet", |
| 4676 | + "gs://" |
| 4677 | + + CLOUD_SAMPLES_DATA |
| 4678 | + + "/bigquery/federated-formats-reference-file-schema/b-twitter.parquet", |
| 4679 | + "gs://" |
| 4680 | + + CLOUD_SAMPLES_DATA |
| 4681 | + + "/bigquery/federated-formats-reference-file-schema/c-twitter.parquet"); |
| 4682 | + |
| 4683 | + // Because referenceFileSchemaUri is set as a-twitter, the table will have a-twitter schema |
| 4684 | + String referenceFileSchema = |
| 4685 | + "gs://" |
| 4686 | + + CLOUD_SAMPLES_DATA |
| 4687 | + + "/bigquery/federated-formats-reference-file-schema/a-twitter.parquet"; |
| 4688 | + |
| 4689 | + LoadJobConfiguration loadJobConfiguration = |
| 4690 | + LoadJobConfiguration.newBuilder(tableId, SOURCE_URIS, FormatOptions.parquet()) |
| 4691 | + .setReferenceFileSchemaUri(referenceFileSchema) |
| 4692 | + .build(); |
| 4693 | + |
| 4694 | + Job job = bigquery.create(JobInfo.of(loadJobConfiguration)); |
| 4695 | + // Blocks until this load table job completes its execution, either failing or succeeding. |
| 4696 | + job = job.waitFor(); |
| 4697 | + assertEquals(true, job.isDone()); |
| 4698 | + LoadJobConfiguration actualLoadJobConfiguration = job.getConfiguration(); |
| 4699 | + Table generatedTable = bigquery.getTable(actualLoadJobConfiguration.getDestinationTable()); |
| 4700 | + |
| 4701 | + assertEquals(expectedSchema, generatedTable.getDefinition().getSchema()); |
| 4702 | + // clean up after test to avoid conflict with other tests |
| 4703 | + boolean success = bigquery.delete(tableId); |
| 4704 | + assertEquals(true, success); |
| 4705 | + } catch (BigQueryException | InterruptedException e) { |
| 4706 | + System.out.println("Column not added during load append \n" + e.toString()); |
| 4707 | + } |
| 4708 | + } |
| 4709 | + |
| 4710 | + @Test |
| 4711 | + public void testCreateExternalTableWithReferenceFileSchemaAvro() { |
| 4712 | + String destinationTableName = "test_create_external_table_reference_file_schema_avro"; |
| 4713 | + TableId tableId = TableId.of(DATASET, destinationTableName); |
| 4714 | + Schema expectedSchema = |
| 4715 | + Schema.of( |
| 4716 | + Field.newBuilder("username", StandardSQLTypeName.STRING).setMode(Mode.NULLABLE).build(), |
| 4717 | + Field.newBuilder("tweet", StandardSQLTypeName.STRING).setMode(Mode.NULLABLE).build(), |
| 4718 | + Field.newBuilder("timestamp", StandardSQLTypeName.STRING) |
| 4719 | + .setMode(Mode.NULLABLE) |
| 4720 | + .build(), |
| 4721 | + Field.newBuilder("likes", StandardSQLTypeName.INT64).setMode(Mode.NULLABLE).build()); |
| 4722 | + String CLOUD_SAMPLES_DATA = "cloud-samples-data"; |
| 4723 | + |
| 4724 | + // By default, the table should have c-twitter schema because it is lexicographically last. |
| 4725 | + // a-twitter schema (username, tweet, timestamp, likes) |
| 4726 | + // b-twitter schema (username, tweet, timestamp) |
| 4727 | + // c-twitter schema (username, tweet) |
| 4728 | + String SOURCE_URI = |
| 4729 | + "gs://" + CLOUD_SAMPLES_DATA + "/bigquery/federated-formats-reference-file-schema/*.avro"; |
| 4730 | + |
| 4731 | + // Because referenceFileSchemaUri is set as a-twitter, the table will have a-twitter schema |
| 4732 | + String referenceFileSchema = |
| 4733 | + "gs://" |
| 4734 | + + CLOUD_SAMPLES_DATA |
| 4735 | + + "/bigquery/federated-formats-reference-file-schema/a-twitter.avro"; |
| 4736 | + |
| 4737 | + ExternalTableDefinition externalTableDefinition = |
| 4738 | + ExternalTableDefinition.newBuilder(SOURCE_URI, FormatOptions.avro()) |
| 4739 | + .setReferenceFileSchemaUri(referenceFileSchema) |
| 4740 | + .build(); |
| 4741 | + TableInfo tableInfo = TableInfo.of(tableId, externalTableDefinition); |
| 4742 | + Table createdTable = bigquery.create(tableInfo); |
| 4743 | + Table generatedTable = bigquery.getTable(createdTable.getTableId()); |
| 4744 | + assertEquals(expectedSchema, generatedTable.getDefinition().getSchema()); |
| 4745 | + // clean up after test to avoid conflict with other tests |
| 4746 | + boolean success = bigquery.delete(tableId); |
| 4747 | + assertEquals(true, success); |
| 4748 | + } |
| 4749 | + |
| 4750 | + @Test |
| 4751 | + public void testCreateExternalTableWithReferenceFileSchemaParquet() { |
| 4752 | + String destinationTableName = "test_create_external_table_reference_file_schema_parquet"; |
| 4753 | + TableId tableId = TableId.of(DATASET, destinationTableName); |
| 4754 | + Schema expectedSchema = |
| 4755 | + Schema.of( |
| 4756 | + Field.newBuilder("username", StandardSQLTypeName.STRING).setMode(Mode.NULLABLE).build(), |
| 4757 | + Field.newBuilder("tweet", StandardSQLTypeName.STRING).setMode(Mode.NULLABLE).build(), |
| 4758 | + Field.newBuilder("timestamp", StandardSQLTypeName.STRING) |
| 4759 | + .setMode(Mode.NULLABLE) |
| 4760 | + .build(), |
| 4761 | + Field.newBuilder("likes", StandardSQLTypeName.INT64).setMode(Mode.NULLABLE).build()); |
| 4762 | + String CLOUD_SAMPLES_DATA = "cloud-samples-data"; |
| 4763 | + |
| 4764 | + // By default, the table should have c-twitter schema because it is lexicographically last. |
| 4765 | + // a-twitter schema (username, tweet, timestamp, likes) |
| 4766 | + // b-twitter schema (username, tweet, timestamp) |
| 4767 | + // c-twitter schema (username, tweet) |
| 4768 | + String SOURCE_URI = |
| 4769 | + "gs://" |
| 4770 | + + CLOUD_SAMPLES_DATA |
| 4771 | + + "/bigquery/federated-formats-reference-file-schema/*.parquet"; |
| 4772 | + |
| 4773 | + // Because referenceFileSchemaUri is set as a-twitter, the table will have a-twitter schema |
| 4774 | + String referenceFileSchema = |
| 4775 | + "gs://" |
| 4776 | + + CLOUD_SAMPLES_DATA |
| 4777 | + + "/bigquery/federated-formats-reference-file-schema/a-twitter.parquet"; |
| 4778 | + |
| 4779 | + ExternalTableDefinition externalTableDefinition = |
| 4780 | + ExternalTableDefinition.newBuilder(SOURCE_URI, FormatOptions.parquet()) |
| 4781 | + .setReferenceFileSchemaUri(referenceFileSchema) |
| 4782 | + .build(); |
| 4783 | + TableInfo tableInfo = TableInfo.of(tableId, externalTableDefinition); |
| 4784 | + Table createdTable = bigquery.create(tableInfo); |
| 4785 | + Table generatedTable = bigquery.getTable(createdTable.getTableId()); |
| 4786 | + assertEquals(expectedSchema, generatedTable.getDefinition().getSchema()); |
| 4787 | + // clean up after test to avoid conflict with other tests |
| 4788 | + boolean success = bigquery.delete(tableId); |
| 4789 | + assertEquals(true, success); |
| 4790 | + } |
4589 | 4791 | }
|
0 commit comments