Skip to content

Commit 7c48e6a

Browse files
feat: support extractive content in search (#11408)
Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com> Co-authored-by: Anthonios Partheniou <partheniou@google.com>
1 parent 1ec86ce commit 7c48e6a

File tree

12 files changed

+104
-27
lines changed

12 files changed

+104
-27
lines changed

packages/google-cloud-discoveryengine/google/cloud/discoveryengine/gapic_version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,4 +13,4 @@
1313
# See the License for the specific language governing permissions and
1414
# limitations under the License.
1515
#
16-
__version__ = "0.8.1" # {x-release-please-version}
16+
__version__ = "0.1.0" # {x-release-please-version}

packages/google-cloud-discoveryengine/google/cloud/discoveryengine_v1/gapic_version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,4 +13,4 @@
1313
# See the License for the specific language governing permissions and
1414
# limitations under the License.
1515
#
16-
__version__ = "0.8.1" # {x-release-please-version}
16+
__version__ = "0.1.0" # {x-release-please-version}

packages/google-cloud-discoveryengine/google/cloud/discoveryengine_v1beta/gapic_version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,4 +13,4 @@
1313
# See the License for the specific language governing permissions and
1414
# limitations under the License.
1515
#
16-
__version__ = "0.8.1" # {x-release-please-version}
16+
__version__ = "0.1.0" # {x-release-please-version}

packages/google-cloud-discoveryengine/google/cloud/discoveryengine_v1beta/types/completion_service.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,9 @@ class CompleteQueryRequest(proto.Message):
5454
Search API.
5555
- ``user-event`` - Using suggestions generated from
5656
user-imported search events.
57+
- ``document-completable`` - Using suggestions taken
58+
directly from user-imported document fields marked as
59+
completable.
5760
5861
Default values:
5962

packages/google-cloud-discoveryengine/google/cloud/discoveryengine_v1beta/types/document.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,8 +117,14 @@ class Content(proto.Message):
117117
mime_type (str):
118118
The MIME type of the content. Supported types:
119119
120-
- ``application/pdf`` (PDF)
120+
- ``application/pdf`` (PDF, only native PDFs are supported
121+
for now)
121122
- ``text/html`` (HTML)
123+
- ``application/vnd.openxmlformats-officedocument.wordprocessingml.document``
124+
(DOCX)
125+
- ``application/vnd.openxmlformats-officedocument.presentationml.presentation``
126+
(PPTX)
127+
- ``text/plain`` (TXT)
122128
123129
See
124130
https://www.iana.org/assignments/media-types/media-types.xhtml.

packages/google-cloud-discoveryengine/google/cloud/discoveryengine_v1beta/types/import_config.py

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,11 @@ class GcsSource(proto.Message):
7272
[Schema][google.cloud.discoveryengine.v1beta.Schema] of
7373
the data store. This can only be used by the GENERIC Data
7474
Store vertical.
75+
- ``csv``: A CSV file with header conforming the defined
76+
[Schema][google.cloud.discoveryengine.v1beta.Schema] of
77+
the data store. Each entry after the header will be
78+
imported as a Document. This can only be used by the
79+
GENERIC Data Store vertical.
7580
7681
Supported values for user even imports:
7782
@@ -207,17 +212,16 @@ class ImportUserEventsRequest(proto.Message):
207212
208213
Attributes:
209214
inline_source (google.cloud.discoveryengine_v1beta.types.ImportUserEventsRequest.InlineSource):
210-
Required. The Inline source for the input
211-
content for UserEvents.
215+
The Inline source for the input content for
216+
UserEvents.
212217
213218
This field is a member of `oneof`_ ``source``.
214219
gcs_source (google.cloud.discoveryengine_v1beta.types.GcsSource):
215-
Required. Cloud Storage location for the
216-
input content.
220+
Cloud Storage location for the input content.
217221
218222
This field is a member of `oneof`_ ``source``.
219223
bigquery_source (google.cloud.discoveryengine_v1beta.types.BigQuerySource):
220-
Required. BigQuery input source.
224+
BigQuery input source.
221225
222226
This field is a member of `oneof`_ ``source``.
223227
parent (str):
@@ -454,8 +458,8 @@ class ImportDocumentsRequest(proto.Message):
454458
[GcsSource.data_schema][google.cloud.discoveryengine.v1beta.GcsSource.data_schema]
455459
or
456460
[BigQuerySource.data_schema][google.cloud.discoveryengine.v1beta.BigQuerySource.data_schema]
457-
is ``custom``. Otherwise, an INVALID_ARGUMENT error is
458-
thrown.
461+
is ``custom`` or ``csv``. Otherwise, an INVALID_ARGUMENT
462+
error is thrown.
459463
id_field (str):
460464
The field in the Cloud Storage and BigQuery sources that
461465
indicates the unique IDs of the documents.

packages/google-cloud-discoveryengine/google/cloud/discoveryengine_v1beta/types/search_service.py

Lines changed: 70 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -526,6 +526,9 @@ class ContentSearchSpec(proto.Message):
526526
summary_spec (google.cloud.discoveryengine_v1beta.types.SearchRequest.ContentSearchSpec.SummarySpec):
527527
If there is no summary spec provided, there
528528
will be no summary in the search response.
529+
extractive_content_spec (google.cloud.discoveryengine_v1beta.types.SearchRequest.ContentSearchSpec.ExtractiveContentSpec):
530+
If there is no extractive_content_spec provided, there will
531+
be no extractive answer in the search response.
529532
"""
530533

531534
class SnippetSpec(proto.Message):
@@ -534,8 +537,12 @@ class SnippetSpec(proto.Message):
534537
535538
Attributes:
536539
max_snippet_count (int):
537-
Max number of snippets returned in each search result. If
538-
the matching snippets is less than the max_snippet_count,
540+
Max number of snippets returned in each search result.
541+
542+
A snippet is an infomartive summary of a content with
543+
highlighting for UI rendering.
544+
545+
If the matching snippets is less than the max_snippet_count,
539546
return all of the snippets; otherwise, return the
540547
max_snippet_count.
541548
@@ -573,6 +580,51 @@ class SummarySpec(proto.Message):
573580
number=1,
574581
)
575582

583+
class ExtractiveContentSpec(proto.Message):
584+
r"""The specification that configs the extractive content in
585+
search results.
586+
587+
Attributes:
588+
max_extractive_answer_count (int):
589+
The max number of extractive answers returned in each search
590+
result.
591+
592+
An extractive answer is a verbatim answer extracted from the
593+
original document, which provides precise and contextually
594+
relevant answer to the search query.
595+
596+
If the number of matching answers is less than the
597+
extractive_answer_count, return all of the answers;
598+
otherwise, return the extractive_answer_count.
599+
600+
At most 5 answers will be returned for each SearchResult.
601+
max_extractive_segment_count (int):
602+
The max number of extractive segments returned in each
603+
search result.
604+
605+
An extractive segment is a text segment extracted from the
606+
original document which is relevant to the search query and
607+
in general more verbose than an extrative answer. The
608+
segment could then be used as input for LLMs to generate
609+
summaries and answers.
610+
611+
If the number of matching segments is less than the
612+
max_extractive_segment_count, return all of the segments;
613+
otherwise, return the max_extractive_segment_count.
614+
615+
Currently one segment will be returned for each
616+
SearchResult.
617+
"""
618+
619+
max_extractive_answer_count: int = proto.Field(
620+
proto.INT32,
621+
number=1,
622+
)
623+
max_extractive_segment_count: int = proto.Field(
624+
proto.INT32,
625+
number=2,
626+
)
627+
576628
snippet_spec: "SearchRequest.ContentSearchSpec.SnippetSpec" = proto.Field(
577629
proto.MESSAGE,
578630
number=1,
@@ -583,6 +635,11 @@ class SummarySpec(proto.Message):
583635
number=2,
584636
message="SearchRequest.ContentSearchSpec.SummarySpec",
585637
)
638+
extractive_content_spec: "SearchRequest.ContentSearchSpec.ExtractiveContentSpec" = proto.Field(
639+
proto.MESSAGE,
640+
number=3,
641+
message="SearchRequest.ContentSearchSpec.ExtractiveContentSpec",
642+
)
586643

587644
serving_config: str = proto.Field(
588645
proto.STRING,
@@ -692,6 +749,13 @@ class SearchResponse(proto.Message):
692749
[UserEvent][google.cloud.discoveryengine.v1beta.UserEvent]
693750
logs resulting from this search, which enables accurate
694751
attribution of search model performance.
752+
redirect_uri (str):
753+
The URI of a customer-defined redirect page. If redirect
754+
action is triggered, no search is performed, and only
755+
[redirect_uri][google.cloud.discoveryengine.v1beta.SearchResponse.redirect_uri]
756+
and
757+
[attribution_token][google.cloud.discoveryengine.v1beta.SearchResponse.attribution_token]
758+
are set in the response.
695759
next_page_token (str):
696760
A token that can be sent as
697761
[SearchRequest.page_token][google.cloud.discoveryengine.v1beta.SearchRequest.page_token]
@@ -887,6 +951,10 @@ def raw_page(self):
887951
proto.STRING,
888952
number=4,
889953
)
954+
redirect_uri: str = proto.Field(
955+
proto.STRING,
956+
number=12,
957+
)
890958
next_page_token: str = proto.Field(
891959
proto.STRING,
892960
number=5,

packages/google-cloud-discoveryengine/samples/generated_samples/snippet_metadata_google.cloud.discoveryengine.v1.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
],
99
"language": "PYTHON",
1010
"name": "google-cloud-discoveryengine",
11-
"version": "0.8.1"
11+
"version": "0.1.0"
1212
},
1313
"snippets": [
1414
{

packages/google-cloud-discoveryengine/samples/generated_samples/snippet_metadata_google.cloud.discoveryengine.v1beta.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
],
99
"language": "PYTHON",
1010
"name": "google-cloud-discoveryengine",
11-
"version": "0.8.1"
11+
"version": "0.1.0"
1212
},
1313
"snippets": [
1414
{

packages/google-cloud-discoveryengine/scripts/fixup_discoveryengine_v1beta_keywords.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ class discoveryengineCallTransformer(cst.CSTTransformer):
4848
'get_document': ('name', ),
4949
'get_schema': ('name', ),
5050
'import_documents': ('parent', 'inline_source', 'gcs_source', 'bigquery_source', 'error_config', 'reconciliation_mode', 'auto_generate_ids', 'id_field', ),
51-
'import_user_events': ('inline_source', 'gcs_source', 'bigquery_source', 'parent', 'error_config', ),
51+
'import_user_events': ('parent', 'inline_source', 'gcs_source', 'bigquery_source', 'error_config', ),
5252
'list_documents': ('parent', 'page_size', 'page_token', ),
5353
'list_schemas': ('parent', 'page_size', 'page_token', ),
5454
'purge_documents': ('parent', 'filter', 'force', ),

packages/google-cloud-discoveryengine/tests/unit/gapic/discoveryengine_v1beta/test_search_service.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -727,6 +727,7 @@ def test_search(request_type, transport: str = "grpc"):
727727
call.return_value = search_service.SearchResponse(
728728
total_size=1086,
729729
attribution_token="attribution_token_value",
730+
redirect_uri="redirect_uri_value",
730731
next_page_token="next_page_token_value",
731732
corrected_query="corrected_query_value",
732733
applied_controls=["applied_controls_value"],
@@ -742,6 +743,7 @@ def test_search(request_type, transport: str = "grpc"):
742743
assert isinstance(response, pagers.SearchPager)
743744
assert response.total_size == 1086
744745
assert response.attribution_token == "attribution_token_value"
746+
assert response.redirect_uri == "redirect_uri_value"
745747
assert response.next_page_token == "next_page_token_value"
746748
assert response.corrected_query == "corrected_query_value"
747749
assert response.applied_controls == ["applied_controls_value"]
@@ -783,6 +785,7 @@ async def test_search_async(
783785
search_service.SearchResponse(
784786
total_size=1086,
785787
attribution_token="attribution_token_value",
788+
redirect_uri="redirect_uri_value",
786789
next_page_token="next_page_token_value",
787790
corrected_query="corrected_query_value",
788791
applied_controls=["applied_controls_value"],
@@ -799,6 +802,7 @@ async def test_search_async(
799802
assert isinstance(response, pagers.SearchAsyncPager)
800803
assert response.total_size == 1086
801804
assert response.attribution_token == "attribution_token_value"
805+
assert response.redirect_uri == "redirect_uri_value"
802806
assert response.next_page_token == "next_page_token_value"
803807
assert response.corrected_query == "corrected_query_value"
804808
assert response.applied_controls == ["applied_controls_value"]
@@ -1085,6 +1089,7 @@ def test_search_rest(request_type):
10851089
return_value = search_service.SearchResponse(
10861090
total_size=1086,
10871091
attribution_token="attribution_token_value",
1092+
redirect_uri="redirect_uri_value",
10881093
next_page_token="next_page_token_value",
10891094
corrected_query="corrected_query_value",
10901095
applied_controls=["applied_controls_value"],
@@ -1104,6 +1109,7 @@ def test_search_rest(request_type):
11041109
assert isinstance(response, pagers.SearchPager)
11051110
assert response.total_size == 1086
11061111
assert response.attribution_token == "attribution_token_value"
1112+
assert response.redirect_uri == "redirect_uri_value"
11071113
assert response.next_page_token == "next_page_token_value"
11081114
assert response.corrected_query == "corrected_query_value"
11091115
assert response.applied_controls == ["applied_controls_value"]

packages/google-cloud-discoveryengine/tests/unit/gapic/discoveryengine_v1beta/test_user_event_service.py

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1954,17 +1954,7 @@ def test_import_user_events_rest_unset_required_fields():
19541954
)
19551955

19561956
unset_fields = transport.import_user_events._get_unset_required_fields({})
1957-
assert set(unset_fields) == (
1958-
set(())
1959-
& set(
1960-
(
1961-
"inlineSource",
1962-
"gcsSource",
1963-
"bigquerySource",
1964-
"parent",
1965-
)
1966-
)
1967-
)
1957+
assert set(unset_fields) == (set(()) & set(("parent",)))
19681958

19691959

19701960
@pytest.mark.parametrize("null_interceptor", [True, False])

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy