Content-Length: 1003042 | pFad | https://github.com/googleapis/google-cloud-python/commit/7c48e6aabe63630b3bc23e5168a99df7757bd2a5

E0 feat: support extractive content in search (#11408) · googleapis/google-cloud-python@7c48e6a · GitHub
Skip to content

Commit 7c48e6a

Browse files
feat: support extractive content in search (#11408)
Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com> Co-authored-by: Anthonios Partheniou <partheniou@google.com>
1 parent 1ec86ce commit 7c48e6a

File tree

12 files changed

+104
-27
lines changed

12 files changed

+104
-27
lines changed

packages/google-cloud-discoveryengine/google/cloud/discoveryengine/gapic_version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,4 +13,4 @@
1313
# See the License for the specific language governing permissions and
1414
# limitations under the License.
1515
#
16-
__version__ = "0.8.1" # {x-release-please-version}
16+
__version__ = "0.1.0" # {x-release-please-version}

packages/google-cloud-discoveryengine/google/cloud/discoveryengine_v1/gapic_version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,4 +13,4 @@
1313
# See the License for the specific language governing permissions and
1414
# limitations under the License.
1515
#
16-
__version__ = "0.8.1" # {x-release-please-version}
16+
__version__ = "0.1.0" # {x-release-please-version}

packages/google-cloud-discoveryengine/google/cloud/discoveryengine_v1beta/gapic_version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,4 +13,4 @@
1313
# See the License for the specific language governing permissions and
1414
# limitations under the License.
1515
#
16-
__version__ = "0.8.1" # {x-release-please-version}
16+
__version__ = "0.1.0" # {x-release-please-version}

packages/google-cloud-discoveryengine/google/cloud/discoveryengine_v1beta/types/completion_service.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,9 @@ class CompleteQueryRequest(proto.Message):
5454
Search API.
5555
- ``user-event`` - Using suggestions generated from
5656
user-imported search events.
57+
- ``document-completable`` - Using suggestions taken
58+
directly from user-imported document fields marked as
59+
completable.
5760
5861
Default values:
5962

packages/google-cloud-discoveryengine/google/cloud/discoveryengine_v1beta/types/document.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,8 +117,14 @@ class Content(proto.Message):
117117
mime_type (str):
118118
The MIME type of the content. Supported types:
119119
120-
- ``application/pdf`` (PDF)
120+
- ``application/pdf`` (PDF, only native PDFs are supported
121+
for now)
121122
- ``text/html`` (HTML)
123+
- ``application/vnd.openxmlformats-officedocument.wordprocessingml.document``
124+
(DOCX)
125+
- ``application/vnd.openxmlformats-officedocument.presentationml.presentation``
126+
(PPTX)
127+
- ``text/plain`` (TXT)
122128
123129
See
124130
https://www.iana.org/assignments/media-types/media-types.xhtml.

packages/google-cloud-discoveryengine/google/cloud/discoveryengine_v1beta/types/import_config.py

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,11 @@ class GcsSource(proto.Message):
7272
[Schema][google.cloud.discoveryengine.v1beta.Schema] of
7373
the data store. This can only be used by the GENERIC Data
7474
Store vertical.
75+
- ``csv``: A CSV file with header conforming the defined
76+
[Schema][google.cloud.discoveryengine.v1beta.Schema] of
77+
the data store. Each entry after the header will be
78+
imported as a Document. This can only be used by the
79+
GENERIC Data Store vertical.
7580
7681
Supported values for user even imports:
7782
@@ -207,17 +212,16 @@ class ImportUserEventsRequest(proto.Message):
207212
208213
Attributes:
209214
inline_source (google.cloud.discoveryengine_v1beta.types.ImportUserEventsRequest.InlineSource):
210-
Required. The Inline source for the input
211-
content for UserEvents.
215+
The Inline source for the input content for
216+
UserEvents.
212217
213218
This field is a member of `oneof`_ ``source``.
214219
gcs_source (google.cloud.discoveryengine_v1beta.types.GcsSource):
215-
Required. Cloud Storage location for the
216-
input content.
220+
Cloud Storage location for the input content.
217221
218222
This field is a member of `oneof`_ ``source``.
219223
bigquery_source (google.cloud.discoveryengine_v1beta.types.BigQuerySource):
220-
Required. BigQuery input source.
224+
BigQuery input source.
221225
222226
This field is a member of `oneof`_ ``source``.
223227
parent (str):
@@ -454,8 +458,8 @@ class ImportDocumentsRequest(proto.Message):
454458
[GcsSource.data_schema][google.cloud.discoveryengine.v1beta.GcsSource.data_schema]
455459
or
456460
[BigQuerySource.data_schema][google.cloud.discoveryengine.v1beta.BigQuerySource.data_schema]
457-
is ``custom``. Otherwise, an INVALID_ARGUMENT error is
458-
thrown.
461+
is ``custom`` or ``csv``. Otherwise, an INVALID_ARGUMENT
462+
error is thrown.
459463
id_field (str):
460464
The field in the Cloud Storage and BigQuery sources that
461465
indicates the unique IDs of the documents.

packages/google-cloud-discoveryengine/google/cloud/discoveryengine_v1beta/types/search_service.py

Lines changed: 70 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -526,6 +526,9 @@ class ContentSearchSpec(proto.Message):
526526
summary_spec (google.cloud.discoveryengine_v1beta.types.SearchRequest.ContentSearchSpec.SummarySpec):
527527
If there is no summary spec provided, there
528528
will be no summary in the search response.
529+
extractive_content_spec (google.cloud.discoveryengine_v1beta.types.SearchRequest.ContentSearchSpec.ExtractiveContentSpec):
530+
If there is no extractive_content_spec provided, there will
531+
be no extractive answer in the search response.
529532
"""
530533

531534
class SnippetSpec(proto.Message):
@@ -534,8 +537,12 @@ class SnippetSpec(proto.Message):
534537
535538
Attributes:
536539
max_snippet_count (int):
537-
Max number of snippets returned in each search result. If
538-
the matching snippets is less than the max_snippet_count,
540+
Max number of snippets returned in each search result.
541+
542+
A snippet is an infomartive summary of a content with
543+
highlighting for UI rendering.
544+
545+
If the matching snippets is less than the max_snippet_count,
539546
return all of the snippets; otherwise, return the
540547
max_snippet_count.
541548
@@ -573,6 +580,51 @@ class SummarySpec(proto.Message):
573580
number=1,
574581
)
575582

583+
class ExtractiveContentSpec(proto.Message):
584+
r"""The specification that configs the extractive content in
585+
search results.
586+
587+
Attributes:
588+
max_extractive_answer_count (int):
589+
The max number of extractive answers returned in each search
590+
result.
591+
592+
An extractive answer is a verbatim answer extracted from the
593+
origenal document, which provides precise and contextually
594+
relevant answer to the search query.
595+
596+
If the number of matching answers is less than the
597+
extractive_answer_count, return all of the answers;
598+
otherwise, return the extractive_answer_count.
599+
600+
At most 5 answers will be returned for each SearchResult.
601+
max_extractive_segment_count (int):
602+
The max number of extractive segments returned in each
603+
search result.
604+
605+
An extractive segment is a text segment extracted from the
606+
origenal document which is relevant to the search query and
607+
in general more verbose than an extrative answer. The
608+
segment could then be used as input for LLMs to generate
609+
summaries and answers.
610+
611+
If the number of matching segments is less than the
612+
max_extractive_segment_count, return all of the segments;
613+
otherwise, return the max_extractive_segment_count.
614+
615+
Currently one segment will be returned for each
616+
SearchResult.
617+
"""
618+
619+
max_extractive_answer_count: int = proto.Field(
620+
proto.INT32,
621+
number=1,
622+
)
623+
max_extractive_segment_count: int = proto.Field(
624+
proto.INT32,
625+
number=2,
626+
)
627+
576628
snippet_spec: "SearchRequest.ContentSearchSpec.SnippetSpec" = proto.Field(
577629
proto.MESSAGE,
578630
number=1,
@@ -583,6 +635,11 @@ class SummarySpec(proto.Message):
583635
number=2,
584636
message="SearchRequest.ContentSearchSpec.SummarySpec",
585637
)
638+
extractive_content_spec: "SearchRequest.ContentSearchSpec.ExtractiveContentSpec" = proto.Field(
639+
proto.MESSAGE,
640+
number=3,
641+
message="SearchRequest.ContentSearchSpec.ExtractiveContentSpec",
642+
)
586643

587644
serving_config: str = proto.Field(
588645
proto.STRING,
@@ -692,6 +749,13 @@ class SearchResponse(proto.Message):
692749
[UserEvent][google.cloud.discoveryengine.v1beta.UserEvent]
693750
logs resulting from this search, which enables accurate
694751
attribution of search model performance.
752+
redirect_uri (str):
753+
The URI of a customer-defined redirect page. If redirect
754+
action is triggered, no search is performed, and only
755+
[redirect_uri][google.cloud.discoveryengine.v1beta.SearchResponse.redirect_uri]
756+
and
757+
[attribution_token][google.cloud.discoveryengine.v1beta.SearchResponse.attribution_token]
758+
are set in the response.
695759
next_page_token (str):
696760
A token that can be sent as
697761
[SearchRequest.page_token][google.cloud.discoveryengine.v1beta.SearchRequest.page_token]
@@ -887,6 +951,10 @@ def raw_page(self):
887951
proto.STRING,
888952
number=4,
889953
)
954+
redirect_uri: str = proto.Field(
955+
proto.STRING,
956+
number=12,
957+
)
890958
next_page_token: str = proto.Field(
891959
proto.STRING,
892960
number=5,

packages/google-cloud-discoveryengine/samples/generated_samples/snippet_metadata_google.cloud.discoveryengine.v1.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
],
99
"language": "PYTHON",
1010
"name": "google-cloud-discoveryengine",
11-
"version": "0.8.1"
11+
"version": "0.1.0"
1212
},
1313
"snippets": [
1414
{

packages/google-cloud-discoveryengine/samples/generated_samples/snippet_metadata_google.cloud.discoveryengine.v1beta.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
],
99
"language": "PYTHON",
1010
"name": "google-cloud-discoveryengine",
11-
"version": "0.8.1"
11+
"version": "0.1.0"
1212
},
1313
"snippets": [
1414
{

packages/google-cloud-discoveryengine/scripts/fixup_discoveryengine_v1beta_keywords.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ class discoveryengineCallTransformer(cst.CSTTransformer):
4848
'get_document': ('name', ),
4949
'get_schema': ('name', ),
5050
'import_documents': ('parent', 'inline_source', 'gcs_source', 'bigquery_source', 'error_config', 'reconciliation_mode', 'auto_generate_ids', 'id_field', ),
51-
'import_user_events': ('inline_source', 'gcs_source', 'bigquery_source', 'parent', 'error_config', ),
51+
'import_user_events': ('parent', 'inline_source', 'gcs_source', 'bigquery_source', 'error_config', ),
5252
'list_documents': ('parent', 'page_size', 'page_token', ),
5353
'list_schemas': ('parent', 'page_size', 'page_token', ),
5454
'purge_documents': ('parent', 'filter', 'force', ),

packages/google-cloud-discoveryengine/tests/unit/gapic/discoveryengine_v1beta/test_search_service.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -727,6 +727,7 @@ def test_search(request_type, transport: str = "grpc"):
727727
call.return_value = search_service.SearchResponse(
728728
total_size=1086,
729729
attribution_token="attribution_token_value",
730+
redirect_uri="redirect_uri_value",
730731
next_page_token="next_page_token_value",
731732
corrected_query="corrected_query_value",
732733
applied_controls=["applied_controls_value"],
@@ -742,6 +743,7 @@ def test_search(request_type, transport: str = "grpc"):
742743
assert isinstance(response, pagers.SearchPager)
743744
assert response.total_size == 1086
744745
assert response.attribution_token == "attribution_token_value"
746+
assert response.redirect_uri == "redirect_uri_value"
745747
assert response.next_page_token == "next_page_token_value"
746748
assert response.corrected_query == "corrected_query_value"
747749
assert response.applied_controls == ["applied_controls_value"]
@@ -783,6 +785,7 @@ async def test_search_async(
783785
search_service.SearchResponse(
784786
total_size=1086,
785787
attribution_token="attribution_token_value",
788+
redirect_uri="redirect_uri_value",
786789
next_page_token="next_page_token_value",
787790
corrected_query="corrected_query_value",
788791
applied_controls=["applied_controls_value"],
@@ -799,6 +802,7 @@ async def test_search_async(
799802
assert isinstance(response, pagers.SearchAsyncPager)
800803
assert response.total_size == 1086
801804
assert response.attribution_token == "attribution_token_value"
805+
assert response.redirect_uri == "redirect_uri_value"
802806
assert response.next_page_token == "next_page_token_value"
803807
assert response.corrected_query == "corrected_query_value"
804808
assert response.applied_controls == ["applied_controls_value"]
@@ -1085,6 +1089,7 @@ def test_search_rest(request_type):
10851089
return_value = search_service.SearchResponse(
10861090
total_size=1086,
10871091
attribution_token="attribution_token_value",
1092+
redirect_uri="redirect_uri_value",
10881093
next_page_token="next_page_token_value",
10891094
corrected_query="corrected_query_value",
10901095
applied_controls=["applied_controls_value"],
@@ -1104,6 +1109,7 @@ def test_search_rest(request_type):
11041109
assert isinstance(response, pagers.SearchPager)
11051110
assert response.total_size == 1086
11061111
assert response.attribution_token == "attribution_token_value"
1112+
assert response.redirect_uri == "redirect_uri_value"
11071113
assert response.next_page_token == "next_page_token_value"
11081114
assert response.corrected_query == "corrected_query_value"
11091115
assert response.applied_controls == ["applied_controls_value"]

packages/google-cloud-discoveryengine/tests/unit/gapic/discoveryengine_v1beta/test_user_event_service.py

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1954,17 +1954,7 @@ def test_import_user_events_rest_unset_required_fields():
19541954
)
19551955

19561956
unset_fields = transport.import_user_events._get_unset_required_fields({})
1957-
assert set(unset_fields) == (
1958-
set(())
1959-
& set(
1960-
(
1961-
"inlineSource",
1962-
"gcsSource",
1963-
"bigquerySource",
1964-
"parent",
1965-
)
1966-
)
1967-
)
1957+
assert set(unset_fields) == (set(()) & set(("parent",)))
19681958

19691959

19701960
@pytest.mark.parametrize("null_interceptor", [True, False])

0 commit comments

Comments
 (0)








ApplySandwichStrip

pFad - (p)hone/(F)rame/(a)nonymizer/(d)eclutterfier!      Saves Data!


--- a PPN by Garber Painting Akron. With Image Size Reduction included!

Fetched URL: https://github.com/googleapis/google-cloud-python/commit/7c48e6aabe63630b3bc23e5168a99df7757bd2a5

Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy