Content-Length: 877191 | pFad | https://github.com/googleapis/googleapis/commit/1b650d6c6ee9e50fe122562550a47ec258151498

28 feat: add `PredictionService.ServerStreamingPredict` method · googleapis/googleapis@1b650d6 · GitHub
Skip to content

Commit 1b650d6

Browse files
Google APIscopybara-github
Google APIs
authored andcommitted
feat: add PredictionService.ServerStreamingPredict method
feat: add `StreamingPredictRequest` type feat: add `StreamingPredictResponse` type feat: add `Tensor` type PiperOrigin-RevId: 551672526
1 parent 149aec4 commit 1b650d6

File tree

4 files changed

+283
-0
lines changed

4 files changed

+283
-0
lines changed

google/cloud/aiplatform/v1/prediction_service.proto

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import "google/api/field_behavior.proto";
2222
import "google/api/httpbody.proto";
2323
import "google/api/resource.proto";
2424
import "google/cloud/aiplatform/v1/explanation.proto";
25+
import "google/cloud/aiplatform/v1/types.proto";
2526
import "google/protobuf/struct.proto";
2627

2728
option csharp_namespace = "Google.Cloud.AIPlatform.V1";
@@ -74,6 +75,20 @@ service PredictionService {
7475
option (google.api.method_signature) = "endpoint,http_body";
7576
}
7677

78+
// Perform a server-side streaming online prediction request for Vertex
79+
// LLM streaming.
80+
rpc ServerStreamingPredict(StreamingPredictRequest)
81+
returns (stream StreamingPredictResponse) {
82+
option (google.api.http) = {
83+
post: "/v1/{endpoint=projects/*/locations/*/endpoints/*}:serverStreamingPredict"
84+
body: "*"
85+
additional_bindings {
86+
post: "/v1/{endpoint=projects/*/locations/*/publishers/*/models/*}:serverStreamingPredict"
87+
body: "*"
88+
}
89+
};
90+
}
91+
7792
// Perform an online explanation.
7893
//
7994
// If
@@ -158,6 +173,11 @@ message PredictResponse {
158173
// name][google.cloud.aiplatform.v1.Model.display_name] of the Model which is
159174
// deployed as the DeployedModel that this prediction hits.
160175
string model_display_name = 4 [(google.api.field_behavior) = OUTPUT_ONLY];
176+
177+
// Output only. Request-level metadata returned by the model. The metadata
178+
// type will be dependent upon the model implementation.
179+
google.protobuf.Value metadata = 6
180+
[(google.api.field_behavior) = OUTPUT_ONLY];
161181
}
162182

163183
// Request message for
@@ -191,6 +211,40 @@ message RawPredictRequest {
191211
google.api.HttpBody http_body = 2;
192212
}
193213

214+
// Request message for
215+
// [PredictionService.StreamingPredict][google.cloud.aiplatform.v1.PredictionService.StreamingPredict].
216+
//
217+
// The first message must contain
218+
// [endpoint][google.cloud.aiplatform.v1.StreamingPredictRequest.endpoint] field
219+
// and optionally [input][]. The subsequent messages must contain [input][].
220+
message StreamingPredictRequest {
221+
// Required. The name of the Endpoint requested to serve the prediction.
222+
// Format:
223+
// `projects/{project}/locations/{location}/endpoints/{endpoint}`
224+
string endpoint = 1 [
225+
(google.api.field_behavior) = REQUIRED,
226+
(google.api.resource_reference) = {
227+
type: "aiplatform.googleapis.com/Endpoint"
228+
}
229+
];
230+
231+
// The prediction input.
232+
repeated Tensor inputs = 2;
233+
234+
// The parameters that govern the prediction.
235+
Tensor parameters = 3;
236+
}
237+
238+
// Response message for
239+
// [PredictionService.StreamingPredict][google.cloud.aiplatform.v1.PredictionService.StreamingPredict].
240+
message StreamingPredictResponse {
241+
// The prediction output.
242+
repeated Tensor outputs = 1;
243+
244+
// The parameters that govern the prediction.
245+
Tensor parameters = 2;
246+
}
247+
194248
// Request message for
195249
// [PredictionService.Explain][google.cloud.aiplatform.v1.PredictionService.Explain].
196250
message ExplainRequest {

google/cloud/aiplatform/v1/types.proto

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,3 +47,90 @@ message StringArray {
4747
// A list of string values.
4848
repeated string values = 1;
4949
}
50+
51+
// A tensor value type.
52+
message Tensor {
53+
// Data type of the tensor.
54+
enum DataType {
55+
// Not a legal value for DataType. Used to indicate a DataType field has not
56+
// been set.
57+
DATA_TYPE_UNSPECIFIED = 0;
58+
59+
// Data types that all computation devices are expected to be
60+
// capable to support.
61+
BOOL = 1;
62+
63+
STRING = 2;
64+
65+
FLOAT = 3;
66+
67+
DOUBLE = 4;
68+
69+
INT8 = 5;
70+
71+
INT16 = 6;
72+
73+
INT32 = 7;
74+
75+
INT64 = 8;
76+
77+
UINT8 = 9;
78+
79+
UINT16 = 10;
80+
81+
UINT32 = 11;
82+
83+
UINT64 = 12;
84+
}
85+
86+
// The data type of tensor.
87+
DataType dtype = 1;
88+
89+
// Shape of the tensor.
90+
repeated int64 shape = 2;
91+
92+
// Type specific representations that make it easy to create tensor protos in
93+
// all languages. Only the representation corresponding to "dtype" can
94+
// be set. The values hold the flattened representation of the tensor in
95+
// row major order.
96+
//
97+
// [BOOL][google.aiplatform.master.Tensor.DataType.BOOL]
98+
repeated bool bool_val = 3;
99+
100+
// [STRING][google.aiplatform.master.Tensor.DataType.STRING]
101+
repeated string string_val = 14;
102+
103+
// [STRING][google.aiplatform.master.Tensor.DataType.STRING]
104+
repeated bytes bytes_val = 15;
105+
106+
// [FLOAT][google.aiplatform.master.Tensor.DataType.FLOAT]
107+
repeated float float_val = 5;
108+
109+
// [DOUBLE][google.aiplatform.master.Tensor.DataType.DOUBLE]
110+
repeated double double_val = 6;
111+
112+
// [INT_8][google.aiplatform.master.Tensor.DataType.INT8]
113+
// [INT_16][google.aiplatform.master.Tensor.DataType.INT16]
114+
// [INT_32][google.aiplatform.master.Tensor.DataType.INT32]
115+
repeated int32 int_val = 7;
116+
117+
// [INT64][google.aiplatform.master.Tensor.DataType.INT64]
118+
repeated int64 int64_val = 8;
119+
120+
// [UINT8][google.aiplatform.master.Tensor.DataType.UINT8]
121+
// [UINT16][google.aiplatform.master.Tensor.DataType.UINT16]
122+
// [UINT32][google.aiplatform.master.Tensor.DataType.UINT32]
123+
repeated uint32 uint_val = 9;
124+
125+
// [UINT64][google.aiplatform.master.Tensor.DataType.UINT64]
126+
repeated uint64 uint64_val = 10;
127+
128+
// A list of tensor values.
129+
repeated Tensor list_val = 11;
130+
131+
// A map of string to tensor.
132+
map<string, Tensor> struct_val = 12;
133+
134+
// Serialized raw tensor content.
135+
bytes tensor_val = 13;
136+
}

google/cloud/aiplatform/v1beta1/prediction_service.proto

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import "google/api/field_behavior.proto";
2222
import "google/api/httpbody.proto";
2323
import "google/api/resource.proto";
2424
import "google/cloud/aiplatform/v1beta1/explanation.proto";
25+
import "google/cloud/aiplatform/v1beta1/types.proto";
2526
import "google/protobuf/struct.proto";
2627

2728
option csharp_namespace = "Google.Cloud.AIPlatform.V1Beta1";
@@ -74,6 +75,20 @@ service PredictionService {
7475
option (google.api.method_signature) = "endpoint,http_body";
7576
}
7677

78+
// Perform a server-side streaming online prediction request for Vertex
79+
// LLM streaming.
80+
rpc ServerStreamingPredict(StreamingPredictRequest)
81+
returns (stream StreamingPredictResponse) {
82+
option (google.api.http) = {
83+
post: "/v1beta1/{endpoint=projects/*/locations/*/endpoints/*}:serverStreamingPredict"
84+
body: "*"
85+
additional_bindings {
86+
post: "/v1beta1/{endpoint=projects/*/locations/*/publishers/*/models/*}:serverStreamingPredict"
87+
body: "*"
88+
}
89+
};
90+
}
91+
7792
// Perform an online explanation.
7893
//
7994
// If
@@ -160,6 +175,11 @@ message PredictResponse {
160175
// name][google.cloud.aiplatform.v1beta1.Model.display_name] of the Model
161176
// which is deployed as the DeployedModel that this prediction hits.
162177
string model_display_name = 4 [(google.api.field_behavior) = OUTPUT_ONLY];
178+
179+
// Output only. Request-level metadata returned by the model. The metadata
180+
// type will be dependent upon the model implementation.
181+
google.protobuf.Value metadata = 6
182+
[(google.api.field_behavior) = OUTPUT_ONLY];
163183
}
164184

165185
// Request message for
@@ -193,6 +213,41 @@ message RawPredictRequest {
193213
google.api.HttpBody http_body = 2;
194214
}
195215

216+
// Request message for
217+
// [PredictionService.StreamingPredict][google.cloud.aiplatform.v1beta1.PredictionService.StreamingPredict].
218+
//
219+
// The first message must contain
220+
// [endpoint][google.cloud.aiplatform.v1beta1.StreamingPredictRequest.endpoint]
221+
// field and optionally [input][]. The subsequent messages must contain
222+
// [input][].
223+
message StreamingPredictRequest {
224+
// Required. The name of the Endpoint requested to serve the prediction.
225+
// Format:
226+
// `projects/{project}/locations/{location}/endpoints/{endpoint}`
227+
string endpoint = 1 [
228+
(google.api.field_behavior) = REQUIRED,
229+
(google.api.resource_reference) = {
230+
type: "aiplatform.googleapis.com/Endpoint"
231+
}
232+
];
233+
234+
// The prediction input.
235+
repeated Tensor inputs = 2;
236+
237+
// The parameters that govern the prediction.
238+
Tensor parameters = 3;
239+
}
240+
241+
// Response message for
242+
// [PredictionService.StreamingPredict][google.cloud.aiplatform.v1beta1.PredictionService.StreamingPredict].
243+
message StreamingPredictResponse {
244+
// The prediction output.
245+
repeated Tensor outputs = 1;
246+
247+
// The parameters that govern the prediction.
248+
Tensor parameters = 2;
249+
}
250+
196251
// Request message for
197252
// [PredictionService.Explain][google.cloud.aiplatform.v1beta1.PredictionService.Explain].
198253
message ExplainRequest {

google/cloud/aiplatform/v1beta1/types.proto

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,3 +47,90 @@ message StringArray {
4747
// A list of string values.
4848
repeated string values = 1;
4949
}
50+
51+
// A tensor value type.
52+
message Tensor {
53+
// Data type of the tensor.
54+
enum DataType {
55+
// Not a legal value for DataType. Used to indicate a DataType field has not
56+
// been set.
57+
DATA_TYPE_UNSPECIFIED = 0;
58+
59+
// Data types that all computation devices are expected to be
60+
// capable to support.
61+
BOOL = 1;
62+
63+
STRING = 2;
64+
65+
FLOAT = 3;
66+
67+
DOUBLE = 4;
68+
69+
INT8 = 5;
70+
71+
INT16 = 6;
72+
73+
INT32 = 7;
74+
75+
INT64 = 8;
76+
77+
UINT8 = 9;
78+
79+
UINT16 = 10;
80+
81+
UINT32 = 11;
82+
83+
UINT64 = 12;
84+
}
85+
86+
// The data type of tensor.
87+
DataType dtype = 1;
88+
89+
// Shape of the tensor.
90+
repeated int64 shape = 2;
91+
92+
// Type specific representations that make it easy to create tensor protos in
93+
// all languages. Only the representation corresponding to "dtype" can
94+
// be set. The values hold the flattened representation of the tensor in
95+
// row major order.
96+
//
97+
// [BOOL][google.aiplatform.master.Tensor.DataType.BOOL]
98+
repeated bool bool_val = 3;
99+
100+
// [STRING][google.aiplatform.master.Tensor.DataType.STRING]
101+
repeated string string_val = 14;
102+
103+
// [STRING][google.aiplatform.master.Tensor.DataType.STRING]
104+
repeated bytes bytes_val = 15;
105+
106+
// [FLOAT][google.aiplatform.master.Tensor.DataType.FLOAT]
107+
repeated float float_val = 5;
108+
109+
// [DOUBLE][google.aiplatform.master.Tensor.DataType.DOUBLE]
110+
repeated double double_val = 6;
111+
112+
// [INT_8][google.aiplatform.master.Tensor.DataType.INT8]
113+
// [INT_16][google.aiplatform.master.Tensor.DataType.INT16]
114+
// [INT_32][google.aiplatform.master.Tensor.DataType.INT32]
115+
repeated int32 int_val = 7;
116+
117+
// [INT64][google.aiplatform.master.Tensor.DataType.INT64]
118+
repeated int64 int64_val = 8;
119+
120+
// [UINT8][google.aiplatform.master.Tensor.DataType.UINT8]
121+
// [UINT16][google.aiplatform.master.Tensor.DataType.UINT16]
122+
// [UINT32][google.aiplatform.master.Tensor.DataType.UINT32]
123+
repeated uint32 uint_val = 9;
124+
125+
// [UINT64][google.aiplatform.master.Tensor.DataType.UINT64]
126+
repeated uint64 uint64_val = 10;
127+
128+
// A list of tensor values.
129+
repeated Tensor list_val = 11;
130+
131+
// A map of string to tensor.
132+
map<string, Tensor> struct_val = 12;
133+
134+
// Serialized raw tensor content.
135+
bytes tensor_val = 13;
136+
}

0 commit comments

Comments
 (0)








ApplySandwichStrip

pFad - (p)hone/(F)rame/(a)nonymizer/(d)eclutterfier!      Saves Data!


--- a PPN by Garber Painting Akron. With Image Size Reduction included!

Fetched URL: https://github.com/googleapis/googleapis/commit/1b650d6c6ee9e50fe122562550a47ec258151498

Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy