feat: add PredictionService.ServerStreamingPredict method

Google APIs · copybara-github · commit 1b650d6c6ee9 · 2023-07-27T16:28:14.000-07:00
feat: add `StreamingPredictRequest` type
feat: add `StreamingPredictResponse` type
feat: add `Tensor` type

PiperOrigin-RevId: 551672526
diff --git a/google/cloud/aiplatform/v1/prediction_service.proto b/google/cloud/aiplatform/v1/prediction_service.proto
@@ -22,6 +22,7 @@ import "google/api/field_behavior.proto";
 import "google/api/httpbody.proto";
 import "google/api/resource.proto";
 import "google/cloud/aiplatform/v1/explanation.proto";
+import "google/cloud/aiplatform/v1/types.proto";
 import "google/protobuf/struct.proto";
 
 option csharp_namespace = "Google.Cloud.AIPlatform.V1";
@@ -74,6 +75,20 @@ service PredictionService {
     option (google.api.method_signature) = "endpoint,http_body";
   }
 
+  // Perform a server-side streaming online prediction request for Vertex
+  // LLM streaming.
+  rpc ServerStreamingPredict(StreamingPredictRequest)
+      returns (stream StreamingPredictResponse) {
+    option (google.api.http) = {
+      post: "/v1/{endpoint=projects/*/locations/*/endpoints/*}:serverStreamingPredict"
+      body: "*"
+      additional_bindings {
+        post: "/v1/{endpoint=projects/*/locations/*/publishers/*/models/*}:serverStreamingPredict"
+        body: "*"
+      }
+    };
+  }
+
   // Perform an online explanation.
   //
   // If
@@ -158,6 +173,11 @@ message PredictResponse {
   // name][google.cloud.aiplatform.v1.Model.display_name] of the Model which is
   // deployed as the DeployedModel that this prediction hits.
   string model_display_name = 4 [(google.api.field_behavior) = OUTPUT_ONLY];
+
+  // Output only. Request-level metadata returned by the model. The metadata
+  // type will be dependent upon the model implementation.
+  google.protobuf.Value metadata = 6
+      [(google.api.field_behavior) = OUTPUT_ONLY];
 }
 
 // Request message for
@@ -191,6 +211,40 @@ message RawPredictRequest {
   google.api.HttpBody http_body = 2;
 }
 
+// Request message for
+// [PredictionService.StreamingPredict][google.cloud.aiplatform.v1.PredictionService.StreamingPredict].
+//
+// The first message must contain
+// [endpoint][google.cloud.aiplatform.v1.StreamingPredictRequest.endpoint] field
+// and optionally [input][]. The subsequent messages must contain [input][].
+message StreamingPredictRequest {
+  // Required. The name of the Endpoint requested to serve the prediction.
+  // Format:
+  // `projects/{project}/locations/{location}/endpoints/{endpoint}`
+  string endpoint = 1 [
+    (google.api.field_behavior) = REQUIRED,
+    (google.api.resource_reference) = {
+      type: "aiplatform.googleapis.com/Endpoint"
+    }
+  ];
+
+  // The prediction input.
+  repeated Tensor inputs = 2;
+
+  // The parameters that govern the prediction.
+  Tensor parameters = 3;
+}
+
+// Response message for
+// [PredictionService.StreamingPredict][google.cloud.aiplatform.v1.PredictionService.StreamingPredict].
+message StreamingPredictResponse {
+  // The prediction output.
+  repeated Tensor outputs = 1;
+
+  // The parameters that govern the prediction.
+  Tensor parameters = 2;
+}
+
 // Request message for
 // [PredictionService.Explain][google.cloud.aiplatform.v1.PredictionService.Explain].
 message ExplainRequest {
diff --git a/google/cloud/aiplatform/v1/types.proto b/google/cloud/aiplatform/v1/types.proto
@@ -47,3 +47,90 @@ message StringArray {
   // A list of string values.
   repeated string values = 1;
 }
+
+// A tensor value type.
+message Tensor {
+  // Data type of the tensor.
+  enum DataType {
+    // Not a legal value for DataType. Used to indicate a DataType field has not
+    // been set.
+    DATA_TYPE_UNSPECIFIED = 0;
+
+    // Data types that all computation devices are expected to be
+    // capable to support.
+    BOOL = 1;
+
+    STRING = 2;
+
+    FLOAT = 3;
+
+    DOUBLE = 4;
+
+    INT8 = 5;
+
+    INT16 = 6;
+
+    INT32 = 7;
+
+    INT64 = 8;
+
+    UINT8 = 9;
+
+    UINT16 = 10;
+
+    UINT32 = 11;
+
+    UINT64 = 12;
+  }
+
+  // The data type of tensor.
+  DataType dtype = 1;
+
+  // Shape of the tensor.
+  repeated int64 shape = 2;
+
+  // Type specific representations that make it easy to create tensor protos in
+  // all languages.  Only the representation corresponding to "dtype" can
+  // be set.  The values hold the flattened representation of the tensor in
+  // row major order.
+  //
+  // [BOOL][google.aiplatform.master.Tensor.DataType.BOOL]
+  repeated bool bool_val = 3;
+
+  // [STRING][google.aiplatform.master.Tensor.DataType.STRING]
+  repeated string string_val = 14;
+
+  // [STRING][google.aiplatform.master.Tensor.DataType.STRING]
+  repeated bytes bytes_val = 15;
+
+  // [FLOAT][google.aiplatform.master.Tensor.DataType.FLOAT]
+  repeated float float_val = 5;
+
+  // [DOUBLE][google.aiplatform.master.Tensor.DataType.DOUBLE]
+  repeated double double_val = 6;
+
+  // [INT_8][google.aiplatform.master.Tensor.DataType.INT8]
+  // [INT_16][google.aiplatform.master.Tensor.DataType.INT16]
+  // [INT_32][google.aiplatform.master.Tensor.DataType.INT32]
+  repeated int32 int_val = 7;
+
+  // [INT64][google.aiplatform.master.Tensor.DataType.INT64]
+  repeated int64 int64_val = 8;
+
+  // [UINT8][google.aiplatform.master.Tensor.DataType.UINT8]
+  // [UINT16][google.aiplatform.master.Tensor.DataType.UINT16]
+  // [UINT32][google.aiplatform.master.Tensor.DataType.UINT32]
+  repeated uint32 uint_val = 9;
+
+  // [UINT64][google.aiplatform.master.Tensor.DataType.UINT64]
+  repeated uint64 uint64_val = 10;
+
+  // A list of tensor values.
+  repeated Tensor list_val = 11;
+
+  // A map of string to tensor.
+  map<string, Tensor> struct_val = 12;
+
+  // Serialized raw tensor content.
+  bytes tensor_val = 13;
+}
diff --git a/google/cloud/aiplatform/v1beta1/prediction_service.proto b/google/cloud/aiplatform/v1beta1/prediction_service.proto
@@ -22,6 +22,7 @@ import "google/api/field_behavior.proto";
 import "google/api/httpbody.proto";
 import "google/api/resource.proto";
 import "google/cloud/aiplatform/v1beta1/explanation.proto";
+import "google/cloud/aiplatform/v1beta1/types.proto";
 import "google/protobuf/struct.proto";
 
 option csharp_namespace = "Google.Cloud.AIPlatform.V1Beta1";
@@ -74,6 +75,20 @@ service PredictionService {
     option (google.api.method_signature) = "endpoint,http_body";
   }
 
+  // Perform a server-side streaming online prediction request for Vertex
+  // LLM streaming.
+  rpc ServerStreamingPredict(StreamingPredictRequest)
+      returns (stream StreamingPredictResponse) {
+    option (google.api.http) = {
+      post: "/v1beta1/{endpoint=projects/*/locations/*/endpoints/*}:serverStreamingPredict"
+      body: "*"
+      additional_bindings {
+        post: "/v1beta1/{endpoint=projects/*/locations/*/publishers/*/models/*}:serverStreamingPredict"
+        body: "*"
+      }
+    };
+  }
+
   // Perform an online explanation.
   //
   // If
@@ -160,6 +175,11 @@ message PredictResponse {
   // name][google.cloud.aiplatform.v1beta1.Model.display_name] of the Model
   // which is deployed as the DeployedModel that this prediction hits.
   string model_display_name = 4 [(google.api.field_behavior) = OUTPUT_ONLY];
+
+  // Output only. Request-level metadata returned by the model. The metadata
+  // type will be dependent upon the model implementation.
+  google.protobuf.Value metadata = 6
+      [(google.api.field_behavior) = OUTPUT_ONLY];
 }
 
 // Request message for
@@ -193,6 +213,41 @@ message RawPredictRequest {
   google.api.HttpBody http_body = 2;
 }
 
+// Request message for
+// [PredictionService.StreamingPredict][google.cloud.aiplatform.v1beta1.PredictionService.StreamingPredict].
+//
+// The first message must contain
+// [endpoint][google.cloud.aiplatform.v1beta1.StreamingPredictRequest.endpoint]
+// field and optionally [input][]. The subsequent messages must contain
+// [input][].
+message StreamingPredictRequest {
+  // Required. The name of the Endpoint requested to serve the prediction.
+  // Format:
+  // `projects/{project}/locations/{location}/endpoints/{endpoint}`
+  string endpoint = 1 [
+    (google.api.field_behavior) = REQUIRED,
+    (google.api.resource_reference) = {
+      type: "aiplatform.googleapis.com/Endpoint"
+    }
+  ];
+
+  // The prediction input.
+  repeated Tensor inputs = 2;
+
+  // The parameters that govern the prediction.
+  Tensor parameters = 3;
+}
+
+// Response message for
+// [PredictionService.StreamingPredict][google.cloud.aiplatform.v1beta1.PredictionService.StreamingPredict].
+message StreamingPredictResponse {
+  // The prediction output.
+  repeated Tensor outputs = 1;
+
+  // The parameters that govern the prediction.
+  Tensor parameters = 2;
+}
+
 // Request message for
 // [PredictionService.Explain][google.cloud.aiplatform.v1beta1.PredictionService.Explain].
 message ExplainRequest {
diff --git a/google/cloud/aiplatform/v1beta1/types.proto b/google/cloud/aiplatform/v1beta1/types.proto
@@ -47,3 +47,90 @@ message StringArray {
   // A list of string values.
   repeated string values = 1;
 }
+
+// A tensor value type.
+message Tensor {
+  // Data type of the tensor.
+  enum DataType {
+    // Not a legal value for DataType. Used to indicate a DataType field has not
+    // been set.
+    DATA_TYPE_UNSPECIFIED = 0;
+
+    // Data types that all computation devices are expected to be
+    // capable to support.
+    BOOL = 1;
+
+    STRING = 2;
+
+    FLOAT = 3;
+
+    DOUBLE = 4;
+
+    INT8 = 5;
+
+    INT16 = 6;
+
+    INT32 = 7;
+
+    INT64 = 8;
+
+    UINT8 = 9;
+
+    UINT16 = 10;
+
+    UINT32 = 11;
+
+    UINT64 = 12;
+  }
+
+  // The data type of tensor.
+  DataType dtype = 1;
+
+  // Shape of the tensor.
+  repeated int64 shape = 2;
+
+  // Type specific representations that make it easy to create tensor protos in
+  // all languages.  Only the representation corresponding to "dtype" can
+  // be set.  The values hold the flattened representation of the tensor in
+  // row major order.
+  //
+  // [BOOL][google.aiplatform.master.Tensor.DataType.BOOL]
+  repeated bool bool_val = 3;
+
+  // [STRING][google.aiplatform.master.Tensor.DataType.STRING]
+  repeated string string_val = 14;
+
+  // [STRING][google.aiplatform.master.Tensor.DataType.STRING]
+  repeated bytes bytes_val = 15;
+
+  // [FLOAT][google.aiplatform.master.Tensor.DataType.FLOAT]
+  repeated float float_val = 5;
+
+  // [DOUBLE][google.aiplatform.master.Tensor.DataType.DOUBLE]
+  repeated double double_val = 6;
+
+  // [INT_8][google.aiplatform.master.Tensor.DataType.INT8]
+  // [INT_16][google.aiplatform.master.Tensor.DataType.INT16]
+  // [INT_32][google.aiplatform.master.Tensor.DataType.INT32]
+  repeated int32 int_val = 7;
+
+  // [INT64][google.aiplatform.master.Tensor.DataType.INT64]
+  repeated int64 int64_val = 8;
+
+  // [UINT8][google.aiplatform.master.Tensor.DataType.UINT8]
+  // [UINT16][google.aiplatform.master.Tensor.DataType.UINT16]
+  // [UINT32][google.aiplatform.master.Tensor.DataType.UINT32]
+  repeated uint32 uint_val = 9;
+
+  // [UINT64][google.aiplatform.master.Tensor.DataType.UINT64]
+  repeated uint64 uint64_val = 10;
+
+  // A list of tensor values.
+  repeated Tensor list_val = 11;
+
+  // A map of string to tensor.
+  map<string, Tensor> struct_val = 12;
+
+  // Serialized raw tensor content.
+  bytes tensor_val = 13;
+}