@@ -978,6 +978,10 @@ def predict(
978
978
grounding_source : Optional [
979
979
Union [GroundingSource .WebSearch , GroundingSource .VertexAISearch ]
980
980
] = None ,
981
+ logprobs : Optional [int ] = None ,
982
+ presence_penalty : Optional [float ] = None ,
983
+ frequency_penalty : Optional [float ] = None ,
984
+ logit_bias : Optional [Dict [int , float ]] = None ,
981
985
) -> "MultiCandidateTextGenerationResponse" :
982
986
"""Gets model response for a single prompt.
983
987
@@ -990,6 +994,26 @@ def predict(
990
994
stop_sequences: Customized stop sequences to stop the decoding process.
991
995
candidate_count: Number of response candidates to return.
992
996
grounding_source: If specified, grounding feature will be enabled using the grounding source. Default: None.
997
+ logprobs: Returns the top `logprobs` most likely candidate tokens with their log probabilities
998
+ at each generation step. The chosen tokens and their log probabilities at each step are always
999
+ returned. The chosen token may or may not be in the top `logprobs` most likely candidates.
1000
+ The minimum value for `logprobs` is 0, which means only the chosen tokens and their log
1001
+ probabilities are returned.
1002
+ The maximum value for `logprobs` is 5.
1003
+ presence_penalty:
1004
+ Positive values penalize tokens that have appeared in the generated text,
1005
+ thus increasing the possibility of generating more diversed topics.
1006
+ Range: [-2.0, 2.0]
1007
+ frequency_penalty:
1008
+ Positive values penalize tokens that repeatedly appear in the generated
1009
+ text, thus decreasing the possibility of repeating the same content.
1010
+ Range: [-2.0, 2.0]
1011
+ logit_bias:
1012
+ Mapping from token IDs (integers) to their bias values (floats).
1013
+ The bias values are added to the logits before sampling.
1014
+ Larger positive bias increases the probability of choosing the token.
1015
+ Smaller negative bias decreases the probability of choosing the token.
1016
+ Range: [-100.0, 100.0]
993
1017
994
1018
Returns:
995
1019
A `MultiCandidateTextGenerationResponse` object that contains the text produced by the model.
@@ -1003,6 +1027,10 @@ def predict(
1003
1027
stop_sequences = stop_sequences ,
1004
1028
candidate_count = candidate_count ,
1005
1029
grounding_source = grounding_source ,
1030
+ logprobs = logprobs ,
1031
+ presence_penalty = presence_penalty ,
1032
+ frequency_penalty = frequency_penalty ,
1033
+ logit_bias = logit_bias ,
1006
1034
)
1007
1035
1008
1036
prediction_response = self ._endpoint .predict (
@@ -1027,6 +1055,10 @@ async def predict_async(
1027
1055
grounding_source : Optional [
1028
1056
Union [GroundingSource .WebSearch , GroundingSource .VertexAISearch ]
1029
1057
] = None ,
1058
+ logprobs : Optional [int ] = None ,
1059
+ presence_penalty : Optional [float ] = None ,
1060
+ frequency_penalty : Optional [float ] = None ,
1061
+ logit_bias : Optional [Dict [int , float ]] = None ,
1030
1062
) -> "MultiCandidateTextGenerationResponse" :
1031
1063
"""Asynchronously gets model response for a single prompt.
1032
1064
@@ -1039,6 +1071,26 @@ async def predict_async(
1039
1071
stop_sequences: Customized stop sequences to stop the decoding process.
1040
1072
candidate_count: Number of response candidates to return.
1041
1073
grounding_source: If specified, grounding feature will be enabled using the grounding source. Default: None.
1074
+ logprobs: Returns the top `logprobs` most likely candidate tokens with their log probabilities
1075
+ at each generation step. The chosen tokens and their log probabilities at each step are always
1076
+ returned. The chosen token may or may not be in the top `logprobs` most likely candidates.
1077
+ The minimum value for `logprobs` is 0, which means only the chosen tokens and their log
1078
+ probabilities are returned.
1079
+ The maximum value for `logprobs` is 5.
1080
+ presence_penalty:
1081
+ Positive values penalize tokens that have appeared in the generated text,
1082
+ thus increasing the possibility of generating more diversed topics.
1083
+ Range: [-2.0, 2.0]
1084
+ frequency_penalty:
1085
+ Positive values penalize tokens that repeatedly appear in the generated
1086
+ text, thus decreasing the possibility of repeating the same content.
1087
+ Range: [-2.0, 2.0]
1088
+ logit_bias:
1089
+ Mapping from token IDs (integers) to their bias values (floats).
1090
+ The bias values are added to the logits before sampling.
1091
+ Larger positive bias increases the probability of choosing the token.
1092
+ Smaller negative bias decreases the probability of choosing the token.
1093
+ Range: [-100.0, 100.0]
1042
1094
1043
1095
Returns:
1044
1096
A `MultiCandidateTextGenerationResponse` object that contains the text produced by the model.
@@ -1052,6 +1104,10 @@ async def predict_async(
1052
1104
stop_sequences = stop_sequences ,
1053
1105
candidate_count = candidate_count ,
1054
1106
grounding_source = grounding_source ,
1107
+ logprobs = logprobs ,
1108
+ presence_penalty = presence_penalty ,
1109
+ frequency_penalty = frequency_penalty ,
1110
+ logit_bias = logit_bias ,
1055
1111
)
1056
1112
1057
1113
prediction_response = await self ._endpoint .predict_async (
@@ -1072,6 +1128,10 @@ def predict_streaming(
1072
1128
top_k : Optional [int ] = None ,
1073
1129
top_p : Optional [float ] = None ,
1074
1130
stop_sequences : Optional [List [str ]] = None ,
1131
+ logprobs : Optional [int ] = None ,
1132
+ presence_penalty : Optional [float ] = None ,
1133
+ frequency_penalty : Optional [float ] = None ,
1134
+ logit_bias : Optional [Dict [int , float ]] = None ,
1075
1135
) -> Iterator [TextGenerationResponse ]:
1076
1136
"""Gets a streaming model response for a single prompt.
1077
1137
@@ -1084,6 +1144,26 @@ def predict_streaming(
1084
1144
top_k: The number of highest probability vocabulary tokens to keep for top-k-filtering. Range: [1, 40]. Default: 40.
1085
1145
top_p: The cumulative probability of parameter highest probability vocabulary tokens to keep for nucleus sampling. Range: [0, 1]. Default: 0.95.
1086
1146
stop_sequences: Customized stop sequences to stop the decoding process.
1147
+ logprobs: Returns the top `logprobs` most likely candidate tokens with their log probabilities
1148
+ at each generation step. The chosen tokens and their log probabilities at each step are always
1149
+ returned. The chosen token may or may not be in the top `logprobs` most likely candidates.
1150
+ The minimum value for `logprobs` is 0, which means only the chosen tokens and their log
1151
+ probabilities are returned.
1152
+ The maximum value for `logprobs` is 5.
1153
+ presence_penalty:
1154
+ Positive values penalize tokens that have appeared in the generated text,
1155
+ thus increasing the possibility of generating more diversed topics.
1156
+ Range: [-2.0, 2.0]
1157
+ frequency_penalty:
1158
+ Positive values penalize tokens that repeatedly appear in the generated
1159
+ text, thus decreasing the possibility of repeating the same content.
1160
+ Range: [-2.0, 2.0]
1161
+ logit_bias:
1162
+ Mapping from token IDs (integers) to their bias values (floats).
1163
+ The bias values are added to the logits before sampling.
1164
+ Larger positive bias increases the probability of choosing the token.
1165
+ Smaller negative bias decreases the probability of choosing the token.
1166
+ Range: [-100.0, 100.0]
1087
1167
1088
1168
Yields:
1089
1169
A stream of `TextGenerationResponse` objects that contain partial
@@ -1096,6 +1176,10 @@ def predict_streaming(
1096
1176
top_k = top_k ,
1097
1177
top_p = top_p ,
1098
1178
stop_sequences = stop_sequences ,
1179
+ logprobs = logprobs ,
1180
+ presence_penalty = presence_penalty ,
1181
+ frequency_penalty = frequency_penalty ,
1182
+ logit_bias = logit_bias ,
1099
1183
)
1100
1184
1101
1185
prediction_service_client = self ._endpoint ._prediction_client
@@ -1122,6 +1206,10 @@ async def predict_streaming_async(
1122
1206
top_k : Optional [int ] = None ,
1123
1207
top_p : Optional [float ] = None ,
1124
1208
stop_sequences : Optional [List [str ]] = None ,
1209
+ logprobs : Optional [int ] = None ,
1210
+ presence_penalty : Optional [float ] = None ,
1211
+ frequency_penalty : Optional [float ] = None ,
1212
+ logit_bias : Optional [Dict [int , float ]] = None ,
1125
1213
) -> AsyncIterator [TextGenerationResponse ]:
1126
1214
"""Asynchronously gets a streaming model response for a single prompt.
1127
1215
@@ -1134,6 +1222,26 @@ async def predict_streaming_async(
1134
1222
top_k: The number of highest probability vocabulary tokens to keep for top-k-filtering. Range: [1, 40]. Default: 40.
1135
1223
top_p: The cumulative probability of parameter highest probability vocabulary tokens to keep for nucleus sampling. Range: [0, 1]. Default: 0.95.
1136
1224
stop_sequences: Customized stop sequences to stop the decoding process.
1225
+ logprobs: Returns the top `logprobs` most likely candidate tokens with their log probabilities
1226
+ at each generation step. The chosen tokens and their log probabilities at each step are always
1227
+ returned. The chosen token may or may not be in the top `logprobs` most likely candidates.
1228
+ The minimum value for `logprobs` is 0, which means only the chosen tokens and their log
1229
+ probabilities are returned.
1230
+ The maximum value for `logprobs` is 5.
1231
+ presence_penalty:
1232
+ Positive values penalize tokens that have appeared in the generated text,
1233
+ thus increasing the possibility of generating more diversed topics.
1234
+ Range: [-2.0, 2.0]
1235
+ frequency_penalty:
1236
+ Positive values penalize tokens that repeatedly appear in the generated
1237
+ text, thus decreasing the possibility of repeating the same content.
1238
+ Range: [-2.0, 2.0]
1239
+ logit_bias:
1240
+ Mapping from token IDs (integers) to their bias values (floats).
1241
+ The bias values are added to the logits before sampling.
1242
+ Larger positive bias increases the probability of choosing the token.
1243
+ Smaller negative bias decreases the probability of choosing the token.
1244
+ Range: [-100.0, 100.0]
1137
1245
1138
1246
Yields:
1139
1247
A stream of `TextGenerationResponse` objects that contain partial
@@ -1146,6 +1254,10 @@ async def predict_streaming_async(
1146
1254
top_k = top_k ,
1147
1255
top_p = top_p ,
1148
1256
stop_sequences = stop_sequences ,
1257
+ logprobs = logprobs ,
1258
+ presence_penalty = presence_penalty ,
1259
+ frequency_penalty = frequency_penalty ,
1260
+ logit_bias = logit_bias ,
1149
1261
)
1150
1262
1151
1263
prediction_service_async_client = self ._endpoint ._prediction_async_client
@@ -1174,6 +1286,10 @@ def _create_text_generation_prediction_request(
1174
1286
grounding_source : Optional [
1175
1287
Union [GroundingSource .WebSearch , GroundingSource .VertexAISearch ]
1176
1288
] = None ,
1289
+ logprobs : Optional [int ] = None ,
1290
+ presence_penalty : Optional [float ] = None ,
1291
+ frequency_penalty : Optional [float ] = None ,
1292
+ logit_bias : Optional [Dict [int , int ]] = None ,
1177
1293
) -> "_PredictionRequest" :
1178
1294
"""Prepares the text generation request for a single prompt.
1179
1295
@@ -1186,7 +1302,26 @@ def _create_text_generation_prediction_request(
1186
1302
stop_sequences: Customized stop sequences to stop the decoding process.
1187
1303
candidate_count: Number of candidates to return.
1188
1304
grounding_source: If specified, grounding feature will be enabled using the grounding source. Default: None.
1189
-
1305
+ logprobs: Returns the top `logprobs` most likely candidate tokens with their log probabilities
1306
+ at each generation step. The chosen tokens and their log probabilities at each step are always
1307
+ returned. The chosen token may or may not be in the top `logprobs` most likely candidates.
1308
+ The minimum value for `logprobs` is 0, which means only the chosen tokens and their log
1309
+ probabilities are returned.
1310
+ The maximum value for `logprobs` is 5.
1311
+ presence_penalty:
1312
+ Positive values penalize tokens that have appeared in the generated text,
1313
+ thus increasing the possibility of generating more diversed topics.
1314
+ Range: [-2.0, 2.0]
1315
+ frequency_penalty:
1316
+ Positive values penalize tokens that repeatedly appear in the generated
1317
+ text, thus decreasing the possibility of repeating the same content.
1318
+ Range: [-2.0, 2.0]
1319
+ logit_bias:
1320
+ Mapping from token IDs (integers) to their bias values (floats).
1321
+ The bias values are added to the logits before sampling.
1322
+ Larger positive bias increases the probability of choosing the token.
1323
+ Smaller negative bias decreases the probability of choosing the token.
1324
+ Range: [-100.0, 100.0]
1190
1325
1191
1326
Returns:
1192
1327
A `_PredictionRequest` object that contains prediction instance and parameters.
@@ -1221,6 +1356,18 @@ def _create_text_generation_prediction_request(
1221
1356
"groundingConfig"
1222
1357
] = grounding_source ._to_grounding_source_dict ()
1223
1358
1359
+ if logprobs is not None :
1360
+ prediction_parameters ["logprobs" ] = logprobs
1361
+
1362
+ if presence_penalty is not None :
1363
+ prediction_parameters ["presencePenalty" ] = presence_penalty
1364
+
1365
+ if frequency_penalty is not None :
1366
+ prediction_parameters ["frequencyPenalty" ] = frequency_penalty
1367
+
1368
+ if logit_bias is not None :
1369
+ prediction_parameters ["logitBias" ] = logit_bias
1370
+
1224
1371
return _PredictionRequest (
1225
1372
instance = instance ,
1226
1373
parameters = prediction_parameters ,
0 commit comments