Content-Length: 756967 | pFad | https://github.com/googleapis/python-aiplatform/commit/1449344490bbfd2ea9eddf0a7dfa651d89db7bc9

22 feat: LLM - Added support for the `logprobs`, `presence_penalty`, `fr… · googleapis/python-aiplatform@1449344 · GitHub
Skip to content

Commit 1449344

Browse files
Ark-kuncopybara-github
authored andcommitted
feat: LLM - Added support for the logprobs, presence_penalty, frequency_penalty, and logit_bias generation parameters
PiperOrigin-RevId: 589026949
1 parent 10c6ad2 commit 1449344

File tree

2 files changed

+156
-1
lines changed

2 files changed

+156
-1
lines changed

tests/unit/aiplatform/test_language_models.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1483,6 +1483,10 @@ def test_text_generation_ga(self):
14831483
top_p=1.0,
14841484
top_k=5,
14851485
stop_sequences=["\n"],
1486+
logprobs=3,
1487+
presence_penalty=1.0,
1488+
frequency_penalty=1.0,
1489+
logit_bias={1: 100.0, 2: -100.0},
14861490
)
14871491

14881492
expected_errors = (100,)
@@ -1492,6 +1496,10 @@ def test_text_generation_ga(self):
14921496
assert prediction_parameters["topP"] == 1.0
14931497
assert prediction_parameters["topK"] == 5
14941498
assert prediction_parameters["stopSequences"] == ["\n"]
1499+
assert prediction_parameters["logprobs"] == 3
1500+
assert prediction_parameters["presencePenalty"] == 1.0
1501+
assert prediction_parameters["frequencyPenalty"] == 1.0
1502+
assert prediction_parameters["logitBias"] == {1: 100.0, 2: -100.0}
14951503
assert response.text == _TEST_TEXT_GENERATION_PREDICTION["content"]
14961504
assert response.errors == expected_errors
14971505

vertexai/language_models/_language_models.py

Lines changed: 148 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -978,6 +978,10 @@ def predict(
978978
grounding_source: Optional[
979979
Union[GroundingSource.WebSearch, GroundingSource.VertexAISearch]
980980
] = None,
981+
logprobs: Optional[int] = None,
982+
presence_penalty: Optional[float] = None,
983+
frequency_penalty: Optional[float] = None,
984+
logit_bias: Optional[Dict[int, float]] = None,
981985
) -> "MultiCandidateTextGenerationResponse":
982986
"""Gets model response for a single prompt.
983987
@@ -990,6 +994,26 @@ def predict(
990994
stop_sequences: Customized stop sequences to stop the decoding process.
991995
candidate_count: Number of response candidates to return.
992996
grounding_source: If specified, grounding feature will be enabled using the grounding source. Default: None.
997+
logprobs: Returns the top `logprobs` most likely candidate tokens with their log probabilities
998+
at each generation step. The chosen tokens and their log probabilities at each step are always
999+
returned. The chosen token may or may not be in the top `logprobs` most likely candidates.
1000+
The minimum value for `logprobs` is 0, which means only the chosen tokens and their log
1001+
probabilities are returned.
1002+
The maximum value for `logprobs` is 5.
1003+
presence_penalty:
1004+
Positive values penalize tokens that have appeared in the generated text,
1005+
thus increasing the possibility of generating more diversed topics.
1006+
Range: [-2.0, 2.0]
1007+
frequency_penalty:
1008+
Positive values penalize tokens that repeatedly appear in the generated
1009+
text, thus decreasing the possibility of repeating the same content.
1010+
Range: [-2.0, 2.0]
1011+
logit_bias:
1012+
Mapping from token IDs (integers) to their bias values (floats).
1013+
The bias values are added to the logits before sampling.
1014+
Larger positive bias increases the probability of choosing the token.
1015+
Smaller negative bias decreases the probability of choosing the token.
1016+
Range: [-100.0, 100.0]
9931017
9941018
Returns:
9951019
A `MultiCandidateTextGenerationResponse` object that contains the text produced by the model.
@@ -1003,6 +1027,10 @@ def predict(
10031027
stop_sequences=stop_sequences,
10041028
candidate_count=candidate_count,
10051029
grounding_source=grounding_source,
1030+
logprobs=logprobs,
1031+
presence_penalty=presence_penalty,
1032+
frequency_penalty=frequency_penalty,
1033+
logit_bias=logit_bias,
10061034
)
10071035

10081036
prediction_response = self._endpoint.predict(
@@ -1027,6 +1055,10 @@ async def predict_async(
10271055
grounding_source: Optional[
10281056
Union[GroundingSource.WebSearch, GroundingSource.VertexAISearch]
10291057
] = None,
1058+
logprobs: Optional[int] = None,
1059+
presence_penalty: Optional[float] = None,
1060+
frequency_penalty: Optional[float] = None,
1061+
logit_bias: Optional[Dict[int, float]] = None,
10301062
) -> "MultiCandidateTextGenerationResponse":
10311063
"""Asynchronously gets model response for a single prompt.
10321064
@@ -1039,6 +1071,26 @@ async def predict_async(
10391071
stop_sequences: Customized stop sequences to stop the decoding process.
10401072
candidate_count: Number of response candidates to return.
10411073
grounding_source: If specified, grounding feature will be enabled using the grounding source. Default: None.
1074+
logprobs: Returns the top `logprobs` most likely candidate tokens with their log probabilities
1075+
at each generation step. The chosen tokens and their log probabilities at each step are always
1076+
returned. The chosen token may or may not be in the top `logprobs` most likely candidates.
1077+
The minimum value for `logprobs` is 0, which means only the chosen tokens and their log
1078+
probabilities are returned.
1079+
The maximum value for `logprobs` is 5.
1080+
presence_penalty:
1081+
Positive values penalize tokens that have appeared in the generated text,
1082+
thus increasing the possibility of generating more diversed topics.
1083+
Range: [-2.0, 2.0]
1084+
frequency_penalty:
1085+
Positive values penalize tokens that repeatedly appear in the generated
1086+
text, thus decreasing the possibility of repeating the same content.
1087+
Range: [-2.0, 2.0]
1088+
logit_bias:
1089+
Mapping from token IDs (integers) to their bias values (floats).
1090+
The bias values are added to the logits before sampling.
1091+
Larger positive bias increases the probability of choosing the token.
1092+
Smaller negative bias decreases the probability of choosing the token.
1093+
Range: [-100.0, 100.0]
10421094
10431095
Returns:
10441096
A `MultiCandidateTextGenerationResponse` object that contains the text produced by the model.
@@ -1052,6 +1104,10 @@ async def predict_async(
10521104
stop_sequences=stop_sequences,
10531105
candidate_count=candidate_count,
10541106
grounding_source=grounding_source,
1107+
logprobs=logprobs,
1108+
presence_penalty=presence_penalty,
1109+
frequency_penalty=frequency_penalty,
1110+
logit_bias=logit_bias,
10551111
)
10561112

10571113
prediction_response = await self._endpoint.predict_async(
@@ -1072,6 +1128,10 @@ def predict_streaming(
10721128
top_k: Optional[int] = None,
10731129
top_p: Optional[float] = None,
10741130
stop_sequences: Optional[List[str]] = None,
1131+
logprobs: Optional[int] = None,
1132+
presence_penalty: Optional[float] = None,
1133+
frequency_penalty: Optional[float] = None,
1134+
logit_bias: Optional[Dict[int, float]] = None,
10751135
) -> Iterator[TextGenerationResponse]:
10761136
"""Gets a streaming model response for a single prompt.
10771137
@@ -1084,6 +1144,26 @@ def predict_streaming(
10841144
top_k: The number of highest probability vocabulary tokens to keep for top-k-filtering. Range: [1, 40]. Default: 40.
10851145
top_p: The cumulative probability of parameter highest probability vocabulary tokens to keep for nucleus sampling. Range: [0, 1]. Default: 0.95.
10861146
stop_sequences: Customized stop sequences to stop the decoding process.
1147+
logprobs: Returns the top `logprobs` most likely candidate tokens with their log probabilities
1148+
at each generation step. The chosen tokens and their log probabilities at each step are always
1149+
returned. The chosen token may or may not be in the top `logprobs` most likely candidates.
1150+
The minimum value for `logprobs` is 0, which means only the chosen tokens and their log
1151+
probabilities are returned.
1152+
The maximum value for `logprobs` is 5.
1153+
presence_penalty:
1154+
Positive values penalize tokens that have appeared in the generated text,
1155+
thus increasing the possibility of generating more diversed topics.
1156+
Range: [-2.0, 2.0]
1157+
frequency_penalty:
1158+
Positive values penalize tokens that repeatedly appear in the generated
1159+
text, thus decreasing the possibility of repeating the same content.
1160+
Range: [-2.0, 2.0]
1161+
logit_bias:
1162+
Mapping from token IDs (integers) to their bias values (floats).
1163+
The bias values are added to the logits before sampling.
1164+
Larger positive bias increases the probability of choosing the token.
1165+
Smaller negative bias decreases the probability of choosing the token.
1166+
Range: [-100.0, 100.0]
10871167
10881168
Yields:
10891169
A stream of `TextGenerationResponse` objects that contain partial
@@ -1096,6 +1176,10 @@ def predict_streaming(
10961176
top_k=top_k,
10971177
top_p=top_p,
10981178
stop_sequences=stop_sequences,
1179+
logprobs=logprobs,
1180+
presence_penalty=presence_penalty,
1181+
frequency_penalty=frequency_penalty,
1182+
logit_bias=logit_bias,
10991183
)
11001184

11011185
prediction_service_client = self._endpoint._prediction_client
@@ -1122,6 +1206,10 @@ async def predict_streaming_async(
11221206
top_k: Optional[int] = None,
11231207
top_p: Optional[float] = None,
11241208
stop_sequences: Optional[List[str]] = None,
1209+
logprobs: Optional[int] = None,
1210+
presence_penalty: Optional[float] = None,
1211+
frequency_penalty: Optional[float] = None,
1212+
logit_bias: Optional[Dict[int, float]] = None,
11251213
) -> AsyncIterator[TextGenerationResponse]:
11261214
"""Asynchronously gets a streaming model response for a single prompt.
11271215
@@ -1134,6 +1222,26 @@ async def predict_streaming_async(
11341222
top_k: The number of highest probability vocabulary tokens to keep for top-k-filtering. Range: [1, 40]. Default: 40.
11351223
top_p: The cumulative probability of parameter highest probability vocabulary tokens to keep for nucleus sampling. Range: [0, 1]. Default: 0.95.
11361224
stop_sequences: Customized stop sequences to stop the decoding process.
1225+
logprobs: Returns the top `logprobs` most likely candidate tokens with their log probabilities
1226+
at each generation step. The chosen tokens and their log probabilities at each step are always
1227+
returned. The chosen token may or may not be in the top `logprobs` most likely candidates.
1228+
The minimum value for `logprobs` is 0, which means only the chosen tokens and their log
1229+
probabilities are returned.
1230+
The maximum value for `logprobs` is 5.
1231+
presence_penalty:
1232+
Positive values penalize tokens that have appeared in the generated text,
1233+
thus increasing the possibility of generating more diversed topics.
1234+
Range: [-2.0, 2.0]
1235+
frequency_penalty:
1236+
Positive values penalize tokens that repeatedly appear in the generated
1237+
text, thus decreasing the possibility of repeating the same content.
1238+
Range: [-2.0, 2.0]
1239+
logit_bias:
1240+
Mapping from token IDs (integers) to their bias values (floats).
1241+
The bias values are added to the logits before sampling.
1242+
Larger positive bias increases the probability of choosing the token.
1243+
Smaller negative bias decreases the probability of choosing the token.
1244+
Range: [-100.0, 100.0]
11371245
11381246
Yields:
11391247
A stream of `TextGenerationResponse` objects that contain partial
@@ -1146,6 +1254,10 @@ async def predict_streaming_async(
11461254
top_k=top_k,
11471255
top_p=top_p,
11481256
stop_sequences=stop_sequences,
1257+
logprobs=logprobs,
1258+
presence_penalty=presence_penalty,
1259+
frequency_penalty=frequency_penalty,
1260+
logit_bias=logit_bias,
11491261
)
11501262

11511263
prediction_service_async_client = self._endpoint._prediction_async_client
@@ -1174,6 +1286,10 @@ def _create_text_generation_prediction_request(
11741286
grounding_source: Optional[
11751287
Union[GroundingSource.WebSearch, GroundingSource.VertexAISearch]
11761288
] = None,
1289+
logprobs: Optional[int] = None,
1290+
presence_penalty: Optional[float] = None,
1291+
frequency_penalty: Optional[float] = None,
1292+
logit_bias: Optional[Dict[int, int]] = None,
11771293
) -> "_PredictionRequest":
11781294
"""Prepares the text generation request for a single prompt.
11791295
@@ -1186,7 +1302,26 @@ def _create_text_generation_prediction_request(
11861302
stop_sequences: Customized stop sequences to stop the decoding process.
11871303
candidate_count: Number of candidates to return.
11881304
grounding_source: If specified, grounding feature will be enabled using the grounding source. Default: None.
1189-
1305+
logprobs: Returns the top `logprobs` most likely candidate tokens with their log probabilities
1306+
at each generation step. The chosen tokens and their log probabilities at each step are always
1307+
returned. The chosen token may or may not be in the top `logprobs` most likely candidates.
1308+
The minimum value for `logprobs` is 0, which means only the chosen tokens and their log
1309+
probabilities are returned.
1310+
The maximum value for `logprobs` is 5.
1311+
presence_penalty:
1312+
Positive values penalize tokens that have appeared in the generated text,
1313+
thus increasing the possibility of generating more diversed topics.
1314+
Range: [-2.0, 2.0]
1315+
frequency_penalty:
1316+
Positive values penalize tokens that repeatedly appear in the generated
1317+
text, thus decreasing the possibility of repeating the same content.
1318+
Range: [-2.0, 2.0]
1319+
logit_bias:
1320+
Mapping from token IDs (integers) to their bias values (floats).
1321+
The bias values are added to the logits before sampling.
1322+
Larger positive bias increases the probability of choosing the token.
1323+
Smaller negative bias decreases the probability of choosing the token.
1324+
Range: [-100.0, 100.0]
11901325
11911326
Returns:
11921327
A `_PredictionRequest` object that contains prediction instance and parameters.
@@ -1221,6 +1356,18 @@ def _create_text_generation_prediction_request(
12211356
"groundingConfig"
12221357
] = grounding_source._to_grounding_source_dict()
12231358

1359+
if logprobs is not None:
1360+
prediction_parameters["logprobs"] = logprobs
1361+
1362+
if presence_penalty is not None:
1363+
prediction_parameters["presencePenalty"] = presence_penalty
1364+
1365+
if frequency_penalty is not None:
1366+
prediction_parameters["frequencyPenalty"] = frequency_penalty
1367+
1368+
if logit_bias is not None:
1369+
prediction_parameters["logitBias"] = logit_bias
1370+
12241371
return _PredictionRequest(
12251372
instance=instance,
12261373
parameters=prediction_parameters,

0 commit comments

Comments
 (0)








ApplySandwichStrip

pFad - (p)hone/(F)rame/(a)nonymizer/(d)eclutterfier!      Saves Data!


--- a PPN by Garber Painting Akron. With Image Size Reduction included!

Fetched URL: https://github.com/googleapis/python-aiplatform/commit/1449344490bbfd2ea9eddf0a7dfa651d89db7bc9

Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy