Skip to content

Commit 0825d52

Browse files
1993heqiangmarkpollack
authored andcommitted
Add validation and rename parameter in TokenCountBatchingStrategy
This commit rename the thresholdFactor parameter to reservePercentage to better reflect its purpose in the TokenCountBatchingStrategy class. It also adds validation for input parameters including maxInputTokenCount > 0 and reservePercentage between 0 and 1 for safer initialization. The change ensures proper parameter validation on TokenCountBatchingStrategy creation to prevent potential runtime errors from invalid inputs.
1 parent 83f7164 commit 0825d52

File tree

1 file changed

+11
-4
lines changed

1 file changed

+11
-4
lines changed

spring-ai-core/src/main/java/org/springframework/ai/embedding/TokenCountBatchingStrategy.java

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -78,12 +78,13 @@ public TokenCountBatchingStrategy() {
7878

7979
/**
8080
* @param encodingType {@link EncodingType}
81-
* @param thresholdFactor the threshold factor to use on top of the max input token
82-
* count
81+
* @param reservePercentage the percentage of tokens to reserve from the max input
82+
* token count to create a buffer.
8383
* @param maxInputTokenCount upper limit for input tokens
8484
*/
85-
public TokenCountBatchingStrategy(EncodingType encodingType, int maxInputTokenCount, double thresholdFactor) {
86-
this(encodingType, maxInputTokenCount, thresholdFactor, Document.DEFAULT_CONTENT_FORMATTER, MetadataMode.NONE);
85+
public TokenCountBatchingStrategy(EncodingType encodingType, int maxInputTokenCount, double reservePercentage) {
86+
this(encodingType, maxInputTokenCount, reservePercentage, Document.DEFAULT_CONTENT_FORMATTER,
87+
MetadataMode.NONE);
8788
}
8889

8990
/**
@@ -99,6 +100,8 @@ public TokenCountBatchingStrategy(EncodingType encodingType, int maxInputTokenCo
99100
public TokenCountBatchingStrategy(EncodingType encodingType, int maxInputTokenCount, double reservePercentage,
100101
ContentFormatter contentFormatter, MetadataMode metadataMode) {
101102
Assert.notNull(encodingType, "EncodingType must not be null");
103+
Assert.isTrue(maxInputTokenCount > 0, "MaxInputTokenCount must be greater than 0");
104+
Assert.isTrue(reservePercentage >= 0 && reservePercentage < 1, "ReservePercentage must be in range [0, 1)");
102105
Assert.notNull(contentFormatter, "ContentFormatter must not be null");
103106
Assert.notNull(metadataMode, "MetadataMode must not be null");
104107
this.tokenCountEstimator = new JTokkitTokenCountEstimator(encodingType);
@@ -120,6 +123,10 @@ public TokenCountBatchingStrategy(EncodingType encodingType, int maxInputTokenCo
120123
public TokenCountBatchingStrategy(TokenCountEstimator tokenCountEstimator, int maxInputTokenCount,
121124
double reservePercentage, ContentFormatter contentFormatter, MetadataMode metadataMode) {
122125
Assert.notNull(tokenCountEstimator, "TokenCountEstimator must not be null");
126+
Assert.isTrue(maxInputTokenCount > 0, "MaxInputTokenCount must be greater than 0");
127+
Assert.isTrue(reservePercentage >= 0 && reservePercentage < 1, "ReservePercentage must be in range [0, 1)");
128+
Assert.notNull(contentFormatter, "ContentFormatter must not be null");
129+
Assert.notNull(metadataMode, "MetadataMode must not be null");
123130
this.tokenCountEstimator = tokenCountEstimator;
124131
this.maxInputTokenCount = (int) Math.round(maxInputTokenCount * (1 - reservePercentage));
125132
this.contentFormater = contentFormatter;

0 commit comments

Comments
 (0)