49
49
* @author Soby Chacko
50
50
* @author Mark Pollack
51
51
* @author Laura Trotta
52
+ * @author Jihoon Kim
52
53
* @since 1.0.0
53
54
*/
54
55
public class TokenCountBatchingStrategy implements BatchingStrategy {
@@ -68,7 +69,7 @@ public class TokenCountBatchingStrategy implements BatchingStrategy {
68
69
69
70
private final int maxInputTokenCount ;
70
71
71
- private final ContentFormatter contentFormater ;
72
+ private final ContentFormatter contentFormatter ;
72
73
73
74
private final MetadataMode metadataMode ;
74
75
@@ -78,9 +79,9 @@ public TokenCountBatchingStrategy() {
78
79
79
80
/**
80
81
* @param encodingType {@link EncodingType}
82
+ * @param maxInputTokenCount upper limit for input tokens
81
83
* @param reservePercentage the percentage of tokens to reserve from the max input
82
84
* token count to create a buffer.
83
- * @param maxInputTokenCount upper limit for input tokens
84
85
*/
85
86
public TokenCountBatchingStrategy (EncodingType encodingType , int maxInputTokenCount , double reservePercentage ) {
86
87
this (encodingType , maxInputTokenCount , reservePercentage , Document .DEFAULT_CONTENT_FORMATTER ,
@@ -106,7 +107,7 @@ public TokenCountBatchingStrategy(EncodingType encodingType, int maxInputTokenCo
106
107
Assert .notNull (metadataMode , "MetadataMode must not be null" );
107
108
this .tokenCountEstimator = new JTokkitTokenCountEstimator (encodingType );
108
109
this .maxInputTokenCount = (int ) Math .round (maxInputTokenCount * (1 - reservePercentage ));
109
- this .contentFormater = contentFormatter ;
110
+ this .contentFormatter = contentFormatter ;
110
111
this .metadataMode = metadataMode ;
111
112
}
112
113
@@ -129,7 +130,7 @@ public TokenCountBatchingStrategy(TokenCountEstimator tokenCountEstimator, int m
129
130
Assert .notNull (metadataMode , "MetadataMode must not be null" );
130
131
this .tokenCountEstimator = tokenCountEstimator ;
131
132
this .maxInputTokenCount = (int ) Math .round (maxInputTokenCount * (1 - reservePercentage ));
132
- this .contentFormater = contentFormatter ;
133
+ this .contentFormatter = contentFormatter ;
133
134
this .metadataMode = metadataMode ;
134
135
}
135
136
@@ -142,7 +143,7 @@ public List<List<Document>> batch(List<Document> documents) {
142
143
143
144
for (Document document : documents ) {
144
145
int tokenCount = this .tokenCountEstimator
145
- .estimate (document .getFormattedContent (this .contentFormater , this .metadataMode ));
146
+ .estimate (document .getFormattedContent (this .contentFormatter , this .metadataMode ));
146
147
if (tokenCount > this .maxInputTokenCount ) {
147
148
throw new IllegalArgumentException (
148
149
"Tokens in a single document exceeds the maximum number of allowed input tokens" );
0 commit comments