Skip to content

Commit 769a258

Browse files
committed
refactor(embedding): improve OpenAiEncoding encode and decode methods
Refactored the encode method to use IntArrayList for better performance and modified the decode method to handle IntArrayList instead of List<Int>.
1 parent 2ffb207 commit 769a258

File tree

1 file changed

+7
-2
lines changed

1 file changed

+7
-2
lines changed

cocoa-core/src/main/kotlin/cc/unitmesh/nlp/embedding/OpenAiEncoding.kt

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,20 @@ import com.knuddels.jtokkit.Encodings
44
import com.knuddels.jtokkit.api.Encoding
55
import com.knuddels.jtokkit.api.EncodingRegistry
66
import com.knuddels.jtokkit.api.EncodingType
7+
import com.knuddels.jtokkit.api.IntArrayList
78

89
class OpenAiEncoding : EncodingTokenizer {
910
private val registry: EncodingRegistry = Encodings.newLazyEncodingRegistry()
1011
private val encoding: Encoding = registry.getEncoding(EncodingType.CL100K_BASE)
12+
1113
override fun encode(text: String): List<Int> {
12-
return encoding.encode(text)
14+
val encode: IntArrayList = encoding.encode(text)
15+
return encode.boxed()
1316
}
1417

1518
override fun decode(tokens: List<Int>): String {
16-
return encoding.decode(tokens)
19+
val intArray: IntArrayList = IntArrayList(tokens.size)
20+
tokens.forEach { intArray.add(it) }
21+
return encoding.decode(intArray)
1722
}
1823
}

0 commit comments

Comments
 (0)