From 78070b64d73ea126aa39e22f2d504d7f9b0b72c8 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 19 May 2026 06:41:26 +0000 Subject: [PATCH] deps(cargo): bump tokenizers from 0.22.2 to 0.23.1 Bumps [tokenizers](https://github.com/huggingface/tokenizers) from 0.22.2 to 0.23.1. - [Release notes](https://github.com/huggingface/tokenizers/releases) - [Changelog](https://github.com/huggingface/tokenizers/blob/main/RELEASE.md) - [Commits](https://github.com/huggingface/tokenizers/compare/v0.22.2...v0.23.1) --- updated-dependencies: - dependency-name: tokenizers dependency-version: 0.23.1 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- Cargo.lock | 12 +++++++++--- Cargo.toml | 2 +- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0476cd9..6bc285c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1108,6 +1108,12 @@ dependencies = [ "memchr", ] +[[package]] +name = "daachorse" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f55d7153ba3b507595872a3874803f07a8a81d1e888abed8e5db7da0597d6e2" + [[package]] name = "darling" version = "0.20.11" @@ -5742,13 +5748,13 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokenizers" -version = "0.22.2" +version = "0.23.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b238e22d44a15349529690fb07bd645cf58149a1b1e44d6cb5bd1641ff1a6223" +checksum = "44e5bea67576e04b6ff8564c5d9e09c2ef0cf476502245f2f120e497769d3112" dependencies = [ "ahash", - "aho-corasick", "compact_str", + "daachorse", "dary_heap", "derive_builder", "esaxx-rs", diff --git a/Cargo.toml b/Cargo.toml index bcd5eb7..e7b4afc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -144,7 +144,7 @@ arc-swap = "1" memmap2 = "0.9" # HuggingFace tokenizer for subword tokenization across all embedding backends -tokenizers = "0.22" +tokenizers = "0.23" # PDF text extraction (pure Rust, the default backend) pdf-extract = "0.10"