Skip to content

Commit f1faec1

Browse files
authored
Fix typos in strings and comments (#1770)
1 parent 67db0cd commit f1faec1

File tree

15 files changed

+16
-16
lines changed

15 files changed

+16
-16
lines changed

bindings/node/lib/bindings/encoding.test.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ describe('Encoding', () => {
122122
expect(indexes).toEqual([3, 5])
123123
})
124124

125-
it('returns the corrent indexes with pair sequences', () => {
125+
it('returns the correct indexes with pair sequences', () => {
126126
expect(encodingDual.wordToTokens(3, 0)).toEqual([3, 5])
127127
expect(encodingDual.wordToTokens(3, 1)).toEqual([8, 9])
128128
})

bindings/python/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,4 +27,4 @@ tempfile = "3.10"
2727
pyo3 = { version = "0.23", features = ["auto-initialize"] }
2828

2929
[features]
30-
defaut = ["pyo3/extension-module"]
30+
default = ["pyo3/extension-module"]

bindings/python/scripts/convert.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -397,7 +397,7 @@ def main():
397397
"--models",
398398
type=lambda s: s.split(","),
399399
default=pretraineds,
400-
help=f"The pretrained tokenizers you want to test agains, (default: {pretraineds})",
400+
help=f"The pretrained tokenizers you want to test against, (default: {pretraineds})",
401401
)
402402
args = parser.parse_args()
403403

bindings/python/src/decoders.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -404,7 +404,7 @@ impl PyMetaspaceDec {
404404
///
405405
/// Args:
406406
/// suffix (:obj:`str`, `optional`, defaults to :obj:`</w>`):
407-
/// The suffix that was used to caracterize an end-of-word. This suffix will
407+
/// The suffix that was used to characterize an end-of-word. This suffix will
408408
/// be replaced by whitespaces during the decoding
409409
#[pyclass(extends=PyDecoder, module = "tokenizers.decoders", name = "BPEDecoder")]
410410
pub struct PyBPEDecoder {}

tokenizers/src/models/bpe/model.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,7 @@ pub struct BPE {
221221
pub unk_token: Option<String>,
222222
/// An optional prefix to use on any subword that exist only behind another one
223223
pub continuing_subword_prefix: Option<String>,
224-
/// An optional suffix to caracterize and end-of-word subword
224+
/// An optional suffix to characterize and end-of-word subword
225225
pub end_of_word_suffix: Option<String>,
226226
/// Do multiple unk tokens get fused
227227
pub fuse_unk: bool,

tokenizers/src/models/bpe/trainer.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,7 @@ pub struct BpeTrainer {
190190
pub initial_alphabet: HashSet<char>,
191191
/// An optional prefix to use on any subword that exist only behind another one
192192
pub continuing_subword_prefix: Option<String>,
193-
/// An optional suffix to caracterize and end-of-word subword
193+
/// An optional suffix to characterize and end-of-word subword
194194
pub end_of_word_suffix: Option<String>,
195195
/// An optional parameter to limit the max length of any single token
196196
pub max_token_length: Option<usize>,

tokenizers/src/models/unigram/trainer.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -401,7 +401,7 @@ impl UnigramTrainer {
401401

402402
let logsum_alt = (sum + freq[id] * (alternatives.len() - 1) as f64).ln();
403403

404-
// The frequencies of altenatives are increased by freq[i].
404+
// The frequencies of alternatives are increased by freq[i].
405405
let mut logprob_alt = 0.0;
406406
for n in &alternatives[id] {
407407
logprob_alt += (freq[*n] + freq[id]).ln() - logsum_alt;

tokenizers/src/models/wordlevel/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ impl WordLevelBuilder {
7373
self
7474
}
7575

76-
/// Contructs a `WordLevel` model that uses the `WordLevelBuilder`'s configuration.
76+
/// Constructs a `WordLevel` model that uses the `WordLevelBuilder`'s configuration.
7777
pub fn build(mut self) -> Result<WordLevel> {
7878
if let Some(vocab) = self.config.files {
7979
self.config.vocab = WordLevel::read_file(&vocab)?;

tokenizers/src/models/wordpiece/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ impl WordPieceBuilder {
9393
self
9494
}
9595

96-
/// Contructs a `WordPiece` model that uses the `WordPieceBuilder`'s configuration.
96+
/// Constructs a `WordPiece` model that uses the `WordPieceBuilder`'s configuration.
9797
pub fn build(mut self) -> Result<WordPiece> {
9898
if let Some(vocab) = self.config.files {
9999
self.config.vocab = WordPiece::read_file(&vocab)?;

tokenizers/src/models/wordpiece/trainer.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,7 @@ impl WordPieceTrainer {
170170
// Transfer the vocab
171171
model.vocab = new_wordpiece.vocab;
172172
model.vocab_r = new_wordpiece.vocab_r;
173-
// The continuing_subword_prefix is the only other option to be overriden by the trainer
173+
// The continuing_subword_prefix is the only other option to be overridden by the trainer
174174
model.continuing_subword_prefix = new_wordpiece.continuing_subword_prefix;
175175

176176
Ok(special_tokens)

tokenizers/src/normalizers/precompiled.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ fn replace(transformations: &mut Vec<(char, isize)>, old_part: &str, new_part: &
1212
transformations.extend(new_part.chars().map(|c| (c, 0)));
1313

1414
match diff.cmp(&0) {
15-
// If we are adding some characters, the last DIFF characters shoud be == 1
15+
// If we are adding some characters, the last DIFF characters should be == 1
1616
Ordering::Greater => {
1717
transformations
1818
.iter_mut()

tokenizers/src/tokenizer/added_vocabulary.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ pub struct AddedToken {
2929
}
3030

3131
impl AddedToken {
32-
/// Build this token from the given content, specifying if it is intented to be a
32+
/// Build this token from the given content, specifying if it is intended to be a
3333
/// special token. Special tokens are not normalized by default.
3434
pub fn from<S: Into<String>>(content: S, special: bool) -> Self {
3535
Self {

tokenizers/src/tokenizer/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -389,7 +389,7 @@ where
389389
self
390390
}
391391

392-
/// Set the trunaction parameters.
392+
/// Set the truncation parameters.
393393
#[must_use]
394394
pub fn with_truncation(mut self, trunc: Option<TruncationParams>) -> Self {
395395
self.truncation = trunc;

tokenizers/src/tokenizer/normalizer.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -201,9 +201,9 @@ impl NormalizedString {
201201
});
202202

203203
match (start, end) {
204-
// Targeting inexistant beginning
204+
// Targeting inexistent beginning
205205
(Some(s), None) => Some(s..s),
206-
// Targeting inexistant end
206+
// Targeting inexistent end
207207
(None, Some(e)) => Some(e..e),
208208
// Found the range
209209
(Some(s), Some(e)) => Some(s..e),

tokenizers/src/tokenizer/pattern.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ where
122122
}
123123
}
124124

125-
/// Invert the `is_match` flags for the wrapped Pattern. This is usefull
125+
/// Invert the `is_match` flags for the wrapped Pattern. This is useful
126126
/// for example when we use a regex that matches words instead of a delimiter,
127127
/// and we want to match the delimiter.
128128
pub struct Invert<P: Pattern>(pub P);

0 commit comments

Comments
 (0)