Skip to content

Commit 8217267

Browse files
committed
add individual .pypi
1 parent 85691df commit 8217267

8 files changed

Lines changed: 733 additions & 1756 deletions

File tree

bindings/python/py_src/tokenizers/__init__.pyi

Lines changed: 150 additions & 1748 deletions
Large diffs are not rendered by default.
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
import tokenizers
2+
import tokenizers.decoders
3+
import typing
4+
5+
class BPEDecoder:
6+
def __new__(cls, /, suffix: str = ...) -> None: ...
7+
@property
8+
def suffix(self, /) -> str: ...
9+
@suffix.setter
10+
def suffix(self, /, suffix: str) -> None: ...
11+
12+
class ByteFallback:
13+
def __new__(cls, /) -> None: ...
14+
15+
class ByteLevel:
16+
def __new__(cls, /, **_kwargs) -> None: ...
17+
18+
class CTC:
19+
def __new__(cls, /, pad_token: str = ..., word_delimiter_token: str = ..., cleanup: bool = True) -> None: ...
20+
@property
21+
def cleanup(self, /) -> bool: ...
22+
@cleanup.setter
23+
def cleanup(self, /, cleanup: bool) -> None: ...
24+
@property
25+
def pad_token(self, /) -> str: ...
26+
@pad_token.setter
27+
def pad_token(self, /, pad_token: str) -> None: ...
28+
@property
29+
def word_delimiter_token(self, /) -> str: ...
30+
@word_delimiter_token.setter
31+
def word_delimiter_token(self, /, word_delimiter_token: str) -> None: ...
32+
33+
class DecodeStream:
34+
def __new__(cls, /, ids: typing.Any | None = None, skip_special_tokens: bool | None = False) -> None: ...
35+
def step(self, /, tokenizer: tokenizers.Tokenizer, id: typing.Any) -> typing.Any: ...
36+
37+
class Decoder:
38+
def __getstate__(self, /) -> typing.Any: ...
39+
def __repr__(self, /) -> str: ...
40+
def __setstate__(self, /, state: typing.Any) -> typing.Any: ...
41+
def __str__(self, /) -> str: ...
42+
@staticmethod
43+
def custom(decoder: typing.Any) -> tokenizers.decoders.Decoder: ...
44+
def decode(self, /, tokens: typing.Any) -> str: ...
45+
46+
class Fuse:
47+
def __new__(cls, /) -> None: ...
48+
49+
class Metaspace:
50+
def __new__(cls, /, replacement: str = '▁', prepend_scheme: str = ..., split: bool = True) -> None: ...
51+
@property
52+
def prepend_scheme(self, /) -> str: ...
53+
@prepend_scheme.setter
54+
def prepend_scheme(self, /, prepend_scheme: str) -> typing.Any: ...
55+
@property
56+
def replacement(self, /) -> str: ...
57+
@replacement.setter
58+
def replacement(self, /, replacement: str) -> None: ...
59+
@property
60+
def split(self, /) -> bool: ...
61+
@split.setter
62+
def split(self, /, split: bool) -> None: ...
63+
64+
class Replace:
65+
def __new__(cls, /, pattern: str | tokenizers.Regex, content: str) -> None: ...
66+
67+
class Sequence:
68+
def __getnewargs__(self, /) -> typing.Any: ...
69+
def __new__(cls, /, decoders_py: typing.Any) -> None: ...
70+
71+
class Strip:
72+
def __new__(cls, /, content: str = ' ', left: int = 0, right: int = 0) -> None: ...
73+
@property
74+
def content(self, /) -> str: ...
75+
@content.setter
76+
def content(self, /, content: str) -> None: ...
77+
@property
78+
def start(self, /) -> int: ...
79+
@start.setter
80+
def start(self, /, start: int) -> None: ...
81+
@property
82+
def stop(self, /) -> int: ...
83+
@stop.setter
84+
def stop(self, /, stop: int) -> None: ...
85+
86+
class WordPiece:
87+
def __new__(cls, /, prefix: str = ..., cleanup: bool = True) -> None: ...
88+
@property
89+
def cleanup(self, /) -> bool: ...
90+
@cleanup.setter
91+
def cleanup(self, /, cleanup: bool) -> None: ...
92+
@property
93+
def prefix(self, /) -> str: ...
94+
@prefix.setter
95+
def prefix(self, /, prefix: str) -> None: ...
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
import typing
2+
3+
class BPE:
4+
def __new__(cls, /, vocab: typing.Any | str | None = None, merges: typing.Any | str | None = None, **kwargs) -> None: ...
5+
def _clear_cache(self, /) -> typing.Any: ...
6+
def _resize_cache(self, /, capacity: int) -> typing.Any: ...
7+
@property
8+
def byte_fallback(self, /) -> bool: ...
9+
@byte_fallback.setter
10+
def byte_fallback(self, /, byte_fallback: bool) -> None: ...
11+
@property
12+
def continuing_subword_prefix(self, /) -> typing.Any: ...
13+
@continuing_subword_prefix.setter
14+
def continuing_subword_prefix(self, /, continuing_subword_prefix: str | None) -> None: ...
15+
@property
16+
def dropout(self, /) -> typing.Any: ...
17+
@dropout.setter
18+
def dropout(self, /, dropout: float | None) -> None: ...
19+
@property
20+
def end_of_word_suffix(self, /) -> typing.Any: ...
21+
@end_of_word_suffix.setter
22+
def end_of_word_suffix(self, /, end_of_word_suffix: str | None) -> None: ...
23+
@classmethod
24+
def from_file(cls, /, vocab: str, merges: str, **kwargs) -> BPE: ...
25+
@property
26+
def fuse_unk(self, /) -> bool: ...
27+
@fuse_unk.setter
28+
def fuse_unk(self, /, fuse_unk: bool) -> None: ...
29+
@property
30+
def ignore_merges(self, /) -> bool: ...
31+
@ignore_merges.setter
32+
def ignore_merges(self, /, ignore_merges: bool) -> None: ...
33+
@staticmethod
34+
def read_file(vocab: str, merges: str) -> typing.Any: ...
35+
@property
36+
def unk_token(self, /) -> typing.Any: ...
37+
@unk_token.setter
38+
def unk_token(self, /, unk_token: str | None) -> None: ...
39+
40+
class Model:
41+
def __getstate__(self, /) -> typing.Any: ...
42+
def __new__(cls, /) -> None: ...
43+
def __repr__(self, /) -> str: ...
44+
def __setstate__(self, /, state: typing.Any) -> typing.Any: ...
45+
def __str__(self, /) -> str: ...
46+
def get_trainer(self, /) -> typing.Any: ...
47+
def id_to_token(self, /, id: int) -> typing.Any: ...
48+
def save(self, /, folder: str, prefix: str | None = None, name: str | None = None) -> typing.Any: ...
49+
def token_to_id(self, /, token: str) -> typing.Any: ...
50+
def tokenize(self, /, sequence: str) -> typing.Any: ...
51+
52+
class Unigram:
53+
def __new__(cls, /, vocab: typing.Any | None = None, unk_id: int | None = None, byte_fallback: bool | None = None) -> None: ...
54+
def _clear_cache(self, /) -> typing.Any: ...
55+
def _resize_cache(self, /, capacity: int) -> typing.Any: ...
56+
57+
class WordLevel:
58+
def __new__(cls, /, vocab: typing.Any | str | None = None, unk_token: str | None = None) -> None: ...
59+
@classmethod
60+
def from_file(cls, /, vocab: str, unk_token: str | None = None) -> WordLevel: ...
61+
@staticmethod
62+
def read_file(vocab: str) -> typing.Any: ...
63+
@property
64+
def unk_token(self, /) -> str: ...
65+
@unk_token.setter
66+
def unk_token(self, /, unk_token: str) -> None: ...
67+
68+
class WordPiece:
69+
def __new__(cls, /, vocab: typing.Any | str | None = None, **kwargs) -> None: ...
70+
@property
71+
def continuing_subword_prefix(self, /) -> str: ...
72+
@continuing_subword_prefix.setter
73+
def continuing_subword_prefix(self, /, continuing_subword_prefix: str) -> None: ...
74+
@classmethod
75+
def from_file(cls, /, vocab: str, **kwargs) -> WordPiece: ...
76+
@property
77+
def max_input_chars_per_word(self, /) -> int: ...
78+
@max_input_chars_per_word.setter
79+
def max_input_chars_per_word(self, /, max: int) -> None: ...
80+
@staticmethod
81+
def read_file(vocab: str) -> typing.Any: ...
82+
@property
83+
def unk_token(self, /) -> str: ...
84+
@unk_token.setter
85+
def unk_token(self, /, unk_token: str) -> None: ...
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
import tokenizers
2+
import tokenizers.normalizers
3+
import typing
4+
5+
class BertNormalizer:
6+
def __new__(cls, /, clean_text: bool = True, handle_chinese_chars: bool = True, strip_accents: bool | None = None, lowercase: bool = True) -> None: ...
7+
@property
8+
def clean_text(self, /) -> bool: ...
9+
@clean_text.setter
10+
def clean_text(self, /, clean_text: bool) -> None: ...
11+
@property
12+
def handle_chinese_chars(self, /) -> bool: ...
13+
@handle_chinese_chars.setter
14+
def handle_chinese_chars(self, /, handle_chinese_chars: bool) -> None: ...
15+
@property
16+
def lowercase(self, /) -> bool: ...
17+
@lowercase.setter
18+
def lowercase(self, /, lowercase: bool) -> None: ...
19+
@property
20+
def strip_accents(self, /) -> typing.Any: ...
21+
@strip_accents.setter
22+
def strip_accents(self, /, strip_accents: bool | None) -> None: ...
23+
24+
class ByteLevel:
25+
def __new__(cls, /) -> None: ...
26+
27+
class Lowercase:
28+
def __new__(cls, /) -> None: ...
29+
30+
class NFC:
31+
def __new__(cls, /) -> None: ...
32+
33+
class NFD:
34+
def __new__(cls, /) -> None: ...
35+
36+
class NFKC:
37+
def __new__(cls, /) -> None: ...
38+
39+
class NFKD:
40+
def __new__(cls, /) -> None: ...
41+
42+
class Nmt:
43+
def __new__(cls, /) -> None: ...
44+
45+
class Normalizer:
46+
def __getstate__(self, /) -> typing.Any: ...
47+
def __repr__(self, /) -> str: ...
48+
def __setstate__(self, /, state: typing.Any) -> typing.Any: ...
49+
def __str__(self, /) -> str: ...
50+
@staticmethod
51+
def custom(obj: typing.Any) -> tokenizers.normalizers.Normalizer: ...
52+
def normalize(self, /, normalized: tokenizers.NormalizedString | tokenizers.NormalizedStringRefMut) -> typing.Any: ...
53+
def normalize_str(self, /, sequence: str) -> str: ...
54+
55+
class Precompiled:
56+
def __new__(cls, /, precompiled_charsmap: typing.Any) -> None: ...
57+
58+
class Prepend:
59+
def __new__(cls, /, prepend: str = ...) -> None: ...
60+
@property
61+
def prepend(self, /) -> str: ...
62+
@prepend.setter
63+
def prepend(self, /, prepend: str) -> None: ...
64+
65+
class Replace:
66+
def __new__(cls, /, pattern: str | tokenizers.Regex, content: str) -> None: ...
67+
@property
68+
def content(self, /) -> str: ...
69+
@content.setter
70+
def content(self, /, content: str) -> None: ...
71+
@property
72+
def pattern(self, /) -> typing.Any: ...
73+
@pattern.setter
74+
def pattern(self, /, _pattern: str | tokenizers.Regex) -> typing.Any: ...
75+
76+
class Sequence:
77+
def __getitem__(self, /, index: int) -> typing.Any: ...
78+
def __getnewargs__(self, /) -> typing.Any: ...
79+
def __len__(self, /) -> int: ...
80+
def __new__(cls, /, normalizers: typing.Any) -> None: ...
81+
def __setitem__(self, /, index: int, value: typing.Any) -> typing.Any: ...
82+
83+
class Strip:
84+
def __new__(cls, /, left: bool = True, right: bool = True) -> None: ...
85+
@property
86+
def left(self, /) -> bool: ...
87+
@left.setter
88+
def left(self, /, left: bool) -> None: ...
89+
@property
90+
def right(self, /) -> bool: ...
91+
@right.setter
92+
def right(self, /, right: bool) -> None: ...
93+
94+
class StripAccents:
95+
def __new__(cls, /) -> None: ...
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
import tokenizers
2+
import tokenizers.pre_tokenizers
3+
import typing
4+
5+
class BertPreTokenizer:
6+
def __new__(cls, /) -> None: ...
7+
8+
class ByteLevel:
9+
def __new__(cls, /, add_prefix_space: bool = True, trim_offsets: bool = True, use_regex: bool = True, **_kwargs) -> None: ...
10+
@property
11+
def add_prefix_space(self, /) -> bool: ...
12+
@add_prefix_space.setter
13+
def add_prefix_space(self, /, add_prefix_space: bool) -> None: ...
14+
@staticmethod
15+
def alphabet() -> typing.Any: ...
16+
@property
17+
def trim_offsets(self, /) -> bool: ...
18+
@trim_offsets.setter
19+
def trim_offsets(self, /, trim_offsets: bool) -> None: ...
20+
@property
21+
def use_regex(self, /) -> bool: ...
22+
@use_regex.setter
23+
def use_regex(self, /, use_regex: bool) -> None: ...
24+
25+
class CharDelimiterSplit:
26+
def __getnewargs__(self, /) -> typing.Any: ...
27+
def __new__(cls, /, delimiter: str) -> None: ...
28+
@property
29+
def delimiter(self, /) -> str: ...
30+
@delimiter.setter
31+
def delimiter(self, /, delimiter: str) -> None: ...
32+
33+
class Digits:
34+
def __new__(cls, /, individual_digits: bool = False) -> None: ...
35+
@property
36+
def individual_digits(self, /) -> bool: ...
37+
@individual_digits.setter
38+
def individual_digits(self, /, individual_digits: bool) -> None: ...
39+
40+
class FixedLength:
41+
def __new__(cls, /, length: int = 5) -> None: ...
42+
@property
43+
def length(self, /) -> int: ...
44+
@length.setter
45+
def length(self, /, length: int) -> None: ...
46+
47+
class Metaspace:
48+
def __new__(cls, /, replacement: str = '▁', prepend_scheme: str = ..., split: bool = True) -> None: ...
49+
@property
50+
def prepend_scheme(self, /) -> str: ...
51+
@prepend_scheme.setter
52+
def prepend_scheme(self, /, prepend_scheme: str) -> typing.Any: ...
53+
@property
54+
def replacement(self, /) -> str: ...
55+
@replacement.setter
56+
def replacement(self, /, replacement: str) -> None: ...
57+
@property
58+
def split(self, /) -> bool: ...
59+
@split.setter
60+
def split(self, /, split: bool) -> None: ...
61+
62+
class PreTokenizer:
63+
def __getstate__(self, /) -> typing.Any: ...
64+
def __repr__(self, /) -> str: ...
65+
def __setstate__(self, /, state: typing.Any) -> typing.Any: ...
66+
def __str__(self, /) -> str: ...
67+
@staticmethod
68+
def custom(pretok: typing.Any) -> tokenizers.pre_tokenizers.PreTokenizer: ...
69+
def pre_tokenize(self, /, pretok: tokenizers.PreTokenizedString) -> typing.Any: ...
70+
def pre_tokenize_str(self, /, s: str) -> typing.Any: ...
71+
72+
class Punctuation:
73+
def __new__(cls, /, behavior: typing.Any = ...) -> None: ...
74+
@property
75+
def behavior(self, /) -> str: ...
76+
@behavior.setter
77+
def behavior(self, /, behavior: str) -> typing.Any: ...
78+
79+
class Sequence:
80+
def __getitem__(self, /, index: int) -> typing.Any: ...
81+
def __getnewargs__(self, /) -> typing.Any: ...
82+
def __new__(cls, /, pre_tokenizers: typing.Any) -> None: ...
83+
def __setitem__(self, /, index: int, value: typing.Any) -> typing.Any: ...
84+
85+
class Split:
86+
def __getnewargs__(self, /) -> typing.Any: ...
87+
def __new__(cls, /, pattern: str | tokenizers.Regex, behavior: typing.Any, invert: bool = False) -> None: ...
88+
@property
89+
def behavior(self, /) -> str: ...
90+
@behavior.setter
91+
def behavior(self, /, behavior: str) -> typing.Any: ...
92+
@property
93+
def invert(self, /) -> bool: ...
94+
@invert.setter
95+
def invert(self, /, invert: bool) -> None: ...
96+
@property
97+
def pattern(self, /) -> typing.Any: ...
98+
@pattern.setter
99+
def pattern(self, /, _pattern: str | tokenizers.Regex) -> typing.Any: ...
100+
101+
class UnicodeScripts:
102+
def __new__(cls, /) -> None: ...
103+
104+
class Whitespace:
105+
def __new__(cls, /) -> None: ...
106+
107+
class WhitespaceSplit:
108+
def __new__(cls, /) -> None: ...

0 commit comments

Comments
 (0)