File tree 2 files changed +8
-8
lines changed
2 files changed +8
-8
lines changed Original file line number Diff line number Diff line change 34
34
35
35
pool = None
36
36
37
- re_userdict = re .compile ('^(.+?)( [0-9]+)?( [a-z]+)?$' , re .U )
37
+ re_userdict = re .compile (r '^(.+?)( [0-9]+)?( [a-z]+)?$' , re .U )
38
38
39
- re_eng = re .compile ('[a-zA-Z0-9]' , re .U )
39
+ re_eng = re .compile (r '[a-zA-Z0-9]' , re .U )
40
40
41
41
# \u4E00-\u9FD5a-zA-Z0-9+#&\._ : All non-space characters. Will be handled with re_han
42
42
# \r\n|\s : whitespace characters. Will not be handled.
43
43
# re_han_default = re.compile("([\u4E00-\u9FD5a-zA-Z0-9+#&\._%]+)", re.U)
44
44
# Adding "-" symbol in re_han_default
45
- re_han_default = re .compile (" ([\u4E00 -\u9FD5 a-zA-Z0-9+#&\._%\-]+)" , re .U )
45
+ re_han_default = re .compile (r' ([\u4E00-\u9FD5a-zA-Z0-9+#&\._%\-]+)' , re .U )
46
46
47
- re_skip_default = re .compile (" (\r \n |\s)" , re .U )
48
- re_han_cut_all = re .compile (" ([\u4E00 -\u9FD5 ]+)" , re .U )
49
- re_skip_cut_all = re .compile (" [^a-zA-Z0-9+#\n ]" , re .U )
47
+ re_skip_default = re .compile (r' (\r\n|\s)' , re .U )
48
+ re_han_cut_all = re .compile (r' ([\u4E00-\u9FD5]+)' , re .U )
49
+ re_skip_cut_all = re .compile (r' [^a-zA-Z0-9+#\n]' , re .U )
50
50
51
51
def setLogLevel (log_level ):
52
52
global logger
Original file line number Diff line number Diff line change @@ -74,8 +74,8 @@ def __cut(sentence):
74
74
if nexti < len (sentence ):
75
75
yield sentence [nexti :]
76
76
77
- re_han = re .compile (" ([\u4E00 -\u9FD5 ]+)" )
78
- re_skip = re .compile (" ([a-zA-Z0-9]+(?:\.\d+)?%?)" )
77
+ re_han = re .compile (r' ([\u4E00-\u9FD5]+)' )
78
+ re_skip = re .compile (r' ([a-zA-Z0-9]+(?:\.\d+)?%?)' )
79
79
80
80
81
81
def add_force_split (word ):
You can’t perform that action at this time.
0 commit comments