-
Notifications
You must be signed in to change notification settings - Fork 2.8k
Commit
- Loading branch information
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
# -*- coding: cp936 -*- | ||
import re | ||
fin=open("example.txt","r") | ||
fout=open("result.txt","w") | ||
str=fin.read() | ||
#匹配正则表达式 | ||
reObj=re.compile("\b?([a-zA-Z]+)\b?") | ||
This comment has been minimized.
Sorry, something went wrong.
This comment has been minimized.
Sorry, something went wrong. |
||
words=reObj.findall(str) | ||
#建立空字典 | ||
word_dict={} | ||
#以单词的小写作为键值进行统计,同时要 | ||
for word in words: | ||
if(word_dict.has_key(word)): | ||
word_dict[word.lower()]=max(word_dict[word.lower()],words.count(word.lower())+words.count(word.upper())+words.count(word)) | ||
else: | ||
word_dict[word.lower()]=max(0,words.count(word.lower())+words.count(word.upper())+words.count(word)) | ||
for(word,number) in word_dict.items(): | ||
fout.write(word+":%d\n"%number) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
In the latest move to support the economy, Shanghai, Beijing, Chongqing and six other provinces and municipalities will allow banks to refinance high-quality credit assets rated by the People's Bank of China, said the central bank, as the program was first introduced in Guangdong and Shandong provinces last year. |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
and:6 | ||
beijing:1 | ||
shandong:1 | ||
six:2 | ||
people:1 | ||
move:2 | ||
year:2 | ||
high:2 | ||
as:2 | ||
program:2 | ||
in:2 | ||
guangdong:1 | ||
quality:2 | ||
provinces:4 | ||
rated:2 | ||
support:2 | ||
shanghai:1 | ||
to:4 | ||
other:2 | ||
was:2 | ||
economy:2 | ||
municipalities:2 | ||
refinance:2 | ||
said:2 | ||
china:1 | ||
last:2 | ||
by:2 | ||
bank:2 | ||
chongqing:1 | ||
introduced:2 | ||
central:2 | ||
assets:2 | ||
of:2 | ||
will:2 | ||
credit:2 | ||
s:2 | ||
allow:2 | ||
banks:2 | ||
the:10 | ||
first:2 | ||
latest:2 |
2 comments
on commit b46e227
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
你的代码不对
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In [1]: data = open('data.txt', 'r').read().replace('\n', ' ')
In [2]: import re
In [3]: pattern = re.compile("[A-Za-z0-9\-\']+")
In [4]: words = pattern.findall(data)
In [5]: count = {}
In [6]: for word in words:
.............:word = word.lower()
.................:if word in count.keys():
.....................:count[word] += 1
.....................:continue
.................:count[word] = 1
...:
In [7]: count
Out[7]:
{'allow': 1,
'and': 3,
'as': 1,
'assets': 1,
'bank': 2,
'banks': 1,
'beijing': 1,
'by': 1,
'central': 1,
'china': 1,
'chongqing': 1,
'credit': 1,
'economy': 1,
'first': 1,
'guangdong': 1,
'high-quality': 1,
'in': 2,
'introduced': 1,
'last': 1,
'latest': 1,
'move': 1,
'municipalities': 1,
'of': 1,
'other': 1,
"people's": 1,
'program': 1,
'provinces': 2,
'rated': 1,
'refinance': 1,
'said': 1,
'shandong': 1,
'shanghai': 1,
'six': 1,
'support': 1,
'the': 5,
'to': 2,
'was': 1,
'will': 1,
'year': 1}
这句正则表达式里面为什么要加问号呢?