Skip to content

Commit bc0a145

Browse files
committed
yes
1 parent 0201dba commit bc0a145

File tree

3 files changed

+19
-15
lines changed

3 files changed

+19
-15
lines changed
Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import scrapy
22
import scrapy.http.response.html
3+
from scrapy.spidermiddlewares.httperror import HttpError
34

45
from v2ex_scrapy import v2ex_parser
56
from v2ex_scrapy.DB import DB
@@ -9,11 +10,11 @@
910
class V2exTopicSpider(scrapy.Spider):
1011
name = "v2ex-member"
1112

12-
def __init__(self, name=None, **kwargs):
13-
super().__init__(name, **kwargs)
13+
def __init__(self, start_id=1, end_id=635000, *args, **kwargs):
14+
super().__init__(*args, **kwargs)
1415
self.db = DB()
15-
self.start_id = 1
16-
self.end_id = 635000
16+
self.start_id = start_id
17+
self.end_id = end_id
1718
self.logger.info(f"start from topic id {self.start_id}, end at {self.end_id}")
1819

1920
def start_requests(self):
@@ -25,17 +26,20 @@ def start_requests(self):
2526
errback=self.member_err,
2627
cb_kwargs={"uid": i},
2728
)
29+
else:
30+
self.logger.info(f"skip member id:{i}, because it exists")
2831

2932
def parse(self, response: scrapy.http.response.html.HtmlResponse, uid: int):
3033
for i in v2ex_parser.parse_member(response):
3134
i.uid = uid
3235
yield i
3336

3437
def member_err(self, failure):
35-
yield MemberItem(
36-
username="",
37-
avatar_url="",
38-
create_at=0,
39-
social_link=[],
40-
uid=failure.request.cb_kwargs["uid"],
41-
)
38+
if failure.check(HttpError):
39+
yield MemberItem(
40+
username="",
41+
avatar_url="",
42+
create_at=0,
43+
social_link=[],
44+
uid=failure.request.cb_kwargs["uid"],
45+
)

v2ex_scrapy/spiders/V2exNodeTopicSpider.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from v2ex_scrapy import utils
1111

1212

13-
class V2exTopicSpider(scrapy.Spider):
13+
class V2exNodeTopicSpider(scrapy.Spider):
1414
name = "v2ex-node"
1515

1616
UPDATE_TOPIC_WHEN_REPLY_CHANGE = True

v2ex_scrapy/spiders/V2exSpider.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,13 @@
66
from v2ex_scrapy.spiders.CommonSpider import CommonSpider
77

88

9-
class V2exTopicSpider(scrapy.Spider):
9+
class V2exSpider(scrapy.Spider):
1010
name = "v2ex"
1111
FORCE_UPDATE_TOPIC = False
1212
UPDATE_COMMENT = True
1313

14-
def __init__(self, name=None, **kwargs):
15-
super().__init__(name, **kwargs)
14+
def __init__(self, *args, **kwargs):
15+
super().__init__(*args, **kwargs)
1616
self.db = DB()
1717
self.start_id = 1
1818
self.end_id = 1000000

0 commit comments

Comments
 (0)