This repository has been archived by the owner on Oct 19, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 42
/
Copy pathquestion.go
309 lines (260 loc) · 7.53 KB
/
question.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
package zhihu
import (
"encoding/json"
"errors"
"fmt"
"net/url"
"strconv"
"strings"
"github.com/PuerkitoBio/goquery"
)
// Question 表示一个知乎问题,可以用于获取其标题、详情、答案等信息
type Question struct {
*Page
// title 是该问题的标题
title string
}
// NewQuestion 通过给定的 URL 创建一个 Question 对象
func NewQuestion(link string, title string) *Question {
if !validQuestionURL(link) {
panic("问题链接不正确: " + link)
}
return &Question{
Page: newZhihuPage(link),
title: title,
}
}
// GetTitle 获取问题标题
func (q *Question) GetTitle() string {
if q.title != "" {
return q.title
}
doc := q.Doc()
q.title = strip(doc.Find("h2.zm-item-title").First().Text())
return q.title
}
// GetDetail 获取问题描述
func (q *Question) GetDetail() string {
if got, ok := q.getStringField("detail"); ok {
return got
}
doc := q.Doc()
detail := strip(doc.Find("div#zh-question-detail").First().Text())
q.setField("detail", detail)
return detail
}
// GetAnswersNum 获取问题回答数量
func (q *Question) GetAnswersNum() int {
if got, ok := q.getIntField("answers-num"); ok {
return got
}
doc := q.Doc()
data, exists := doc.Find("h3#zh-question-answer-num").Attr("data-num")
answerNum := 0
if exists {
answerNum, _ = strconv.Atoi(data)
}
q.setField("answers-num", answerNum)
return answerNum
}
// GetFollowersNum 获取问题关注数量
func (q *Question) GetFollowersNum() int {
if got, ok := q.getIntField("followers-num"); ok {
return got
}
doc := q.Doc()
text := doc.Find("div.zg-gray-normal>a>strong").Text()
followersNum, _ := strconv.Atoi(text)
q.setField("followers-num", followersNum)
return followersNum
}
// GetTopics 获取问题的话题列表
func (q *Question) GetTopics() []*Topic {
var topics []*Topic
q.Doc().Find("a.zm-item-tag").Each(func(index int, sel *goquery.Selection) {
name := strip(sel.Text())
href, _ := sel.Attr("href")
thisTopic := NewTopic(makeZhihuLink(href), name)
topics = append(topics, thisTopic)
})
return topics
}
// GetFollowersN 返回 n 个关注者,如果 n < 0,返回所有关注者
func (q *Question) GetFollowersN(n int) []*User {
var (
link = urlJoin(q.Link, "/followers")
xsrf = q.GetXSRF()
)
users, err := ajaxGetFollowers(link, xsrf, n)
if err != nil {
return nil
}
return users
}
// GetFollowers 获取关注该问题的用户
func (q *Question) GetFollowers() []*User {
return q.GetFollowersN(q.GetFollowersNum())
}
// GetAllAnswers 获取问题的所有答案
func (q *Question) GetAllAnswers() []*Answer {
return q.GetTopXAnswers(q.GetAnswersNum())
}
// GetTopXAnswers 获取问题 Top X 的答案
func (q *Question) GetTopXAnswers(x int) []*Answer {
if x < 0 || x > q.GetAnswersNum() {
x = q.GetAnswersNum()
}
// 1. 首页的回答
answers := q.getAnswersOnIndex()
if x < len(answers) {
return answers[:x]
}
// 2. "更多",调用 Ajax 接口
moreCount := x - pageSize
if moreCount > 0 {
answers = append(answers, q.getMoreAnswers(moreCount)...)
}
return answers
}
// GetTopAnswer 获取问题排名第一的答案
func (q *Question) GetTopAnswer() *Answer {
topAnswers := q.GetTopXAnswers(1)
if len(topAnswers) >= 1 {
return topAnswers[0]
}
return nil
}
// GetCommentsNum 返回问题的评论数量
func (q *Question) GetCommentsNum() int {
if value, ok := q.getIntField("comment-num"); ok {
return value
}
doc := q.Doc()
text := doc.Find("div.zm-meta-panel a.toggle-comment").Text()
rv := reMatchInt(strip(text))
q.setField("comment-num", rv)
return rv
}
// GetVisitTimes 获取问题的访问次数
func (q *Question) GetVisitTimes() int {
if got, ok := q.getIntField("visit-times"); ok {
return got
}
doc := q.Doc()
content, exists := doc.Find(`meta[itemprop="visitsCount"]`).Attr("content")
visitTimes := 0
if exists {
visitTimes, _ = strconv.Atoi(content)
}
q.setField("visit-times", visitTimes)
return visitTimes
}
func (q *Question) String() string {
return fmt.Sprintf("<Question: %s - %s>", q.GetTitle(), q.Link)
}
// getAnswersOnIndex 解析问题页面,返回第一页的回答
func (q *Question) getAnswersOnIndex() []*Answer {
totalNum := q.GetAnswersNum()
answers := make([]*Answer, 0, minInt(pageSize, totalNum))
doc := q.Doc()
doc.Find("div.zm-item-answer").Each(func(index int, sel *goquery.Selection) {
answers = append(answers, q.processSingleAnswer(sel))
})
return answers
}
// getAnswersByAjax 处理 “更多” 回答,调用 Ajax 接口
func (q *Question) getAnswersByAjax(page int) ([]*Answer, error) {
offset := page * pageSize
if offset > q.GetAnswersNum() {
return nil, errors.New("No more answers.")
}
// 如果 URL 是 https://www.zhihu.com/question/23759686,则 urlToken 是 23759686
urlToken, _ := strconv.Atoi(q.Link[len(q.Link)-8 : len(q.Link)])
form := url.Values{}
form.Set("_xsrf", q.GetXSRF())
form.Set("method", "next")
form.Set("params", fmt.Sprintf(`{"url_token":%d,"pagesize":%d,"offset":%d}`, urlToken, pageSize, offset))
link := makeZhihuLink("/node/QuestionAnswerListV2")
body := strings.NewReader(form.Encode())
resp, err := gSession.Ajax(link, body, q.Link)
if err != nil {
return nil, err
}
defer resp.Body.Close()
result := nodeListResult{}
err = json.NewDecoder(resp.Body).Decode(&result)
if err != nil {
return nil, err
}
answers := make([]*Answer, 0, len(result.Msg))
for _, answerHtml := range result.Msg {
doc, err := goquery.NewDocumentFromReader(strings.NewReader(answerHtml))
if err != nil {
return nil, err
}
thisAnswer := q.processSingleAnswer(doc.Selection)
answers = append(answers, thisAnswer)
}
return answers, nil
}
// getMoreAnswers 执行多次“更多”
func (q *Question) getMoreAnswers(limit int) []*Answer {
answers := make([]*Answer, 0, limit)
index := 0
totalPage := (limit + pageSize - 1) / pageSize
for index < totalPage {
page := index + 1
moreAnswers, err := q.getAnswersByAjax(page)
if err != nil {
logger.Error("加载第 %d 页回答失败,问题:%s,错误:%s", page, q.Link, err.Error())
} else {
answers = append(answers, moreAnswers...)
}
index++
}
return answers
}
// processSingleAnswer 处理一个回答的 HTML 片段,
// 这段 HTML 可能来自问题页面,也可能来自 Ajax 接口
func (q *Question) processSingleAnswer(sel *goquery.Selection) *Answer {
// 1. 获取链接
answerHref, _ := sel.Find("a.answer-date-link").Attr("href")
answerLink := makeZhihuLink(answerHref)
// 2. 获取作者
authorSel := sel.Find("div.zm-item-answer-author-info")
var author *User
if authorSel.Find("a.author-link").Size() == 0 {
// 匿名用户
author = ANONYMOUS
} else {
// 具名用户
x := authorSel.Find("a.author-link")
userID := strip(x.Text())
userHref, _ := x.Attr("href")
author = NewUser(makeZhihuLink(userHref), userID)
}
answer := NewAnswer(answerLink, q, author)
// 3. 获取赞同数
dataIsOwner, _ := sel.Attr("data-isowner")
isOwner := dataIsOwner == "1" // 判断是否本人的回答
var voteText string
if isOwner {
voteText = strip(sel.Find("a.zm-item-vote-count").Text())
} else {
voteText = strip(sel.Find("div.zm-votebar").Find("span.count").Text())
}
answer.setUpvote(upvoteTextToNum(voteText))
// 4. 获取内容
content, _ := answerSelectionToHtml(sel.Find("div.zm-editable-content"))
answer.setContent(content)
return answer
}
func (q *Question) setFollowersNum(value int) {
q.setField("followers-num", value)
}
func (q *Question) setAnswersNum(value int) {
q.setField("answers-num", value)
}
func (q *Question) setVisitTimes(value int) {
q.setField("visit-times", value)
}