Skip to content

Commit 02570f1

Browse files
committed
Fix more cases of pages redirecting to themselves
This was "fixed" in b4ca6a7 (#763), but the fix turned out to be incomplete. That fix only allowed redirects leading to the same URL as the original destination, and didn't take into account more complicated cases. Such as, for example: * www.example.com * example.com * (set cookie) * example.com
1 parent c8b9cba commit 02570f1

File tree

2 files changed

+43
-5
lines changed

2 files changed

+43
-5
lines changed

colly.go

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1343,12 +1343,26 @@ func (c *Collector) checkRedirectFunc() func(req *http.Request, via []*http.Requ
13431343
return fmt.Errorf("Not following redirect to %q: %w", req.URL, err)
13441344
}
13451345

1346-
// allow redirects to the original destination
1347-
// to support websites redirecting to the same page while setting
1348-
// session cookies
1349-
samePageRedirect := normalizeURL(req.URL.String()) == normalizeURL(via[0].URL.String())
1346+
// Page may set cookies and respond with a redirect to itself.
1347+
// Some example of such redirect "cycles":
1348+
//
1349+
// example.com -(set cookie)-> example.com
1350+
// example.com -> auth.example.com -(set cookie)-> example.com
1351+
// www.example.com -> example.com -(set cookie)-> example.com
1352+
//
1353+
// We must not return "already visited" error in such cases.
1354+
// So ignore redirect cycles when checking for URL revisit.
1355+
redirectCycle := false
1356+
normalizedURL := normalizeURL(req.URL.String())
1357+
for _, viaReq := range via {
1358+
viaURL := normalizeURL(viaReq.URL.String())
1359+
if viaURL == normalizedURL {
1360+
redirectCycle = true
1361+
break
1362+
}
1363+
}
13501364

1351-
if !c.AllowURLRevisit && !samePageRedirect {
1365+
if !c.AllowURLRevisit && !redirectCycle {
13521366
var body io.ReadCloser
13531367
if req.GetBody != nil {
13541368
var err error

colly_test.go

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -783,6 +783,30 @@ func TestSetCookieRedirect(t *testing.T) {
783783
}
784784
}
785785

786+
func TestSetCookieComplexRedirectCycle(t *testing.T) {
787+
// server2 -> server1 -(set cookie)-> server1
788+
ts1 := newUnstartedTestServer()
789+
ts1.Config.Handler = requireSessionCookieSimple(ts1.Config.Handler)
790+
ts1.Start()
791+
defer ts1.Close()
792+
793+
ts2 := httptest.NewServer(http.RedirectHandler(ts1.URL, http.StatusMovedPermanently))
794+
defer ts2.Close()
795+
796+
c := NewCollector()
797+
c.OnResponse(func(r *Response) {
798+
if got, want := r.Body, serverIndexResponse; !bytes.Equal(got, want) {
799+
t.Errorf("bad response body got=%q want=%q", got, want)
800+
}
801+
if got, want := r.StatusCode, http.StatusOK; got != want {
802+
t.Errorf("bad response code got=%d want=%d", got, want)
803+
}
804+
})
805+
if err := c.Visit(ts2.URL); err != nil {
806+
t.Fatal(err)
807+
}
808+
}
809+
786810
func TestCollectorPostURLRevisitCheck(t *testing.T) {
787811
ts := newTestServer()
788812
defer ts.Close()

0 commit comments

Comments
 (0)